julius.jconf
5.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
######################################################################
# Julius Conf File
######################################################################
#chmod 644
-lv 3000 # level threshold (0-32767)
-zc 150 # zero-cross threshold (times in sec.)
-headmargin 600 # head silence margin (msec)
-tailmargin 1000 # tail silence margin (msec)
-rejectshort 50 # reject shorter input (msec)
#-realtime # force real-time processing
-norealtime # force non real-time processing
#### Acoustic HMM file ####
# Hmm model
#-h ../Adaptation/AMs/LaPSAM1.3_patrick/MMF
-h ./julius/acoustic_model_pt-BR/x64/LaPSAM1.5.am.bin
#-h /projetos/bravoz/LVCSR-Project/lvcsr_patrick/htkEmbedded/AMs/LapsAM-1.5-Renan/MMF.in2
# triphone model needs HMMList that maps logical triphone to physical ones.
#-hlist ../Adaptation/AMs/LapsAM1.3_patrick/tiedlist
#-hlist /projetos/bravoz/LVCSR-Project/lvcsr_patrick/htkEmbedded/AMs/LapsAM-1.5-Renan/tiedlist
-hlist ./julius/acoustic_model_pt-BR/x64/LaPSAM1.5.tiedlist
# Specify short pause model name to be treated as special
-spmodel "sp" # HMM model name
#-iwspword
# Context-dependency handling will be enabled according to the model type.
# Try below if julius wrongly detect the type of hmmdefs
#
#-no_ccd # disable context-dependency handling
-force_ccd # enable context-dependency handling
#-htkconf ../../confs/edaz_mod.conf
#### Language Model ####
# binary LM
#-d LM/MIX.4gram.julius.bin
-d ./julius/acoustic_model_pt-BR/x64/LaPSLM1.5.lm.bin
# 1st LM
#-nlr /home/04080002801/lvcsr1.1/LM/HLMToolkit/lms/bigram_HLM_dictionary.lm
# 2nd LM
#-nrl /home/04080002801/lvcsr1.1/LM/HLMToolkit/lms/bigram_HLM_dictionary.rev.lm
##-dfa simon/model.dfa
##-v simon/model.dict
# Dictionary
-v ./julius/acoustic_model_pt-BR/x64/dic.temp
#-looktrellis
# LM weights and word insertion penalties
-lmp 15.0 0.0
-lmp2 15.0 0.0
#-transp -20.0
# Envelope beam width (number of hypothesis) - deixa o decoder bem mais lento !!
-b 3000
-b2 150
-sb 100
# The number of candidates Julius tries to find.
#-n 5
## If julius go wrong with checking parameter type, try below.
##
#-notypecheck
#
## (PTM/triphone) switch computation method of IWCD on 1st pass
##
-iwcd1 best 6 # assign average of N-best likelihood of the same context
#-iwcd1 max # assign maximum likelihood of the same context // tenta o maximo achar o contexto
#-iwcd1 avg # assign average likelihood of the same context (default)
#### Gaussian Pruning ####
## Number of mixtures to select in a mixture pdf.
## This default value is optimized for IPA99's PTM,
## with 64 Gaussians per codebook
#-tmix 2
## Select Gaussian pruning algorithm
## defulat: beam (standard setting), safe (others)
-gprune safe # safe pruning, accurate but slow // retira sentenças de modo seguro (mais lento)
#-gprune heuristic # heuristic pruning
#-gprune beam # beam pruning, fast but sensitive
#-gprune none # no pruning
#### Search Parameters ####
#-b 400 # beam width on 1st pass (#nodes) for monophone
#-b 800 # beam width on 1st pass (#nodes) for triphone,PTM
#-b 1000 # beam width on 1st pass (#nodes) for triphone,PTM,engine=v2.1
#-s 50 # Stack size
#-b2 200 # beam width on 2nd pass (#words)
#-sb 200.0 # score beam envelope threshold
#-s 500 # hypotheses stack size on 2nd pass (#hypo) // max num de hipóteses na pilha
-m 2000 # hypotheses overflow threshold (#hypo)
#-lookuprange 5 # lookup range for word expansion (#frame)
#-n 1 # num of sentences to find (#sentence)
#-n 50 # (default for 'standard' configuration) // busca pela melhor hipótese
#-output 1 # num of found sentences to output (#sentence)
#-looktrellis # search within only backtrellis words
## For insertion of context-free short-term inter-word pauses between words
## (multi-path version only)
##
-iwsp # append a skippable sp model at all word ends
#-iwsppenalty -5.0 # transition penalty for the appenede sp models
######################################################################
#### Speech Input Source
######################################################################
## select one (default: mfcfile)
#-input mfcfile # MFCC file in HTK parameter file format
-input rawfile # raw wavefile (auto-detect format)
# WAV(16bit) or
# RAW(16bit(signed short),mono,big-endian)
# AIFF,AU (with libsndfile extension)
# other than 16kHz, sampling rate should be specified
# by "-smpFreq" option
#-input mic # direct microphone input
# device name can be specified via env. val. "AUDIODEV"
#-input netaudio -NA host:0 # direct input from DatLink(NetAudio) host
#-input adinnet -adport portnum # via adinnet network client
#-input stdin # from standard tty input (pipe)
-filelist audiolist # specify file list to be recognized in batch mode
#-nostrip # switch OFF dropping of invalid input segment.
-zmeanframe # (default: strip off invalid segment (0 sequence etc.)
-zmean # enable DC offset removal (invalid for mfcfile input)
######################################################################
#### Acoustic Analysis
######################################################################
-smpFreq 22050 # sampling rate (Hz)
#-smpPeriod 227 # sampling period (ns) (= 10000000 / smpFreq)
#-fsize 400 # window size (samples)
#-fshift 160 # frame shift (samples)
#-delwin 2 # delta window (frames)
#-hifreq -1 # cut-off hi frequency (Hz) (-1: disable)
#-lofreq -1 # cut-off low frequency (Hz) (-1: disable)
#-cmnsave filename # save CMN param to file (update per input)
#-cmnload filename # load initial CMN param from file on startup