Commit 0e854e1e1dab832696362b8210d202475bd32de7
1 parent
e1cf2b2f
Exists in
master
and in
1 other branch
Remove arquivos binários (dependências) desnecessarios
Showing
22 changed files
with
0 additions
and
9606 deletions
Show diff stats
aelius-install.tar.gz
No preview for this file type
aelius_install.sh
... | ... | @@ -1,36 +0,0 @@ |
1 | -#!/bin/bash | |
2 | - | |
3 | -#Instalador do Tradutor Aelius | |
4 | - | |
5 | -cd ~/vlibras-core | |
6 | - | |
7 | -echo "\n# Extraindo...\n" | |
8 | -tar -xf aelius-install.tar.gz -C ~/. | |
9 | - | |
10 | -echo "# Instalando dependências...\n" | |
11 | -sudo apt-get install python-dev python-yaml python-numpy python-matplotlib | |
12 | - | |
13 | -cd ~/nltk-2.0.1rc1/ | |
14 | - | |
15 | -sudo python setup.py install | |
16 | - | |
17 | -echo "\n# Finalizando...\n" | |
18 | - | |
19 | -# path to HunPos binaries | |
20 | -echo "\nPATH=\"${PATH}:$HOME/Applications/bin\"" >> ~/.bashrc | |
21 | -echo "export PATH\n" >> ~/.bashrc | |
22 | - | |
23 | -# path to Aelius and Translate package | |
24 | -echo "PYTHONPATH=\"${PYTHONPATH}:$HOME/Applications:$HOME/vlibras-core/tradutor/src/py\"" >> ~/.bashrc | |
25 | -echo "export PYTHONPATH\n" >> ~/.bashrc | |
26 | - | |
27 | -cd .. | |
28 | - | |
29 | -sudo mv usr-local-bin.tar.gz /usr/local/bin/ | |
30 | - | |
31 | -cd /usr/local/bin/ | |
32 | - | |
33 | -sudo tar -xf usr-local-bin.tar.gz | |
34 | -sudo rm usr-local-bin.tar.gz | |
35 | - | |
36 | -echo "### Instalação finalizada! \n## Execute o seguinte comando para concluir:\n\n$ source ~/.bashrc\n" |
recognize/src/julius/doc/Juliusbook-4.1.5.pdf
No preview for this file type
recognize/src/julius/doc/manuals/accept_check.txt
... | ... | @@ -1,80 +0,0 @@ |
1 | - accept_check | |
2 | - | |
3 | -ACCEPT_CHECK(1) ACCEPT_CHECK(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - accept_check | |
9 | - - Check whether a grammar accept / reject given word sequences | |
10 | - | |
11 | -SYNOPSIS | |
12 | - accept_check [-t] [-s spname] [-v] {prefix} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - accept_check is a tool to check whether a sentence can be accepted or | |
16 | - rejected on a grammar (prefix.dfa and prefix.dict). The sentence should | |
17 | - be given from standard input. You can do a batch check by preparing all | |
18 | - test sentence at each line of a text file, and give it as standard | |
19 | - input of accept_check. | |
20 | - | |
21 | - This tool needs .dfa, .dict and .term files. You should convert a | |
22 | - written grammar file to generate them by mkdfa.pl. | |
23 | - | |
24 | - A sentence should be given as space-separated word sequence. It may be | |
25 | - required to add head / tail silence word like sil, depending on your | |
26 | - grammar. And should not contain a short-pause word. | |
27 | - | |
28 | - When a word belongs to various category in a grammar, accept_check will | |
29 | - check all the possible sentence patterns, and accept it if any of those | |
30 | - is acceptable. | |
31 | - | |
32 | -OPTIONS | |
33 | - -t | |
34 | - Use category name as input instead of word. | |
35 | - | |
36 | - -s spname | |
37 | - Short-pause word name to be skipped. (default: "sp") | |
38 | - | |
39 | - -v | |
40 | - Debug output. | |
41 | - | |
42 | -EXAMPLES | |
43 | - An output for "date" grammar: | |
44 | - | |
45 | - % echo '<s> NEXT SUNDAY </s>' | accept_check date | |
46 | - Reading in dictionary... | |
47 | - 143 words...done | |
48 | - Reading in DFA grammar...done | |
49 | - Mapping dict item <-> DFA terminal (category)...done | |
50 | - Reading in term file (optional)...done | |
51 | - 27 categories, 143 words | |
52 | - DFA has 35 nodes and 71 arcs | |
53 | - ----- | |
54 | - wseq: <s> NEXT SUNDAY </s> | |
55 | - cate: NS_B (NEXT|NEXT) (DAYOFWEEK|DAYOFWEEK|DAY|DAY) NS_E | |
56 | - accepted | |
57 | - | |
58 | - | |
59 | -SEE ALSO | |
60 | - mkdfa.pl ( 1 ) , | |
61 | - generate ( 1 ) , | |
62 | - nextword ( 1 ) | |
63 | - | |
64 | -COPYRIGHT | |
65 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
66 | - | |
67 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
68 | - | |
69 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
70 | - Technology | |
71 | - | |
72 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
73 | - Technology | |
74 | - | |
75 | -LICENSE | |
76 | - The same as Julius. | |
77 | - | |
78 | - | |
79 | - | |
80 | - 10/02/2008 ACCEPT_CHECK(1) |
recognize/src/julius/doc/manuals/adinrec.txt
... | ... | @@ -1,130 +0,0 @@ |
1 | - adinrec | |
2 | - | |
3 | -ADINREC(1) ADINREC(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - adinrec | |
9 | - - record audio device and save one utterance to a file | |
10 | - | |
11 | -SYNOPSIS | |
12 | - adinrec [options...] {filename} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - adinrec opens an audio stream, detects an utterance input and store it | |
16 | - to a specified file. The utterance detection is done by level and | |
17 | - zero-cross thresholds. Default input device is microphone, but other | |
18 | - audio input source, including Julius A/D-in plugin, can be used by | |
19 | - using "-input" option. | |
20 | - | |
21 | - The audio format is 16 bit, 1 channel, in Microsoft WAV format. If the | |
22 | - given filename already exists, it will be overridden. | |
23 | - | |
24 | - If filename is "-" , the captured data will be streamed into standard | |
25 | - out, with no header (raw format). | |
26 | - | |
27 | -OPTIONS | |
28 | - adinrec uses JuliusLib and adopts Julius options. Below is a list of | |
29 | - valid options. | |
30 | - | |
31 | - adinrec specific options | |
32 | - -freq Hz | |
33 | - Set sampling rate in Hz. (default: 16,000) | |
34 | - | |
35 | - -raw | |
36 | - Output in raw file format. | |
37 | - | |
38 | - JuliusLib options | |
39 | - -input {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} | |
40 | - Choose speech input source. Specify 'file' or 'rawfile' for waveform | |
41 | - file. On file input, users will be prompted to enter the file name | |
42 | - from stdin. | |
43 | - | |
44 | - 'mic' is to get audio input from a default live microphone device, | |
45 | - and 'adinnet' means receiving waveform data via tcpip network from | |
46 | - an adinnet client. 'netaudio' is from DatLink/NetAudio input, and | |
47 | - 'stdin' means data input from standard input. | |
48 | - | |
49 | - At Linux, you can choose API at run time by specifying alsa, oss and | |
50 | - esd. | |
51 | - | |
52 | - -lv thres | |
53 | - Level threshold for speech input detection. Values should be in | |
54 | - range from 0 to 32767. (default: 2000) | |
55 | - | |
56 | - -zc thres | |
57 | - Zero crossing threshold per second. Only input that goes over the | |
58 | - level threshold (-lv) will be counted. (default: 60) | |
59 | - | |
60 | - -headmargin msec | |
61 | - Silence margin at the start of speech segment in milliseconds. | |
62 | - (default: 300) | |
63 | - | |
64 | - -tailmargin msec | |
65 | - Silence margin at the end of speech segment in milliseconds. | |
66 | - (default: 400) | |
67 | - | |
68 | - -zmean | |
69 | - This option enables DC offset removal. | |
70 | - | |
71 | - -smpFreq Hz | |
72 | - Set sampling rate in Hz. (default: 16,000) | |
73 | - | |
74 | - -48 | |
75 | - Record input with 48kHz sampling, and down-sample it to 16kHz | |
76 | - on-the-fly. This option is valid for 16kHz model only. The | |
77 | - down-sampling routine was ported from sptk. (Rev. 4.0) | |
78 | - | |
79 | - -NA devicename | |
80 | - Host name for DatLink server input (-input netaudio). | |
81 | - | |
82 | - -adport port_number | |
83 | - With -input adinnet, specify adinnet port number to listen. | |
84 | - (default: 5530) | |
85 | - | |
86 | - -nostrip | |
87 | - Julius by default removes successive zero samples in input speech | |
88 | - data. This option stop it. | |
89 | - | |
90 | - -C jconffile | |
91 | - Load a jconf file at here. The content of the jconffile will be | |
92 | - expanded at this point. | |
93 | - | |
94 | - -plugindir dirlist | |
95 | - Specify which directories to load plugin. If several direcotries | |
96 | - exist, specify them by colon-separated list. | |
97 | - | |
98 | -ENVIRONMENT VARIABLES | |
99 | - ALSADEV | |
100 | - Device name string for ALSA. (default: "default") | |
101 | - | |
102 | - AUDIODEV | |
103 | - Device name string for OSS. (default: "/dev/dsp") | |
104 | - | |
105 | - LATENCY_MSEC | |
106 | - Input latency of microphone input in milliseconds. Smaller value | |
107 | - will shorten latency but sometimes make process unstable. Default | |
108 | - value will depend on the running OS. | |
109 | - | |
110 | -SEE ALSO | |
111 | - julius ( 1 ) , | |
112 | - adintool ( 1 ) | |
113 | - | |
114 | -COPYRIGHT | |
115 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
116 | - | |
117 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
118 | - | |
119 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
120 | - Technology | |
121 | - | |
122 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
123 | - Technology | |
124 | - | |
125 | -LICENSE | |
126 | - The same as Julius. | |
127 | - | |
128 | - | |
129 | - | |
130 | - 10/02/2008 ADINREC(1) |
recognize/src/julius/doc/manuals/adintool.txt
... | ... | @@ -1,214 +0,0 @@ |
1 | - adintool | |
2 | - | |
3 | -ADINTOOL(1) ADINTOOL(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - adintool | |
9 | - - a tool to record / split / send / receive audio streams | |
10 | - | |
11 | -SYNOPSIS | |
12 | - adintool {-in inputdev} {-out outputdev} [options...] | |
13 | - | |
14 | -DESCRIPTION | |
15 | - adintool analyzes speech input, finds speech segments skipping silence, | |
16 | - and records the detected segments in various ways. It performs speech | |
17 | - detection based on zerocross number and power (level), and records the | |
18 | - detected parts to files or other output devices sucessively. | |
19 | - | |
20 | - | |
21 | - adintool is a upper version of adinrec with various functions. | |
22 | - Supported input device are: microphone input, a speech file, standard | |
23 | - tty input, and network socket (called adin-net server mode). Julius | |
24 | - plugin can be also used. Detected speech segments will be saved to | |
25 | - output devices: speech files, standard tty output, and network socket | |
26 | - (called adin-net client mode). For example, you can split the incoming | |
27 | - speech to segments and send them to Julius to be recognized. | |
28 | - | |
29 | - Output format is WAV, 16bit (signed short), monoral. If the file | |
30 | - already exist, it will be overridden. | |
31 | - | |
32 | -OPTIONS | |
33 | - All Julius options can be set. Only audio input related options are | |
34 | - treated and others are silently skipped. Below is a list of options. | |
35 | - | |
36 | - adintool specific options | |
37 | - -freq Hz | |
38 | - Set sampling rate in Hz. (default: 16,000) | |
39 | - | |
40 | - -in inputdev | |
41 | - Audio input device. "mic" to capture via microphone input, "file" | |
42 | - for audio file input, and "stdin" to read raw data from | |
43 | - standard-input. For file input, file name prompt will appear after | |
44 | - startup. Use "adinnet" to make adintool as "adinnet server", | |
45 | - receiving data from client via network socket. Default port number | |
46 | - is 5530, which can be altered by option "-inport". | |
47 | - | |
48 | - Alternatively, input device can be set by "-input" option, in which | |
49 | - case you can use plugin input. | |
50 | - | |
51 | - -out outputdev | |
52 | - Audio output device store the data. Specify "file" to save to file, | |
53 | - in which the output filename should be given by "-filename". Use | |
54 | - "stdout" to standard out. "adinnet" will make adintool to be an | |
55 | - adinnet client, sending speech data to a server via tcp/ip socket. | |
56 | - When using "adinnet" output, the server name to send data should be | |
57 | - specified by "-server". The default port number is 5530, which can | |
58 | - be changed by "-port" option. | |
59 | - | |
60 | - -inport num | |
61 | - When adintool becomes adinnet server to receive data (-in adinnet), | |
62 | - set the port number to listen. (default: 5530) | |
63 | - | |
64 | - -server [host] [,host...] | |
65 | - When output to adinnet server (-out adinnet), set the hostname. You | |
66 | - can send to multiple hosts by specifying their hostnames as | |
67 | - comma-delimited list like "host1,host2,host3". | |
68 | - | |
69 | - -port [num] [,num...] | |
70 | - When adintool send a data to adinnet server (-out adinnet), set the | |
71 | - port number to connect. (default: 5530) For multiple servers, | |
72 | - specify port numbers for all servers like "5530,5530,5531". | |
73 | - | |
74 | - -filename file | |
75 | - When output to file (-out file), set the output filename. The actual | |
76 | - file name will be as "file.0000.wav" , "file.0001.wav" and so on, | |
77 | - where the four digit number increases as speech segment detected. | |
78 | - The initial number will be set to 0 by default, which can be changed | |
79 | - by "-startid" option. When using "-oneshot" option to save only the | |
80 | - first segment, the input will be saved as "file". | |
81 | - | |
82 | - -startid number | |
83 | - At file output, set the initial file number. (default: 0) | |
84 | - | |
85 | - -oneshot | |
86 | - Exit after the end of first speech segment. | |
87 | - | |
88 | - -nosegment | |
89 | - Do not perform speech detection for input, just treat all the input | |
90 | - as a single valid segment. | |
91 | - | |
92 | - -raw | |
93 | - Output as RAW file (no header). | |
94 | - | |
95 | - -autopause | |
96 | - When output to adinnet server, adintool enter pause state at every | |
97 | - end of speech segment. It will restart when the destination adinnet | |
98 | - server sends it a resume signal. | |
99 | - | |
100 | - -loosesync | |
101 | - When output to multiple adinnet server, not to do strict | |
102 | - synchronization for restart. By default, when adintool has entered | |
103 | - pause state, it will not restart until resume commands are received | |
104 | - from all servers. This option will allow restart at least one | |
105 | - restart command has arrived. | |
106 | - | |
107 | - -rewind msec | |
108 | - When input is a live microphone device, and there has been some | |
109 | - continuing input at the moment adintool resumes, it start recording | |
110 | - backtracking by the specified milliseconds. | |
111 | - | |
112 | - Concerning Julius options | |
113 | - -input {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} | |
114 | - Choose speech input source. Specify 'file' or 'rawfile' for waveform | |
115 | - file. On file input, users will be prompted to enter the file name | |
116 | - from stdin. | |
117 | - | |
118 | - 'mic' is to get audio input from a default live microphone device, | |
119 | - and 'adinnet' means receiving waveform data via tcpip network from | |
120 | - an adinnet client. 'netaudio' is from DatLink/NetAudio input, and | |
121 | - 'stdin' means data input from standard input. | |
122 | - | |
123 | - At Linux, you can choose API at run time by specifying alsa, oss and | |
124 | - esd. | |
125 | - | |
126 | - -lv thres | |
127 | - Level threshold for speech input detection. Values should be in | |
128 | - range from 0 to 32767. (default: 2000) | |
129 | - | |
130 | - -zc thres | |
131 | - Zero crossing threshold per second. Only input that goes over the | |
132 | - level threshold (-lv) will be counted. (default: 60) | |
133 | - | |
134 | - -headmargin msec | |
135 | - Silence margin at the start of speech segment in milliseconds. | |
136 | - (default: 300) | |
137 | - | |
138 | - -tailmargin msec | |
139 | - Silence margin at the end of speech segment in milliseconds. | |
140 | - (default: 400) | |
141 | - | |
142 | - -zmean | |
143 | - This option enables DC offset removal. | |
144 | - | |
145 | - -smpFreq Hz | |
146 | - Set sampling rate in Hz. (default: 16,000) | |
147 | - | |
148 | - -48 | |
149 | - Record input with 48kHz sampling, and down-sample it to 16kHz | |
150 | - on-the-fly. This option is valid for 16kHz model only. The | |
151 | - down-sampling routine was ported from sptk. (Rev. 4.0) | |
152 | - | |
153 | - -NA devicename | |
154 | - Host name for DatLink server input (-input netaudio). | |
155 | - | |
156 | - -adport port_number | |
157 | - With -input adinnet, specify adinnet port number to listen. | |
158 | - (default: 5530) | |
159 | - | |
160 | - -nostrip | |
161 | - Julius by default removes successive zero samples in input speech | |
162 | - data. This option stop it. | |
163 | - | |
164 | - -C jconffile | |
165 | - Load a jconf file at here. The content of the jconffile will be | |
166 | - expanded at this point. | |
167 | - | |
168 | - -plugindir dirlist | |
169 | - Specify which directories to load plugin. If several direcotries | |
170 | - exist, specify them by colon-separated list. | |
171 | - | |
172 | -ENVIRONMENT VARIABLES | |
173 | - ALSADEV | |
174 | - (using mic input with alsa device) specify a capture device name. If | |
175 | - not specified, "default" will be used. | |
176 | - | |
177 | - AUDIODEV | |
178 | - (using mic input with oss device) specify a capture device path. If | |
179 | - not specified, "/dev/dsp" will be used. | |
180 | - | |
181 | - LATENCY_MSEC | |
182 | - Try to set input latency of microphone input in milliseconds. | |
183 | - Smaller value will shorten latency but sometimes make process | |
184 | - unstable. Default value will depend on the running OS. | |
185 | - | |
186 | -EXAMPLES | |
187 | - Record microphone input to files: "data.0000.wav", "data.0001.wav" and | |
188 | - so on: | |
189 | - Split a long speech file "foobar.raw" into "foobar.1500.wav", | |
190 | - "foobar.1501.wav" ...: | |
191 | - Copy an entire audio file via network socket. | |
192 | - Detect speech segment, send to Julius via network and recognize it: | |
193 | - | |
194 | -SEE ALSO | |
195 | - julius ( 1 ) , | |
196 | - adinrec ( 1 ) | |
197 | - | |
198 | -COPYRIGHT | |
199 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
200 | - | |
201 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
202 | - | |
203 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
204 | - Technology | |
205 | - | |
206 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
207 | - Technology | |
208 | - | |
209 | -LICENSE | |
210 | - The same as Julius. | |
211 | - | |
212 | - | |
213 | - | |
214 | - 10/02/2008 ADINTOOL(1) |
recognize/src/julius/doc/manuals/dfa_determinize.txt
... | ... | @@ -1,51 +0,0 @@ |
1 | - dfa_determinize | |
2 | - | |
3 | -DFA_DETERMINIZE(1) DFA_DETERMINIZE(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - dfa_determinize | |
9 | - - Determinize NFA grammar network. | |
10 | - | |
11 | -SYNOPSIS | |
12 | - dfa_determinize [-o outfile] {dfafile} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - dfa_determinize converts a non-deterministic .dfa file into | |
16 | - deterministic DFA. Output to standard output, or file specified by "-o" | |
17 | - option. | |
18 | - | |
19 | - This additional tool is not necessary on a grammar building procedure | |
20 | - in Julius, since the grammar network generated by mkdfa.pl is always | |
21 | - determinized. | |
22 | - | |
23 | -OPTIONS | |
24 | - -o outfile | |
25 | - Outout file. If not specified, output to stdout. | |
26 | - | |
27 | -EXAMPLES | |
28 | - Determinize foo.dfa to bar.dfa: | |
29 | - Another way: | |
30 | - | |
31 | -SEE ALSO | |
32 | - mkdfa.pl ( 1 ) , | |
33 | - dfa_minimize ( 1 ) | |
34 | - | |
35 | -COPYRIGHT | |
36 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
37 | - | |
38 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
39 | - | |
40 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
41 | - Technology | |
42 | - | |
43 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
44 | - Technology | |
45 | - | |
46 | -LICENSE | |
47 | - The same as Julius. | |
48 | - | |
49 | - | |
50 | - | |
51 | - 10/02/2008 DFA_DETERMINIZE(1) |
recognize/src/julius/doc/manuals/dfa_minimize.txt
... | ... | @@ -1,49 +0,0 @@ |
1 | - dfa_minimize | |
2 | - | |
3 | -DFA_MINIMIZE(1) DFA_MINIMIZE(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - dfa_minimize | |
9 | - - Minimize a DFA grammar network | |
10 | - | |
11 | -SYNOPSIS | |
12 | - dfa_minimize [-o outfile] {dfafile} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - dfa_minimize will convert an .dfa file to an equivalent minimal form. | |
16 | - Output to standard output, or to a file specified by "-o" option. | |
17 | - | |
18 | - On version 3.5.3 and later, mkdfa.pl invokes this tool inside, and the | |
19 | - output .dfa file will be always minimized, so you do not need to use | |
20 | - this manually. | |
21 | - | |
22 | -OPTIONS | |
23 | - -o outfile | |
24 | - Output file. If not specified output to standard output. | |
25 | - | |
26 | -EXAMPLES | |
27 | - Minimize foo.dfa to bar.dfa: | |
28 | - Another way: | |
29 | - | |
30 | -SEE ALSO | |
31 | - mkdfa.pl ( 1 ) | |
32 | - | |
33 | -COPYRIGHT | |
34 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
35 | - | |
36 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
37 | - | |
38 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
39 | - Technology | |
40 | - | |
41 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
42 | - Technology | |
43 | - | |
44 | -LICENSE | |
45 | - The same as Julius. | |
46 | - | |
47 | - | |
48 | - | |
49 | - 10/02/2008 DFA_MINIMIZE(1) |
recognize/src/julius/doc/manuals/generate-ngram.txt
... | ... | @@ -1,62 +0,0 @@ |
1 | - generate-ngram | |
2 | - | |
3 | -GENERATE-NGRAM(1) GENERATE-NGRAM(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - generate-ngram | |
9 | - - random sentence generator from N-gram | |
10 | - | |
11 | -SYNOPSIS | |
12 | - generate-ngram [options...] {binary_ngram} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - generate-ngram is a tool to generate sentences randomly according to | |
16 | - the given N-gram language model. The N-gram model file binary_ngram | |
17 | - should be an binary format. | |
18 | - | |
19 | -OPTIONS | |
20 | - -n num | |
21 | - Number of sentences to generate (default: 10) | |
22 | - | |
23 | - -N | |
24 | - Specify which length of N-gram to use (default: available max in the | |
25 | - given model) | |
26 | - | |
27 | - -bos | |
28 | - Beginning-of-sentence word (default: "<s>") | |
29 | - | |
30 | - -eos | |
31 | - End-of-sentence word (default: "</s>") | |
32 | - | |
33 | - -ignore | |
34 | - Specify a word to be supressed from output (default: "<UNK") | |
35 | - | |
36 | - -v | |
37 | - Verbose output. | |
38 | - | |
39 | - -debug | |
40 | - Debug output. | |
41 | - | |
42 | -SEE ALSO | |
43 | - julius ( 1 ) , | |
44 | - mkbingram ( 1 ) | |
45 | - | |
46 | -COPYRIGHT | |
47 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
48 | - | |
49 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
50 | - | |
51 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
52 | - Technology | |
53 | - | |
54 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
55 | - Technology | |
56 | - | |
57 | -LICENSE | |
58 | - The same as Julius. | |
59 | - | |
60 | - | |
61 | - | |
62 | - 10/02/2008 GENERATE-NGRAM(1) |
recognize/src/julius/doc/manuals/generate.txt
... | ... | @@ -1,76 +0,0 @@ |
1 | - generate | |
2 | - | |
3 | -GENERATE(1) GENERATE(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - generate | |
9 | - - random sentence generator from a grammar | |
10 | - | |
11 | -SYNOPSIS | |
12 | - generate [-v] [-t] [-n num] [-s spname] {prefix} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - This small program randomly generates sentences that are acceptable by | |
16 | - the given grammar. | |
17 | - | |
18 | - | |
19 | - .dfa, .dict and .term files are needed to execute. They can be | |
20 | - generated from .grammar and .voca file by mkdfa.pl. | |
21 | - | |
22 | -OPTIONS | |
23 | - -t | |
24 | - Output in word's category name. | |
25 | - | |
26 | - -n num | |
27 | - Set number of sentences to be generated (default: 10) | |
28 | - | |
29 | - -s spname | |
30 | - the name string of short-pause word to be supressed (default: "sp") | |
31 | - | |
32 | - -v | |
33 | - Debug output mode. | |
34 | - | |
35 | -EXAMPLES | |
36 | - Exmple output of a sample grammar "fruit": | |
37 | - | |
38 | - % generate fruit | |
39 | - Stat: init_voca: read 36 words | |
40 | - Reading in term file (optional)...done | |
41 | - 15 categories, 36 words | |
42 | - DFA has 26 nodes and 42 arcs | |
43 | - ----- | |
44 | - <s> I WANT ONE APPLE </s> | |
45 | - <s> I WANT TEN PEARS </s> | |
46 | - <s> CAN I HAVE A PINEAPPLE </s> | |
47 | - <s> I WANT ONE PEAR </s> | |
48 | - <s> COULD I HAVE A BANANA </s> | |
49 | - <s> I WANT ONE APPLE PLEASE </s> | |
50 | - <s> I WANT NINE APPLES </s> | |
51 | - <s> NINE APPLES </s> | |
52 | - <s> I WANT ONE PINEAPPLE </s> | |
53 | - <s> I WANT A PEAR </s> | |
54 | - | |
55 | - | |
56 | -SEE ALSO | |
57 | - mkdfa.pl ( 1 ) , | |
58 | - generate-ngram ( 1 ) | |
59 | - | |
60 | -COPYRIGHT | |
61 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
62 | - | |
63 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
64 | - | |
65 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
66 | - Technology | |
67 | - | |
68 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
69 | - Technology | |
70 | - | |
71 | -LICENSE | |
72 | - The same as Julius. | |
73 | - | |
74 | - | |
75 | - | |
76 | - 10/02/2008 GENERATE(1) |
recognize/src/julius/doc/manuals/gram2sapixml.pl.txt
... | ... | @@ -1,47 +0,0 @@ |
1 | - gram2sapixml.pl | |
2 | - | |
3 | -GRAM2SAPIXML.PL(1) GRAM2SAPIXML.PL(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - gram2sapixml.pl | |
9 | - - convert Julius grammar to SAPI XML grammar format | |
10 | - | |
11 | -SYNOPSIS | |
12 | - gram2sapixml.pl [prefix...] | |
13 | - | |
14 | -DESCRIPTION | |
15 | - gram2sapixml.pl converts a recognition grammar file of Julius | |
16 | - (.grammar, .voca) to Microsoft SAPI XML grammar format. prefix should | |
17 | - be a file name of target grammar, excluding suffixes. If multiple | |
18 | - argument is given, each will be process sequencialy in turn. | |
19 | - | |
20 | - The internal character set should be in UTF-8 format. By default | |
21 | - gram2sapixml.pl assume input in EUC-JP encoding and tries to convert it | |
22 | - to UTF-8 using iconv. You may want to disable this feature within the | |
23 | - script. | |
24 | - | |
25 | - It will fail to convert a left recursive rule in the grammar. When | |
26 | - fails, it will leave the source rules in the target .xml file, so you | |
27 | - should modify the output manually to solve it. | |
28 | - | |
29 | -SEE ALSO | |
30 | - mkdfa.pl ( 1 ) | |
31 | - | |
32 | -DIAGNOSTICS | |
33 | - The conversion procedure is somewhat dumb one, only converting the | |
34 | - non-terminal symbols and terminal symbols (=word category name) into | |
35 | - corresponding rules one by one. This is only a help tool, and you will | |
36 | - need a manual inspection and editing to use it on a real SAPI | |
37 | - application. | |
38 | - | |
39 | -COPYRIGHT | |
40 | - Copyright (c) 2002 Takashi Sumiyoshi | |
41 | - | |
42 | -LICENSE | |
43 | - The same as Julius. | |
44 | - | |
45 | - | |
46 | - | |
47 | - 10/02/2008 GRAM2SAPIXML.PL(1) |
recognize/src/julius/doc/manuals/jclient.pl.txt
... | ... | @@ -1,46 +0,0 @@ |
1 | - jclient.pl | |
2 | - | |
3 | -JCLIENT.PL(1) JCLIENT.PL(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - jclient.pl | |
9 | - - sample client for module mode (perl version) | |
10 | - | |
11 | -SYNOPSIS | |
12 | - jclient.pl | |
13 | - | |
14 | -DESCRIPTION | |
15 | - This is yet another sample client written in perl. It will connect to | |
16 | - Julius running in module mode, receive recognition results from Julius, | |
17 | - and cna send commands to control Julius. | |
18 | - | |
19 | - This is a tiny program with only 57 lines. You can use it for free. | |
20 | - | |
21 | -EXAMPLES | |
22 | - Invoke Julius with module mode by specifying "-module" option: | |
23 | - Then, at other terminal or other host, invoke jclient.pl like below. | |
24 | - The default hostname is "localhost", and port number is 10500. You can | |
25 | - change them by editing the top part of the script. | |
26 | - It will then receive the outputs of Julius and output the raw message | |
27 | - to standard out. Also, by inputting a raw module command to the | |
28 | - standard input of jclient.pl, it will be sent to Julius. See manuals | |
29 | - for the specification of module mode. | |
30 | - | |
31 | -SEE ALSO | |
32 | - julius ( 1 ) , | |
33 | - jcontrol ( 1 ) | |
34 | - | |
35 | -COPYRIGHT | |
36 | - "jclient.pl" has been developed by Dr. Ryuichi Nisimura | |
37 | - (nisimura@sys.wakayama-u.ac.jp). Use at your own risk. | |
38 | - | |
39 | - If you have any feedback, comment or request, please contact the E-mail | |
40 | - address above, or look at the Web page below. | |
41 | - | |
42 | - http://w3voice.jp/ | |
43 | - | |
44 | - | |
45 | - | |
46 | - 10/02/2008 JCLIENT.PL(1) |
recognize/src/julius/doc/manuals/jcontrol.txt
... | ... | @@ -1,173 +0,0 @@ |
1 | - jcontrol | |
2 | - | |
3 | -JCONTROL(1) JCONTROL(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - jcontrol | |
9 | - - a sample module client written in C | |
10 | - | |
11 | -SYNOPSIS | |
12 | - jcontrol {hostname} [portnum] | |
13 | - | |
14 | -DESCRIPTION | |
15 | - jcontrol is a simple console program to control julius running on other | |
16 | - host via network API. It can send command to Julius, and receive | |
17 | - messages from Julius. | |
18 | - | |
19 | - When invoked, jcontrol tries to connect to Julius running in "module | |
20 | - mode" on specified hostname. After connection established, jcontrol | |
21 | - waits for user commands from standard input. | |
22 | - | |
23 | - When user types a command to jcontrol, it will be interpreted and cor- | |
24 | - responding API command will be sent to Julius. When a message is | |
25 | - received from Julius, its content will be output to standard output. | |
26 | - | |
27 | - For the details about the API, see the related documents. | |
28 | - | |
29 | -OPTIONS | |
30 | - hostname | |
31 | - Host name where Julius is runnning in module mode. | |
32 | - | |
33 | - portnum | |
34 | - port number (default: 10500) | |
35 | - | |
36 | -COMMANDS | |
37 | - jcontrol interprets commands from standard input. Below is a list of | |
38 | - all commands. | |
39 | - | |
40 | - Engine control | |
41 | - pause | |
42 | - Stop Julius and enter into paused status. In paused status, Julius | |
43 | - will not run recognition even if speech input occurs. When this | |
44 | - command is issued while recognition is running, Julius will stop | |
45 | - after the recognition has been finished. | |
46 | - | |
47 | - terminate | |
48 | - Same as pause, but discard the current speech input when received | |
49 | - command in the middle of recognition process. | |
50 | - | |
51 | - resume | |
52 | - Restart Julius that has been paused or terminated. | |
53 | - | |
54 | - inputparam arg | |
55 | - Tell Julius how to deal with speech input in case grammar is changed | |
56 | - just when recognition is running. Specify one: "TERMINATE", "PAUSE" | |
57 | - or "WAIT". | |
58 | - | |
59 | - version | |
60 | - Tell Julius to send version description string. | |
61 | - | |
62 | - status | |
63 | - Tell Julius to send the system status (active / sleep) | |
64 | - | |
65 | - Grammar handling | |
66 | - changegram prefix | |
67 | - Send a new grammar "prefix.dfa" and "prefix.dict", and tell julius | |
68 | - to use it as a new grammar. All the current grammars used in the | |
69 | - current process of Julius will be deleted and replaced to the | |
70 | - specifed grammar. | |
71 | - | |
72 | - addgram prefix | |
73 | - Send a new grammar "prefix.dfa" and "prefix.dict" and add it to the | |
74 | - current grammar. | |
75 | - | |
76 | - deletegram gramlist | |
77 | - Tell Julius to delete existing grammar. The grammar can be specified | |
78 | - by either prefix name or number ID. The number ID can be determined | |
79 | - from the message sent from Julius at each time grammar information | |
80 | - has changed. When want to delete more than one grammar, specify all | |
81 | - of them as comma-sparated. | |
82 | - | |
83 | - deactivategram gramlist | |
84 | - Tell Julius to de-activate a specified grammar. The specified | |
85 | - grammar will still be kept but will not be used for recognition. | |
86 | - | |
87 | - The target grammar can be specified by either prefix name or number | |
88 | - ID. The number ID can be determined from the message sent from | |
89 | - Julius at each time grammar information has changed. When want to | |
90 | - delete more than one grammar, specify all of them as comma-sparated. | |
91 | - | |
92 | - activategram gramlist | |
93 | - Tell Julius to activate previously de-activated grammar. The target | |
94 | - grammar can be specified by either prefix name or number ID. The | |
95 | - number ID can be determined from the message sent from Julius at | |
96 | - each time grammar information has changed. When want to delete more | |
97 | - than one grammar, specify all of them as comma-sparated. | |
98 | - | |
99 | - addword grammar_name_or_id dictfile | |
100 | - Add the recognition word entries in the specified dictfile to the | |
101 | - specified grammar on current process. | |
102 | - | |
103 | - syncgram | |
104 | - Force synchronize grammar status, like unix command "sync". | |
105 | - | |
106 | - Process management | |
107 | - Julius-4 supports multi-model recognition nad multi decoding. In this | |
108 | - case it is possible to control each recognition process, as defined by | |
109 | - "-SR" option, from module client. | |
110 | - | |
111 | - In multi decoding mode, the module client holds "current process", and | |
112 | - the process commands and grammar related commands will be issued toward | |
113 | - the current process. | |
114 | - | |
115 | - listprocess | |
116 | - Tell Julius to send the list of existing recognition process. | |
117 | - | |
118 | - currentprocess procname | |
119 | - Switch the current process to the process specified by the name. | |
120 | - | |
121 | - shiftprocess | |
122 | - Rotate the current process. At each call the current process will be | |
123 | - changed to the next one. | |
124 | - | |
125 | - addprocess jconffile | |
126 | - Tell Julisu to load a new recognition process into engine. The | |
127 | - argument jconffile should be a jconf file that contains only one set | |
128 | - of LM options and one SR definition. Note that the file should be | |
129 | - visible on the running Julius, since jcontrol only send the path | |
130 | - name and Julius actually read the jconf file. | |
131 | - | |
132 | - The new LM and SR process will have the name of the jconffile. | |
133 | - | |
134 | - delprocess procname | |
135 | - Delete the specified recognition process from the engine. | |
136 | - | |
137 | - deactivateprocess procname | |
138 | - Tell Julius to temporary stop the specified recognition process. The | |
139 | - stopped process will not be executed for the input until activated | |
140 | - again. | |
141 | - | |
142 | - activateprocess procname | |
143 | - Tell Julius to activate the temporarily stopped process. | |
144 | - | |
145 | -EXAMPLES | |
146 | - The dump messages from Julius are output to tty with prefix ">" | |
147 | - appended to each line. Julius can be started in module mode like this: | |
148 | - jcontrolcan be launched with the host name: | |
149 | - It will then receive the outputs of Julius and output the raw message | |
150 | - to standard out. Also, by inputting the commands above to the standard | |
151 | - input of jcontrol, it will be sent to Julius. See manuals for the | |
152 | - specification of module mode. | |
153 | - | |
154 | -SEE ALSO | |
155 | - julius ( 1 ) | |
156 | - | |
157 | -COPYRIGHT | |
158 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
159 | - | |
160 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
161 | - | |
162 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
163 | - Technology | |
164 | - | |
165 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
166 | - Technology | |
167 | - | |
168 | -LICENSE | |
169 | - The same as Julius. | |
170 | - | |
171 | - | |
172 | - | |
173 | - 10/02/2008 JCONTROL(1) |
recognize/src/julius/doc/manuals/julius.txt
... | ... | @@ -1,1287 +0,0 @@ |
1 | - julius | |
2 | - | |
3 | -JULIUS(1) JULIUS(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - julius | |
9 | - - open source multi-purpose LVCSR engine | |
10 | - | |
11 | -SYNOPSIS | |
12 | - julius [-C jconffile] [options...] | |
13 | - | |
14 | -DESCRIPTION | |
15 | - julius is a high-performance, multi-purpose, open-source speech | |
16 | - recognition engine for researchers and developers. It is capable of | |
17 | - performing almost real-time recognition of continuous speech with over | |
18 | - 60k-word 3-gram language model and triphone HMM model, on most current | |
19 | - PCs. julius can perform recognition on audio files, live microphone | |
20 | - input, network input and feature parameter files. | |
21 | - | |
22 | - The core recognition module is implemented as C library called | |
23 | - "JuliusLib". It can also be extended by plug-in facility. | |
24 | - | |
25 | - Supported Models | |
26 | - julius needs a language model and an acoustic model to run as a speech | |
27 | - recognizer. julius supports the following models. | |
28 | - | |
29 | - Acoustic model | |
30 | - Sub-word HMM (Hidden Markov Model) in HTK ascii format are | |
31 | - supported. Phoneme models (monophone), context dependent phoneme | |
32 | - models (triphone), tied-mixture and phonetic tied-mixture models | |
33 | - of any unit can be used. When using context dependent models, | |
34 | - inter-word context dependency is also handled. Multi-stream | |
35 | - feature and MSD-HMM is also supported. You can further use a | |
36 | - tool mkbinhmm to convert the ascii HMM file to a compact binary | |
37 | - format for faster loading. | |
38 | - | |
39 | - Note that julius itself can only extract MFCC features from | |
40 | - speech data. If you use acoustic HMM trained for other feature, | |
41 | - you should give the input in HTK parameter file of the same | |
42 | - feature type. | |
43 | - | |
44 | - Language model: word N-gram | |
45 | - Word N-gram language model, up to 10-gram, is supported. Julius | |
46 | - uses different N-gram for each pass: left-to-right 2-gram on 1st | |
47 | - pass, and right-to-left N-gram on 2nd pass. It is recommended to | |
48 | - use both LR 2-gram and RL N-gram for Julius. However, you can | |
49 | - use only single LR N-gram or RL N-gram. In such case, | |
50 | - approximated LR 2-gram computed from the given N-gram will be | |
51 | - applied at the first pass. | |
52 | - | |
53 | - The Standard ARPA format is supported. In addition, a binary | |
54 | - format is also supported for efficiency. The tool mkbingram(1) | |
55 | - can convert ARPA format N-gram to binary format. | |
56 | - | |
57 | - Language model: grammar | |
58 | - The grammar format is an original one, and tools to create a | |
59 | - recognirion grammar are included in the distribution. A grammar | |
60 | - consists of two files: one is a 'grammar' file that describes | |
61 | - sentence structures in a BNF style, using word 'category' name | |
62 | - as terminate symbols. Another is a 'voca' file that defines | |
63 | - words with its pronunciations (i.e. phoneme sequences) for each | |
64 | - category. They should be converted by mkdfa.pl(1) to a | |
65 | - deterministic finite automaton file (.dfa) and a dictionary file | |
66 | - (.dict), respectively. You can also use multiple grammars. | |
67 | - | |
68 | - Language model: isolated word | |
69 | - You can perform isolated word recognition using only word | |
70 | - dictionary. With this model type, Julius will perform rapid one | |
71 | - pass recognition with static context handling. Silence models | |
72 | - will be added at both head and tail of each word. You can also | |
73 | - use multiple dictionaries in a process. | |
74 | - | |
75 | - Search Algorithm | |
76 | - Recognition algorithm of julius is based on a two-pass strategy. Word | |
77 | - 2-gram and reverse word 3-gram is used on the respective passes. The | |
78 | - entire input is processed on the first pass, and again the final | |
79 | - searching process is performed again for the input, using the result of | |
80 | - the first pass to narrow the search space. Specifically, the | |
81 | - recognition algorithm is based on a tree-trellis heuristic search | |
82 | - combined with left-to-right frame-synchronous beam search and | |
83 | - right-to-left stack decoding search. | |
84 | - | |
85 | - When using context dependent phones (triphones), interword contexts are | |
86 | - taken into consideration. For tied-mixture and phonetic tied-mixture | |
87 | - models, high-speed acoustic likelihood calculation is possible using | |
88 | - gaussian pruning. | |
89 | - | |
90 | - For more details, see the related documents. | |
91 | - | |
92 | -OPTIONS | |
93 | - These options specify the models, system behaviors and various search | |
94 | - parameters to Julius. These option can be set at the command line, but | |
95 | - it is recommended that you write them in a text file as a "jconf file", | |
96 | - and specify it by "-C" option. | |
97 | - | |
98 | - Applications incorporating JuliusLib also use these options to set the | |
99 | - parameters of core recognition engine. For example, a jconf file can be | |
100 | - loaded to the enine by calling j_config_load_file_new() with the jconf | |
101 | - file name as argument. | |
102 | - | |
103 | - Please note that relative paths in a jconf file should be relative to | |
104 | - the jconf file itself, not the current working directory. | |
105 | - | |
106 | - Below are the details of all options, gathered by group. | |
107 | - | |
108 | - Julius application option | |
109 | - These are application options of Julius, outside of JuliusLib. It | |
110 | - contains parameters and switches for result output, character set | |
111 | - conversion, log level, and module mode options. These option are | |
112 | - specific to Julius, and cannot be used at applications using JuliusLib | |
113 | - other than Julius. | |
114 | - | |
115 | - -outfile | |
116 | - On file input, this option write the recognition result of each file | |
117 | - to a separate file. The output file of an input file will be the | |
118 | - same name but the suffix will be changed to ".out". (rev.4.0) | |
119 | - | |
120 | - -separatescore | |
121 | - Output the language and acoustic scores separately. | |
122 | - | |
123 | - -callbackdebug | |
124 | - Print the callback names at each call for debug. (rev.4.0) | |
125 | - | |
126 | - -charconv from to | |
127 | - Print with character set conversion. from is the source character | |
128 | - set used in the language model, and to is the target character set | |
129 | - you want to get. | |
130 | - | |
131 | - On Linux, the arguments should be a code name. You can obtain the | |
132 | - list of available code names by invoking the command "iconv --list". | |
133 | - On Windows, the arguments should be a code name or codepage number. | |
134 | - Code name should be one of "ansi", "mac", "oem", "utf-7", "utf-8", | |
135 | - "sjis", "euc". Or you can specify any codepage number supported at | |
136 | - your environment. | |
137 | - | |
138 | - -nocharconv | |
139 | - Disable character conversion. | |
140 | - | |
141 | - -module [port] | |
142 | - Run Julius on "Server Module Mode". After startup, Julius waits for | |
143 | - tcp/ip connection from client. Once connection is established, | |
144 | - Julius start communication with the client to process incoming | |
145 | - commands from the client, or to output recognition results, input | |
146 | - trigger information and other system status to the client. The | |
147 | - default port number is 10500. | |
148 | - | |
149 | - -record dir | |
150 | - Auto-save all input speech data into the specified directory. Each | |
151 | - segmented inputs are recorded each by one. The file name of the | |
152 | - recorded data is generated from system time when the input ends, in | |
153 | - a style of YYYY.MMDD.HHMMSS.wav. File format is 16bit monoral WAV. | |
154 | - Invalid for mfcfile input. | |
155 | - | |
156 | - With input rejection by -rejectshort, the rejected input will also | |
157 | - be recorded even if they are rejected. | |
158 | - | |
159 | - -logfile file | |
160 | - Save all log output to a file instead of standard output. (Rev.4.0) | |
161 | - | |
162 | - -nolog | |
163 | - Disable all log output. (Rev.4.0) | |
164 | - | |
165 | - -help | |
166 | - Output help message and exit. | |
167 | - | |
168 | - Global options | |
169 | - These are model-/search-dependent options relating audio input, sound | |
170 | - detection, GMM, decoding algorithm, plugin facility, and others. Global | |
171 | - options should be placed before any instance declaration (-AM, -LM, or | |
172 | - -SR), or just after "-GLOBAL" option. | |
173 | - | |
174 | - Audio input | |
175 | - -input | |
176 | - {mic|rawfile|mfcfile|adinnet|stdin|netaudio|alsa|oss|esd} | |
177 | - Choose speech input source. Specify 'file' or 'rawfile' for | |
178 | - waveform file, 'htkparam' or 'mfcfile' for HTK parameter | |
179 | - file. On file input, users will be prompted to enter the file | |
180 | - name from stdin, or you can use -filelist option to specify | |
181 | - list of files to process. | |
182 | - | |
183 | - 'mic' is to get audio input from a default live microphone | |
184 | - device, and 'adinnet' means receiving waveform data via tcpip | |
185 | - network from an adinnet client. 'netaudio' is from | |
186 | - DatLink/NetAudio input, and 'stdin' means data input from | |
187 | - standard input. | |
188 | - | |
189 | - For waveform file input, only WAV (no compression) and RAW | |
190 | - (noheader, 16bit, big endian) are supported by default. Other | |
191 | - format can be read when compiled with libsnd library. To see | |
192 | - what format is actually supported, see the help message using | |
193 | - option -help. For stdin input, only WAV and RAW is supported. | |
194 | - (default: mfcfile) | |
195 | - | |
196 | - At Linux, you can choose API at run time by specifying alsa, | |
197 | - oss and esd. | |
198 | - | |
199 | - -filelist filename | |
200 | - (With -input rawfile|mfcfile) perform recognition on all | |
201 | - files listed in the file. The file should contain input file | |
202 | - per line. Engine will end when all of the files are | |
203 | - processed. | |
204 | - | |
205 | - -notypecheck | |
206 | - By default, Julius checks the input parameter type whether it | |
207 | - matches the AM or not. This option will disable the check and | |
208 | - force engine to use the input vector as is. | |
209 | - | |
210 | - -48 | |
211 | - Record input with 48kHz sampling, and down-sample it to 16kHz | |
212 | - on-the-fly. This option is valid for 16kHz model only. The | |
213 | - down-sampling routine was ported from sptk. (Rev. 4.0) | |
214 | - | |
215 | - -NA devicename | |
216 | - Host name for DatLink server input (-input netaudio). | |
217 | - | |
218 | - -adport port_number | |
219 | - With -input adinnet, specify adinnet port number to listen. | |
220 | - (default: 5530) | |
221 | - | |
222 | - -nostrip | |
223 | - Julius by default removes successive zero samples in input | |
224 | - speech data. This option inhibits the removal. | |
225 | - | |
226 | - -zmean , -nozmean | |
227 | - This option enables/disables DC offset removal of input | |
228 | - waveform. Offset will be estimated from the whole input. For | |
229 | - microphone / network input, zero mean of the first 48000 | |
230 | - samples (3 seconds in 16kHz sampling) will be used for the | |
231 | - estimation. (default: disabled) | |
232 | - | |
233 | - This option uses static offset for the channel. See also | |
234 | - -zmeansource for frame-wise offset removal. | |
235 | - | |
236 | - Speech detection by level and zero-cross | |
237 | - -cutsilence , -nocutsilence | |
238 | - Turn on / off the speech detection by level and zero-cross. | |
239 | - Default is on for mic / adinnet input, and off for files. | |
240 | - | |
241 | - -lv thres | |
242 | - Level threshold for speech input detection. Values should be | |
243 | - in range from 0 to 32767. (default: 2000) | |
244 | - | |
245 | - -zc thres | |
246 | - Zero crossing threshold per second. Only input that goes over | |
247 | - the level threshold (-lv) will be counted. (default: 60) | |
248 | - | |
249 | - -headmargin msec | |
250 | - Silence margin at the start of speech segment in | |
251 | - milliseconds. (default: 300) | |
252 | - | |
253 | - -tailmargin msec | |
254 | - Silence margin at the end of speech segment in milliseconds. | |
255 | - (default: 400) | |
256 | - | |
257 | - Input rejection | |
258 | - Two simple front-end input rejection methods are implemented, | |
259 | - based on input length and average power of detected segment. The | |
260 | - rejection by average power is experimental, and can be enabled | |
261 | - by --enable-power-reject on compilation. Valid for MFCC feature | |
262 | - with power coefficient and real-time input only. | |
263 | - | |
264 | - For GMM-based input rejection see the GMM section below. | |
265 | - | |
266 | - -rejectshort msec | |
267 | - Reject input shorter than specified milliseconds. Search will | |
268 | - be terminated and no result will be output. | |
269 | - | |
270 | - -powerthres thres | |
271 | - Reject the inputted segment by its average energy. If the | |
272 | - average energy of the last recognized input is below the | |
273 | - threshold, Julius will reject the input. (Rev.4.0) | |
274 | - | |
275 | - This option is valid when --enable-power-reject is specified | |
276 | - at compilation time. | |
277 | - | |
278 | - Gaussian mixture model / GMM-VAD | |
279 | - GMM will be used for input rejection by accumulated score, or | |
280 | - for front-end GMM-based VAD when --enable-gmm-vad is specified. | |
281 | - | |
282 | - NOTE: You should also set the proper MFCC parameters required | |
283 | - for the GMM, specifying the acoustic parameters described in AM | |
284 | - section -AM_GMM. | |
285 | - | |
286 | - When GMM-based VAD is enabled, the voice activity score will be | |
287 | - calculated at each frame as front-end processing. The value will | |
288 | - be computed as \[ \max_{m \in M_v} p(x|m) - \max_{m \in M_n} | |
289 | - p(x|m) \] where $M_v$ is a set of voice GMM, and $M_n$ is a set | |
290 | - of noise GMM whose names should be specified by -gmmreject. The | |
291 | - activity score will be then averaged for the last N frames, | |
292 | - where N is specified by -gmmmargin. Julius updates the averaged | |
293 | - activity score at each frame, and detect speech up-trigger when | |
294 | - the value gets higher than a value specified by -gmmup, and | |
295 | - detecgt down-trigger when it gets lower than a value of | |
296 | - -gmmdown. | |
297 | - | |
298 | - -gmm hmmdefs_file | |
299 | - GMM definition file in HTK format. If specified, GMM-based | |
300 | - input verification will be performed concurrently with the | |
301 | - 1st pass, and you can reject the input according to the | |
302 | - result as specified by -gmmreject. The GMM should be defined | |
303 | - as one-state HMMs. | |
304 | - | |
305 | - -gmmnum number | |
306 | - Number of Gaussian components to be computed per frame on GMM | |
307 | - calculation. Only the N-best Gaussians will be computed for | |
308 | - rapid calculation. The default is 10 and specifying smaller | |
309 | - value will speed up GMM calculation, but too small value (1 | |
310 | - or 2) may cause degradation of identification performance. | |
311 | - | |
312 | - -gmmreject string | |
313 | - Comma-separated list of GMM names to be rejected as invalid | |
314 | - input. When recognition, the log likelihoods of GMMs | |
315 | - accumulated for the entire input will be computed | |
316 | - concurrently with the 1st pass. If the GMM name of the | |
317 | - maximum score is within this string, the 2nd pass will not be | |
318 | - executed and the input will be rejected. | |
319 | - | |
320 | - -gmmmargin frames | |
321 | - (GMM_VAD) Head margin in frames. When a speech trigger | |
322 | - detected by GMM, recognition will start from current frame | |
323 | - minus this value. (Rev.4.0) | |
324 | - | |
325 | - This option will be valid only if compiled with | |
326 | - --enable-gmm-vad. | |
327 | - | |
328 | - -gmmup value | |
329 | - (GMM_VAD) Up trigger threshold of voice activity score. | |
330 | - (Rev.4.1) | |
331 | - | |
332 | - This option will be valid only if compiled with | |
333 | - --enable-gmm-vad. | |
334 | - | |
335 | - -gmmdown value | |
336 | - (GMM_VAD) Down trigger threshold of voice activity score. | |
337 | - (Rev.4.1) | |
338 | - | |
339 | - This option will be valid only if compiled with | |
340 | - --enable-gmm-vad. | |
341 | - | |
342 | - Decoding option | |
343 | - Real-time processing means concurrent processing of MFCC | |
344 | - computation 1st pass decoding. By default, real-time processing | |
345 | - on the pass is on for microphone / adinnet / netaudio input, and | |
346 | - for others. | |
347 | - | |
348 | - -realtime , -norealtime | |
349 | - Explicitly switch on / off real-time (pipe-line) processing | |
350 | - on the first pass. The default is off for file input, and on | |
351 | - for microphone, adinnet and NetAudio input. This option | |
352 | - relates to the way CMN and energy normalization is performed: | |
353 | - if off, they will be done using average features of whole | |
354 | - input. If on, MAP-CMN and energy normalization to do | |
355 | - real-time processing. | |
356 | - | |
357 | - Misc. options | |
358 | - -C jconffile | |
359 | - Load a jconf file at here. The content of the jconffile will | |
360 | - be expanded at this point. | |
361 | - | |
362 | - -version | |
363 | - Print version information to standard error, and exit. | |
364 | - | |
365 | - -setting | |
366 | - Print engine setting information to standard error, and exit. | |
367 | - | |
368 | - -quiet | |
369 | - Output less log. For result, only the best word sequence will | |
370 | - be printed. | |
371 | - | |
372 | - -debug | |
373 | - (For debug) output enormous internal message and debug | |
374 | - information to log. | |
375 | - | |
376 | - -check {wchmm|trellis|triphone} | |
377 | - For debug, enter interactive check mode. | |
378 | - | |
379 | - -plugindir dirlist | |
380 | - Specify directory to load plugin. If several direcotries | |
381 | - exist, specify them by colon-separated list. | |
382 | - | |
383 | - Instance declaration for multi decoding | |
384 | - The following arguments will create a new configuration set with | |
385 | - default parameters, and switch current set to it. Jconf parameters | |
386 | - specified after the option will be set into the current set. | |
387 | - | |
388 | - To do multi-model decoding, these argument should be specified at the | |
389 | - first of each model / search instances with different names. Any | |
390 | - options before the first instance definition will be IGNORED. | |
391 | - | |
392 | - When no instance definition is found (as older version of Julius), all | |
393 | - the options are assigned to a default instance named _default. | |
394 | - | |
395 | - Please note that decoding with a single LM and multiple AMs is not | |
396 | - fully supported. For example, you may want to construct the jconf file | |
397 | - as following. | |
398 | - This type of model sharing is not supported yet, since some part of LM | |
399 | - processing depends on the assigned AM. Instead, you can get the same | |
400 | - result by defining the same LMs for each AM, like this: | |
401 | - | |
402 | - -AM name | |
403 | - Create a new AM configuration set, and switch current to the new | |
404 | - one. You should give a unique name. (Rev.4.0) | |
405 | - | |
406 | - -LM name | |
407 | - Create a new LM configuration set, and switch current to the new | |
408 | - one. You should give a unique name. (Rev.4.0) | |
409 | - | |
410 | - -SR name am_name lm_name | |
411 | - Create a new search configuration set, and switch current to the new | |
412 | - one. The specified AM and LM will be assigned to it. The am_name and | |
413 | - lm_name can be either name or ID number. You should give a unique | |
414 | - name. (Rev.4.0) | |
415 | - | |
416 | - -AM_GMM | |
417 | - When using GMM for front-end processing, you can specify | |
418 | - GMM-specific acoustic parameters after this option. If you does not | |
419 | - specify -AM_GMM with GMM, the GMM will share the same parameter | |
420 | - vector as the last AM. The current AM will be switched to the GMM | |
421 | - one, so be careful not to confuse with normal AM configurations. | |
422 | - (Rev.4.0) | |
423 | - | |
424 | - -GLOBAL | |
425 | - Start a global section. The global options should be placed before | |
426 | - any instance declaration, or after this option on multiple model | |
427 | - recognition. This can be used multiple times. (Rev.4.1) | |
428 | - | |
429 | - -nosectioncheck , -sectioncheck | |
430 | - Disable / enable option location check in multi-model decoding. When | |
431 | - enabled, the options between instance declaration is treated as | |
432 | - "sections" and only the belonging option types can be written. For | |
433 | - example, when an option -AM is specified, only the AM related option | |
434 | - can be placed after the option until other declaration is found. | |
435 | - Also, global options should be placed at top, before any instance | |
436 | - declarataion. This is enabled by default. (Rev.4.1) | |
437 | - | |
438 | - Language model (-LM) | |
439 | - This group contains options for model definition of each language model | |
440 | - type. When using multiple LM, one instance can have only one LM. | |
441 | - | |
442 | - Only one type of LM can be specified for a LM configuration. If you | |
443 | - want to use multi model, you should define them one as a new LM. | |
444 | - | |
445 | - N-gram | |
446 | - -d bingram_file | |
447 | - Use binary format N-gram. An ARPA N-gram file can be | |
448 | - converted to Julius binary format by mkbingram. | |
449 | - | |
450 | - -nlr arpa_ngram_file | |
451 | - A forward, left-to-right N-gram language model in standard | |
452 | - ARPA format. When both a forward N-gram and backward N-gram | |
453 | - are specified, Julius uses this forward 2-gram for the 1st | |
454 | - pass, and the backward N-gram for the 2nd pass. | |
455 | - | |
456 | - Since ARPA file often gets huge and requires a lot of time to | |
457 | - load, it may be better to convert the ARPA file to Julius | |
458 | - binary format by mkbingram. Note that if both forward and | |
459 | - backward N-gram is used for recognition, they together will | |
460 | - be converted to a single binary. | |
461 | - | |
462 | - When only a forward N-gram is specified by this option and no | |
463 | - backward N-gram specified by -nrl, Julius performs | |
464 | - recognition with only the forward N-gram. The 1st pass will | |
465 | - use the 2-gram entry in the given N-gram, and The 2nd pass | |
466 | - will use the given N-gram, with converting forward | |
467 | - probabilities to backward probabilities by Bayes rule. | |
468 | - (Rev.4.0) | |
469 | - | |
470 | - -nrl arpa_ngram_file | |
471 | - A backward, right-to-left N-gram language model in standard | |
472 | - ARPA format. When both a forward N-gram and backward N-gram | |
473 | - are specified, Julius uses the forward 2-gram for the 1st | |
474 | - pass, and this backward N-gram for the 2nd pass. | |
475 | - | |
476 | - Since ARPA file often gets huge and requires a lot of time to | |
477 | - load, it may be better to convert the ARPA file to Julius | |
478 | - binary format by mkbingram. Note that if both forward and | |
479 | - backward N-gram is used for recognition, they together will | |
480 | - be converted to a single binary. | |
481 | - | |
482 | - When only a backward N-gram is specified by this option and | |
483 | - no forward N-gram specified by -nlr, Julius performs | |
484 | - recognition with only the backward N-gram. The 1st pass will | |
485 | - use the forward 2-gram probability computed from the backward | |
486 | - 2-gram using Bayes rule. The 2nd pass fully use the given | |
487 | - backward N-gram. (Rev.4.0) | |
488 | - | |
489 | - -v dict_file | |
490 | - Word dictionary file. | |
491 | - | |
492 | - -silhead word_string -siltail word_string | |
493 | - Silence word defined in the dictionary, for silences at the | |
494 | - beginning of sentence and end of sentence. (default: "<s>", | |
495 | - "</s>") | |
496 | - | |
497 | - -mapunk word_string | |
498 | - Specify unknown word. Default is "<unk>" or "<UNK>". This | |
499 | - will be used to assign word probability on unknown words, | |
500 | - i.e. words in dictionary that are not in N-gram vocabulary. | |
501 | - | |
502 | - -iwspword | |
503 | - Add a word entry to the dictionary that should correspond to | |
504 | - inter-word pauses. This may improve recognition accuracy in | |
505 | - some language model that has no explicit inter-word pause | |
506 | - modeling. The word entry to be added can be changed by | |
507 | - -iwspentry. | |
508 | - | |
509 | - -iwspentry word_entry_string | |
510 | - Specify the word entry that will be added by -iwspword. | |
511 | - (default: "<UNK> [sp] sp sp") | |
512 | - | |
513 | - -sepnum number | |
514 | - Number of high frequency words to be isolated from the | |
515 | - lexicon tree, to ease approximation error that may be caused | |
516 | - by the one-best approximation on 1st pass. (default: 150) | |
517 | - | |
518 | - Grammar | |
519 | - Multiple grammars can be specified by repeating -gram and | |
520 | - -gramlist. Note that this is unusual behavior from other options | |
521 | - (in normal Julius option, last one will override previous ones). | |
522 | - You can use -nogram to reset the grammars already specified | |
523 | - before the point. | |
524 | - | |
525 | - -gram gramprefix1[,gramprefix2[,gramprefix3,...]] | |
526 | - Comma-separated list of grammars to be used. the argument | |
527 | - should be a prefix of a grammar, i.e. if you have foo.dfa and | |
528 | - foo.dict, you should specify them with a single argument foo. | |
529 | - Multiple grammars can be specified at a time as a | |
530 | - comma-separated list. | |
531 | - | |
532 | - -gramlist list_file | |
533 | - Specify a grammar list file that contains list of grammars to | |
534 | - be used. The list file should contain the prefixes of | |
535 | - grammars, each per line. A relative path in the list file | |
536 | - will be treated as relative to the file, not the current path | |
537 | - or configuration file. | |
538 | - | |
539 | - -dfa dfa_file -v dict_file | |
540 | - An old way of specifying grammar files separately. This is | |
541 | - bogus, and should not be used any more. | |
542 | - | |
543 | - -nogram | |
544 | - Remove the current list of grammars already specified by | |
545 | - -gram, -gramlist, -dfa and -v. | |
546 | - | |
547 | - Isolated word | |
548 | - Dictionary can be specified by using -w and -wlist. When you | |
549 | - specify multiple times, all of them will be read at startup. You | |
550 | - can use -nogram to reset the already specified dictionaries at | |
551 | - that point. | |
552 | - | |
553 | - -w dict_file | |
554 | - Word dictionary for isolated word recognition. File format is | |
555 | - the same as other LM. (Rev.4.0) | |
556 | - | |
557 | - -wlist list_file | |
558 | - Specify a dictionary list file that contains list of | |
559 | - dictionaries to be used. The list file should contain the | |
560 | - file name of dictionaries, each per line. A relative path in | |
561 | - the list file will be treated as relative to the list file, | |
562 | - not the current path or configuration file. (Rev.4.0) | |
563 | - | |
564 | - -nogram | |
565 | - Remove the current list of dictionaries already specified by | |
566 | - -w and -wlist. | |
567 | - | |
568 | - -wsil head_sil_model_name tail_sil_model_name sil_context_name | |
569 | - On isolated word recognition, silence models will be appended | |
570 | - to the head and tail of each word at recognition. This option | |
571 | - specifies the silence models to be appended. | |
572 | - sil_context_name is the name of the head sil model and tail | |
573 | - sil model as a context of word head phone and tail phone. For | |
574 | - example, if you specify -wsil silB silE sp, a word with phone | |
575 | - sequence b eh t will be translated as silB sp-b+eh b-eh+t | |
576 | - eh-t+sp silE. (Rev.4.0) | |
577 | - | |
578 | - User-defined LM | |
579 | - -userlm | |
580 | - Declare to use user LM functions in the program. This option | |
581 | - should be specified if you use user-defined LM functions. | |
582 | - (Rev.4.0) | |
583 | - | |
584 | - Misc. LM options | |
585 | - -forcedict | |
586 | - Skip error words in dictionary and force running. | |
587 | - | |
588 | - Acoustic model and feature analysis (-AM) (-AM_GMM) | |
589 | - This section is about options for acoustic model, feature extraction, | |
590 | - feature normalizations and spectral subtraction. | |
591 | - | |
592 | - After -AM name, an acoustic model and related specification should be | |
593 | - written. You can use multiple AMs trained with different MFCC types. | |
594 | - For GMM, the required parameter condition should be specified just as | |
595 | - same as AMs after -AM_GMM. | |
596 | - | |
597 | - When using multiple AMs, the values of -smpPeriod, -smpFreq, -fsize and | |
598 | - -fshift should be the same among all AMs. | |
599 | - | |
600 | - Acoustic HMM | |
601 | - -h hmmdef_file | |
602 | - Acoustic HMM definition file. It should be in HTK ascii | |
603 | - format, or Julius binary format. You can convert HTK ascii | |
604 | - format to Julius binary format using mkbinhmm. | |
605 | - | |
606 | - -hlist hmmlist_file | |
607 | - HMMList file for phone mapping. This file provides mapping | |
608 | - between logical triphone names generated in the dictionary | |
609 | - and the defined HMM names in hmmdefs. This option should be | |
610 | - specified for context-dependent model. | |
611 | - | |
612 | - -tmix number | |
613 | - Specify the number of top Gaussians to be calculated in a | |
614 | - mixture codebook. Small number will speed up the acoustic | |
615 | - computation, but AM accuracy may get worse with too small | |
616 | - value. See also -gprune. (default: 2) | |
617 | - | |
618 | - -spmodel name | |
619 | - Specify HMM model name that corresponds to short-pause in an | |
620 | - utterance. The short-pause model name will be used in | |
621 | - recognition: short-pause skipping on grammar recognition, | |
622 | - word-end short-pause model insertion with -iwsp on N-gram, or | |
623 | - short-pause segmentation (-spsegment). (default: "sp") | |
624 | - | |
625 | - -multipath | |
626 | - Enable multi-path mode. To make decoding faster, Julius by | |
627 | - default impose a limit on HMM transitions that each model | |
628 | - should have only one transition from initial state and to end | |
629 | - state. On multi-path mode, Julius does extra handling on | |
630 | - inter-model transition to allows model-skipping transition | |
631 | - and multiple output/input transitions. Note that specifying | |
632 | - this option will make Julius a bit slower, and the larger | |
633 | - beam width may be required. | |
634 | - | |
635 | - This function was a compilation-time option on Julius 3.x, | |
636 | - and now becomes a run-time option. By default (without this | |
637 | - option), Julius checks the transition type of specified HMMs, | |
638 | - and enable the multi-path mode if required. You can force | |
639 | - multi-path mode with this option. (rev.4.0) | |
640 | - | |
641 | - -gprune {safe|heuristic|beam|none|default} | |
642 | - Set Gaussian pruning algorithm to use. For tied-mixture | |
643 | - model, Julius performs Gaussian pruning to reduce acoustic | |
644 | - computation, by calculating only the top N Gaussians in each | |
645 | - codebook at each frame. The default setting will be set | |
646 | - according to the model type and engine setting. default will | |
647 | - force accepting the default setting. Set this to none to | |
648 | - disable pruning and perform full computation. safe | |
649 | - guarantees the top N Gaussians to be computed. heuristic and | |
650 | - beam do more aggressive computational cost reduction, but may | |
651 | - result in small loss of accuracy model (default: safe | |
652 | - (standard), beam (fast) for tied mixture model, none for non | |
653 | - tied-mixture model). | |
654 | - | |
655 | - -iwcd1 {max|avg|best number} | |
656 | - Select method to approximate inter-word triphone on the head | |
657 | - and tail of a word in the first pass. | |
658 | - | |
659 | - | |
660 | - max will apply the maximum likelihood of the same context | |
661 | - triphones. avg will apply the average likelihood of the same | |
662 | - context triphones. best number will apply the average of top | |
663 | - N-best likelihoods of the same context triphone. | |
664 | - | |
665 | - Default is best 3 for use with N-gram, and avg for grammar | |
666 | - and word. When this AM is shared by LMs of both type, latter | |
667 | - one will be chosen. | |
668 | - | |
669 | - -iwsppenalty float | |
670 | - Insertion penalty for word-end short pauses appended by | |
671 | - -iwsp. | |
672 | - | |
673 | - -gshmm hmmdef_file | |
674 | - If this option is specified, Julius performs Gaussian Mixture | |
675 | - Selection for efficient decoding. The hmmdefs should be a | |
676 | - monophone model generated from an ordinary monophone HMM | |
677 | - model, using mkgshmm. | |
678 | - | |
679 | - -gsnum number | |
680 | - On GMS, specify number of monophone states to compute | |
681 | - corresponding triphones in detail. (default: 24) | |
682 | - | |
683 | - Speech analysis | |
684 | - Only MFCC feature extraction is supported in current Julius. | |
685 | - Thus when recognizing a waveform input from file or microphone, | |
686 | - AM must be trained by MFCC. The parameter condition should also | |
687 | - be set as exactly the same as the training condition by the | |
688 | - options below. | |
689 | - | |
690 | - When you give an input in HTK Parameter file, you can use any | |
691 | - parameter type for AM. In this case Julius does not care about | |
692 | - the type of input feature and AM, just read them as vector | |
693 | - sequence and match them to the given AM. Julius only checks | |
694 | - whether the parameter types are the same. If it does not work | |
695 | - well, you can disable this checking by -notypecheck. | |
696 | - | |
697 | - In Julius, the parameter kind and qualifiers (as TARGETKIND in | |
698 | - HTK) and the number of cepstral parameters (NUMCEPS) will be set | |
699 | - automatically from the content of the AM header, so you need not | |
700 | - specify them by options. | |
701 | - | |
702 | - Other parameters should be set exactly the same as training | |
703 | - condition. You can also give a HTK Config file which you used to | |
704 | - train AM to Julius by -htkconf. When this option is applied, | |
705 | - Julius will parse the Config file and set appropriate parameter. | |
706 | - | |
707 | - You can further embed those analysis parameter settings to a | |
708 | - binary HMM file using mkbinhmm. | |
709 | - | |
710 | - If options specified in several ways, they will be evaluated in | |
711 | - the order below. The AM embedded parameter will be loaded first | |
712 | - if any. Then, the HTK config file given by -htkconf will be | |
713 | - parsed. If a value already set by AM embedded value, HTK config | |
714 | - will override them. At last, the direct options will be loaded, | |
715 | - which will override settings loaded before. Note that, when the | |
716 | - same options are specified several times, later will override | |
717 | - previous, except that -htkconf will be evaluated first as | |
718 | - described above. | |
719 | - | |
720 | - -smpPeriod period | |
721 | - Sampling period of input speech, in unit of 100 nanoseconds. | |
722 | - Sampling rate can also be specified by -smpFreq. Please note | |
723 | - that the input frequency should be set equal to the training | |
724 | - conditions of AM. (default: 625, corresponds to 16,000Hz) | |
725 | - | |
726 | - This option corresponds to the HTK Option SOURCERATE. The | |
727 | - same value can be given to this option. | |
728 | - | |
729 | - When using multiple AM, this value should be the same among | |
730 | - all AMs. | |
731 | - | |
732 | - -smpFreq Hz | |
733 | - Set sampling frequency of input speech in Hz. Sampling rate | |
734 | - can also be specified using -smpPeriod. Please note that this | |
735 | - frequency should be set equal to the training conditions of | |
736 | - AM. (default: 16,000) | |
737 | - | |
738 | - When using multiple AM, this value should be the same among | |
739 | - all AMs. | |
740 | - | |
741 | - -fsize sample_num | |
742 | - Window size in number of samples. (default: 400) | |
743 | - | |
744 | - This option corresponds to the HTK Option WINDOWSIZE, but | |
745 | - value should be in samples (HTK value / smpPeriod). | |
746 | - | |
747 | - When using multiple AM, this value should be the same among | |
748 | - all AMs. | |
749 | - | |
750 | - -fshift sample_num | |
751 | - Frame shift in number of samples. (default: 160) | |
752 | - | |
753 | - This option corresponds to the HTK Option TARGETRATE, but | |
754 | - value should be in samples (HTK value / smpPeriod). | |
755 | - | |
756 | - When using multiple AM, this value should be the same among | |
757 | - all AMs. | |
758 | - | |
759 | - -preemph float | |
760 | - Pre-emphasis coefficient. (default: 0.97) | |
761 | - | |
762 | - This option corresponds to the HTK Option PREEMCOEF. The same | |
763 | - value can be given to this option. | |
764 | - | |
765 | - -fbank num | |
766 | - Number of filterbank channels. (default: 24) | |
767 | - | |
768 | - This option corresponds to the HTK Option NUMCHANS. The same | |
769 | - value can be given to this option. Be aware that the default | |
770 | - value not the same as in HTK (22). | |
771 | - | |
772 | - -ceplif num | |
773 | - Cepstral liftering coefficient. (default: 22) | |
774 | - | |
775 | - This option corresponds to the HTK Option CEPLIFTER. The same | |
776 | - value can be given to this option. | |
777 | - | |
778 | - -rawe , -norawe | |
779 | - Enable/disable using raw energy before pre-emphasis (default: | |
780 | - disabled) | |
781 | - | |
782 | - This option corresponds to the HTK Option RAWENERGY. Be aware | |
783 | - that the default value differs from HTK (enabled at HTK, | |
784 | - disabled at Julius). | |
785 | - | |
786 | - -enormal , -noenormal | |
787 | - Enable/disable normalizing log energy. On live input, this | |
788 | - normalization will be approximated from the average of last | |
789 | - input. (default: disabled) | |
790 | - | |
791 | - This option corresponds to the HTK Option ENORMALISE. Be | |
792 | - aware that the default value differs from HTK (enabled at | |
793 | - HTK, disabled at Julius). | |
794 | - | |
795 | - -escale float_scale | |
796 | - Scaling factor of log energy when normalizing log energy. | |
797 | - (default: 1.0) | |
798 | - | |
799 | - This option corresponds to the HTK Option ESCALE. Be aware | |
800 | - that the default value differs from HTK (0.1). | |
801 | - | |
802 | - -silfloor float | |
803 | - Energy silence floor in dB when normalizing log energy. | |
804 | - (default: 50.0) | |
805 | - | |
806 | - This option corresponds to the HTK Option SILFLOOR. | |
807 | - | |
808 | - -delwin frame | |
809 | - Delta window size in number of frames. (default: 2) | |
810 | - | |
811 | - This option corresponds to the HTK Option DELTAWINDOW. The | |
812 | - same value can be given to this option. | |
813 | - | |
814 | - -accwin frame | |
815 | - Acceleration window size in number of frames. (default: 2) | |
816 | - | |
817 | - This option corresponds to the HTK Option ACCWINDOW. The same | |
818 | - value can be given to this option. | |
819 | - | |
820 | - -hifreq Hz | |
821 | - Enable band-limiting for MFCC filterbank computation: set | |
822 | - upper frequency cut-off. Value of -1 will disable it. | |
823 | - (default: -1) | |
824 | - | |
825 | - This option corresponds to the HTK Option HIFREQ. The same | |
826 | - value can be given to this option. | |
827 | - | |
828 | - -lofreq Hz | |
829 | - Enable band-limiting for MFCC filterbank computation: set | |
830 | - lower frequency cut-off. Value of -1 will disable it. | |
831 | - (default: -1) | |
832 | - | |
833 | - This option corresponds to the HTK Option LOFREQ. The same | |
834 | - value can be given to this option. | |
835 | - | |
836 | - -zmeanframe , -nozmeanframe | |
837 | - With speech input, this option enables/disables frame-wise DC | |
838 | - offset removal. This corresponds to HTK configuration | |
839 | - ZMEANSOURCE. This cannot be used together with -zmean. | |
840 | - (default: disabled) | |
841 | - | |
842 | - -usepower | |
843 | - Use power instead of magnitude on filterbank analysis. | |
844 | - (default: disabled) | |
845 | - | |
846 | - Normalization | |
847 | - Julius can perform cepstral mean normalization (CMN) for inputs. | |
848 | - CMN will be activated when the given AM was trained with CMN | |
849 | - (i.e. has "_Z" qualifier in the header). | |
850 | - | |
851 | - The cepstral mean will be estimated in different way according | |
852 | - to the input type. On file input, the mean will be computed from | |
853 | - the whole input. On live input such as microphone and network | |
854 | - input, the ceptral mean of the input is unknown at the start. So | |
855 | - MAP-CMN will be used. On MAP-CMN, an initial mean vector will be | |
856 | - applied at the beginning, and the mean vector will be smeared to | |
857 | - the mean of the incrementing input vector as input goes. Options | |
858 | - below can control the behavior of MAP-CMN. | |
859 | - | |
860 | - -cvn | |
861 | - Enable cepstral variance normalization. At file input, the | |
862 | - variance of whole input will be calculated and then applied. | |
863 | - At live microphone input, variance of the last input will be | |
864 | - applied. CVN is only supported for an audio input. | |
865 | - | |
866 | - -vtln alpha lowcut hicut | |
867 | - Do frequency warping, typically for a vocal tract length | |
868 | - normalization (VTLN). Arguments are warping factor, high | |
869 | - frequency cut-off and low freq. cut-off. They correspond to | |
870 | - HTK Config values, WARPFREQ, WARPHCUTOFF and WARPLCUTOFF. | |
871 | - | |
872 | - -cmnload file | |
873 | - Load initial cepstral mean vector from file on startup. The | |
874 | - file should be one saved by -cmnsave. Loading an initial | |
875 | - cepstral mean enables Julius to better recognize the first | |
876 | - utterance on a real-time input. When used together with | |
877 | - -cmnnoupdate, this initial value will be used for all input. | |
878 | - | |
879 | - -cmnsave file | |
880 | - Save the calculated cepstral mean vector into file. The | |
881 | - parameters will be saved at each input end. If the output | |
882 | - file already exists, it will be overridden. | |
883 | - | |
884 | - -cmnupdate -cmnnoupdate | |
885 | - Control whether to update the cepstral mean at each input on | |
886 | - real-time input. Disabling this and specifying -cmnload will | |
887 | - make engine to always use the loaded static initial cepstral | |
888 | - mean. | |
889 | - | |
890 | - -cmnmapweight float | |
891 | - Specify the weight of initial cepstral mean for MAP-CMN. | |
892 | - Specify larger value to retain the initial cepstral mean for | |
893 | - a longer period, and smaller value to make the cepstral mean | |
894 | - rely more on the current input. (default: 100.0) | |
895 | - | |
896 | - Front-end processing | |
897 | - Julius can perform spectral subtraction to reduce some | |
898 | - stationary noise from audio input. Though it is not a powerful | |
899 | - method, but it may work on some situation. Julius has two ways | |
900 | - to estimate noise spectrum. One way is to assume that the first | |
901 | - short segment of an speech input is noise segment, and estimate | |
902 | - the noise spectrum as the average of the segment. Another way is | |
903 | - to calculate average spectrum from noise-only input using other | |
904 | - tool mkss, and load it in Julius. The former one is popular for | |
905 | - speech file input, and latter should be used in live input. The | |
906 | - options below will switch / control the behavior. | |
907 | - | |
908 | - -sscalc | |
909 | - Perform spectral subtraction using head part of each file as | |
910 | - silence part. The head part length should be specified by | |
911 | - -sscalclen. Valid only for file input. Conflict with -ssload. | |
912 | - | |
913 | - -sscalclen msec | |
914 | - With -sscalc, specify the length of head silence for noise | |
915 | - spectrum estimation in milliseconds. (default: 300) | |
916 | - | |
917 | - -ssload file | |
918 | - Perform spectral subtraction for speech input using | |
919 | - pre-estimated noise spectrum loaded from file. The noise | |
920 | - spectrum file can be made by mkss. Valid for all speech | |
921 | - input. Conflict with -sscalc. | |
922 | - | |
923 | - -ssalpha float | |
924 | - Alpha coefficient of spectral subtraction for -sscalc and | |
925 | - -ssload. Noise will be subtracted stronger as this value gets | |
926 | - larger, but distortion of the resulting signal also becomes | |
927 | - remarkable. (default: 2.0) | |
928 | - | |
929 | - -ssfloor float | |
930 | - Flooring coefficient of spectral subtraction. The spectral | |
931 | - power that goes below zero after subtraction will be | |
932 | - substituted by the source signal with this coefficient | |
933 | - multiplied. (default: 0.5) | |
934 | - | |
935 | - Misc. AM options | |
936 | - -htkconf file | |
937 | - Parse the given HTK Config file, and set corresponding | |
938 | - parameters to Julius. When using this option, the default | |
939 | - parameter values are switched from Julius defaults to HTK | |
940 | - defaults. | |
941 | - | |
942 | - Recognition process and search (-SR) | |
943 | - This section contains options for search parameters on the 1st / 2nd | |
944 | - pass such as beam width and LM weights, configurations for short-pause | |
945 | - segmentation, switches for word lattice output and confusion network | |
946 | - output, forced alignments, and other options relating recognition | |
947 | - process and result output. | |
948 | - | |
949 | - Default values for beam width and LM weights will change according to | |
950 | - compile-time setup of JuliusLib , AM model type, and LM size. Please | |
951 | - see the startup log for the actual values. | |
952 | - | |
953 | - 1st pass parameters | |
954 | - -lmp weight penalty | |
955 | - (N-gram) Language model weights and word insertion penalties | |
956 | - for the first pass. | |
957 | - | |
958 | - -penalty1 penalty | |
959 | - (Grammar) word insertion penalty for the first pass. | |
960 | - (default: 0.0) | |
961 | - | |
962 | - -b width | |
963 | - Beam width in number of HMM nodes for rank beaming on the | |
964 | - first pass. This value defines search width on the 1st pass, | |
965 | - and has dominant effect on the total processing time. Smaller | |
966 | - width will speed up the decoding, but too small value will | |
967 | - result in a substantial increase of recognition errors due to | |
968 | - search failure. Larger value will make the search stable and | |
969 | - will lead to failure-free search, but processing time will | |
970 | - grow in proportion to the width. | |
971 | - | |
972 | - The default value is dependent on acoustic model type: 400 | |
973 | - (monophone), 800 (triphone), or 1000 (triphone, setup=v2.1) | |
974 | - | |
975 | - -nlimit num | |
976 | - Upper limit of token per node. This option is valid when | |
977 | - --enable-wpair and --enable-wpair-nlimit are enabled at | |
978 | - compilation time. | |
979 | - | |
980 | - -progout | |
981 | - Enable progressive output of the partial results on the first | |
982 | - pass. | |
983 | - | |
984 | - -proginterval msec | |
985 | - Set the time interval for -progout in milliseconds. (default: | |
986 | - 300) | |
987 | - | |
988 | - 2nd pass parameters | |
989 | - -lmp2 weight penalty | |
990 | - (N-gram) Language model weights and word insertion penalties | |
991 | - for the second pass. | |
992 | - | |
993 | - -penalty2 penalty | |
994 | - (Grammar) word insertion penalty for the second pass. | |
995 | - (default: 0.0) | |
996 | - | |
997 | - -b2 width | |
998 | - Envelope beam width (number of hypothesis) at the second | |
999 | - pass. If the count of word expansion at a certain hypothesis | |
1000 | - length reaches this limit while search, shorter hypotheses | |
1001 | - are not expanded further. This prevents search to fall in | |
1002 | - breadth-first-like situation stacking on the same position, | |
1003 | - and improve search failure mostly for large vocabulary | |
1004 | - condition. (default: 30) | |
1005 | - | |
1006 | - -sb float | |
1007 | - Score envelope width for enveloped scoring. When calculating | |
1008 | - hypothesis score for each generated hypothesis, its trellis | |
1009 | - expansion and Viterbi operation will be pruned in the middle | |
1010 | - of the speech if score on a frame goes under the width. | |
1011 | - Giving small value makes the second pass faster, but | |
1012 | - computation error may occur. (default: 80.0) | |
1013 | - | |
1014 | - -s num | |
1015 | - Stack size, i.e. the maximum number of hypothesis that can be | |
1016 | - stored on the stack during the search. A larger value may | |
1017 | - give more stable results, but increases the amount of memory | |
1018 | - required. (default: 500) | |
1019 | - | |
1020 | - -m count | |
1021 | - Number of expanded hypotheses required to discontinue the | |
1022 | - search. If the number of expanded hypotheses is greater then | |
1023 | - this threshold then, the search is discontinued at that | |
1024 | - point. The larger this value is, The longer Julius gets to | |
1025 | - give up search. (default: 2000) | |
1026 | - | |
1027 | - -n num | |
1028 | - The number of candidates Julius tries to find. The search | |
1029 | - continues till this number of sentence hypotheses have been | |
1030 | - found. The obtained sentence hypotheses are sorted by score, | |
1031 | - and final result is displayed in the order (see also the | |
1032 | - -output). The possibility that the optimum hypothesis is | |
1033 | - correctly found increases as this value gets increased, but | |
1034 | - the processing time also becomes longer. The default value | |
1035 | - depends on the engine setup on compilation time: 10 | |
1036 | - (standard) or 1 (fast or v2.1) | |
1037 | - | |
1038 | - -output num | |
1039 | - The top N sentence hypothesis to be output at the end of | |
1040 | - search. Use with -n (default: 1) | |
1041 | - | |
1042 | - -lookuprange frame | |
1043 | - Set the number of frames before and after to look up next | |
1044 | - word hypotheses in the word trellis on the second pass. This | |
1045 | - prevents the omission of short words, but with a large value, | |
1046 | - the number of expanded hypotheses increases and system | |
1047 | - becomes slow. (default: 5) | |
1048 | - | |
1049 | - -looktrellis | |
1050 | - (Grammar) Expand only the words survived on the first pass | |
1051 | - instead of expanding all the words predicted by grammar. This | |
1052 | - option makes second pass decoding faster especially for large | |
1053 | - vocabulary condition, but may increase deletion error of | |
1054 | - short words. (default: disabled) | |
1055 | - | |
1056 | - Short-pause segmentation / decoder-VAD | |
1057 | - When compiled with --enable-decoder-vad, the short-pause | |
1058 | - segmentation will be extended to support decoder-based VAD. | |
1059 | - | |
1060 | - -spsegment | |
1061 | - Enable short-pause segmentation mode. Input will be segmented | |
1062 | - when a short pause word (word with only silence model in | |
1063 | - pronunciation) gets the highest likelihood at certain | |
1064 | - successive frames on the first pass. When detected segment | |
1065 | - end, Julius stop the 1st pass at the point, perform 2nd pass, | |
1066 | - and continue with next segment. The word context will be | |
1067 | - considered among segments. (Rev.4.0) | |
1068 | - | |
1069 | - When compiled with --enable-decoder-vad, this option enables | |
1070 | - decoder-based VAD, to skip long silence. | |
1071 | - | |
1072 | - -spdur frame | |
1073 | - Short pause duration length to detect end of input segment, | |
1074 | - in number of frames. (default: 10) | |
1075 | - | |
1076 | - -pausemodels string | |
1077 | - A comma-separated list of pause model names to be used at | |
1078 | - short-pause segmentation. The word whose pronunciation | |
1079 | - consists of only the pause models will be treated as "pause | |
1080 | - word" and used for pause detection. If not specified, name of | |
1081 | - -spmodel, -silhead and -siltail will be used. (Rev.4.0) | |
1082 | - | |
1083 | - -spmargin frame | |
1084 | - Back step margin at trigger up for decoder-based VAD. When | |
1085 | - speech up-trigger found by decoder-VAD, Julius will rewind | |
1086 | - the input parameter by this value, and start recognition at | |
1087 | - the point. (Rev.4.0) | |
1088 | - | |
1089 | - This option will be valid only if compiled with | |
1090 | - --enable-decoder-vad. | |
1091 | - | |
1092 | - -spdelay frame | |
1093 | - Trigger decision delay frame at trigger up for decoder-based | |
1094 | - VAD. (Rev.4.0) | |
1095 | - | |
1096 | - This option will be valid only if compiled with | |
1097 | - --enable-decoder-vad. | |
1098 | - | |
1099 | - Word lattice / confusion network output | |
1100 | - -lattice , -nolattice | |
1101 | - Enable / disable generation of word graph. Search algorithm | |
1102 | - also has changed to optimize for better word graph | |
1103 | - generation, so the sentence result may not be the same as | |
1104 | - normal N-best recognition. (Rev.4.0) | |
1105 | - | |
1106 | - -confnet , -noconfnet | |
1107 | - Enable / disable generation of confusion network. Enabling | |
1108 | - this will also activates -lattice internally. (Rev.4.0) | |
1109 | - | |
1110 | - -graphrange frame | |
1111 | - Merge same words at neighbor position at graph generation. If | |
1112 | - the beginning time and ending time of two word candidates of | |
1113 | - the same word is within the specified range, they will be | |
1114 | - merged. The default is 0 (allow merging same words on exactly | |
1115 | - the same location) and specifying larger value will result in | |
1116 | - smaller graph output. Setting this value to -1 will disable | |
1117 | - merging, in that case same words on the same location of | |
1118 | - different scores will be left as they are. (default: 0) | |
1119 | - | |
1120 | - -graphcut depth | |
1121 | - Cut the resulting graph by its word depth at post-processing | |
1122 | - stage. The depth value is the number of words to be allowed | |
1123 | - at a frame. Setting to -1 disables this feature. (default: | |
1124 | - 80) | |
1125 | - | |
1126 | - -graphboundloop count | |
1127 | - Limit the number of boundary adjustment loop at | |
1128 | - post-processing stage. This parameter prevents Julius from | |
1129 | - blocking by infinite adjustment loop by short word | |
1130 | - oscillation. (default: 20) | |
1131 | - | |
1132 | - -graphsearchdelay , -nographsearchdelay | |
1133 | - When this option is enabled, Julius modifies its graph | |
1134 | - generation algorithm on the 2nd pass not to terminate search | |
1135 | - by graph merging, until the first sentence candidate is | |
1136 | - found. This option may improve graph accuracy, especially | |
1137 | - when you are going to generate a huge word graph by setting | |
1138 | - broad search. Namely, it may result in better graph accuracy | |
1139 | - when you set wide beams on both 1st pass -b and 2nd pass -b2, | |
1140 | - and large number for -n. (default: disabled) | |
1141 | - | |
1142 | - Multi-gram / multi-dic recognition | |
1143 | - -multigramout , -nomultigramout | |
1144 | - On grammar recognition using multiple grammars, Julius will | |
1145 | - output only the best result among all grammars. Enabling this | |
1146 | - option will make Julius to output result for each grammar. | |
1147 | - (default: disabled) | |
1148 | - | |
1149 | - Forced alignment | |
1150 | - -walign | |
1151 | - Do viterbi alignment per word units for the recognition | |
1152 | - result. The word boundary frames and the average acoustic | |
1153 | - scores per frame will be calculated. | |
1154 | - | |
1155 | - -palign | |
1156 | - Do viterbi alignment per phone units for the recognition | |
1157 | - result. The phone boundary frames and the average acoustic | |
1158 | - scores per frame will be calculated. | |
1159 | - | |
1160 | - -salign | |
1161 | - Do viterbi alignment per state for the recognition result. | |
1162 | - The state boundary frames and the average acoustic scores per | |
1163 | - frame will be calculated. | |
1164 | - | |
1165 | - Misc. search options | |
1166 | - -inactive | |
1167 | - Start this recognition process instance with inactive state. | |
1168 | - (Rev.4.0) | |
1169 | - | |
1170 | - -1pass | |
1171 | - Perform only the first pass. | |
1172 | - | |
1173 | - -fallback1pass | |
1174 | - When 2nd pass fails, Julius finish the recognition with no | |
1175 | - result. This option tell Julius to output the 1st pass result | |
1176 | - as a final result when the 2nd pass fails. Note that some | |
1177 | - score output (confidence etc.) may not be useful. This was | |
1178 | - the default behavior of Julius-3.x. | |
1179 | - | |
1180 | - -no_ccd , -force_ccd | |
1181 | - Explicitly switch phone context handling at search. Normally | |
1182 | - Julius determines whether the using AM is a context-dependent | |
1183 | - model or not from the model names, i.e., whether the names | |
1184 | - contain character + and -. This option will override the | |
1185 | - automatic detection. | |
1186 | - | |
1187 | - -cmalpha float | |
1188 | - Smoothing parameter for confidence scoring. (default: 0.05) | |
1189 | - | |
1190 | - -iwsp | |
1191 | - (Multi-path mode only) Enable inter-word context-free short | |
1192 | - pause insertion. This option appends a skippable short pause | |
1193 | - model for every word end. The short-pause model can be | |
1194 | - specified by -spmodel. | |
1195 | - | |
1196 | - -transp float | |
1197 | - Additional insertion penalty for transparent words. (default: | |
1198 | - 0.0) | |
1199 | - | |
1200 | - -demo | |
1201 | - Equivalent to -progout -quiet. | |
1202 | - | |
1203 | -ENVIRONMENT VARIABLES | |
1204 | - ALSADEV | |
1205 | - (using mic input with alsa device) specify a capture device name. If | |
1206 | - not specified, "default" will be used. | |
1207 | - | |
1208 | - AUDIODEV | |
1209 | - (using mic input with oss device) specify a capture device path. If | |
1210 | - not specified, "/dev/dsp" will be used. | |
1211 | - | |
1212 | - LATENCY_MSEC | |
1213 | - Try to set input latency of microphone input in milliseconds. | |
1214 | - Smaller value will shorten latency but sometimes make process | |
1215 | - unstable. Default value will depend on the running OS. | |
1216 | - | |
1217 | -EXAMPLES | |
1218 | - For examples of system usage, refer to the tutorial section in the | |
1219 | - Julius documents. | |
1220 | - | |
1221 | -NOTICE | |
1222 | - Note about jconf files: relative paths in a jconf file are interpreted | |
1223 | - as relative to the jconf file itself, not to the current directory. | |
1224 | - | |
1225 | -SEE ALSO | |
1226 | - julian(1), jcontrol(1), adinrec(1), adintool(1), mkbingram(1), | |
1227 | - mkbinhmm(1), mkgsmm(1), wav2mfcc(1), mkss(1) | |
1228 | - | |
1229 | - http://julius.sourceforge.jp/en/ | |
1230 | - | |
1231 | -DIAGNOSTICS | |
1232 | - Julius normally will return the exit status 0. If an error occurs, | |
1233 | - Julius exits abnormally with exit status 1. If an input file cannot be | |
1234 | - found or cannot be loaded for some reason then Julius will skip | |
1235 | - processing for that file. | |
1236 | - | |
1237 | -BUGS | |
1238 | - There are some restrictions to the type and size of the models Julius | |
1239 | - can use. For a detailed explanation refer to the Julius documentation. | |
1240 | - For bug-reports, inquires and comments please contact julius-info at | |
1241 | - lists.sourceforge.jp. | |
1242 | - | |
1243 | -COPYRIGHT | |
1244 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
1245 | - | |
1246 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
1247 | - | |
1248 | - Copyright (c) 2000-2008 Shikano Lab., Nara Institute of Science and | |
1249 | - Technology | |
1250 | - | |
1251 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
1252 | - Technology | |
1253 | - | |
1254 | -AUTHORS | |
1255 | - Rev.1.0 (1998/02/20) | |
1256 | - Designed by Tatsuya KAWAHARA and Akinobu LEE (Kyoto University) | |
1257 | - | |
1258 | - Development by Akinobu LEE (Kyoto University) | |
1259 | - | |
1260 | - Rev.1.1 (1998/04/14), Rev.1.2 (1998/10/31), Rev.2.0 (1999/02/20), | |
1261 | - Rev.2.1 (1999/04/20), Rev.2.2 (1999/10/04), Rev.3.0 (2000/02/14), | |
1262 | - Rev.3.1 (2000/05/11) | |
1263 | - Development of above versions by Akinobu LEE (Kyoto University) | |
1264 | - | |
1265 | - Rev.3.2 (2001/08/15), Rev.3.3 (2002/09/11), Rev.3.4 (2003/10/01), | |
1266 | - Rev.3.4.1 (2004/02/25), Rev.3.4.2 (2004/04/30) | |
1267 | - Development of above versions by Akinobu LEE (Nara Institute of | |
1268 | - Science and Technology) | |
1269 | - | |
1270 | - Rev.3.5 (2005/11/11), Rev.3.5.1 (2006/03/31), Rev.3.5.2 (2006/07/31), | |
1271 | - Rev.3.5.3 (2006/12/29), Rev.4.0 (2007/12/19), Rev.4.1 (2008/10/03) | |
1272 | - Development of above versions by Akinobu LEE (Nagoya Institute of | |
1273 | - Technology) | |
1274 | - | |
1275 | -THANKS TO | |
1276 | - From rev.3.2, Julius is released by the "Information Processing | |
1277 | - Society, Continuous Speech Consortium". | |
1278 | - | |
1279 | - The Windows DLL version was developed and released by Hideki BANNO | |
1280 | - (Nagoya University). | |
1281 | - | |
1282 | - The Windows Microsoft Speech API compatible version was developed by | |
1283 | - Takashi SUMIYOSHI (Kyoto University). | |
1284 | - | |
1285 | - | |
1286 | - | |
1287 | - 02/11/2009 JULIUS(1) |
recognize/src/julius/doc/manuals/mkbingram.txt
... | ... | @@ -1,97 +0,0 @@ |
1 | - mkbingram | |
2 | - | |
3 | -MKBINGRAM(1) MKBINGRAM(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - mkbingram | |
9 | - - make binary N-gram from ARPA N-gram file | |
10 | - | |
11 | -SYNOPSIS | |
12 | - mkbingram [-nlr forward_ngram.arpa] [-nrl backward_ngram.arpa] | |
13 | - [-d old_bingram_file] {output_bingram_file} | |
14 | - | |
15 | -DESCRIPTION | |
16 | - mkbingram is a tool to convert N-gram definition file(s) in ARPA | |
17 | - standard format to a compact Julius binary format. It will speed up the | |
18 | - initial loading time of N-gram much faster. It can read gzipped file | |
19 | - directly. | |
20 | - | |
21 | - From rev.4.0, Julius can deal with forward N-gram, backward N-gram and | |
22 | - their combinations. So, mkbingram now generates binary N-gram file from | |
23 | - one of them, or combining them two to produce one binary N-gram. | |
24 | - | |
25 | - When only a forward N-gram is specified, mkbingram generates binary | |
26 | - N-gram from only the forward N-gram. When using this binary N-gram at | |
27 | - Julius, it performs the 1st pass with the 2-gram probabilities in the | |
28 | - N-gram, and run the 2nd pass with the given N-gram fully, with | |
29 | - converting forward probabilities to backward probabilities by Bayes | |
30 | - rule. | |
31 | - | |
32 | - When only a backward N-gram is specified, mkbingram generates an binary | |
33 | - N-gram file that contains only the backward N-gram. The 1st pass will | |
34 | - use forward 2-gram probabilities that can be computed from the backward | |
35 | - 2-gram using Bayes rule, and the 2nd pass use the given backward N-gram | |
36 | - fully. | |
37 | - | |
38 | - When both forward and backward N-grams are specified, the 2-gram part | |
39 | - in the forward N-gram and all backward N-gram will be combined into | |
40 | - single bingram file. The forward 2-gram will be applied for the 1st | |
41 | - pass and backward N-gram for the 2nd pass. Note that both N-gram should | |
42 | - be trained in the same corpus with same parameters (i.e. cut-off | |
43 | - thresholds), with same vocabulary. | |
44 | - | |
45 | - The old binary N-gram produced by mkbingram of version 3.x and earlier | |
46 | - can be used in Julius-4, but you can convert the old version to the new | |
47 | - version by specifying it as input of current mkbingram by option "-d". | |
48 | - | |
49 | - Please note that binary N-gram file converted by mkbingram of version | |
50 | - 4.0 and later cannot be read by older Julius 3.x. | |
51 | - | |
52 | -OPTIONS | |
53 | - -nlr forward_ngram.arpa | |
54 | - Read in a forward (left-to-right) word N-gram file in ARPA standard | |
55 | - format. | |
56 | - | |
57 | - -nrl backward_ngram.arpa | |
58 | - Read in a backward (right-to-left) word N-gram file in ARPA standard | |
59 | - format. | |
60 | - | |
61 | - -d old_bingram_file | |
62 | - Read in a binary N-gram file. | |
63 | - | |
64 | - -swap | |
65 | - Swap BOS word <s> and EOS word </s> in N-gram. | |
66 | - | |
67 | - output_bingram_file | |
68 | - binary N-gram file name to output. | |
69 | - | |
70 | -EXAMPLES | |
71 | - Convert a set of forward and backward N-gram in ARPA format into Julius | |
72 | - binary form: | |
73 | - Convert a single forward 4-gram in ARPA format into a binary file: | |
74 | - Convert old binary N-gram file to current format: | |
75 | - | |
76 | -SEE ALSO | |
77 | - julius ( 1 ) , | |
78 | - mkbinhmm ( 1 ) , | |
79 | - mkbinhmmlist ( 1 ) | |
80 | - | |
81 | -COPYRIGHT | |
82 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
83 | - | |
84 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
85 | - | |
86 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
87 | - Technology | |
88 | - | |
89 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
90 | - Technology | |
91 | - | |
92 | -LICENSE | |
93 | - The same as Julius. | |
94 | - | |
95 | - | |
96 | - | |
97 | - 02/11/2009 MKBINGRAM(1) |
recognize/src/julius/doc/manuals/mkbinhmm.txt
... | ... | @@ -1,78 +0,0 @@ |
1 | - mkbinhmm | |
2 | - | |
3 | -MKBINHMM(1) MKBINHMM(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - mkbinhmm | |
9 | - - convert HMM definition file in HTK ascii format to Julius binary | |
10 | - format | |
11 | - | |
12 | -SYNOPSIS | |
13 | - mkbinhmm [-htkconf HTKConfigFile] {hmmdefs_file} {binhmm_file} | |
14 | - | |
15 | -DESCRIPTION | |
16 | - mkbinhmm convert an HMM definition file in HTK ascii format into a | |
17 | - binary HMM file for Julius. It will greatly speed up the launch | |
18 | - process. | |
19 | - | |
20 | - You can also embed acoustic analysis condition parameters needed for | |
21 | - recognition into the output file. To embed the parameters, specify the | |
22 | - HTK Config file you have used to extract acoustic features for training | |
23 | - the HMM by the optione "-htkconf". | |
24 | - | |
25 | - The embedded parameters in a binary HMM format will be loaded into | |
26 | - Julius automatically, so you do not need to specify the acoustic | |
27 | - feature options at run time. It will be convenient when you deliver an | |
28 | - acoustic model. | |
29 | - | |
30 | - You can also specify binary file as the input. This can be used to | |
31 | - update the old binary format into new one, or to embed the config | |
32 | - parameters into the already existing binary files. If the input binhmm | |
33 | - already has acoustic analysis parameters embedded, they will be | |
34 | - overridden by the specified values. | |
35 | - | |
36 | - | |
37 | - mkbinhmm can read gzipped file as input. | |
38 | - | |
39 | -OPTIONS | |
40 | - -htkconf HTKConfigFile | |
41 | - HTK Config file you used at training time. If specified, the values | |
42 | - are embedded to the output file. | |
43 | - | |
44 | - hmmdefs_file | |
45 | - The source HMm definitino file in HTK ascii format or Julius binary | |
46 | - format. | |
47 | - | |
48 | - hmmdefs_file | |
49 | - Output file. | |
50 | - | |
51 | -EXAMPLES | |
52 | - Convert HTK ascii format HMM definition file into Julius binary file: | |
53 | - Furthermore, embed acoustic feature parameters as specified by Config | |
54 | - file | |
55 | - Embed the acoustic parameters into an existing binary file | |
56 | - | |
57 | -SEE ALSO | |
58 | - julius ( 1 ) , | |
59 | - mkbingram ( 1 ) , | |
60 | - mkbinhmmlist ( 1 ) | |
61 | - | |
62 | -COPYRIGHT | |
63 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
64 | - | |
65 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
66 | - | |
67 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
68 | - Technology | |
69 | - | |
70 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
71 | - Technology | |
72 | - | |
73 | -LICENSE | |
74 | - The same as Julius. | |
75 | - | |
76 | - | |
77 | - | |
78 | - 10/02/2008 MKBINHMM(1) |
recognize/src/julius/doc/manuals/mkbinhmmlist.txt
... | ... | @@ -1,64 +0,0 @@ |
1 | - mkbinhmmlist | |
2 | - | |
3 | -MKBINHMMLIST(1) MKBINHMMLIST(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - mkbinhmmlist | |
9 | - - convert HMMList file into binary format | |
10 | - | |
11 | -SYNOPSIS | |
12 | - mkbinhmmlist {hmmdefs_file} {HMMList_file} {output_binhmmlist_file} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - mkbinhmmlist converts a HMMList file to binary format. Since the index | |
16 | - trees for lookup are also stored in the binary format, it will speed up | |
17 | - the startup of Julius, namely when using big HMMList file. | |
18 | - | |
19 | - For conversion, HMM definition file hmmdefs_file that will be used | |
20 | - together at Julius needs to be specified. The format of the HMM | |
21 | - definition file can be either ascii or Julius binary format. | |
22 | - | |
23 | - The output binary file can be used in Julius as the same by "-hlist". | |
24 | - The format wil be auto-detected by Julius. | |
25 | - | |
26 | - | |
27 | - mkbinhmmlist can read gzipped file. | |
28 | - | |
29 | -OPTIONS | |
30 | - hmmdefs_file | |
31 | - Acoustic HMM definition file, in HMM ascii format or Julius binary | |
32 | - format. | |
33 | - | |
34 | - HMMList_file | |
35 | - Source HMMList file | |
36 | - | |
37 | - output_binhmmlist_file | |
38 | - Output file, will be overwritten if already exist. | |
39 | - | |
40 | -EXAMPLES | |
41 | - Convert a HMMList file logicalTri into binary format and store to | |
42 | - logicalTri.bin: | |
43 | - | |
44 | -SEE ALSO | |
45 | - julius ( 1 ) , | |
46 | - mkbinhmm ( 1 ) | |
47 | - | |
48 | -COPYRIGHT | |
49 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
50 | - | |
51 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
52 | - | |
53 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
54 | - Technology | |
55 | - | |
56 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
57 | - Technology | |
58 | - | |
59 | -LICENSE | |
60 | - The same as Julius. | |
61 | - | |
62 | - | |
63 | - | |
64 | - 10/02/2008 MKBINHMMLIST(1) |
recognize/src/julius/doc/manuals/mkdfa.pl.txt
... | ... | @@ -1,67 +0,0 @@ |
1 | - mkdfa.pl | |
2 | - | |
3 | -MKDFA.PL(1) MKDFA.PL(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - mkdfa.pl | |
9 | - - grammar compiler | |
10 | - | |
11 | -SYNOPSIS | |
12 | - mkdfa.pl [options...] {prefix} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - mkdfa.pl compiles the Julian format grammar (.grammar and .voca) to | |
16 | - Julian native formats (.dfa and .dict). In addition, ".term" will be | |
17 | - also generated that stores correspondence of category ID used in the | |
18 | - output files to the source category name. | |
19 | - | |
20 | - | |
21 | - prefix should be the common file name prefix of ".grammar" and "voca" | |
22 | - file. From prefix.grammar and prefix.voca file, prefix.dfa, prefix.dict | |
23 | - and prefix.term will be output. | |
24 | - | |
25 | -OPTIONS | |
26 | - -n | |
27 | - Not process dictionary. You can only convert .grammar file to .dfa | |
28 | - file without .voca file. | |
29 | - | |
30 | -ENVIRONMENT VARIABLES | |
31 | - TMP or TEMP | |
32 | - Set directory to store temporal file. If not specified, one of them | |
33 | - on the following list will be used: /tmp, /var/tmp, /WINDOWS/Temp, | |
34 | - /WINNT/Temp. | |
35 | - | |
36 | -EXAMPLES | |
37 | - Convert a grammar foo.grammar and foo.voca to foo.dfa, foo.voca and | |
38 | - foo.term. | |
39 | - | |
40 | -SEE ALSO | |
41 | - julius ( 1 ) , | |
42 | - generate ( 1 ) , | |
43 | - nextword ( 1 ) , | |
44 | - accept_check ( 1 ) , | |
45 | - dfa_minimize ( 1 ) | |
46 | - | |
47 | -DIAGNOSTICS | |
48 | - mkdfa.pl invokes mkfa and dfa_minimize internally. They should be | |
49 | - placed at the same directory as mkdfa.pl. | |
50 | - | |
51 | -COPYRIGHT | |
52 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
53 | - | |
54 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
55 | - | |
56 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
57 | - Technology | |
58 | - | |
59 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
60 | - Technology | |
61 | - | |
62 | -LICENSE | |
63 | - The same as Julius. | |
64 | - | |
65 | - | |
66 | - | |
67 | - 10/02/2008 MKDFA.PL(1) |
recognize/src/julius/doc/manuals/mkgshmm.txt
... | ... | @@ -1,50 +0,0 @@ |
1 | - mkgshmm | |
2 | - | |
3 | -MKGSHMM(1) MKGSHMM(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - mkgshmm | |
9 | - - convert monophone HMM to GS HMM for Julius | |
10 | - | |
11 | -SYNOPSIS | |
12 | - mkgshmm {monophone_hmmdefs} | |
13 | - > | |
14 | - {outputfile} | |
15 | - | |
16 | -DESCRIPTION | |
17 | - mkgshmm converts monophone HMM definition file in HTK format into a | |
18 | - special format for Gaussian Mixture Selection (GMS) in Julius. | |
19 | - | |
20 | - GMS is an algorithm to reduce the amount of acoustic computation with | |
21 | - triphone HMM, by pre-selection of promising gaussian mixtures using | |
22 | - likelihoods of corresponding monophone mixtures. | |
23 | - | |
24 | -EXAMPLES | |
25 | - (1) Prepare a monophone model which was trained by the same corpus as | |
26 | - target triphone model. | |
27 | - | |
28 | - (2) Convert the monophone model using mkgshmm. | |
29 | - (3) Specify the output file in Julius with option "-gshmm" | |
30 | - | |
31 | -SEE ALSO | |
32 | - julius ( 1 ) | |
33 | - | |
34 | -COPYRIGHT | |
35 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
36 | - | |
37 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
38 | - | |
39 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
40 | - Technology | |
41 | - | |
42 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
43 | - Technology | |
44 | - | |
45 | -LICENSE | |
46 | - The same as Julius. | |
47 | - | |
48 | - | |
49 | - | |
50 | - 10/02/2008 MKGSHMM(1) |
recognize/src/julius/doc/manuals/mkss.txt
... | ... | @@ -1,55 +0,0 @@ |
1 | - mkss | |
2 | - | |
3 | -MKSS(1) MKSS(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - mkss | |
9 | - - calculate average spectrum for spectral subtraction | |
10 | - | |
11 | -SYNOPSIS | |
12 | - mkss [options...] {filename} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - mkss is a tool to estimate noise spectrum for spectral subtraction on | |
16 | - Julius. It reads a few seconds of sound data from microphone input, | |
17 | - calculate the average spectrum and save it to a file. The output file | |
18 | - can be used as a noise spectrum data in Julius (option "-ssload"). | |
19 | - | |
20 | - The recording will start immediately after startup. Sampling format is | |
21 | - 16bit, monoral. If outpue file already exist, it will be overridden. | |
22 | - | |
23 | -OPTIONS | |
24 | - -freq Hz | |
25 | - Sampling frequency in Hz (default: 16,000) | |
26 | - | |
27 | - -len msec | |
28 | - capture length in milliseconds (default: 3000) | |
29 | - | |
30 | - -fsize sample_num | |
31 | - frame size in number of samples (default: 400) | |
32 | - | |
33 | - -fshift sample_num | |
34 | - frame shift in number of samples (default: 160) | |
35 | - | |
36 | -SEE ALSO | |
37 | - julius ( 1 ) | |
38 | - | |
39 | -COPYRIGHT | |
40 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
41 | - | |
42 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
43 | - | |
44 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
45 | - Technology | |
46 | - | |
47 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
48 | - Technology | |
49 | - | |
50 | -LICENSE | |
51 | - The same as Julius. | |
52 | - | |
53 | - | |
54 | - | |
55 | - 10/02/2008 MKSS(1) |
recognize/src/julius/doc/manuals/nextword.txt
... | ... | @@ -1,82 +0,0 @@ |
1 | - nextword | |
2 | - | |
3 | -NEXTWORD(1) NEXTWORD(1) | |
4 | - | |
5 | - | |
6 | - | |
7 | -NAME | |
8 | - nextword | |
9 | - - display next predicted words (in reverse order) | |
10 | - | |
11 | -SYNOPSIS | |
12 | - nextword [-t] [-r] [-s spname] [-v] {prefix} | |
13 | - | |
14 | -DESCRIPTION | |
15 | - Given a partial (part of) sentence from the end, it outputs the next | |
16 | - words allowed in the specified grammar. | |
17 | - | |
18 | - | |
19 | - .dfa, .dict and .term files are needed to execute. They can be | |
20 | - generated from .grammar and .voca file by mkdfa.pl. | |
21 | - | |
22 | - Please note that the latter part of sentence should be given, since the | |
23 | - main 2nd pass does a right-to-left parsing. | |
24 | - | |
25 | -OPTIONS | |
26 | - -t | |
27 | - Input / Output in category name. (default: word) | |
28 | - | |
29 | - -r | |
30 | - Enter in reverse order | |
31 | - | |
32 | - -s spname | |
33 | - the name string of short-pause word to be supressed (default: "sp") | |
34 | - | |
35 | - -v | |
36 | - Debug output. | |
37 | - | |
38 | -EXAMPLES | |
39 | - Exmple output of a sample grammar "fruit": | |
40 | - | |
41 | - % nextword fruit | |
42 | - Stat: init_voca: read 36 words | |
43 | - Reading in term file (optional)...done | |
44 | - 15 categories, 36 words | |
45 | - DFA has 26 nodes and 42 arcs | |
46 | - ----- | |
47 | - command completion is disabled | |
48 | - ----- | |
49 | - wseq > A BANANA </s> | |
50 | - [wseq: A BANANA </s>] | |
51 | - [cate: (NUM_1|NUM_1|A|A) FRUIT_SINGULAR NS_E] | |
52 | - PREDICTED CATEGORIES/WORDS: | |
53 | - NS_B (<s> ) | |
54 | - HAVE (HAVE ) | |
55 | - WANT (WANT ) | |
56 | - NS_B (<s> ) | |
57 | - HAVE (HAVE ) | |
58 | - WANT (WANT ) | |
59 | - | |
60 | - | |
61 | -SEE ALSO | |
62 | - mkdfa.pl ( 1 ) , | |
63 | - generate ( 1 ) , | |
64 | - accept_check ( 1 ) | |
65 | - | |
66 | -COPYRIGHT | |
67 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | |
68 | - | |
69 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | |
70 | - | |
71 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | |
72 | - Technology | |
73 | - | |
74 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | |
75 | - Technology | |
76 | - | |
77 | -LICENSE | |
78 | - The same as Julius. | |
79 | - | |
80 | - | |
81 | - | |
82 | - 10/02/2008 NEXTWORD(1) |
recognize/src/julius/install/julius-4.2.2.tar.gz
No preview for this file type