Commit 0e854e1e1dab832696362b8210d202475bd32de7
1 parent
e1cf2b2f
Exists in
master
and in
1 other branch
Remove arquivos binários (dependências) desnecessarios
Showing
22 changed files
with
0 additions
and
9606 deletions
Show diff stats
aelius-install.tar.gz
No preview for this file type
aelius_install.sh
| @@ -1,36 +0,0 @@ | @@ -1,36 +0,0 @@ | ||
| 1 | -#!/bin/bash | ||
| 2 | - | ||
| 3 | -#Instalador do Tradutor Aelius | ||
| 4 | - | ||
| 5 | -cd ~/vlibras-core | ||
| 6 | - | ||
| 7 | -echo "\n# Extraindo...\n" | ||
| 8 | -tar -xf aelius-install.tar.gz -C ~/. | ||
| 9 | - | ||
| 10 | -echo "# Instalando dependências...\n" | ||
| 11 | -sudo apt-get install python-dev python-yaml python-numpy python-matplotlib | ||
| 12 | - | ||
| 13 | -cd ~/nltk-2.0.1rc1/ | ||
| 14 | - | ||
| 15 | -sudo python setup.py install | ||
| 16 | - | ||
| 17 | -echo "\n# Finalizando...\n" | ||
| 18 | - | ||
| 19 | -# path to HunPos binaries | ||
| 20 | -echo "\nPATH=\"${PATH}:$HOME/Applications/bin\"" >> ~/.bashrc | ||
| 21 | -echo "export PATH\n" >> ~/.bashrc | ||
| 22 | - | ||
| 23 | -# path to Aelius and Translate package | ||
| 24 | -echo "PYTHONPATH=\"${PYTHONPATH}:$HOME/Applications:$HOME/vlibras-core/tradutor/src/py\"" >> ~/.bashrc | ||
| 25 | -echo "export PYTHONPATH\n" >> ~/.bashrc | ||
| 26 | - | ||
| 27 | -cd .. | ||
| 28 | - | ||
| 29 | -sudo mv usr-local-bin.tar.gz /usr/local/bin/ | ||
| 30 | - | ||
| 31 | -cd /usr/local/bin/ | ||
| 32 | - | ||
| 33 | -sudo tar -xf usr-local-bin.tar.gz | ||
| 34 | -sudo rm usr-local-bin.tar.gz | ||
| 35 | - | ||
| 36 | -echo "### Instalação finalizada! \n## Execute o seguinte comando para concluir:\n\n$ source ~/.bashrc\n" |
recognize/src/julius/doc/Juliusbook-4.1.5.pdf
No preview for this file type
recognize/src/julius/doc/manuals/accept_check.txt
| @@ -1,80 +0,0 @@ | @@ -1,80 +0,0 @@ | ||
| 1 | - accept_check | ||
| 2 | - | ||
| 3 | -ACCEPT_CHECK(1) ACCEPT_CHECK(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - accept_check | ||
| 9 | - - Check whether a grammar accept / reject given word sequences | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - accept_check [-t] [-s spname] [-v] {prefix} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - accept_check is a tool to check whether a sentence can be accepted or | ||
| 16 | - rejected on a grammar (prefix.dfa and prefix.dict). The sentence should | ||
| 17 | - be given from standard input. You can do a batch check by preparing all | ||
| 18 | - test sentence at each line of a text file, and give it as standard | ||
| 19 | - input of accept_check. | ||
| 20 | - | ||
| 21 | - This tool needs .dfa, .dict and .term files. You should convert a | ||
| 22 | - written grammar file to generate them by mkdfa.pl. | ||
| 23 | - | ||
| 24 | - A sentence should be given as space-separated word sequence. It may be | ||
| 25 | - required to add head / tail silence word like sil, depending on your | ||
| 26 | - grammar. And should not contain a short-pause word. | ||
| 27 | - | ||
| 28 | - When a word belongs to various category in a grammar, accept_check will | ||
| 29 | - check all the possible sentence patterns, and accept it if any of those | ||
| 30 | - is acceptable. | ||
| 31 | - | ||
| 32 | -OPTIONS | ||
| 33 | - -t | ||
| 34 | - Use category name as input instead of word. | ||
| 35 | - | ||
| 36 | - -s spname | ||
| 37 | - Short-pause word name to be skipped. (default: "sp") | ||
| 38 | - | ||
| 39 | - -v | ||
| 40 | - Debug output. | ||
| 41 | - | ||
| 42 | -EXAMPLES | ||
| 43 | - An output for "date" grammar: | ||
| 44 | - | ||
| 45 | - % echo '<s> NEXT SUNDAY </s>' | accept_check date | ||
| 46 | - Reading in dictionary... | ||
| 47 | - 143 words...done | ||
| 48 | - Reading in DFA grammar...done | ||
| 49 | - Mapping dict item <-> DFA terminal (category)...done | ||
| 50 | - Reading in term file (optional)...done | ||
| 51 | - 27 categories, 143 words | ||
| 52 | - DFA has 35 nodes and 71 arcs | ||
| 53 | - ----- | ||
| 54 | - wseq: <s> NEXT SUNDAY </s> | ||
| 55 | - cate: NS_B (NEXT|NEXT) (DAYOFWEEK|DAYOFWEEK|DAY|DAY) NS_E | ||
| 56 | - accepted | ||
| 57 | - | ||
| 58 | - | ||
| 59 | -SEE ALSO | ||
| 60 | - mkdfa.pl ( 1 ) , | ||
| 61 | - generate ( 1 ) , | ||
| 62 | - nextword ( 1 ) | ||
| 63 | - | ||
| 64 | -COPYRIGHT | ||
| 65 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 66 | - | ||
| 67 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 68 | - | ||
| 69 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 70 | - Technology | ||
| 71 | - | ||
| 72 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 73 | - Technology | ||
| 74 | - | ||
| 75 | -LICENSE | ||
| 76 | - The same as Julius. | ||
| 77 | - | ||
| 78 | - | ||
| 79 | - | ||
| 80 | - 10/02/2008 ACCEPT_CHECK(1) |
recognize/src/julius/doc/manuals/adinrec.txt
| @@ -1,130 +0,0 @@ | @@ -1,130 +0,0 @@ | ||
| 1 | - adinrec | ||
| 2 | - | ||
| 3 | -ADINREC(1) ADINREC(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - adinrec | ||
| 9 | - - record audio device and save one utterance to a file | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - adinrec [options...] {filename} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - adinrec opens an audio stream, detects an utterance input and store it | ||
| 16 | - to a specified file. The utterance detection is done by level and | ||
| 17 | - zero-cross thresholds. Default input device is microphone, but other | ||
| 18 | - audio input source, including Julius A/D-in plugin, can be used by | ||
| 19 | - using "-input" option. | ||
| 20 | - | ||
| 21 | - The audio format is 16 bit, 1 channel, in Microsoft WAV format. If the | ||
| 22 | - given filename already exists, it will be overridden. | ||
| 23 | - | ||
| 24 | - If filename is "-" , the captured data will be streamed into standard | ||
| 25 | - out, with no header (raw format). | ||
| 26 | - | ||
| 27 | -OPTIONS | ||
| 28 | - adinrec uses JuliusLib and adopts Julius options. Below is a list of | ||
| 29 | - valid options. | ||
| 30 | - | ||
| 31 | - adinrec specific options | ||
| 32 | - -freq Hz | ||
| 33 | - Set sampling rate in Hz. (default: 16,000) | ||
| 34 | - | ||
| 35 | - -raw | ||
| 36 | - Output in raw file format. | ||
| 37 | - | ||
| 38 | - JuliusLib options | ||
| 39 | - -input {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} | ||
| 40 | - Choose speech input source. Specify 'file' or 'rawfile' for waveform | ||
| 41 | - file. On file input, users will be prompted to enter the file name | ||
| 42 | - from stdin. | ||
| 43 | - | ||
| 44 | - 'mic' is to get audio input from a default live microphone device, | ||
| 45 | - and 'adinnet' means receiving waveform data via tcpip network from | ||
| 46 | - an adinnet client. 'netaudio' is from DatLink/NetAudio input, and | ||
| 47 | - 'stdin' means data input from standard input. | ||
| 48 | - | ||
| 49 | - At Linux, you can choose API at run time by specifying alsa, oss and | ||
| 50 | - esd. | ||
| 51 | - | ||
| 52 | - -lv thres | ||
| 53 | - Level threshold for speech input detection. Values should be in | ||
| 54 | - range from 0 to 32767. (default: 2000) | ||
| 55 | - | ||
| 56 | - -zc thres | ||
| 57 | - Zero crossing threshold per second. Only input that goes over the | ||
| 58 | - level threshold (-lv) will be counted. (default: 60) | ||
| 59 | - | ||
| 60 | - -headmargin msec | ||
| 61 | - Silence margin at the start of speech segment in milliseconds. | ||
| 62 | - (default: 300) | ||
| 63 | - | ||
| 64 | - -tailmargin msec | ||
| 65 | - Silence margin at the end of speech segment in milliseconds. | ||
| 66 | - (default: 400) | ||
| 67 | - | ||
| 68 | - -zmean | ||
| 69 | - This option enables DC offset removal. | ||
| 70 | - | ||
| 71 | - -smpFreq Hz | ||
| 72 | - Set sampling rate in Hz. (default: 16,000) | ||
| 73 | - | ||
| 74 | - -48 | ||
| 75 | - Record input with 48kHz sampling, and down-sample it to 16kHz | ||
| 76 | - on-the-fly. This option is valid for 16kHz model only. The | ||
| 77 | - down-sampling routine was ported from sptk. (Rev. 4.0) | ||
| 78 | - | ||
| 79 | - -NA devicename | ||
| 80 | - Host name for DatLink server input (-input netaudio). | ||
| 81 | - | ||
| 82 | - -adport port_number | ||
| 83 | - With -input adinnet, specify adinnet port number to listen. | ||
| 84 | - (default: 5530) | ||
| 85 | - | ||
| 86 | - -nostrip | ||
| 87 | - Julius by default removes successive zero samples in input speech | ||
| 88 | - data. This option stop it. | ||
| 89 | - | ||
| 90 | - -C jconffile | ||
| 91 | - Load a jconf file at here. The content of the jconffile will be | ||
| 92 | - expanded at this point. | ||
| 93 | - | ||
| 94 | - -plugindir dirlist | ||
| 95 | - Specify which directories to load plugin. If several direcotries | ||
| 96 | - exist, specify them by colon-separated list. | ||
| 97 | - | ||
| 98 | -ENVIRONMENT VARIABLES | ||
| 99 | - ALSADEV | ||
| 100 | - Device name string for ALSA. (default: "default") | ||
| 101 | - | ||
| 102 | - AUDIODEV | ||
| 103 | - Device name string for OSS. (default: "/dev/dsp") | ||
| 104 | - | ||
| 105 | - LATENCY_MSEC | ||
| 106 | - Input latency of microphone input in milliseconds. Smaller value | ||
| 107 | - will shorten latency but sometimes make process unstable. Default | ||
| 108 | - value will depend on the running OS. | ||
| 109 | - | ||
| 110 | -SEE ALSO | ||
| 111 | - julius ( 1 ) , | ||
| 112 | - adintool ( 1 ) | ||
| 113 | - | ||
| 114 | -COPYRIGHT | ||
| 115 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 116 | - | ||
| 117 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 118 | - | ||
| 119 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 120 | - Technology | ||
| 121 | - | ||
| 122 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 123 | - Technology | ||
| 124 | - | ||
| 125 | -LICENSE | ||
| 126 | - The same as Julius. | ||
| 127 | - | ||
| 128 | - | ||
| 129 | - | ||
| 130 | - 10/02/2008 ADINREC(1) |
recognize/src/julius/doc/manuals/adintool.txt
| @@ -1,214 +0,0 @@ | @@ -1,214 +0,0 @@ | ||
| 1 | - adintool | ||
| 2 | - | ||
| 3 | -ADINTOOL(1) ADINTOOL(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - adintool | ||
| 9 | - - a tool to record / split / send / receive audio streams | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - adintool {-in inputdev} {-out outputdev} [options...] | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - adintool analyzes speech input, finds speech segments skipping silence, | ||
| 16 | - and records the detected segments in various ways. It performs speech | ||
| 17 | - detection based on zerocross number and power (level), and records the | ||
| 18 | - detected parts to files or other output devices sucessively. | ||
| 19 | - | ||
| 20 | - | ||
| 21 | - adintool is a upper version of adinrec with various functions. | ||
| 22 | - Supported input device are: microphone input, a speech file, standard | ||
| 23 | - tty input, and network socket (called adin-net server mode). Julius | ||
| 24 | - plugin can be also used. Detected speech segments will be saved to | ||
| 25 | - output devices: speech files, standard tty output, and network socket | ||
| 26 | - (called adin-net client mode). For example, you can split the incoming | ||
| 27 | - speech to segments and send them to Julius to be recognized. | ||
| 28 | - | ||
| 29 | - Output format is WAV, 16bit (signed short), monoral. If the file | ||
| 30 | - already exist, it will be overridden. | ||
| 31 | - | ||
| 32 | -OPTIONS | ||
| 33 | - All Julius options can be set. Only audio input related options are | ||
| 34 | - treated and others are silently skipped. Below is a list of options. | ||
| 35 | - | ||
| 36 | - adintool specific options | ||
| 37 | - -freq Hz | ||
| 38 | - Set sampling rate in Hz. (default: 16,000) | ||
| 39 | - | ||
| 40 | - -in inputdev | ||
| 41 | - Audio input device. "mic" to capture via microphone input, "file" | ||
| 42 | - for audio file input, and "stdin" to read raw data from | ||
| 43 | - standard-input. For file input, file name prompt will appear after | ||
| 44 | - startup. Use "adinnet" to make adintool as "adinnet server", | ||
| 45 | - receiving data from client via network socket. Default port number | ||
| 46 | - is 5530, which can be altered by option "-inport". | ||
| 47 | - | ||
| 48 | - Alternatively, input device can be set by "-input" option, in which | ||
| 49 | - case you can use plugin input. | ||
| 50 | - | ||
| 51 | - -out outputdev | ||
| 52 | - Audio output device store the data. Specify "file" to save to file, | ||
| 53 | - in which the output filename should be given by "-filename". Use | ||
| 54 | - "stdout" to standard out. "adinnet" will make adintool to be an | ||
| 55 | - adinnet client, sending speech data to a server via tcp/ip socket. | ||
| 56 | - When using "adinnet" output, the server name to send data should be | ||
| 57 | - specified by "-server". The default port number is 5530, which can | ||
| 58 | - be changed by "-port" option. | ||
| 59 | - | ||
| 60 | - -inport num | ||
| 61 | - When adintool becomes adinnet server to receive data (-in adinnet), | ||
| 62 | - set the port number to listen. (default: 5530) | ||
| 63 | - | ||
| 64 | - -server [host] [,host...] | ||
| 65 | - When output to adinnet server (-out adinnet), set the hostname. You | ||
| 66 | - can send to multiple hosts by specifying their hostnames as | ||
| 67 | - comma-delimited list like "host1,host2,host3". | ||
| 68 | - | ||
| 69 | - -port [num] [,num...] | ||
| 70 | - When adintool send a data to adinnet server (-out adinnet), set the | ||
| 71 | - port number to connect. (default: 5530) For multiple servers, | ||
| 72 | - specify port numbers for all servers like "5530,5530,5531". | ||
| 73 | - | ||
| 74 | - -filename file | ||
| 75 | - When output to file (-out file), set the output filename. The actual | ||
| 76 | - file name will be as "file.0000.wav" , "file.0001.wav" and so on, | ||
| 77 | - where the four digit number increases as speech segment detected. | ||
| 78 | - The initial number will be set to 0 by default, which can be changed | ||
| 79 | - by "-startid" option. When using "-oneshot" option to save only the | ||
| 80 | - first segment, the input will be saved as "file". | ||
| 81 | - | ||
| 82 | - -startid number | ||
| 83 | - At file output, set the initial file number. (default: 0) | ||
| 84 | - | ||
| 85 | - -oneshot | ||
| 86 | - Exit after the end of first speech segment. | ||
| 87 | - | ||
| 88 | - -nosegment | ||
| 89 | - Do not perform speech detection for input, just treat all the input | ||
| 90 | - as a single valid segment. | ||
| 91 | - | ||
| 92 | - -raw | ||
| 93 | - Output as RAW file (no header). | ||
| 94 | - | ||
| 95 | - -autopause | ||
| 96 | - When output to adinnet server, adintool enter pause state at every | ||
| 97 | - end of speech segment. It will restart when the destination adinnet | ||
| 98 | - server sends it a resume signal. | ||
| 99 | - | ||
| 100 | - -loosesync | ||
| 101 | - When output to multiple adinnet server, not to do strict | ||
| 102 | - synchronization for restart. By default, when adintool has entered | ||
| 103 | - pause state, it will not restart until resume commands are received | ||
| 104 | - from all servers. This option will allow restart at least one | ||
| 105 | - restart command has arrived. | ||
| 106 | - | ||
| 107 | - -rewind msec | ||
| 108 | - When input is a live microphone device, and there has been some | ||
| 109 | - continuing input at the moment adintool resumes, it start recording | ||
| 110 | - backtracking by the specified milliseconds. | ||
| 111 | - | ||
| 112 | - Concerning Julius options | ||
| 113 | - -input {mic|rawfile|adinnet|stdin|netaudio|esd|alsa|oss} | ||
| 114 | - Choose speech input source. Specify 'file' or 'rawfile' for waveform | ||
| 115 | - file. On file input, users will be prompted to enter the file name | ||
| 116 | - from stdin. | ||
| 117 | - | ||
| 118 | - 'mic' is to get audio input from a default live microphone device, | ||
| 119 | - and 'adinnet' means receiving waveform data via tcpip network from | ||
| 120 | - an adinnet client. 'netaudio' is from DatLink/NetAudio input, and | ||
| 121 | - 'stdin' means data input from standard input. | ||
| 122 | - | ||
| 123 | - At Linux, you can choose API at run time by specifying alsa, oss and | ||
| 124 | - esd. | ||
| 125 | - | ||
| 126 | - -lv thres | ||
| 127 | - Level threshold for speech input detection. Values should be in | ||
| 128 | - range from 0 to 32767. (default: 2000) | ||
| 129 | - | ||
| 130 | - -zc thres | ||
| 131 | - Zero crossing threshold per second. Only input that goes over the | ||
| 132 | - level threshold (-lv) will be counted. (default: 60) | ||
| 133 | - | ||
| 134 | - -headmargin msec | ||
| 135 | - Silence margin at the start of speech segment in milliseconds. | ||
| 136 | - (default: 300) | ||
| 137 | - | ||
| 138 | - -tailmargin msec | ||
| 139 | - Silence margin at the end of speech segment in milliseconds. | ||
| 140 | - (default: 400) | ||
| 141 | - | ||
| 142 | - -zmean | ||
| 143 | - This option enables DC offset removal. | ||
| 144 | - | ||
| 145 | - -smpFreq Hz | ||
| 146 | - Set sampling rate in Hz. (default: 16,000) | ||
| 147 | - | ||
| 148 | - -48 | ||
| 149 | - Record input with 48kHz sampling, and down-sample it to 16kHz | ||
| 150 | - on-the-fly. This option is valid for 16kHz model only. The | ||
| 151 | - down-sampling routine was ported from sptk. (Rev. 4.0) | ||
| 152 | - | ||
| 153 | - -NA devicename | ||
| 154 | - Host name for DatLink server input (-input netaudio). | ||
| 155 | - | ||
| 156 | - -adport port_number | ||
| 157 | - With -input adinnet, specify adinnet port number to listen. | ||
| 158 | - (default: 5530) | ||
| 159 | - | ||
| 160 | - -nostrip | ||
| 161 | - Julius by default removes successive zero samples in input speech | ||
| 162 | - data. This option stop it. | ||
| 163 | - | ||
| 164 | - -C jconffile | ||
| 165 | - Load a jconf file at here. The content of the jconffile will be | ||
| 166 | - expanded at this point. | ||
| 167 | - | ||
| 168 | - -plugindir dirlist | ||
| 169 | - Specify which directories to load plugin. If several direcotries | ||
| 170 | - exist, specify them by colon-separated list. | ||
| 171 | - | ||
| 172 | -ENVIRONMENT VARIABLES | ||
| 173 | - ALSADEV | ||
| 174 | - (using mic input with alsa device) specify a capture device name. If | ||
| 175 | - not specified, "default" will be used. | ||
| 176 | - | ||
| 177 | - AUDIODEV | ||
| 178 | - (using mic input with oss device) specify a capture device path. If | ||
| 179 | - not specified, "/dev/dsp" will be used. | ||
| 180 | - | ||
| 181 | - LATENCY_MSEC | ||
| 182 | - Try to set input latency of microphone input in milliseconds. | ||
| 183 | - Smaller value will shorten latency but sometimes make process | ||
| 184 | - unstable. Default value will depend on the running OS. | ||
| 185 | - | ||
| 186 | -EXAMPLES | ||
| 187 | - Record microphone input to files: "data.0000.wav", "data.0001.wav" and | ||
| 188 | - so on: | ||
| 189 | - Split a long speech file "foobar.raw" into "foobar.1500.wav", | ||
| 190 | - "foobar.1501.wav" ...: | ||
| 191 | - Copy an entire audio file via network socket. | ||
| 192 | - Detect speech segment, send to Julius via network and recognize it: | ||
| 193 | - | ||
| 194 | -SEE ALSO | ||
| 195 | - julius ( 1 ) , | ||
| 196 | - adinrec ( 1 ) | ||
| 197 | - | ||
| 198 | -COPYRIGHT | ||
| 199 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 200 | - | ||
| 201 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 202 | - | ||
| 203 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 204 | - Technology | ||
| 205 | - | ||
| 206 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 207 | - Technology | ||
| 208 | - | ||
| 209 | -LICENSE | ||
| 210 | - The same as Julius. | ||
| 211 | - | ||
| 212 | - | ||
| 213 | - | ||
| 214 | - 10/02/2008 ADINTOOL(1) |
recognize/src/julius/doc/manuals/dfa_determinize.txt
| @@ -1,51 +0,0 @@ | @@ -1,51 +0,0 @@ | ||
| 1 | - dfa_determinize | ||
| 2 | - | ||
| 3 | -DFA_DETERMINIZE(1) DFA_DETERMINIZE(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - dfa_determinize | ||
| 9 | - - Determinize NFA grammar network. | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - dfa_determinize [-o outfile] {dfafile} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - dfa_determinize converts a non-deterministic .dfa file into | ||
| 16 | - deterministic DFA. Output to standard output, or file specified by "-o" | ||
| 17 | - option. | ||
| 18 | - | ||
| 19 | - This additional tool is not necessary on a grammar building procedure | ||
| 20 | - in Julius, since the grammar network generated by mkdfa.pl is always | ||
| 21 | - determinized. | ||
| 22 | - | ||
| 23 | -OPTIONS | ||
| 24 | - -o outfile | ||
| 25 | - Outout file. If not specified, output to stdout. | ||
| 26 | - | ||
| 27 | -EXAMPLES | ||
| 28 | - Determinize foo.dfa to bar.dfa: | ||
| 29 | - Another way: | ||
| 30 | - | ||
| 31 | -SEE ALSO | ||
| 32 | - mkdfa.pl ( 1 ) , | ||
| 33 | - dfa_minimize ( 1 ) | ||
| 34 | - | ||
| 35 | -COPYRIGHT | ||
| 36 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 37 | - | ||
| 38 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 39 | - | ||
| 40 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 41 | - Technology | ||
| 42 | - | ||
| 43 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 44 | - Technology | ||
| 45 | - | ||
| 46 | -LICENSE | ||
| 47 | - The same as Julius. | ||
| 48 | - | ||
| 49 | - | ||
| 50 | - | ||
| 51 | - 10/02/2008 DFA_DETERMINIZE(1) |
recognize/src/julius/doc/manuals/dfa_minimize.txt
| @@ -1,49 +0,0 @@ | @@ -1,49 +0,0 @@ | ||
| 1 | - dfa_minimize | ||
| 2 | - | ||
| 3 | -DFA_MINIMIZE(1) DFA_MINIMIZE(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - dfa_minimize | ||
| 9 | - - Minimize a DFA grammar network | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - dfa_minimize [-o outfile] {dfafile} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - dfa_minimize will convert an .dfa file to an equivalent minimal form. | ||
| 16 | - Output to standard output, or to a file specified by "-o" option. | ||
| 17 | - | ||
| 18 | - On version 3.5.3 and later, mkdfa.pl invokes this tool inside, and the | ||
| 19 | - output .dfa file will be always minimized, so you do not need to use | ||
| 20 | - this manually. | ||
| 21 | - | ||
| 22 | -OPTIONS | ||
| 23 | - -o outfile | ||
| 24 | - Output file. If not specified output to standard output. | ||
| 25 | - | ||
| 26 | -EXAMPLES | ||
| 27 | - Minimize foo.dfa to bar.dfa: | ||
| 28 | - Another way: | ||
| 29 | - | ||
| 30 | -SEE ALSO | ||
| 31 | - mkdfa.pl ( 1 ) | ||
| 32 | - | ||
| 33 | -COPYRIGHT | ||
| 34 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 35 | - | ||
| 36 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 37 | - | ||
| 38 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 39 | - Technology | ||
| 40 | - | ||
| 41 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 42 | - Technology | ||
| 43 | - | ||
| 44 | -LICENSE | ||
| 45 | - The same as Julius. | ||
| 46 | - | ||
| 47 | - | ||
| 48 | - | ||
| 49 | - 10/02/2008 DFA_MINIMIZE(1) |
recognize/src/julius/doc/manuals/generate-ngram.txt
| @@ -1,62 +0,0 @@ | @@ -1,62 +0,0 @@ | ||
| 1 | - generate-ngram | ||
| 2 | - | ||
| 3 | -GENERATE-NGRAM(1) GENERATE-NGRAM(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - generate-ngram | ||
| 9 | - - random sentence generator from N-gram | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - generate-ngram [options...] {binary_ngram} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - generate-ngram is a tool to generate sentences randomly according to | ||
| 16 | - the given N-gram language model. The N-gram model file binary_ngram | ||
| 17 | - should be an binary format. | ||
| 18 | - | ||
| 19 | -OPTIONS | ||
| 20 | - -n num | ||
| 21 | - Number of sentences to generate (default: 10) | ||
| 22 | - | ||
| 23 | - -N | ||
| 24 | - Specify which length of N-gram to use (default: available max in the | ||
| 25 | - given model) | ||
| 26 | - | ||
| 27 | - -bos | ||
| 28 | - Beginning-of-sentence word (default: "<s>") | ||
| 29 | - | ||
| 30 | - -eos | ||
| 31 | - End-of-sentence word (default: "</s>") | ||
| 32 | - | ||
| 33 | - -ignore | ||
| 34 | - Specify a word to be supressed from output (default: "<UNK") | ||
| 35 | - | ||
| 36 | - -v | ||
| 37 | - Verbose output. | ||
| 38 | - | ||
| 39 | - -debug | ||
| 40 | - Debug output. | ||
| 41 | - | ||
| 42 | -SEE ALSO | ||
| 43 | - julius ( 1 ) , | ||
| 44 | - mkbingram ( 1 ) | ||
| 45 | - | ||
| 46 | -COPYRIGHT | ||
| 47 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 48 | - | ||
| 49 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 50 | - | ||
| 51 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 52 | - Technology | ||
| 53 | - | ||
| 54 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 55 | - Technology | ||
| 56 | - | ||
| 57 | -LICENSE | ||
| 58 | - The same as Julius. | ||
| 59 | - | ||
| 60 | - | ||
| 61 | - | ||
| 62 | - 10/02/2008 GENERATE-NGRAM(1) |
recognize/src/julius/doc/manuals/generate.txt
| @@ -1,76 +0,0 @@ | @@ -1,76 +0,0 @@ | ||
| 1 | - generate | ||
| 2 | - | ||
| 3 | -GENERATE(1) GENERATE(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - generate | ||
| 9 | - - random sentence generator from a grammar | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - generate [-v] [-t] [-n num] [-s spname] {prefix} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - This small program randomly generates sentences that are acceptable by | ||
| 16 | - the given grammar. | ||
| 17 | - | ||
| 18 | - | ||
| 19 | - .dfa, .dict and .term files are needed to execute. They can be | ||
| 20 | - generated from .grammar and .voca file by mkdfa.pl. | ||
| 21 | - | ||
| 22 | -OPTIONS | ||
| 23 | - -t | ||
| 24 | - Output in word's category name. | ||
| 25 | - | ||
| 26 | - -n num | ||
| 27 | - Set number of sentences to be generated (default: 10) | ||
| 28 | - | ||
| 29 | - -s spname | ||
| 30 | - the name string of short-pause word to be supressed (default: "sp") | ||
| 31 | - | ||
| 32 | - -v | ||
| 33 | - Debug output mode. | ||
| 34 | - | ||
| 35 | -EXAMPLES | ||
| 36 | - Exmple output of a sample grammar "fruit": | ||
| 37 | - | ||
| 38 | - % generate fruit | ||
| 39 | - Stat: init_voca: read 36 words | ||
| 40 | - Reading in term file (optional)...done | ||
| 41 | - 15 categories, 36 words | ||
| 42 | - DFA has 26 nodes and 42 arcs | ||
| 43 | - ----- | ||
| 44 | - <s> I WANT ONE APPLE </s> | ||
| 45 | - <s> I WANT TEN PEARS </s> | ||
| 46 | - <s> CAN I HAVE A PINEAPPLE </s> | ||
| 47 | - <s> I WANT ONE PEAR </s> | ||
| 48 | - <s> COULD I HAVE A BANANA </s> | ||
| 49 | - <s> I WANT ONE APPLE PLEASE </s> | ||
| 50 | - <s> I WANT NINE APPLES </s> | ||
| 51 | - <s> NINE APPLES </s> | ||
| 52 | - <s> I WANT ONE PINEAPPLE </s> | ||
| 53 | - <s> I WANT A PEAR </s> | ||
| 54 | - | ||
| 55 | - | ||
| 56 | -SEE ALSO | ||
| 57 | - mkdfa.pl ( 1 ) , | ||
| 58 | - generate-ngram ( 1 ) | ||
| 59 | - | ||
| 60 | -COPYRIGHT | ||
| 61 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 62 | - | ||
| 63 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 64 | - | ||
| 65 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 66 | - Technology | ||
| 67 | - | ||
| 68 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 69 | - Technology | ||
| 70 | - | ||
| 71 | -LICENSE | ||
| 72 | - The same as Julius. | ||
| 73 | - | ||
| 74 | - | ||
| 75 | - | ||
| 76 | - 10/02/2008 GENERATE(1) |
recognize/src/julius/doc/manuals/gram2sapixml.pl.txt
| @@ -1,47 +0,0 @@ | @@ -1,47 +0,0 @@ | ||
| 1 | - gram2sapixml.pl | ||
| 2 | - | ||
| 3 | -GRAM2SAPIXML.PL(1) GRAM2SAPIXML.PL(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - gram2sapixml.pl | ||
| 9 | - - convert Julius grammar to SAPI XML grammar format | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - gram2sapixml.pl [prefix...] | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - gram2sapixml.pl converts a recognition grammar file of Julius | ||
| 16 | - (.grammar, .voca) to Microsoft SAPI XML grammar format. prefix should | ||
| 17 | - be a file name of target grammar, excluding suffixes. If multiple | ||
| 18 | - argument is given, each will be process sequencialy in turn. | ||
| 19 | - | ||
| 20 | - The internal character set should be in UTF-8 format. By default | ||
| 21 | - gram2sapixml.pl assume input in EUC-JP encoding and tries to convert it | ||
| 22 | - to UTF-8 using iconv. You may want to disable this feature within the | ||
| 23 | - script. | ||
| 24 | - | ||
| 25 | - It will fail to convert a left recursive rule in the grammar. When | ||
| 26 | - fails, it will leave the source rules in the target .xml file, so you | ||
| 27 | - should modify the output manually to solve it. | ||
| 28 | - | ||
| 29 | -SEE ALSO | ||
| 30 | - mkdfa.pl ( 1 ) | ||
| 31 | - | ||
| 32 | -DIAGNOSTICS | ||
| 33 | - The conversion procedure is somewhat dumb one, only converting the | ||
| 34 | - non-terminal symbols and terminal symbols (=word category name) into | ||
| 35 | - corresponding rules one by one. This is only a help tool, and you will | ||
| 36 | - need a manual inspection and editing to use it on a real SAPI | ||
| 37 | - application. | ||
| 38 | - | ||
| 39 | -COPYRIGHT | ||
| 40 | - Copyright (c) 2002 Takashi Sumiyoshi | ||
| 41 | - | ||
| 42 | -LICENSE | ||
| 43 | - The same as Julius. | ||
| 44 | - | ||
| 45 | - | ||
| 46 | - | ||
| 47 | - 10/02/2008 GRAM2SAPIXML.PL(1) |
recognize/src/julius/doc/manuals/jclient.pl.txt
| @@ -1,46 +0,0 @@ | @@ -1,46 +0,0 @@ | ||
| 1 | - jclient.pl | ||
| 2 | - | ||
| 3 | -JCLIENT.PL(1) JCLIENT.PL(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - jclient.pl | ||
| 9 | - - sample client for module mode (perl version) | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - jclient.pl | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - This is yet another sample client written in perl. It will connect to | ||
| 16 | - Julius running in module mode, receive recognition results from Julius, | ||
| 17 | - and cna send commands to control Julius. | ||
| 18 | - | ||
| 19 | - This is a tiny program with only 57 lines. You can use it for free. | ||
| 20 | - | ||
| 21 | -EXAMPLES | ||
| 22 | - Invoke Julius with module mode by specifying "-module" option: | ||
| 23 | - Then, at other terminal or other host, invoke jclient.pl like below. | ||
| 24 | - The default hostname is "localhost", and port number is 10500. You can | ||
| 25 | - change them by editing the top part of the script. | ||
| 26 | - It will then receive the outputs of Julius and output the raw message | ||
| 27 | - to standard out. Also, by inputting a raw module command to the | ||
| 28 | - standard input of jclient.pl, it will be sent to Julius. See manuals | ||
| 29 | - for the specification of module mode. | ||
| 30 | - | ||
| 31 | -SEE ALSO | ||
| 32 | - julius ( 1 ) , | ||
| 33 | - jcontrol ( 1 ) | ||
| 34 | - | ||
| 35 | -COPYRIGHT | ||
| 36 | - "jclient.pl" has been developed by Dr. Ryuichi Nisimura | ||
| 37 | - (nisimura@sys.wakayama-u.ac.jp). Use at your own risk. | ||
| 38 | - | ||
| 39 | - If you have any feedback, comment or request, please contact the E-mail | ||
| 40 | - address above, or look at the Web page below. | ||
| 41 | - | ||
| 42 | - http://w3voice.jp/ | ||
| 43 | - | ||
| 44 | - | ||
| 45 | - | ||
| 46 | - 10/02/2008 JCLIENT.PL(1) |
recognize/src/julius/doc/manuals/jcontrol.txt
| @@ -1,173 +0,0 @@ | @@ -1,173 +0,0 @@ | ||
| 1 | - jcontrol | ||
| 2 | - | ||
| 3 | -JCONTROL(1) JCONTROL(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - jcontrol | ||
| 9 | - - a sample module client written in C | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - jcontrol {hostname} [portnum] | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - jcontrol is a simple console program to control julius running on other | ||
| 16 | - host via network API. It can send command to Julius, and receive | ||
| 17 | - messages from Julius. | ||
| 18 | - | ||
| 19 | - When invoked, jcontrol tries to connect to Julius running in "module | ||
| 20 | - mode" on specified hostname. After connection established, jcontrol | ||
| 21 | - waits for user commands from standard input. | ||
| 22 | - | ||
| 23 | - When user types a command to jcontrol, it will be interpreted and cor- | ||
| 24 | - responding API command will be sent to Julius. When a message is | ||
| 25 | - received from Julius, its content will be output to standard output. | ||
| 26 | - | ||
| 27 | - For the details about the API, see the related documents. | ||
| 28 | - | ||
| 29 | -OPTIONS | ||
| 30 | - hostname | ||
| 31 | - Host name where Julius is runnning in module mode. | ||
| 32 | - | ||
| 33 | - portnum | ||
| 34 | - port number (default: 10500) | ||
| 35 | - | ||
| 36 | -COMMANDS | ||
| 37 | - jcontrol interprets commands from standard input. Below is a list of | ||
| 38 | - all commands. | ||
| 39 | - | ||
| 40 | - Engine control | ||
| 41 | - pause | ||
| 42 | - Stop Julius and enter into paused status. In paused status, Julius | ||
| 43 | - will not run recognition even if speech input occurs. When this | ||
| 44 | - command is issued while recognition is running, Julius will stop | ||
| 45 | - after the recognition has been finished. | ||
| 46 | - | ||
| 47 | - terminate | ||
| 48 | - Same as pause, but discard the current speech input when received | ||
| 49 | - command in the middle of recognition process. | ||
| 50 | - | ||
| 51 | - resume | ||
| 52 | - Restart Julius that has been paused or terminated. | ||
| 53 | - | ||
| 54 | - inputparam arg | ||
| 55 | - Tell Julius how to deal with speech input in case grammar is changed | ||
| 56 | - just when recognition is running. Specify one: "TERMINATE", "PAUSE" | ||
| 57 | - or "WAIT". | ||
| 58 | - | ||
| 59 | - version | ||
| 60 | - Tell Julius to send version description string. | ||
| 61 | - | ||
| 62 | - status | ||
| 63 | - Tell Julius to send the system status (active / sleep) | ||
| 64 | - | ||
| 65 | - Grammar handling | ||
| 66 | - changegram prefix | ||
| 67 | - Send a new grammar "prefix.dfa" and "prefix.dict", and tell julius | ||
| 68 | - to use it as a new grammar. All the current grammars used in the | ||
| 69 | - current process of Julius will be deleted and replaced to the | ||
| 70 | - specifed grammar. | ||
| 71 | - | ||
| 72 | - addgram prefix | ||
| 73 | - Send a new grammar "prefix.dfa" and "prefix.dict" and add it to the | ||
| 74 | - current grammar. | ||
| 75 | - | ||
| 76 | - deletegram gramlist | ||
| 77 | - Tell Julius to delete existing grammar. The grammar can be specified | ||
| 78 | - by either prefix name or number ID. The number ID can be determined | ||
| 79 | - from the message sent from Julius at each time grammar information | ||
| 80 | - has changed. When want to delete more than one grammar, specify all | ||
| 81 | - of them as comma-sparated. | ||
| 82 | - | ||
| 83 | - deactivategram gramlist | ||
| 84 | - Tell Julius to de-activate a specified grammar. The specified | ||
| 85 | - grammar will still be kept but will not be used for recognition. | ||
| 86 | - | ||
| 87 | - The target grammar can be specified by either prefix name or number | ||
| 88 | - ID. The number ID can be determined from the message sent from | ||
| 89 | - Julius at each time grammar information has changed. When want to | ||
| 90 | - delete more than one grammar, specify all of them as comma-sparated. | ||
| 91 | - | ||
| 92 | - activategram gramlist | ||
| 93 | - Tell Julius to activate previously de-activated grammar. The target | ||
| 94 | - grammar can be specified by either prefix name or number ID. The | ||
| 95 | - number ID can be determined from the message sent from Julius at | ||
| 96 | - each time grammar information has changed. When want to delete more | ||
| 97 | - than one grammar, specify all of them as comma-sparated. | ||
| 98 | - | ||
| 99 | - addword grammar_name_or_id dictfile | ||
| 100 | - Add the recognition word entries in the specified dictfile to the | ||
| 101 | - specified grammar on current process. | ||
| 102 | - | ||
| 103 | - syncgram | ||
| 104 | - Force synchronize grammar status, like unix command "sync". | ||
| 105 | - | ||
| 106 | - Process management | ||
| 107 | - Julius-4 supports multi-model recognition nad multi decoding. In this | ||
| 108 | - case it is possible to control each recognition process, as defined by | ||
| 109 | - "-SR" option, from module client. | ||
| 110 | - | ||
| 111 | - In multi decoding mode, the module client holds "current process", and | ||
| 112 | - the process commands and grammar related commands will be issued toward | ||
| 113 | - the current process. | ||
| 114 | - | ||
| 115 | - listprocess | ||
| 116 | - Tell Julius to send the list of existing recognition process. | ||
| 117 | - | ||
| 118 | - currentprocess procname | ||
| 119 | - Switch the current process to the process specified by the name. | ||
| 120 | - | ||
| 121 | - shiftprocess | ||
| 122 | - Rotate the current process. At each call the current process will be | ||
| 123 | - changed to the next one. | ||
| 124 | - | ||
| 125 | - addprocess jconffile | ||
| 126 | - Tell Julisu to load a new recognition process into engine. The | ||
| 127 | - argument jconffile should be a jconf file that contains only one set | ||
| 128 | - of LM options and one SR definition. Note that the file should be | ||
| 129 | - visible on the running Julius, since jcontrol only send the path | ||
| 130 | - name and Julius actually read the jconf file. | ||
| 131 | - | ||
| 132 | - The new LM and SR process will have the name of the jconffile. | ||
| 133 | - | ||
| 134 | - delprocess procname | ||
| 135 | - Delete the specified recognition process from the engine. | ||
| 136 | - | ||
| 137 | - deactivateprocess procname | ||
| 138 | - Tell Julius to temporary stop the specified recognition process. The | ||
| 139 | - stopped process will not be executed for the input until activated | ||
| 140 | - again. | ||
| 141 | - | ||
| 142 | - activateprocess procname | ||
| 143 | - Tell Julius to activate the temporarily stopped process. | ||
| 144 | - | ||
| 145 | -EXAMPLES | ||
| 146 | - The dump messages from Julius are output to tty with prefix ">" | ||
| 147 | - appended to each line. Julius can be started in module mode like this: | ||
| 148 | - jcontrolcan be launched with the host name: | ||
| 149 | - It will then receive the outputs of Julius and output the raw message | ||
| 150 | - to standard out. Also, by inputting the commands above to the standard | ||
| 151 | - input of jcontrol, it will be sent to Julius. See manuals for the | ||
| 152 | - specification of module mode. | ||
| 153 | - | ||
| 154 | -SEE ALSO | ||
| 155 | - julius ( 1 ) | ||
| 156 | - | ||
| 157 | -COPYRIGHT | ||
| 158 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 159 | - | ||
| 160 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 161 | - | ||
| 162 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 163 | - Technology | ||
| 164 | - | ||
| 165 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 166 | - Technology | ||
| 167 | - | ||
| 168 | -LICENSE | ||
| 169 | - The same as Julius. | ||
| 170 | - | ||
| 171 | - | ||
| 172 | - | ||
| 173 | - 10/02/2008 JCONTROL(1) |
recognize/src/julius/doc/manuals/julius.txt
| @@ -1,1287 +0,0 @@ | @@ -1,1287 +0,0 @@ | ||
| 1 | - julius | ||
| 2 | - | ||
| 3 | -JULIUS(1) JULIUS(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - julius | ||
| 9 | - - open source multi-purpose LVCSR engine | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - julius [-C jconffile] [options...] | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - julius is a high-performance, multi-purpose, open-source speech | ||
| 16 | - recognition engine for researchers and developers. It is capable of | ||
| 17 | - performing almost real-time recognition of continuous speech with over | ||
| 18 | - 60k-word 3-gram language model and triphone HMM model, on most current | ||
| 19 | - PCs. julius can perform recognition on audio files, live microphone | ||
| 20 | - input, network input and feature parameter files. | ||
| 21 | - | ||
| 22 | - The core recognition module is implemented as C library called | ||
| 23 | - "JuliusLib". It can also be extended by plug-in facility. | ||
| 24 | - | ||
| 25 | - Supported Models | ||
| 26 | - julius needs a language model and an acoustic model to run as a speech | ||
| 27 | - recognizer. julius supports the following models. | ||
| 28 | - | ||
| 29 | - Acoustic model | ||
| 30 | - Sub-word HMM (Hidden Markov Model) in HTK ascii format are | ||
| 31 | - supported. Phoneme models (monophone), context dependent phoneme | ||
| 32 | - models (triphone), tied-mixture and phonetic tied-mixture models | ||
| 33 | - of any unit can be used. When using context dependent models, | ||
| 34 | - inter-word context dependency is also handled. Multi-stream | ||
| 35 | - feature and MSD-HMM is also supported. You can further use a | ||
| 36 | - tool mkbinhmm to convert the ascii HMM file to a compact binary | ||
| 37 | - format for faster loading. | ||
| 38 | - | ||
| 39 | - Note that julius itself can only extract MFCC features from | ||
| 40 | - speech data. If you use acoustic HMM trained for other feature, | ||
| 41 | - you should give the input in HTK parameter file of the same | ||
| 42 | - feature type. | ||
| 43 | - | ||
| 44 | - Language model: word N-gram | ||
| 45 | - Word N-gram language model, up to 10-gram, is supported. Julius | ||
| 46 | - uses different N-gram for each pass: left-to-right 2-gram on 1st | ||
| 47 | - pass, and right-to-left N-gram on 2nd pass. It is recommended to | ||
| 48 | - use both LR 2-gram and RL N-gram for Julius. However, you can | ||
| 49 | - use only single LR N-gram or RL N-gram. In such case, | ||
| 50 | - approximated LR 2-gram computed from the given N-gram will be | ||
| 51 | - applied at the first pass. | ||
| 52 | - | ||
| 53 | - The Standard ARPA format is supported. In addition, a binary | ||
| 54 | - format is also supported for efficiency. The tool mkbingram(1) | ||
| 55 | - can convert ARPA format N-gram to binary format. | ||
| 56 | - | ||
| 57 | - Language model: grammar | ||
| 58 | - The grammar format is an original one, and tools to create a | ||
| 59 | - recognirion grammar are included in the distribution. A grammar | ||
| 60 | - consists of two files: one is a 'grammar' file that describes | ||
| 61 | - sentence structures in a BNF style, using word 'category' name | ||
| 62 | - as terminate symbols. Another is a 'voca' file that defines | ||
| 63 | - words with its pronunciations (i.e. phoneme sequences) for each | ||
| 64 | - category. They should be converted by mkdfa.pl(1) to a | ||
| 65 | - deterministic finite automaton file (.dfa) and a dictionary file | ||
| 66 | - (.dict), respectively. You can also use multiple grammars. | ||
| 67 | - | ||
| 68 | - Language model: isolated word | ||
| 69 | - You can perform isolated word recognition using only word | ||
| 70 | - dictionary. With this model type, Julius will perform rapid one | ||
| 71 | - pass recognition with static context handling. Silence models | ||
| 72 | - will be added at both head and tail of each word. You can also | ||
| 73 | - use multiple dictionaries in a process. | ||
| 74 | - | ||
| 75 | - Search Algorithm | ||
| 76 | - Recognition algorithm of julius is based on a two-pass strategy. Word | ||
| 77 | - 2-gram and reverse word 3-gram is used on the respective passes. The | ||
| 78 | - entire input is processed on the first pass, and again the final | ||
| 79 | - searching process is performed again for the input, using the result of | ||
| 80 | - the first pass to narrow the search space. Specifically, the | ||
| 81 | - recognition algorithm is based on a tree-trellis heuristic search | ||
| 82 | - combined with left-to-right frame-synchronous beam search and | ||
| 83 | - right-to-left stack decoding search. | ||
| 84 | - | ||
| 85 | - When using context dependent phones (triphones), interword contexts are | ||
| 86 | - taken into consideration. For tied-mixture and phonetic tied-mixture | ||
| 87 | - models, high-speed acoustic likelihood calculation is possible using | ||
| 88 | - gaussian pruning. | ||
| 89 | - | ||
| 90 | - For more details, see the related documents. | ||
| 91 | - | ||
| 92 | -OPTIONS | ||
| 93 | - These options specify the models, system behaviors and various search | ||
| 94 | - parameters to Julius. These option can be set at the command line, but | ||
| 95 | - it is recommended that you write them in a text file as a "jconf file", | ||
| 96 | - and specify it by "-C" option. | ||
| 97 | - | ||
| 98 | - Applications incorporating JuliusLib also use these options to set the | ||
| 99 | - parameters of core recognition engine. For example, a jconf file can be | ||
| 100 | - loaded to the enine by calling j_config_load_file_new() with the jconf | ||
| 101 | - file name as argument. | ||
| 102 | - | ||
| 103 | - Please note that relative paths in a jconf file should be relative to | ||
| 104 | - the jconf file itself, not the current working directory. | ||
| 105 | - | ||
| 106 | - Below are the details of all options, gathered by group. | ||
| 107 | - | ||
| 108 | - Julius application option | ||
| 109 | - These are application options of Julius, outside of JuliusLib. It | ||
| 110 | - contains parameters and switches for result output, character set | ||
| 111 | - conversion, log level, and module mode options. These option are | ||
| 112 | - specific to Julius, and cannot be used at applications using JuliusLib | ||
| 113 | - other than Julius. | ||
| 114 | - | ||
| 115 | - -outfile | ||
| 116 | - On file input, this option write the recognition result of each file | ||
| 117 | - to a separate file. The output file of an input file will be the | ||
| 118 | - same name but the suffix will be changed to ".out". (rev.4.0) | ||
| 119 | - | ||
| 120 | - -separatescore | ||
| 121 | - Output the language and acoustic scores separately. | ||
| 122 | - | ||
| 123 | - -callbackdebug | ||
| 124 | - Print the callback names at each call for debug. (rev.4.0) | ||
| 125 | - | ||
| 126 | - -charconv from to | ||
| 127 | - Print with character set conversion. from is the source character | ||
| 128 | - set used in the language model, and to is the target character set | ||
| 129 | - you want to get. | ||
| 130 | - | ||
| 131 | - On Linux, the arguments should be a code name. You can obtain the | ||
| 132 | - list of available code names by invoking the command "iconv --list". | ||
| 133 | - On Windows, the arguments should be a code name or codepage number. | ||
| 134 | - Code name should be one of "ansi", "mac", "oem", "utf-7", "utf-8", | ||
| 135 | - "sjis", "euc". Or you can specify any codepage number supported at | ||
| 136 | - your environment. | ||
| 137 | - | ||
| 138 | - -nocharconv | ||
| 139 | - Disable character conversion. | ||
| 140 | - | ||
| 141 | - -module [port] | ||
| 142 | - Run Julius on "Server Module Mode". After startup, Julius waits for | ||
| 143 | - tcp/ip connection from client. Once connection is established, | ||
| 144 | - Julius start communication with the client to process incoming | ||
| 145 | - commands from the client, or to output recognition results, input | ||
| 146 | - trigger information and other system status to the client. The | ||
| 147 | - default port number is 10500. | ||
| 148 | - | ||
| 149 | - -record dir | ||
| 150 | - Auto-save all input speech data into the specified directory. Each | ||
| 151 | - segmented inputs are recorded each by one. The file name of the | ||
| 152 | - recorded data is generated from system time when the input ends, in | ||
| 153 | - a style of YYYY.MMDD.HHMMSS.wav. File format is 16bit monoral WAV. | ||
| 154 | - Invalid for mfcfile input. | ||
| 155 | - | ||
| 156 | - With input rejection by -rejectshort, the rejected input will also | ||
| 157 | - be recorded even if they are rejected. | ||
| 158 | - | ||
| 159 | - -logfile file | ||
| 160 | - Save all log output to a file instead of standard output. (Rev.4.0) | ||
| 161 | - | ||
| 162 | - -nolog | ||
| 163 | - Disable all log output. (Rev.4.0) | ||
| 164 | - | ||
| 165 | - -help | ||
| 166 | - Output help message and exit. | ||
| 167 | - | ||
| 168 | - Global options | ||
| 169 | - These are model-/search-dependent options relating audio input, sound | ||
| 170 | - detection, GMM, decoding algorithm, plugin facility, and others. Global | ||
| 171 | - options should be placed before any instance declaration (-AM, -LM, or | ||
| 172 | - -SR), or just after "-GLOBAL" option. | ||
| 173 | - | ||
| 174 | - Audio input | ||
| 175 | - -input | ||
| 176 | - {mic|rawfile|mfcfile|adinnet|stdin|netaudio|alsa|oss|esd} | ||
| 177 | - Choose speech input source. Specify 'file' or 'rawfile' for | ||
| 178 | - waveform file, 'htkparam' or 'mfcfile' for HTK parameter | ||
| 179 | - file. On file input, users will be prompted to enter the file | ||
| 180 | - name from stdin, or you can use -filelist option to specify | ||
| 181 | - list of files to process. | ||
| 182 | - | ||
| 183 | - 'mic' is to get audio input from a default live microphone | ||
| 184 | - device, and 'adinnet' means receiving waveform data via tcpip | ||
| 185 | - network from an adinnet client. 'netaudio' is from | ||
| 186 | - DatLink/NetAudio input, and 'stdin' means data input from | ||
| 187 | - standard input. | ||
| 188 | - | ||
| 189 | - For waveform file input, only WAV (no compression) and RAW | ||
| 190 | - (noheader, 16bit, big endian) are supported by default. Other | ||
| 191 | - format can be read when compiled with libsnd library. To see | ||
| 192 | - what format is actually supported, see the help message using | ||
| 193 | - option -help. For stdin input, only WAV and RAW is supported. | ||
| 194 | - (default: mfcfile) | ||
| 195 | - | ||
| 196 | - At Linux, you can choose API at run time by specifying alsa, | ||
| 197 | - oss and esd. | ||
| 198 | - | ||
| 199 | - -filelist filename | ||
| 200 | - (With -input rawfile|mfcfile) perform recognition on all | ||
| 201 | - files listed in the file. The file should contain input file | ||
| 202 | - per line. Engine will end when all of the files are | ||
| 203 | - processed. | ||
| 204 | - | ||
| 205 | - -notypecheck | ||
| 206 | - By default, Julius checks the input parameter type whether it | ||
| 207 | - matches the AM or not. This option will disable the check and | ||
| 208 | - force engine to use the input vector as is. | ||
| 209 | - | ||
| 210 | - -48 | ||
| 211 | - Record input with 48kHz sampling, and down-sample it to 16kHz | ||
| 212 | - on-the-fly. This option is valid for 16kHz model only. The | ||
| 213 | - down-sampling routine was ported from sptk. (Rev. 4.0) | ||
| 214 | - | ||
| 215 | - -NA devicename | ||
| 216 | - Host name for DatLink server input (-input netaudio). | ||
| 217 | - | ||
| 218 | - -adport port_number | ||
| 219 | - With -input adinnet, specify adinnet port number to listen. | ||
| 220 | - (default: 5530) | ||
| 221 | - | ||
| 222 | - -nostrip | ||
| 223 | - Julius by default removes successive zero samples in input | ||
| 224 | - speech data. This option inhibits the removal. | ||
| 225 | - | ||
| 226 | - -zmean , -nozmean | ||
| 227 | - This option enables/disables DC offset removal of input | ||
| 228 | - waveform. Offset will be estimated from the whole input. For | ||
| 229 | - microphone / network input, zero mean of the first 48000 | ||
| 230 | - samples (3 seconds in 16kHz sampling) will be used for the | ||
| 231 | - estimation. (default: disabled) | ||
| 232 | - | ||
| 233 | - This option uses static offset for the channel. See also | ||
| 234 | - -zmeansource for frame-wise offset removal. | ||
| 235 | - | ||
| 236 | - Speech detection by level and zero-cross | ||
| 237 | - -cutsilence , -nocutsilence | ||
| 238 | - Turn on / off the speech detection by level and zero-cross. | ||
| 239 | - Default is on for mic / adinnet input, and off for files. | ||
| 240 | - | ||
| 241 | - -lv thres | ||
| 242 | - Level threshold for speech input detection. Values should be | ||
| 243 | - in range from 0 to 32767. (default: 2000) | ||
| 244 | - | ||
| 245 | - -zc thres | ||
| 246 | - Zero crossing threshold per second. Only input that goes over | ||
| 247 | - the level threshold (-lv) will be counted. (default: 60) | ||
| 248 | - | ||
| 249 | - -headmargin msec | ||
| 250 | - Silence margin at the start of speech segment in | ||
| 251 | - milliseconds. (default: 300) | ||
| 252 | - | ||
| 253 | - -tailmargin msec | ||
| 254 | - Silence margin at the end of speech segment in milliseconds. | ||
| 255 | - (default: 400) | ||
| 256 | - | ||
| 257 | - Input rejection | ||
| 258 | - Two simple front-end input rejection methods are implemented, | ||
| 259 | - based on input length and average power of detected segment. The | ||
| 260 | - rejection by average power is experimental, and can be enabled | ||
| 261 | - by --enable-power-reject on compilation. Valid for MFCC feature | ||
| 262 | - with power coefficient and real-time input only. | ||
| 263 | - | ||
| 264 | - For GMM-based input rejection see the GMM section below. | ||
| 265 | - | ||
| 266 | - -rejectshort msec | ||
| 267 | - Reject input shorter than specified milliseconds. Search will | ||
| 268 | - be terminated and no result will be output. | ||
| 269 | - | ||
| 270 | - -powerthres thres | ||
| 271 | - Reject the inputted segment by its average energy. If the | ||
| 272 | - average energy of the last recognized input is below the | ||
| 273 | - threshold, Julius will reject the input. (Rev.4.0) | ||
| 274 | - | ||
| 275 | - This option is valid when --enable-power-reject is specified | ||
| 276 | - at compilation time. | ||
| 277 | - | ||
| 278 | - Gaussian mixture model / GMM-VAD | ||
| 279 | - GMM will be used for input rejection by accumulated score, or | ||
| 280 | - for front-end GMM-based VAD when --enable-gmm-vad is specified. | ||
| 281 | - | ||
| 282 | - NOTE: You should also set the proper MFCC parameters required | ||
| 283 | - for the GMM, specifying the acoustic parameters described in AM | ||
| 284 | - section -AM_GMM. | ||
| 285 | - | ||
| 286 | - When GMM-based VAD is enabled, the voice activity score will be | ||
| 287 | - calculated at each frame as front-end processing. The value will | ||
| 288 | - be computed as \[ \max_{m \in M_v} p(x|m) - \max_{m \in M_n} | ||
| 289 | - p(x|m) \] where $M_v$ is a set of voice GMM, and $M_n$ is a set | ||
| 290 | - of noise GMM whose names should be specified by -gmmreject. The | ||
| 291 | - activity score will be then averaged for the last N frames, | ||
| 292 | - where N is specified by -gmmmargin. Julius updates the averaged | ||
| 293 | - activity score at each frame, and detect speech up-trigger when | ||
| 294 | - the value gets higher than a value specified by -gmmup, and | ||
| 295 | - detecgt down-trigger when it gets lower than a value of | ||
| 296 | - -gmmdown. | ||
| 297 | - | ||
| 298 | - -gmm hmmdefs_file | ||
| 299 | - GMM definition file in HTK format. If specified, GMM-based | ||
| 300 | - input verification will be performed concurrently with the | ||
| 301 | - 1st pass, and you can reject the input according to the | ||
| 302 | - result as specified by -gmmreject. The GMM should be defined | ||
| 303 | - as one-state HMMs. | ||
| 304 | - | ||
| 305 | - -gmmnum number | ||
| 306 | - Number of Gaussian components to be computed per frame on GMM | ||
| 307 | - calculation. Only the N-best Gaussians will be computed for | ||
| 308 | - rapid calculation. The default is 10 and specifying smaller | ||
| 309 | - value will speed up GMM calculation, but too small value (1 | ||
| 310 | - or 2) may cause degradation of identification performance. | ||
| 311 | - | ||
| 312 | - -gmmreject string | ||
| 313 | - Comma-separated list of GMM names to be rejected as invalid | ||
| 314 | - input. When recognition, the log likelihoods of GMMs | ||
| 315 | - accumulated for the entire input will be computed | ||
| 316 | - concurrently with the 1st pass. If the GMM name of the | ||
| 317 | - maximum score is within this string, the 2nd pass will not be | ||
| 318 | - executed and the input will be rejected. | ||
| 319 | - | ||
| 320 | - -gmmmargin frames | ||
| 321 | - (GMM_VAD) Head margin in frames. When a speech trigger | ||
| 322 | - detected by GMM, recognition will start from current frame | ||
| 323 | - minus this value. (Rev.4.0) | ||
| 324 | - | ||
| 325 | - This option will be valid only if compiled with | ||
| 326 | - --enable-gmm-vad. | ||
| 327 | - | ||
| 328 | - -gmmup value | ||
| 329 | - (GMM_VAD) Up trigger threshold of voice activity score. | ||
| 330 | - (Rev.4.1) | ||
| 331 | - | ||
| 332 | - This option will be valid only if compiled with | ||
| 333 | - --enable-gmm-vad. | ||
| 334 | - | ||
| 335 | - -gmmdown value | ||
| 336 | - (GMM_VAD) Down trigger threshold of voice activity score. | ||
| 337 | - (Rev.4.1) | ||
| 338 | - | ||
| 339 | - This option will be valid only if compiled with | ||
| 340 | - --enable-gmm-vad. | ||
| 341 | - | ||
| 342 | - Decoding option | ||
| 343 | - Real-time processing means concurrent processing of MFCC | ||
| 344 | - computation 1st pass decoding. By default, real-time processing | ||
| 345 | - on the pass is on for microphone / adinnet / netaudio input, and | ||
| 346 | - for others. | ||
| 347 | - | ||
| 348 | - -realtime , -norealtime | ||
| 349 | - Explicitly switch on / off real-time (pipe-line) processing | ||
| 350 | - on the first pass. The default is off for file input, and on | ||
| 351 | - for microphone, adinnet and NetAudio input. This option | ||
| 352 | - relates to the way CMN and energy normalization is performed: | ||
| 353 | - if off, they will be done using average features of whole | ||
| 354 | - input. If on, MAP-CMN and energy normalization to do | ||
| 355 | - real-time processing. | ||
| 356 | - | ||
| 357 | - Misc. options | ||
| 358 | - -C jconffile | ||
| 359 | - Load a jconf file at here. The content of the jconffile will | ||
| 360 | - be expanded at this point. | ||
| 361 | - | ||
| 362 | - -version | ||
| 363 | - Print version information to standard error, and exit. | ||
| 364 | - | ||
| 365 | - -setting | ||
| 366 | - Print engine setting information to standard error, and exit. | ||
| 367 | - | ||
| 368 | - -quiet | ||
| 369 | - Output less log. For result, only the best word sequence will | ||
| 370 | - be printed. | ||
| 371 | - | ||
| 372 | - -debug | ||
| 373 | - (For debug) output enormous internal message and debug | ||
| 374 | - information to log. | ||
| 375 | - | ||
| 376 | - -check {wchmm|trellis|triphone} | ||
| 377 | - For debug, enter interactive check mode. | ||
| 378 | - | ||
| 379 | - -plugindir dirlist | ||
| 380 | - Specify directory to load plugin. If several direcotries | ||
| 381 | - exist, specify them by colon-separated list. | ||
| 382 | - | ||
| 383 | - Instance declaration for multi decoding | ||
| 384 | - The following arguments will create a new configuration set with | ||
| 385 | - default parameters, and switch current set to it. Jconf parameters | ||
| 386 | - specified after the option will be set into the current set. | ||
| 387 | - | ||
| 388 | - To do multi-model decoding, these argument should be specified at the | ||
| 389 | - first of each model / search instances with different names. Any | ||
| 390 | - options before the first instance definition will be IGNORED. | ||
| 391 | - | ||
| 392 | - When no instance definition is found (as older version of Julius), all | ||
| 393 | - the options are assigned to a default instance named _default. | ||
| 394 | - | ||
| 395 | - Please note that decoding with a single LM and multiple AMs is not | ||
| 396 | - fully supported. For example, you may want to construct the jconf file | ||
| 397 | - as following. | ||
| 398 | - This type of model sharing is not supported yet, since some part of LM | ||
| 399 | - processing depends on the assigned AM. Instead, you can get the same | ||
| 400 | - result by defining the same LMs for each AM, like this: | ||
| 401 | - | ||
| 402 | - -AM name | ||
| 403 | - Create a new AM configuration set, and switch current to the new | ||
| 404 | - one. You should give a unique name. (Rev.4.0) | ||
| 405 | - | ||
| 406 | - -LM name | ||
| 407 | - Create a new LM configuration set, and switch current to the new | ||
| 408 | - one. You should give a unique name. (Rev.4.0) | ||
| 409 | - | ||
| 410 | - -SR name am_name lm_name | ||
| 411 | - Create a new search configuration set, and switch current to the new | ||
| 412 | - one. The specified AM and LM will be assigned to it. The am_name and | ||
| 413 | - lm_name can be either name or ID number. You should give a unique | ||
| 414 | - name. (Rev.4.0) | ||
| 415 | - | ||
| 416 | - -AM_GMM | ||
| 417 | - When using GMM for front-end processing, you can specify | ||
| 418 | - GMM-specific acoustic parameters after this option. If you does not | ||
| 419 | - specify -AM_GMM with GMM, the GMM will share the same parameter | ||
| 420 | - vector as the last AM. The current AM will be switched to the GMM | ||
| 421 | - one, so be careful not to confuse with normal AM configurations. | ||
| 422 | - (Rev.4.0) | ||
| 423 | - | ||
| 424 | - -GLOBAL | ||
| 425 | - Start a global section. The global options should be placed before | ||
| 426 | - any instance declaration, or after this option on multiple model | ||
| 427 | - recognition. This can be used multiple times. (Rev.4.1) | ||
| 428 | - | ||
| 429 | - -nosectioncheck , -sectioncheck | ||
| 430 | - Disable / enable option location check in multi-model decoding. When | ||
| 431 | - enabled, the options between instance declaration is treated as | ||
| 432 | - "sections" and only the belonging option types can be written. For | ||
| 433 | - example, when an option -AM is specified, only the AM related option | ||
| 434 | - can be placed after the option until other declaration is found. | ||
| 435 | - Also, global options should be placed at top, before any instance | ||
| 436 | - declarataion. This is enabled by default. (Rev.4.1) | ||
| 437 | - | ||
| 438 | - Language model (-LM) | ||
| 439 | - This group contains options for model definition of each language model | ||
| 440 | - type. When using multiple LM, one instance can have only one LM. | ||
| 441 | - | ||
| 442 | - Only one type of LM can be specified for a LM configuration. If you | ||
| 443 | - want to use multi model, you should define them one as a new LM. | ||
| 444 | - | ||
| 445 | - N-gram | ||
| 446 | - -d bingram_file | ||
| 447 | - Use binary format N-gram. An ARPA N-gram file can be | ||
| 448 | - converted to Julius binary format by mkbingram. | ||
| 449 | - | ||
| 450 | - -nlr arpa_ngram_file | ||
| 451 | - A forward, left-to-right N-gram language model in standard | ||
| 452 | - ARPA format. When both a forward N-gram and backward N-gram | ||
| 453 | - are specified, Julius uses this forward 2-gram for the 1st | ||
| 454 | - pass, and the backward N-gram for the 2nd pass. | ||
| 455 | - | ||
| 456 | - Since ARPA file often gets huge and requires a lot of time to | ||
| 457 | - load, it may be better to convert the ARPA file to Julius | ||
| 458 | - binary format by mkbingram. Note that if both forward and | ||
| 459 | - backward N-gram is used for recognition, they together will | ||
| 460 | - be converted to a single binary. | ||
| 461 | - | ||
| 462 | - When only a forward N-gram is specified by this option and no | ||
| 463 | - backward N-gram specified by -nrl, Julius performs | ||
| 464 | - recognition with only the forward N-gram. The 1st pass will | ||
| 465 | - use the 2-gram entry in the given N-gram, and The 2nd pass | ||
| 466 | - will use the given N-gram, with converting forward | ||
| 467 | - probabilities to backward probabilities by Bayes rule. | ||
| 468 | - (Rev.4.0) | ||
| 469 | - | ||
| 470 | - -nrl arpa_ngram_file | ||
| 471 | - A backward, right-to-left N-gram language model in standard | ||
| 472 | - ARPA format. When both a forward N-gram and backward N-gram | ||
| 473 | - are specified, Julius uses the forward 2-gram for the 1st | ||
| 474 | - pass, and this backward N-gram for the 2nd pass. | ||
| 475 | - | ||
| 476 | - Since ARPA file often gets huge and requires a lot of time to | ||
| 477 | - load, it may be better to convert the ARPA file to Julius | ||
| 478 | - binary format by mkbingram. Note that if both forward and | ||
| 479 | - backward N-gram is used for recognition, they together will | ||
| 480 | - be converted to a single binary. | ||
| 481 | - | ||
| 482 | - When only a backward N-gram is specified by this option and | ||
| 483 | - no forward N-gram specified by -nlr, Julius performs | ||
| 484 | - recognition with only the backward N-gram. The 1st pass will | ||
| 485 | - use the forward 2-gram probability computed from the backward | ||
| 486 | - 2-gram using Bayes rule. The 2nd pass fully use the given | ||
| 487 | - backward N-gram. (Rev.4.0) | ||
| 488 | - | ||
| 489 | - -v dict_file | ||
| 490 | - Word dictionary file. | ||
| 491 | - | ||
| 492 | - -silhead word_string -siltail word_string | ||
| 493 | - Silence word defined in the dictionary, for silences at the | ||
| 494 | - beginning of sentence and end of sentence. (default: "<s>", | ||
| 495 | - "</s>") | ||
| 496 | - | ||
| 497 | - -mapunk word_string | ||
| 498 | - Specify unknown word. Default is "<unk>" or "<UNK>". This | ||
| 499 | - will be used to assign word probability on unknown words, | ||
| 500 | - i.e. words in dictionary that are not in N-gram vocabulary. | ||
| 501 | - | ||
| 502 | - -iwspword | ||
| 503 | - Add a word entry to the dictionary that should correspond to | ||
| 504 | - inter-word pauses. This may improve recognition accuracy in | ||
| 505 | - some language model that has no explicit inter-word pause | ||
| 506 | - modeling. The word entry to be added can be changed by | ||
| 507 | - -iwspentry. | ||
| 508 | - | ||
| 509 | - -iwspentry word_entry_string | ||
| 510 | - Specify the word entry that will be added by -iwspword. | ||
| 511 | - (default: "<UNK> [sp] sp sp") | ||
| 512 | - | ||
| 513 | - -sepnum number | ||
| 514 | - Number of high frequency words to be isolated from the | ||
| 515 | - lexicon tree, to ease approximation error that may be caused | ||
| 516 | - by the one-best approximation on 1st pass. (default: 150) | ||
| 517 | - | ||
| 518 | - Grammar | ||
| 519 | - Multiple grammars can be specified by repeating -gram and | ||
| 520 | - -gramlist. Note that this is unusual behavior from other options | ||
| 521 | - (in normal Julius option, last one will override previous ones). | ||
| 522 | - You can use -nogram to reset the grammars already specified | ||
| 523 | - before the point. | ||
| 524 | - | ||
| 525 | - -gram gramprefix1[,gramprefix2[,gramprefix3,...]] | ||
| 526 | - Comma-separated list of grammars to be used. the argument | ||
| 527 | - should be a prefix of a grammar, i.e. if you have foo.dfa and | ||
| 528 | - foo.dict, you should specify them with a single argument foo. | ||
| 529 | - Multiple grammars can be specified at a time as a | ||
| 530 | - comma-separated list. | ||
| 531 | - | ||
| 532 | - -gramlist list_file | ||
| 533 | - Specify a grammar list file that contains list of grammars to | ||
| 534 | - be used. The list file should contain the prefixes of | ||
| 535 | - grammars, each per line. A relative path in the list file | ||
| 536 | - will be treated as relative to the file, not the current path | ||
| 537 | - or configuration file. | ||
| 538 | - | ||
| 539 | - -dfa dfa_file -v dict_file | ||
| 540 | - An old way of specifying grammar files separately. This is | ||
| 541 | - bogus, and should not be used any more. | ||
| 542 | - | ||
| 543 | - -nogram | ||
| 544 | - Remove the current list of grammars already specified by | ||
| 545 | - -gram, -gramlist, -dfa and -v. | ||
| 546 | - | ||
| 547 | - Isolated word | ||
| 548 | - Dictionary can be specified by using -w and -wlist. When you | ||
| 549 | - specify multiple times, all of them will be read at startup. You | ||
| 550 | - can use -nogram to reset the already specified dictionaries at | ||
| 551 | - that point. | ||
| 552 | - | ||
| 553 | - -w dict_file | ||
| 554 | - Word dictionary for isolated word recognition. File format is | ||
| 555 | - the same as other LM. (Rev.4.0) | ||
| 556 | - | ||
| 557 | - -wlist list_file | ||
| 558 | - Specify a dictionary list file that contains list of | ||
| 559 | - dictionaries to be used. The list file should contain the | ||
| 560 | - file name of dictionaries, each per line. A relative path in | ||
| 561 | - the list file will be treated as relative to the list file, | ||
| 562 | - not the current path or configuration file. (Rev.4.0) | ||
| 563 | - | ||
| 564 | - -nogram | ||
| 565 | - Remove the current list of dictionaries already specified by | ||
| 566 | - -w and -wlist. | ||
| 567 | - | ||
| 568 | - -wsil head_sil_model_name tail_sil_model_name sil_context_name | ||
| 569 | - On isolated word recognition, silence models will be appended | ||
| 570 | - to the head and tail of each word at recognition. This option | ||
| 571 | - specifies the silence models to be appended. | ||
| 572 | - sil_context_name is the name of the head sil model and tail | ||
| 573 | - sil model as a context of word head phone and tail phone. For | ||
| 574 | - example, if you specify -wsil silB silE sp, a word with phone | ||
| 575 | - sequence b eh t will be translated as silB sp-b+eh b-eh+t | ||
| 576 | - eh-t+sp silE. (Rev.4.0) | ||
| 577 | - | ||
| 578 | - User-defined LM | ||
| 579 | - -userlm | ||
| 580 | - Declare to use user LM functions in the program. This option | ||
| 581 | - should be specified if you use user-defined LM functions. | ||
| 582 | - (Rev.4.0) | ||
| 583 | - | ||
| 584 | - Misc. LM options | ||
| 585 | - -forcedict | ||
| 586 | - Skip error words in dictionary and force running. | ||
| 587 | - | ||
| 588 | - Acoustic model and feature analysis (-AM) (-AM_GMM) | ||
| 589 | - This section is about options for acoustic model, feature extraction, | ||
| 590 | - feature normalizations and spectral subtraction. | ||
| 591 | - | ||
| 592 | - After -AM name, an acoustic model and related specification should be | ||
| 593 | - written. You can use multiple AMs trained with different MFCC types. | ||
| 594 | - For GMM, the required parameter condition should be specified just as | ||
| 595 | - same as AMs after -AM_GMM. | ||
| 596 | - | ||
| 597 | - When using multiple AMs, the values of -smpPeriod, -smpFreq, -fsize and | ||
| 598 | - -fshift should be the same among all AMs. | ||
| 599 | - | ||
| 600 | - Acoustic HMM | ||
| 601 | - -h hmmdef_file | ||
| 602 | - Acoustic HMM definition file. It should be in HTK ascii | ||
| 603 | - format, or Julius binary format. You can convert HTK ascii | ||
| 604 | - format to Julius binary format using mkbinhmm. | ||
| 605 | - | ||
| 606 | - -hlist hmmlist_file | ||
| 607 | - HMMList file for phone mapping. This file provides mapping | ||
| 608 | - between logical triphone names generated in the dictionary | ||
| 609 | - and the defined HMM names in hmmdefs. This option should be | ||
| 610 | - specified for context-dependent model. | ||
| 611 | - | ||
| 612 | - -tmix number | ||
| 613 | - Specify the number of top Gaussians to be calculated in a | ||
| 614 | - mixture codebook. Small number will speed up the acoustic | ||
| 615 | - computation, but AM accuracy may get worse with too small | ||
| 616 | - value. See also -gprune. (default: 2) | ||
| 617 | - | ||
| 618 | - -spmodel name | ||
| 619 | - Specify HMM model name that corresponds to short-pause in an | ||
| 620 | - utterance. The short-pause model name will be used in | ||
| 621 | - recognition: short-pause skipping on grammar recognition, | ||
| 622 | - word-end short-pause model insertion with -iwsp on N-gram, or | ||
| 623 | - short-pause segmentation (-spsegment). (default: "sp") | ||
| 624 | - | ||
| 625 | - -multipath | ||
| 626 | - Enable multi-path mode. To make decoding faster, Julius by | ||
| 627 | - default impose a limit on HMM transitions that each model | ||
| 628 | - should have only one transition from initial state and to end | ||
| 629 | - state. On multi-path mode, Julius does extra handling on | ||
| 630 | - inter-model transition to allows model-skipping transition | ||
| 631 | - and multiple output/input transitions. Note that specifying | ||
| 632 | - this option will make Julius a bit slower, and the larger | ||
| 633 | - beam width may be required. | ||
| 634 | - | ||
| 635 | - This function was a compilation-time option on Julius 3.x, | ||
| 636 | - and now becomes a run-time option. By default (without this | ||
| 637 | - option), Julius checks the transition type of specified HMMs, | ||
| 638 | - and enable the multi-path mode if required. You can force | ||
| 639 | - multi-path mode with this option. (rev.4.0) | ||
| 640 | - | ||
| 641 | - -gprune {safe|heuristic|beam|none|default} | ||
| 642 | - Set Gaussian pruning algorithm to use. For tied-mixture | ||
| 643 | - model, Julius performs Gaussian pruning to reduce acoustic | ||
| 644 | - computation, by calculating only the top N Gaussians in each | ||
| 645 | - codebook at each frame. The default setting will be set | ||
| 646 | - according to the model type and engine setting. default will | ||
| 647 | - force accepting the default setting. Set this to none to | ||
| 648 | - disable pruning and perform full computation. safe | ||
| 649 | - guarantees the top N Gaussians to be computed. heuristic and | ||
| 650 | - beam do more aggressive computational cost reduction, but may | ||
| 651 | - result in small loss of accuracy model (default: safe | ||
| 652 | - (standard), beam (fast) for tied mixture model, none for non | ||
| 653 | - tied-mixture model). | ||
| 654 | - | ||
| 655 | - -iwcd1 {max|avg|best number} | ||
| 656 | - Select method to approximate inter-word triphone on the head | ||
| 657 | - and tail of a word in the first pass. | ||
| 658 | - | ||
| 659 | - | ||
| 660 | - max will apply the maximum likelihood of the same context | ||
| 661 | - triphones. avg will apply the average likelihood of the same | ||
| 662 | - context triphones. best number will apply the average of top | ||
| 663 | - N-best likelihoods of the same context triphone. | ||
| 664 | - | ||
| 665 | - Default is best 3 for use with N-gram, and avg for grammar | ||
| 666 | - and word. When this AM is shared by LMs of both type, latter | ||
| 667 | - one will be chosen. | ||
| 668 | - | ||
| 669 | - -iwsppenalty float | ||
| 670 | - Insertion penalty for word-end short pauses appended by | ||
| 671 | - -iwsp. | ||
| 672 | - | ||
| 673 | - -gshmm hmmdef_file | ||
| 674 | - If this option is specified, Julius performs Gaussian Mixture | ||
| 675 | - Selection for efficient decoding. The hmmdefs should be a | ||
| 676 | - monophone model generated from an ordinary monophone HMM | ||
| 677 | - model, using mkgshmm. | ||
| 678 | - | ||
| 679 | - -gsnum number | ||
| 680 | - On GMS, specify number of monophone states to compute | ||
| 681 | - corresponding triphones in detail. (default: 24) | ||
| 682 | - | ||
| 683 | - Speech analysis | ||
| 684 | - Only MFCC feature extraction is supported in current Julius. | ||
| 685 | - Thus when recognizing a waveform input from file or microphone, | ||
| 686 | - AM must be trained by MFCC. The parameter condition should also | ||
| 687 | - be set as exactly the same as the training condition by the | ||
| 688 | - options below. | ||
| 689 | - | ||
| 690 | - When you give an input in HTK Parameter file, you can use any | ||
| 691 | - parameter type for AM. In this case Julius does not care about | ||
| 692 | - the type of input feature and AM, just read them as vector | ||
| 693 | - sequence and match them to the given AM. Julius only checks | ||
| 694 | - whether the parameter types are the same. If it does not work | ||
| 695 | - well, you can disable this checking by -notypecheck. | ||
| 696 | - | ||
| 697 | - In Julius, the parameter kind and qualifiers (as TARGETKIND in | ||
| 698 | - HTK) and the number of cepstral parameters (NUMCEPS) will be set | ||
| 699 | - automatically from the content of the AM header, so you need not | ||
| 700 | - specify them by options. | ||
| 701 | - | ||
| 702 | - Other parameters should be set exactly the same as training | ||
| 703 | - condition. You can also give a HTK Config file which you used to | ||
| 704 | - train AM to Julius by -htkconf. When this option is applied, | ||
| 705 | - Julius will parse the Config file and set appropriate parameter. | ||
| 706 | - | ||
| 707 | - You can further embed those analysis parameter settings to a | ||
| 708 | - binary HMM file using mkbinhmm. | ||
| 709 | - | ||
| 710 | - If options specified in several ways, they will be evaluated in | ||
| 711 | - the order below. The AM embedded parameter will be loaded first | ||
| 712 | - if any. Then, the HTK config file given by -htkconf will be | ||
| 713 | - parsed. If a value already set by AM embedded value, HTK config | ||
| 714 | - will override them. At last, the direct options will be loaded, | ||
| 715 | - which will override settings loaded before. Note that, when the | ||
| 716 | - same options are specified several times, later will override | ||
| 717 | - previous, except that -htkconf will be evaluated first as | ||
| 718 | - described above. | ||
| 719 | - | ||
| 720 | - -smpPeriod period | ||
| 721 | - Sampling period of input speech, in unit of 100 nanoseconds. | ||
| 722 | - Sampling rate can also be specified by -smpFreq. Please note | ||
| 723 | - that the input frequency should be set equal to the training | ||
| 724 | - conditions of AM. (default: 625, corresponds to 16,000Hz) | ||
| 725 | - | ||
| 726 | - This option corresponds to the HTK Option SOURCERATE. The | ||
| 727 | - same value can be given to this option. | ||
| 728 | - | ||
| 729 | - When using multiple AM, this value should be the same among | ||
| 730 | - all AMs. | ||
| 731 | - | ||
| 732 | - -smpFreq Hz | ||
| 733 | - Set sampling frequency of input speech in Hz. Sampling rate | ||
| 734 | - can also be specified using -smpPeriod. Please note that this | ||
| 735 | - frequency should be set equal to the training conditions of | ||
| 736 | - AM. (default: 16,000) | ||
| 737 | - | ||
| 738 | - When using multiple AM, this value should be the same among | ||
| 739 | - all AMs. | ||
| 740 | - | ||
| 741 | - -fsize sample_num | ||
| 742 | - Window size in number of samples. (default: 400) | ||
| 743 | - | ||
| 744 | - This option corresponds to the HTK Option WINDOWSIZE, but | ||
| 745 | - value should be in samples (HTK value / smpPeriod). | ||
| 746 | - | ||
| 747 | - When using multiple AM, this value should be the same among | ||
| 748 | - all AMs. | ||
| 749 | - | ||
| 750 | - -fshift sample_num | ||
| 751 | - Frame shift in number of samples. (default: 160) | ||
| 752 | - | ||
| 753 | - This option corresponds to the HTK Option TARGETRATE, but | ||
| 754 | - value should be in samples (HTK value / smpPeriod). | ||
| 755 | - | ||
| 756 | - When using multiple AM, this value should be the same among | ||
| 757 | - all AMs. | ||
| 758 | - | ||
| 759 | - -preemph float | ||
| 760 | - Pre-emphasis coefficient. (default: 0.97) | ||
| 761 | - | ||
| 762 | - This option corresponds to the HTK Option PREEMCOEF. The same | ||
| 763 | - value can be given to this option. | ||
| 764 | - | ||
| 765 | - -fbank num | ||
| 766 | - Number of filterbank channels. (default: 24) | ||
| 767 | - | ||
| 768 | - This option corresponds to the HTK Option NUMCHANS. The same | ||
| 769 | - value can be given to this option. Be aware that the default | ||
| 770 | - value not the same as in HTK (22). | ||
| 771 | - | ||
| 772 | - -ceplif num | ||
| 773 | - Cepstral liftering coefficient. (default: 22) | ||
| 774 | - | ||
| 775 | - This option corresponds to the HTK Option CEPLIFTER. The same | ||
| 776 | - value can be given to this option. | ||
| 777 | - | ||
| 778 | - -rawe , -norawe | ||
| 779 | - Enable/disable using raw energy before pre-emphasis (default: | ||
| 780 | - disabled) | ||
| 781 | - | ||
| 782 | - This option corresponds to the HTK Option RAWENERGY. Be aware | ||
| 783 | - that the default value differs from HTK (enabled at HTK, | ||
| 784 | - disabled at Julius). | ||
| 785 | - | ||
| 786 | - -enormal , -noenormal | ||
| 787 | - Enable/disable normalizing log energy. On live input, this | ||
| 788 | - normalization will be approximated from the average of last | ||
| 789 | - input. (default: disabled) | ||
| 790 | - | ||
| 791 | - This option corresponds to the HTK Option ENORMALISE. Be | ||
| 792 | - aware that the default value differs from HTK (enabled at | ||
| 793 | - HTK, disabled at Julius). | ||
| 794 | - | ||
| 795 | - -escale float_scale | ||
| 796 | - Scaling factor of log energy when normalizing log energy. | ||
| 797 | - (default: 1.0) | ||
| 798 | - | ||
| 799 | - This option corresponds to the HTK Option ESCALE. Be aware | ||
| 800 | - that the default value differs from HTK (0.1). | ||
| 801 | - | ||
| 802 | - -silfloor float | ||
| 803 | - Energy silence floor in dB when normalizing log energy. | ||
| 804 | - (default: 50.0) | ||
| 805 | - | ||
| 806 | - This option corresponds to the HTK Option SILFLOOR. | ||
| 807 | - | ||
| 808 | - -delwin frame | ||
| 809 | - Delta window size in number of frames. (default: 2) | ||
| 810 | - | ||
| 811 | - This option corresponds to the HTK Option DELTAWINDOW. The | ||
| 812 | - same value can be given to this option. | ||
| 813 | - | ||
| 814 | - -accwin frame | ||
| 815 | - Acceleration window size in number of frames. (default: 2) | ||
| 816 | - | ||
| 817 | - This option corresponds to the HTK Option ACCWINDOW. The same | ||
| 818 | - value can be given to this option. | ||
| 819 | - | ||
| 820 | - -hifreq Hz | ||
| 821 | - Enable band-limiting for MFCC filterbank computation: set | ||
| 822 | - upper frequency cut-off. Value of -1 will disable it. | ||
| 823 | - (default: -1) | ||
| 824 | - | ||
| 825 | - This option corresponds to the HTK Option HIFREQ. The same | ||
| 826 | - value can be given to this option. | ||
| 827 | - | ||
| 828 | - -lofreq Hz | ||
| 829 | - Enable band-limiting for MFCC filterbank computation: set | ||
| 830 | - lower frequency cut-off. Value of -1 will disable it. | ||
| 831 | - (default: -1) | ||
| 832 | - | ||
| 833 | - This option corresponds to the HTK Option LOFREQ. The same | ||
| 834 | - value can be given to this option. | ||
| 835 | - | ||
| 836 | - -zmeanframe , -nozmeanframe | ||
| 837 | - With speech input, this option enables/disables frame-wise DC | ||
| 838 | - offset removal. This corresponds to HTK configuration | ||
| 839 | - ZMEANSOURCE. This cannot be used together with -zmean. | ||
| 840 | - (default: disabled) | ||
| 841 | - | ||
| 842 | - -usepower | ||
| 843 | - Use power instead of magnitude on filterbank analysis. | ||
| 844 | - (default: disabled) | ||
| 845 | - | ||
| 846 | - Normalization | ||
| 847 | - Julius can perform cepstral mean normalization (CMN) for inputs. | ||
| 848 | - CMN will be activated when the given AM was trained with CMN | ||
| 849 | - (i.e. has "_Z" qualifier in the header). | ||
| 850 | - | ||
| 851 | - The cepstral mean will be estimated in different way according | ||
| 852 | - to the input type. On file input, the mean will be computed from | ||
| 853 | - the whole input. On live input such as microphone and network | ||
| 854 | - input, the ceptral mean of the input is unknown at the start. So | ||
| 855 | - MAP-CMN will be used. On MAP-CMN, an initial mean vector will be | ||
| 856 | - applied at the beginning, and the mean vector will be smeared to | ||
| 857 | - the mean of the incrementing input vector as input goes. Options | ||
| 858 | - below can control the behavior of MAP-CMN. | ||
| 859 | - | ||
| 860 | - -cvn | ||
| 861 | - Enable cepstral variance normalization. At file input, the | ||
| 862 | - variance of whole input will be calculated and then applied. | ||
| 863 | - At live microphone input, variance of the last input will be | ||
| 864 | - applied. CVN is only supported for an audio input. | ||
| 865 | - | ||
| 866 | - -vtln alpha lowcut hicut | ||
| 867 | - Do frequency warping, typically for a vocal tract length | ||
| 868 | - normalization (VTLN). Arguments are warping factor, high | ||
| 869 | - frequency cut-off and low freq. cut-off. They correspond to | ||
| 870 | - HTK Config values, WARPFREQ, WARPHCUTOFF and WARPLCUTOFF. | ||
| 871 | - | ||
| 872 | - -cmnload file | ||
| 873 | - Load initial cepstral mean vector from file on startup. The | ||
| 874 | - file should be one saved by -cmnsave. Loading an initial | ||
| 875 | - cepstral mean enables Julius to better recognize the first | ||
| 876 | - utterance on a real-time input. When used together with | ||
| 877 | - -cmnnoupdate, this initial value will be used for all input. | ||
| 878 | - | ||
| 879 | - -cmnsave file | ||
| 880 | - Save the calculated cepstral mean vector into file. The | ||
| 881 | - parameters will be saved at each input end. If the output | ||
| 882 | - file already exists, it will be overridden. | ||
| 883 | - | ||
| 884 | - -cmnupdate -cmnnoupdate | ||
| 885 | - Control whether to update the cepstral mean at each input on | ||
| 886 | - real-time input. Disabling this and specifying -cmnload will | ||
| 887 | - make engine to always use the loaded static initial cepstral | ||
| 888 | - mean. | ||
| 889 | - | ||
| 890 | - -cmnmapweight float | ||
| 891 | - Specify the weight of initial cepstral mean for MAP-CMN. | ||
| 892 | - Specify larger value to retain the initial cepstral mean for | ||
| 893 | - a longer period, and smaller value to make the cepstral mean | ||
| 894 | - rely more on the current input. (default: 100.0) | ||
| 895 | - | ||
| 896 | - Front-end processing | ||
| 897 | - Julius can perform spectral subtraction to reduce some | ||
| 898 | - stationary noise from audio input. Though it is not a powerful | ||
| 899 | - method, but it may work on some situation. Julius has two ways | ||
| 900 | - to estimate noise spectrum. One way is to assume that the first | ||
| 901 | - short segment of an speech input is noise segment, and estimate | ||
| 902 | - the noise spectrum as the average of the segment. Another way is | ||
| 903 | - to calculate average spectrum from noise-only input using other | ||
| 904 | - tool mkss, and load it in Julius. The former one is popular for | ||
| 905 | - speech file input, and latter should be used in live input. The | ||
| 906 | - options below will switch / control the behavior. | ||
| 907 | - | ||
| 908 | - -sscalc | ||
| 909 | - Perform spectral subtraction using head part of each file as | ||
| 910 | - silence part. The head part length should be specified by | ||
| 911 | - -sscalclen. Valid only for file input. Conflict with -ssload. | ||
| 912 | - | ||
| 913 | - -sscalclen msec | ||
| 914 | - With -sscalc, specify the length of head silence for noise | ||
| 915 | - spectrum estimation in milliseconds. (default: 300) | ||
| 916 | - | ||
| 917 | - -ssload file | ||
| 918 | - Perform spectral subtraction for speech input using | ||
| 919 | - pre-estimated noise spectrum loaded from file. The noise | ||
| 920 | - spectrum file can be made by mkss. Valid for all speech | ||
| 921 | - input. Conflict with -sscalc. | ||
| 922 | - | ||
| 923 | - -ssalpha float | ||
| 924 | - Alpha coefficient of spectral subtraction for -sscalc and | ||
| 925 | - -ssload. Noise will be subtracted stronger as this value gets | ||
| 926 | - larger, but distortion of the resulting signal also becomes | ||
| 927 | - remarkable. (default: 2.0) | ||
| 928 | - | ||
| 929 | - -ssfloor float | ||
| 930 | - Flooring coefficient of spectral subtraction. The spectral | ||
| 931 | - power that goes below zero after subtraction will be | ||
| 932 | - substituted by the source signal with this coefficient | ||
| 933 | - multiplied. (default: 0.5) | ||
| 934 | - | ||
| 935 | - Misc. AM options | ||
| 936 | - -htkconf file | ||
| 937 | - Parse the given HTK Config file, and set corresponding | ||
| 938 | - parameters to Julius. When using this option, the default | ||
| 939 | - parameter values are switched from Julius defaults to HTK | ||
| 940 | - defaults. | ||
| 941 | - | ||
| 942 | - Recognition process and search (-SR) | ||
| 943 | - This section contains options for search parameters on the 1st / 2nd | ||
| 944 | - pass such as beam width and LM weights, configurations for short-pause | ||
| 945 | - segmentation, switches for word lattice output and confusion network | ||
| 946 | - output, forced alignments, and other options relating recognition | ||
| 947 | - process and result output. | ||
| 948 | - | ||
| 949 | - Default values for beam width and LM weights will change according to | ||
| 950 | - compile-time setup of JuliusLib , AM model type, and LM size. Please | ||
| 951 | - see the startup log for the actual values. | ||
| 952 | - | ||
| 953 | - 1st pass parameters | ||
| 954 | - -lmp weight penalty | ||
| 955 | - (N-gram) Language model weights and word insertion penalties | ||
| 956 | - for the first pass. | ||
| 957 | - | ||
| 958 | - -penalty1 penalty | ||
| 959 | - (Grammar) word insertion penalty for the first pass. | ||
| 960 | - (default: 0.0) | ||
| 961 | - | ||
| 962 | - -b width | ||
| 963 | - Beam width in number of HMM nodes for rank beaming on the | ||
| 964 | - first pass. This value defines search width on the 1st pass, | ||
| 965 | - and has dominant effect on the total processing time. Smaller | ||
| 966 | - width will speed up the decoding, but too small value will | ||
| 967 | - result in a substantial increase of recognition errors due to | ||
| 968 | - search failure. Larger value will make the search stable and | ||
| 969 | - will lead to failure-free search, but processing time will | ||
| 970 | - grow in proportion to the width. | ||
| 971 | - | ||
| 972 | - The default value is dependent on acoustic model type: 400 | ||
| 973 | - (monophone), 800 (triphone), or 1000 (triphone, setup=v2.1) | ||
| 974 | - | ||
| 975 | - -nlimit num | ||
| 976 | - Upper limit of token per node. This option is valid when | ||
| 977 | - --enable-wpair and --enable-wpair-nlimit are enabled at | ||
| 978 | - compilation time. | ||
| 979 | - | ||
| 980 | - -progout | ||
| 981 | - Enable progressive output of the partial results on the first | ||
| 982 | - pass. | ||
| 983 | - | ||
| 984 | - -proginterval msec | ||
| 985 | - Set the time interval for -progout in milliseconds. (default: | ||
| 986 | - 300) | ||
| 987 | - | ||
| 988 | - 2nd pass parameters | ||
| 989 | - -lmp2 weight penalty | ||
| 990 | - (N-gram) Language model weights and word insertion penalties | ||
| 991 | - for the second pass. | ||
| 992 | - | ||
| 993 | - -penalty2 penalty | ||
| 994 | - (Grammar) word insertion penalty for the second pass. | ||
| 995 | - (default: 0.0) | ||
| 996 | - | ||
| 997 | - -b2 width | ||
| 998 | - Envelope beam width (number of hypothesis) at the second | ||
| 999 | - pass. If the count of word expansion at a certain hypothesis | ||
| 1000 | - length reaches this limit while search, shorter hypotheses | ||
| 1001 | - are not expanded further. This prevents search to fall in | ||
| 1002 | - breadth-first-like situation stacking on the same position, | ||
| 1003 | - and improve search failure mostly for large vocabulary | ||
| 1004 | - condition. (default: 30) | ||
| 1005 | - | ||
| 1006 | - -sb float | ||
| 1007 | - Score envelope width for enveloped scoring. When calculating | ||
| 1008 | - hypothesis score for each generated hypothesis, its trellis | ||
| 1009 | - expansion and Viterbi operation will be pruned in the middle | ||
| 1010 | - of the speech if score on a frame goes under the width. | ||
| 1011 | - Giving small value makes the second pass faster, but | ||
| 1012 | - computation error may occur. (default: 80.0) | ||
| 1013 | - | ||
| 1014 | - -s num | ||
| 1015 | - Stack size, i.e. the maximum number of hypothesis that can be | ||
| 1016 | - stored on the stack during the search. A larger value may | ||
| 1017 | - give more stable results, but increases the amount of memory | ||
| 1018 | - required. (default: 500) | ||
| 1019 | - | ||
| 1020 | - -m count | ||
| 1021 | - Number of expanded hypotheses required to discontinue the | ||
| 1022 | - search. If the number of expanded hypotheses is greater then | ||
| 1023 | - this threshold then, the search is discontinued at that | ||
| 1024 | - point. The larger this value is, The longer Julius gets to | ||
| 1025 | - give up search. (default: 2000) | ||
| 1026 | - | ||
| 1027 | - -n num | ||
| 1028 | - The number of candidates Julius tries to find. The search | ||
| 1029 | - continues till this number of sentence hypotheses have been | ||
| 1030 | - found. The obtained sentence hypotheses are sorted by score, | ||
| 1031 | - and final result is displayed in the order (see also the | ||
| 1032 | - -output). The possibility that the optimum hypothesis is | ||
| 1033 | - correctly found increases as this value gets increased, but | ||
| 1034 | - the processing time also becomes longer. The default value | ||
| 1035 | - depends on the engine setup on compilation time: 10 | ||
| 1036 | - (standard) or 1 (fast or v2.1) | ||
| 1037 | - | ||
| 1038 | - -output num | ||
| 1039 | - The top N sentence hypothesis to be output at the end of | ||
| 1040 | - search. Use with -n (default: 1) | ||
| 1041 | - | ||
| 1042 | - -lookuprange frame | ||
| 1043 | - Set the number of frames before and after to look up next | ||
| 1044 | - word hypotheses in the word trellis on the second pass. This | ||
| 1045 | - prevents the omission of short words, but with a large value, | ||
| 1046 | - the number of expanded hypotheses increases and system | ||
| 1047 | - becomes slow. (default: 5) | ||
| 1048 | - | ||
| 1049 | - -looktrellis | ||
| 1050 | - (Grammar) Expand only the words survived on the first pass | ||
| 1051 | - instead of expanding all the words predicted by grammar. This | ||
| 1052 | - option makes second pass decoding faster especially for large | ||
| 1053 | - vocabulary condition, but may increase deletion error of | ||
| 1054 | - short words. (default: disabled) | ||
| 1055 | - | ||
| 1056 | - Short-pause segmentation / decoder-VAD | ||
| 1057 | - When compiled with --enable-decoder-vad, the short-pause | ||
| 1058 | - segmentation will be extended to support decoder-based VAD. | ||
| 1059 | - | ||
| 1060 | - -spsegment | ||
| 1061 | - Enable short-pause segmentation mode. Input will be segmented | ||
| 1062 | - when a short pause word (word with only silence model in | ||
| 1063 | - pronunciation) gets the highest likelihood at certain | ||
| 1064 | - successive frames on the first pass. When detected segment | ||
| 1065 | - end, Julius stop the 1st pass at the point, perform 2nd pass, | ||
| 1066 | - and continue with next segment. The word context will be | ||
| 1067 | - considered among segments. (Rev.4.0) | ||
| 1068 | - | ||
| 1069 | - When compiled with --enable-decoder-vad, this option enables | ||
| 1070 | - decoder-based VAD, to skip long silence. | ||
| 1071 | - | ||
| 1072 | - -spdur frame | ||
| 1073 | - Short pause duration length to detect end of input segment, | ||
| 1074 | - in number of frames. (default: 10) | ||
| 1075 | - | ||
| 1076 | - -pausemodels string | ||
| 1077 | - A comma-separated list of pause model names to be used at | ||
| 1078 | - short-pause segmentation. The word whose pronunciation | ||
| 1079 | - consists of only the pause models will be treated as "pause | ||
| 1080 | - word" and used for pause detection. If not specified, name of | ||
| 1081 | - -spmodel, -silhead and -siltail will be used. (Rev.4.0) | ||
| 1082 | - | ||
| 1083 | - -spmargin frame | ||
| 1084 | - Back step margin at trigger up for decoder-based VAD. When | ||
| 1085 | - speech up-trigger found by decoder-VAD, Julius will rewind | ||
| 1086 | - the input parameter by this value, and start recognition at | ||
| 1087 | - the point. (Rev.4.0) | ||
| 1088 | - | ||
| 1089 | - This option will be valid only if compiled with | ||
| 1090 | - --enable-decoder-vad. | ||
| 1091 | - | ||
| 1092 | - -spdelay frame | ||
| 1093 | - Trigger decision delay frame at trigger up for decoder-based | ||
| 1094 | - VAD. (Rev.4.0) | ||
| 1095 | - | ||
| 1096 | - This option will be valid only if compiled with | ||
| 1097 | - --enable-decoder-vad. | ||
| 1098 | - | ||
| 1099 | - Word lattice / confusion network output | ||
| 1100 | - -lattice , -nolattice | ||
| 1101 | - Enable / disable generation of word graph. Search algorithm | ||
| 1102 | - also has changed to optimize for better word graph | ||
| 1103 | - generation, so the sentence result may not be the same as | ||
| 1104 | - normal N-best recognition. (Rev.4.0) | ||
| 1105 | - | ||
| 1106 | - -confnet , -noconfnet | ||
| 1107 | - Enable / disable generation of confusion network. Enabling | ||
| 1108 | - this will also activates -lattice internally. (Rev.4.0) | ||
| 1109 | - | ||
| 1110 | - -graphrange frame | ||
| 1111 | - Merge same words at neighbor position at graph generation. If | ||
| 1112 | - the beginning time and ending time of two word candidates of | ||
| 1113 | - the same word is within the specified range, they will be | ||
| 1114 | - merged. The default is 0 (allow merging same words on exactly | ||
| 1115 | - the same location) and specifying larger value will result in | ||
| 1116 | - smaller graph output. Setting this value to -1 will disable | ||
| 1117 | - merging, in that case same words on the same location of | ||
| 1118 | - different scores will be left as they are. (default: 0) | ||
| 1119 | - | ||
| 1120 | - -graphcut depth | ||
| 1121 | - Cut the resulting graph by its word depth at post-processing | ||
| 1122 | - stage. The depth value is the number of words to be allowed | ||
| 1123 | - at a frame. Setting to -1 disables this feature. (default: | ||
| 1124 | - 80) | ||
| 1125 | - | ||
| 1126 | - -graphboundloop count | ||
| 1127 | - Limit the number of boundary adjustment loop at | ||
| 1128 | - post-processing stage. This parameter prevents Julius from | ||
| 1129 | - blocking by infinite adjustment loop by short word | ||
| 1130 | - oscillation. (default: 20) | ||
| 1131 | - | ||
| 1132 | - -graphsearchdelay , -nographsearchdelay | ||
| 1133 | - When this option is enabled, Julius modifies its graph | ||
| 1134 | - generation algorithm on the 2nd pass not to terminate search | ||
| 1135 | - by graph merging, until the first sentence candidate is | ||
| 1136 | - found. This option may improve graph accuracy, especially | ||
| 1137 | - when you are going to generate a huge word graph by setting | ||
| 1138 | - broad search. Namely, it may result in better graph accuracy | ||
| 1139 | - when you set wide beams on both 1st pass -b and 2nd pass -b2, | ||
| 1140 | - and large number for -n. (default: disabled) | ||
| 1141 | - | ||
| 1142 | - Multi-gram / multi-dic recognition | ||
| 1143 | - -multigramout , -nomultigramout | ||
| 1144 | - On grammar recognition using multiple grammars, Julius will | ||
| 1145 | - output only the best result among all grammars. Enabling this | ||
| 1146 | - option will make Julius to output result for each grammar. | ||
| 1147 | - (default: disabled) | ||
| 1148 | - | ||
| 1149 | - Forced alignment | ||
| 1150 | - -walign | ||
| 1151 | - Do viterbi alignment per word units for the recognition | ||
| 1152 | - result. The word boundary frames and the average acoustic | ||
| 1153 | - scores per frame will be calculated. | ||
| 1154 | - | ||
| 1155 | - -palign | ||
| 1156 | - Do viterbi alignment per phone units for the recognition | ||
| 1157 | - result. The phone boundary frames and the average acoustic | ||
| 1158 | - scores per frame will be calculated. | ||
| 1159 | - | ||
| 1160 | - -salign | ||
| 1161 | - Do viterbi alignment per state for the recognition result. | ||
| 1162 | - The state boundary frames and the average acoustic scores per | ||
| 1163 | - frame will be calculated. | ||
| 1164 | - | ||
| 1165 | - Misc. search options | ||
| 1166 | - -inactive | ||
| 1167 | - Start this recognition process instance with inactive state. | ||
| 1168 | - (Rev.4.0) | ||
| 1169 | - | ||
| 1170 | - -1pass | ||
| 1171 | - Perform only the first pass. | ||
| 1172 | - | ||
| 1173 | - -fallback1pass | ||
| 1174 | - When 2nd pass fails, Julius finish the recognition with no | ||
| 1175 | - result. This option tell Julius to output the 1st pass result | ||
| 1176 | - as a final result when the 2nd pass fails. Note that some | ||
| 1177 | - score output (confidence etc.) may not be useful. This was | ||
| 1178 | - the default behavior of Julius-3.x. | ||
| 1179 | - | ||
| 1180 | - -no_ccd , -force_ccd | ||
| 1181 | - Explicitly switch phone context handling at search. Normally | ||
| 1182 | - Julius determines whether the using AM is a context-dependent | ||
| 1183 | - model or not from the model names, i.e., whether the names | ||
| 1184 | - contain character + and -. This option will override the | ||
| 1185 | - automatic detection. | ||
| 1186 | - | ||
| 1187 | - -cmalpha float | ||
| 1188 | - Smoothing parameter for confidence scoring. (default: 0.05) | ||
| 1189 | - | ||
| 1190 | - -iwsp | ||
| 1191 | - (Multi-path mode only) Enable inter-word context-free short | ||
| 1192 | - pause insertion. This option appends a skippable short pause | ||
| 1193 | - model for every word end. The short-pause model can be | ||
| 1194 | - specified by -spmodel. | ||
| 1195 | - | ||
| 1196 | - -transp float | ||
| 1197 | - Additional insertion penalty for transparent words. (default: | ||
| 1198 | - 0.0) | ||
| 1199 | - | ||
| 1200 | - -demo | ||
| 1201 | - Equivalent to -progout -quiet. | ||
| 1202 | - | ||
| 1203 | -ENVIRONMENT VARIABLES | ||
| 1204 | - ALSADEV | ||
| 1205 | - (using mic input with alsa device) specify a capture device name. If | ||
| 1206 | - not specified, "default" will be used. | ||
| 1207 | - | ||
| 1208 | - AUDIODEV | ||
| 1209 | - (using mic input with oss device) specify a capture device path. If | ||
| 1210 | - not specified, "/dev/dsp" will be used. | ||
| 1211 | - | ||
| 1212 | - LATENCY_MSEC | ||
| 1213 | - Try to set input latency of microphone input in milliseconds. | ||
| 1214 | - Smaller value will shorten latency but sometimes make process | ||
| 1215 | - unstable. Default value will depend on the running OS. | ||
| 1216 | - | ||
| 1217 | -EXAMPLES | ||
| 1218 | - For examples of system usage, refer to the tutorial section in the | ||
| 1219 | - Julius documents. | ||
| 1220 | - | ||
| 1221 | -NOTICE | ||
| 1222 | - Note about jconf files: relative paths in a jconf file are interpreted | ||
| 1223 | - as relative to the jconf file itself, not to the current directory. | ||
| 1224 | - | ||
| 1225 | -SEE ALSO | ||
| 1226 | - julian(1), jcontrol(1), adinrec(1), adintool(1), mkbingram(1), | ||
| 1227 | - mkbinhmm(1), mkgsmm(1), wav2mfcc(1), mkss(1) | ||
| 1228 | - | ||
| 1229 | - http://julius.sourceforge.jp/en/ | ||
| 1230 | - | ||
| 1231 | -DIAGNOSTICS | ||
| 1232 | - Julius normally will return the exit status 0. If an error occurs, | ||
| 1233 | - Julius exits abnormally with exit status 1. If an input file cannot be | ||
| 1234 | - found or cannot be loaded for some reason then Julius will skip | ||
| 1235 | - processing for that file. | ||
| 1236 | - | ||
| 1237 | -BUGS | ||
| 1238 | - There are some restrictions to the type and size of the models Julius | ||
| 1239 | - can use. For a detailed explanation refer to the Julius documentation. | ||
| 1240 | - For bug-reports, inquires and comments please contact julius-info at | ||
| 1241 | - lists.sourceforge.jp. | ||
| 1242 | - | ||
| 1243 | -COPYRIGHT | ||
| 1244 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 1245 | - | ||
| 1246 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 1247 | - | ||
| 1248 | - Copyright (c) 2000-2008 Shikano Lab., Nara Institute of Science and | ||
| 1249 | - Technology | ||
| 1250 | - | ||
| 1251 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 1252 | - Technology | ||
| 1253 | - | ||
| 1254 | -AUTHORS | ||
| 1255 | - Rev.1.0 (1998/02/20) | ||
| 1256 | - Designed by Tatsuya KAWAHARA and Akinobu LEE (Kyoto University) | ||
| 1257 | - | ||
| 1258 | - Development by Akinobu LEE (Kyoto University) | ||
| 1259 | - | ||
| 1260 | - Rev.1.1 (1998/04/14), Rev.1.2 (1998/10/31), Rev.2.0 (1999/02/20), | ||
| 1261 | - Rev.2.1 (1999/04/20), Rev.2.2 (1999/10/04), Rev.3.0 (2000/02/14), | ||
| 1262 | - Rev.3.1 (2000/05/11) | ||
| 1263 | - Development of above versions by Akinobu LEE (Kyoto University) | ||
| 1264 | - | ||
| 1265 | - Rev.3.2 (2001/08/15), Rev.3.3 (2002/09/11), Rev.3.4 (2003/10/01), | ||
| 1266 | - Rev.3.4.1 (2004/02/25), Rev.3.4.2 (2004/04/30) | ||
| 1267 | - Development of above versions by Akinobu LEE (Nara Institute of | ||
| 1268 | - Science and Technology) | ||
| 1269 | - | ||
| 1270 | - Rev.3.5 (2005/11/11), Rev.3.5.1 (2006/03/31), Rev.3.5.2 (2006/07/31), | ||
| 1271 | - Rev.3.5.3 (2006/12/29), Rev.4.0 (2007/12/19), Rev.4.1 (2008/10/03) | ||
| 1272 | - Development of above versions by Akinobu LEE (Nagoya Institute of | ||
| 1273 | - Technology) | ||
| 1274 | - | ||
| 1275 | -THANKS TO | ||
| 1276 | - From rev.3.2, Julius is released by the "Information Processing | ||
| 1277 | - Society, Continuous Speech Consortium". | ||
| 1278 | - | ||
| 1279 | - The Windows DLL version was developed and released by Hideki BANNO | ||
| 1280 | - (Nagoya University). | ||
| 1281 | - | ||
| 1282 | - The Windows Microsoft Speech API compatible version was developed by | ||
| 1283 | - Takashi SUMIYOSHI (Kyoto University). | ||
| 1284 | - | ||
| 1285 | - | ||
| 1286 | - | ||
| 1287 | - 02/11/2009 JULIUS(1) |
recognize/src/julius/doc/manuals/mkbingram.txt
| @@ -1,97 +0,0 @@ | @@ -1,97 +0,0 @@ | ||
| 1 | - mkbingram | ||
| 2 | - | ||
| 3 | -MKBINGRAM(1) MKBINGRAM(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - mkbingram | ||
| 9 | - - make binary N-gram from ARPA N-gram file | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - mkbingram [-nlr forward_ngram.arpa] [-nrl backward_ngram.arpa] | ||
| 13 | - [-d old_bingram_file] {output_bingram_file} | ||
| 14 | - | ||
| 15 | -DESCRIPTION | ||
| 16 | - mkbingram is a tool to convert N-gram definition file(s) in ARPA | ||
| 17 | - standard format to a compact Julius binary format. It will speed up the | ||
| 18 | - initial loading time of N-gram much faster. It can read gzipped file | ||
| 19 | - directly. | ||
| 20 | - | ||
| 21 | - From rev.4.0, Julius can deal with forward N-gram, backward N-gram and | ||
| 22 | - their combinations. So, mkbingram now generates binary N-gram file from | ||
| 23 | - one of them, or combining them two to produce one binary N-gram. | ||
| 24 | - | ||
| 25 | - When only a forward N-gram is specified, mkbingram generates binary | ||
| 26 | - N-gram from only the forward N-gram. When using this binary N-gram at | ||
| 27 | - Julius, it performs the 1st pass with the 2-gram probabilities in the | ||
| 28 | - N-gram, and run the 2nd pass with the given N-gram fully, with | ||
| 29 | - converting forward probabilities to backward probabilities by Bayes | ||
| 30 | - rule. | ||
| 31 | - | ||
| 32 | - When only a backward N-gram is specified, mkbingram generates an binary | ||
| 33 | - N-gram file that contains only the backward N-gram. The 1st pass will | ||
| 34 | - use forward 2-gram probabilities that can be computed from the backward | ||
| 35 | - 2-gram using Bayes rule, and the 2nd pass use the given backward N-gram | ||
| 36 | - fully. | ||
| 37 | - | ||
| 38 | - When both forward and backward N-grams are specified, the 2-gram part | ||
| 39 | - in the forward N-gram and all backward N-gram will be combined into | ||
| 40 | - single bingram file. The forward 2-gram will be applied for the 1st | ||
| 41 | - pass and backward N-gram for the 2nd pass. Note that both N-gram should | ||
| 42 | - be trained in the same corpus with same parameters (i.e. cut-off | ||
| 43 | - thresholds), with same vocabulary. | ||
| 44 | - | ||
| 45 | - The old binary N-gram produced by mkbingram of version 3.x and earlier | ||
| 46 | - can be used in Julius-4, but you can convert the old version to the new | ||
| 47 | - version by specifying it as input of current mkbingram by option "-d". | ||
| 48 | - | ||
| 49 | - Please note that binary N-gram file converted by mkbingram of version | ||
| 50 | - 4.0 and later cannot be read by older Julius 3.x. | ||
| 51 | - | ||
| 52 | -OPTIONS | ||
| 53 | - -nlr forward_ngram.arpa | ||
| 54 | - Read in a forward (left-to-right) word N-gram file in ARPA standard | ||
| 55 | - format. | ||
| 56 | - | ||
| 57 | - -nrl backward_ngram.arpa | ||
| 58 | - Read in a backward (right-to-left) word N-gram file in ARPA standard | ||
| 59 | - format. | ||
| 60 | - | ||
| 61 | - -d old_bingram_file | ||
| 62 | - Read in a binary N-gram file. | ||
| 63 | - | ||
| 64 | - -swap | ||
| 65 | - Swap BOS word <s> and EOS word </s> in N-gram. | ||
| 66 | - | ||
| 67 | - output_bingram_file | ||
| 68 | - binary N-gram file name to output. | ||
| 69 | - | ||
| 70 | -EXAMPLES | ||
| 71 | - Convert a set of forward and backward N-gram in ARPA format into Julius | ||
| 72 | - binary form: | ||
| 73 | - Convert a single forward 4-gram in ARPA format into a binary file: | ||
| 74 | - Convert old binary N-gram file to current format: | ||
| 75 | - | ||
| 76 | -SEE ALSO | ||
| 77 | - julius ( 1 ) , | ||
| 78 | - mkbinhmm ( 1 ) , | ||
| 79 | - mkbinhmmlist ( 1 ) | ||
| 80 | - | ||
| 81 | -COPYRIGHT | ||
| 82 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 83 | - | ||
| 84 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 85 | - | ||
| 86 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 87 | - Technology | ||
| 88 | - | ||
| 89 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 90 | - Technology | ||
| 91 | - | ||
| 92 | -LICENSE | ||
| 93 | - The same as Julius. | ||
| 94 | - | ||
| 95 | - | ||
| 96 | - | ||
| 97 | - 02/11/2009 MKBINGRAM(1) |
recognize/src/julius/doc/manuals/mkbinhmm.txt
| @@ -1,78 +0,0 @@ | @@ -1,78 +0,0 @@ | ||
| 1 | - mkbinhmm | ||
| 2 | - | ||
| 3 | -MKBINHMM(1) MKBINHMM(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - mkbinhmm | ||
| 9 | - - convert HMM definition file in HTK ascii format to Julius binary | ||
| 10 | - format | ||
| 11 | - | ||
| 12 | -SYNOPSIS | ||
| 13 | - mkbinhmm [-htkconf HTKConfigFile] {hmmdefs_file} {binhmm_file} | ||
| 14 | - | ||
| 15 | -DESCRIPTION | ||
| 16 | - mkbinhmm convert an HMM definition file in HTK ascii format into a | ||
| 17 | - binary HMM file for Julius. It will greatly speed up the launch | ||
| 18 | - process. | ||
| 19 | - | ||
| 20 | - You can also embed acoustic analysis condition parameters needed for | ||
| 21 | - recognition into the output file. To embed the parameters, specify the | ||
| 22 | - HTK Config file you have used to extract acoustic features for training | ||
| 23 | - the HMM by the optione "-htkconf". | ||
| 24 | - | ||
| 25 | - The embedded parameters in a binary HMM format will be loaded into | ||
| 26 | - Julius automatically, so you do not need to specify the acoustic | ||
| 27 | - feature options at run time. It will be convenient when you deliver an | ||
| 28 | - acoustic model. | ||
| 29 | - | ||
| 30 | - You can also specify binary file as the input. This can be used to | ||
| 31 | - update the old binary format into new one, or to embed the config | ||
| 32 | - parameters into the already existing binary files. If the input binhmm | ||
| 33 | - already has acoustic analysis parameters embedded, they will be | ||
| 34 | - overridden by the specified values. | ||
| 35 | - | ||
| 36 | - | ||
| 37 | - mkbinhmm can read gzipped file as input. | ||
| 38 | - | ||
| 39 | -OPTIONS | ||
| 40 | - -htkconf HTKConfigFile | ||
| 41 | - HTK Config file you used at training time. If specified, the values | ||
| 42 | - are embedded to the output file. | ||
| 43 | - | ||
| 44 | - hmmdefs_file | ||
| 45 | - The source HMm definitino file in HTK ascii format or Julius binary | ||
| 46 | - format. | ||
| 47 | - | ||
| 48 | - hmmdefs_file | ||
| 49 | - Output file. | ||
| 50 | - | ||
| 51 | -EXAMPLES | ||
| 52 | - Convert HTK ascii format HMM definition file into Julius binary file: | ||
| 53 | - Furthermore, embed acoustic feature parameters as specified by Config | ||
| 54 | - file | ||
| 55 | - Embed the acoustic parameters into an existing binary file | ||
| 56 | - | ||
| 57 | -SEE ALSO | ||
| 58 | - julius ( 1 ) , | ||
| 59 | - mkbingram ( 1 ) , | ||
| 60 | - mkbinhmmlist ( 1 ) | ||
| 61 | - | ||
| 62 | -COPYRIGHT | ||
| 63 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 64 | - | ||
| 65 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 66 | - | ||
| 67 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 68 | - Technology | ||
| 69 | - | ||
| 70 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 71 | - Technology | ||
| 72 | - | ||
| 73 | -LICENSE | ||
| 74 | - The same as Julius. | ||
| 75 | - | ||
| 76 | - | ||
| 77 | - | ||
| 78 | - 10/02/2008 MKBINHMM(1) |
recognize/src/julius/doc/manuals/mkbinhmmlist.txt
| @@ -1,64 +0,0 @@ | @@ -1,64 +0,0 @@ | ||
| 1 | - mkbinhmmlist | ||
| 2 | - | ||
| 3 | -MKBINHMMLIST(1) MKBINHMMLIST(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - mkbinhmmlist | ||
| 9 | - - convert HMMList file into binary format | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - mkbinhmmlist {hmmdefs_file} {HMMList_file} {output_binhmmlist_file} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - mkbinhmmlist converts a HMMList file to binary format. Since the index | ||
| 16 | - trees for lookup are also stored in the binary format, it will speed up | ||
| 17 | - the startup of Julius, namely when using big HMMList file. | ||
| 18 | - | ||
| 19 | - For conversion, HMM definition file hmmdefs_file that will be used | ||
| 20 | - together at Julius needs to be specified. The format of the HMM | ||
| 21 | - definition file can be either ascii or Julius binary format. | ||
| 22 | - | ||
| 23 | - The output binary file can be used in Julius as the same by "-hlist". | ||
| 24 | - The format wil be auto-detected by Julius. | ||
| 25 | - | ||
| 26 | - | ||
| 27 | - mkbinhmmlist can read gzipped file. | ||
| 28 | - | ||
| 29 | -OPTIONS | ||
| 30 | - hmmdefs_file | ||
| 31 | - Acoustic HMM definition file, in HMM ascii format or Julius binary | ||
| 32 | - format. | ||
| 33 | - | ||
| 34 | - HMMList_file | ||
| 35 | - Source HMMList file | ||
| 36 | - | ||
| 37 | - output_binhmmlist_file | ||
| 38 | - Output file, will be overwritten if already exist. | ||
| 39 | - | ||
| 40 | -EXAMPLES | ||
| 41 | - Convert a HMMList file logicalTri into binary format and store to | ||
| 42 | - logicalTri.bin: | ||
| 43 | - | ||
| 44 | -SEE ALSO | ||
| 45 | - julius ( 1 ) , | ||
| 46 | - mkbinhmm ( 1 ) | ||
| 47 | - | ||
| 48 | -COPYRIGHT | ||
| 49 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 50 | - | ||
| 51 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 52 | - | ||
| 53 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 54 | - Technology | ||
| 55 | - | ||
| 56 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 57 | - Technology | ||
| 58 | - | ||
| 59 | -LICENSE | ||
| 60 | - The same as Julius. | ||
| 61 | - | ||
| 62 | - | ||
| 63 | - | ||
| 64 | - 10/02/2008 MKBINHMMLIST(1) |
recognize/src/julius/doc/manuals/mkdfa.pl.txt
| @@ -1,67 +0,0 @@ | @@ -1,67 +0,0 @@ | ||
| 1 | - mkdfa.pl | ||
| 2 | - | ||
| 3 | -MKDFA.PL(1) MKDFA.PL(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - mkdfa.pl | ||
| 9 | - - grammar compiler | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - mkdfa.pl [options...] {prefix} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - mkdfa.pl compiles the Julian format grammar (.grammar and .voca) to | ||
| 16 | - Julian native formats (.dfa and .dict). In addition, ".term" will be | ||
| 17 | - also generated that stores correspondence of category ID used in the | ||
| 18 | - output files to the source category name. | ||
| 19 | - | ||
| 20 | - | ||
| 21 | - prefix should be the common file name prefix of ".grammar" and "voca" | ||
| 22 | - file. From prefix.grammar and prefix.voca file, prefix.dfa, prefix.dict | ||
| 23 | - and prefix.term will be output. | ||
| 24 | - | ||
| 25 | -OPTIONS | ||
| 26 | - -n | ||
| 27 | - Not process dictionary. You can only convert .grammar file to .dfa | ||
| 28 | - file without .voca file. | ||
| 29 | - | ||
| 30 | -ENVIRONMENT VARIABLES | ||
| 31 | - TMP or TEMP | ||
| 32 | - Set directory to store temporal file. If not specified, one of them | ||
| 33 | - on the following list will be used: /tmp, /var/tmp, /WINDOWS/Temp, | ||
| 34 | - /WINNT/Temp. | ||
| 35 | - | ||
| 36 | -EXAMPLES | ||
| 37 | - Convert a grammar foo.grammar and foo.voca to foo.dfa, foo.voca and | ||
| 38 | - foo.term. | ||
| 39 | - | ||
| 40 | -SEE ALSO | ||
| 41 | - julius ( 1 ) , | ||
| 42 | - generate ( 1 ) , | ||
| 43 | - nextword ( 1 ) , | ||
| 44 | - accept_check ( 1 ) , | ||
| 45 | - dfa_minimize ( 1 ) | ||
| 46 | - | ||
| 47 | -DIAGNOSTICS | ||
| 48 | - mkdfa.pl invokes mkfa and dfa_minimize internally. They should be | ||
| 49 | - placed at the same directory as mkdfa.pl. | ||
| 50 | - | ||
| 51 | -COPYRIGHT | ||
| 52 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 53 | - | ||
| 54 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 55 | - | ||
| 56 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 57 | - Technology | ||
| 58 | - | ||
| 59 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 60 | - Technology | ||
| 61 | - | ||
| 62 | -LICENSE | ||
| 63 | - The same as Julius. | ||
| 64 | - | ||
| 65 | - | ||
| 66 | - | ||
| 67 | - 10/02/2008 MKDFA.PL(1) |
recognize/src/julius/doc/manuals/mkgshmm.txt
| @@ -1,50 +0,0 @@ | @@ -1,50 +0,0 @@ | ||
| 1 | - mkgshmm | ||
| 2 | - | ||
| 3 | -MKGSHMM(1) MKGSHMM(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - mkgshmm | ||
| 9 | - - convert monophone HMM to GS HMM for Julius | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - mkgshmm {monophone_hmmdefs} | ||
| 13 | - > | ||
| 14 | - {outputfile} | ||
| 15 | - | ||
| 16 | -DESCRIPTION | ||
| 17 | - mkgshmm converts monophone HMM definition file in HTK format into a | ||
| 18 | - special format for Gaussian Mixture Selection (GMS) in Julius. | ||
| 19 | - | ||
| 20 | - GMS is an algorithm to reduce the amount of acoustic computation with | ||
| 21 | - triphone HMM, by pre-selection of promising gaussian mixtures using | ||
| 22 | - likelihoods of corresponding monophone mixtures. | ||
| 23 | - | ||
| 24 | -EXAMPLES | ||
| 25 | - (1) Prepare a monophone model which was trained by the same corpus as | ||
| 26 | - target triphone model. | ||
| 27 | - | ||
| 28 | - (2) Convert the monophone model using mkgshmm. | ||
| 29 | - (3) Specify the output file in Julius with option "-gshmm" | ||
| 30 | - | ||
| 31 | -SEE ALSO | ||
| 32 | - julius ( 1 ) | ||
| 33 | - | ||
| 34 | -COPYRIGHT | ||
| 35 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 36 | - | ||
| 37 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 38 | - | ||
| 39 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 40 | - Technology | ||
| 41 | - | ||
| 42 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 43 | - Technology | ||
| 44 | - | ||
| 45 | -LICENSE | ||
| 46 | - The same as Julius. | ||
| 47 | - | ||
| 48 | - | ||
| 49 | - | ||
| 50 | - 10/02/2008 MKGSHMM(1) |
recognize/src/julius/doc/manuals/mkss.txt
| @@ -1,55 +0,0 @@ | @@ -1,55 +0,0 @@ | ||
| 1 | - mkss | ||
| 2 | - | ||
| 3 | -MKSS(1) MKSS(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - mkss | ||
| 9 | - - calculate average spectrum for spectral subtraction | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - mkss [options...] {filename} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - mkss is a tool to estimate noise spectrum for spectral subtraction on | ||
| 16 | - Julius. It reads a few seconds of sound data from microphone input, | ||
| 17 | - calculate the average spectrum and save it to a file. The output file | ||
| 18 | - can be used as a noise spectrum data in Julius (option "-ssload"). | ||
| 19 | - | ||
| 20 | - The recording will start immediately after startup. Sampling format is | ||
| 21 | - 16bit, monoral. If outpue file already exist, it will be overridden. | ||
| 22 | - | ||
| 23 | -OPTIONS | ||
| 24 | - -freq Hz | ||
| 25 | - Sampling frequency in Hz (default: 16,000) | ||
| 26 | - | ||
| 27 | - -len msec | ||
| 28 | - capture length in milliseconds (default: 3000) | ||
| 29 | - | ||
| 30 | - -fsize sample_num | ||
| 31 | - frame size in number of samples (default: 400) | ||
| 32 | - | ||
| 33 | - -fshift sample_num | ||
| 34 | - frame shift in number of samples (default: 160) | ||
| 35 | - | ||
| 36 | -SEE ALSO | ||
| 37 | - julius ( 1 ) | ||
| 38 | - | ||
| 39 | -COPYRIGHT | ||
| 40 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 41 | - | ||
| 42 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 43 | - | ||
| 44 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 45 | - Technology | ||
| 46 | - | ||
| 47 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 48 | - Technology | ||
| 49 | - | ||
| 50 | -LICENSE | ||
| 51 | - The same as Julius. | ||
| 52 | - | ||
| 53 | - | ||
| 54 | - | ||
| 55 | - 10/02/2008 MKSS(1) |
recognize/src/julius/doc/manuals/nextword.txt
| @@ -1,82 +0,0 @@ | @@ -1,82 +0,0 @@ | ||
| 1 | - nextword | ||
| 2 | - | ||
| 3 | -NEXTWORD(1) NEXTWORD(1) | ||
| 4 | - | ||
| 5 | - | ||
| 6 | - | ||
| 7 | -NAME | ||
| 8 | - nextword | ||
| 9 | - - display next predicted words (in reverse order) | ||
| 10 | - | ||
| 11 | -SYNOPSIS | ||
| 12 | - nextword [-t] [-r] [-s spname] [-v] {prefix} | ||
| 13 | - | ||
| 14 | -DESCRIPTION | ||
| 15 | - Given a partial (part of) sentence from the end, it outputs the next | ||
| 16 | - words allowed in the specified grammar. | ||
| 17 | - | ||
| 18 | - | ||
| 19 | - .dfa, .dict and .term files are needed to execute. They can be | ||
| 20 | - generated from .grammar and .voca file by mkdfa.pl. | ||
| 21 | - | ||
| 22 | - Please note that the latter part of sentence should be given, since the | ||
| 23 | - main 2nd pass does a right-to-left parsing. | ||
| 24 | - | ||
| 25 | -OPTIONS | ||
| 26 | - -t | ||
| 27 | - Input / Output in category name. (default: word) | ||
| 28 | - | ||
| 29 | - -r | ||
| 30 | - Enter in reverse order | ||
| 31 | - | ||
| 32 | - -s spname | ||
| 33 | - the name string of short-pause word to be supressed (default: "sp") | ||
| 34 | - | ||
| 35 | - -v | ||
| 36 | - Debug output. | ||
| 37 | - | ||
| 38 | -EXAMPLES | ||
| 39 | - Exmple output of a sample grammar "fruit": | ||
| 40 | - | ||
| 41 | - % nextword fruit | ||
| 42 | - Stat: init_voca: read 36 words | ||
| 43 | - Reading in term file (optional)...done | ||
| 44 | - 15 categories, 36 words | ||
| 45 | - DFA has 26 nodes and 42 arcs | ||
| 46 | - ----- | ||
| 47 | - command completion is disabled | ||
| 48 | - ----- | ||
| 49 | - wseq > A BANANA </s> | ||
| 50 | - [wseq: A BANANA </s>] | ||
| 51 | - [cate: (NUM_1|NUM_1|A|A) FRUIT_SINGULAR NS_E] | ||
| 52 | - PREDICTED CATEGORIES/WORDS: | ||
| 53 | - NS_B (<s> ) | ||
| 54 | - HAVE (HAVE ) | ||
| 55 | - WANT (WANT ) | ||
| 56 | - NS_B (<s> ) | ||
| 57 | - HAVE (HAVE ) | ||
| 58 | - WANT (WANT ) | ||
| 59 | - | ||
| 60 | - | ||
| 61 | -SEE ALSO | ||
| 62 | - mkdfa.pl ( 1 ) , | ||
| 63 | - generate ( 1 ) , | ||
| 64 | - accept_check ( 1 ) | ||
| 65 | - | ||
| 66 | -COPYRIGHT | ||
| 67 | - Copyright (c) 1997-2000 Information-technology Promotion Agency, Japan | ||
| 68 | - | ||
| 69 | - Copyright (c) 1991-2008 Kawahara Lab., Kyoto University | ||
| 70 | - | ||
| 71 | - Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and | ||
| 72 | - Technology | ||
| 73 | - | ||
| 74 | - Copyright (c) 2005-2008 Julius project team, Nagoya Institute of | ||
| 75 | - Technology | ||
| 76 | - | ||
| 77 | -LICENSE | ||
| 78 | - The same as Julius. | ||
| 79 | - | ||
| 80 | - | ||
| 81 | - | ||
| 82 | - 10/02/2008 NEXTWORD(1) |
recognize/src/julius/install/julius-4.2.2.tar.gz
No preview for this file type