translate.h 31.9 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
/*
 * Copyright (C) 2005 to 2014 by Jonathan Duddington
 * email: jonsd@users.sourceforge.net
 * Copyright (C) 2015 Reece H. Dunn
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see: <http://www.gnu.org/licenses/>.
 */

#ifdef __cplusplus
extern "C"
{
#endif

#define L(c1, c2) (c1<<8)+c2 // combine two characters into an integer for translator name

#define CTRL_EMBEDDED    0x01 // control character at the start of an embedded command
#define REPLACED_E       'E' // 'e' replaced by silent e

#define N_WORD_PHONEMES  200 // max phonemes in a word
#define N_WORD_BYTES     160 // max bytes for the UTF8 characters in a word
#define N_CLAUSE_WORDS   300 // max words in a clause
#define N_TR_SOURCE      800 // the source text of a single clause (UTF8 bytes)

#define N_RULE_GROUP2    120 // max num of two-letter rule chains
#define N_HASH_DICT     1024
#define N_CHARSETS        20
#define N_LETTER_GROUPS   95 // maximum is 127-32

// dictionary flags, word 1
// bits 0-3  stressed syllable,  bit 6=unstressed
#define FLAG_SKIPWORDS        0x80
#define FLAG_PREPAUSE        0x100

#define FLAG_STRESS_END      0x200 // full stress if at end of clause
#define FLAG_STRESS_END2     0x400 // full stress if at end of clause, or only followed by unstressed
#define FLAG_UNSTRESS_END    0x800 // reduce stress at end of clause
#define FLAG_SPELLWORD      0x1000 // re-translate the word as individual letters, separated by spaces
#define FLAG_ACCENT_BEFORE  0x1000 // say this accent name before the letter name
#define FLAG_ABBREV         0x2000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters
#define FLAG_DOUBLING       0x4000 // doubles the following consonant

#define BITNUM_FLAG_ALT         14 // bit number of FLAG_ALT_TRANS - 1
#define FLAG_ALT_TRANS      0x8000 // language specific
#define FLAG_ALT2_TRANS    0x10000 // language specific
#define FLAG_ALT3_TRANS    0x20000 // language specific
#define FLAG_ALT4_TRANS    0x40000 // language specific
#define FLAG_ALT5_TRANS    0x80000 // language specific
#define FLAG_ALT6_TRANS   0x100000 // language specific
#define FLAG_ALT7_TRANS   0x200000 // language specific

#define FLAG_COMBINE      0x800000 // combine with the next word
#define FLAG_ALLOW_DOT  0x01000000 // ignore '.' after word (abbreviation)
#define FLAG_NEEDS_DOT  0x02000000 // only if the word is followed by a dot
#define FLAG_WAS_UNPRONOUNCABLE  0x04000000  // the unpronounceable routine was used
#define FLAG_MAX3       0x08000000 // limit to 3 repeats
#define FLAG_PAUSE1     0x10000000 // shorter prepause
#define FLAG_TEXTMODE   0x20000000 // word translates to replacement text, not phonemes
#define BITNUM_FLAG_TEXTMODE    29

#define FLAG_FOUND_ATTRIBUTES 0x40000000 // word was found in the dictionary list (has attributes)
#define FLAG_FOUND            0x80000000 // pronunciation was found in the dictionary list

// dictionary flags, word 2
#define FLAG_VERBF             0x1 // verb follows
#define FLAG_VERBSF            0x2 // verb follows, may have -s suffix
#define FLAG_NOUNF             0x4 // noun follows
#define FLAG_PASTF             0x8 // past tense follows
#define FLAG_VERB             0x10 // pronunciation for verb
#define FLAG_NOUN             0x20 // pronunciation for noun
#define FLAG_PAST             0x40 // pronunciation for past tense
#define FLAG_VERB_EXT        0x100 // extend the 'verb follows'
#define FLAG_CAPITAL         0x200 // pronunciation if initial letter is upper case
#define FLAG_ALLCAPS         0x400 // only if the word is all capitals
#define FLAG_ACCENT          0x800 // character name is base-character name + accent name
#define FLAG_HYPHENATED     0x1000 // multiple-words, but needs hyphen between parts 1 and 2
#define FLAG_SENTENCE       0x2000 // only if the clause is a sentence
#define FLAG_ONLY           0x4000
#define FLAG_ONLY_S         0x8000
#define FLAG_STEM          0x10000 // must have a suffix
#define FLAG_ATEND         0x20000 // use this pronunciation if at end of clause
#define FLAG_ATSTART       0x40000 // use this pronunciation if at start of clause
#define FLAG_NATIVE        0x80000 // not if we've switched translators
#define FLAG_LOOKUP_SYMBOL 0x40000000 // to indicate called from Lookup()

#define BITNUM_FLAG_ALLCAPS    0x2a
#define BITNUM_FLAG_HYPHENATED 0x2c
#define BITNUM_FLAG_ONLY       0x2e
#define BITNUM_FLAG_ONLY_S     0x2f

// wordflags, flags in source word
#define FLAG_ALL_UPPER     0x1   // no lower case letters in the word
#define FLAG_FIRST_UPPER   0x2   // first letter is upper case
#define FLAG_UPPERS        0x3   // FLAG_ALL_UPPER | FLAG_FIRST_UPPER
#define FLAG_HAS_PLURAL    0x4   // upper-case word with s or 's lower-case ending
#define FLAG_PHONEMES      0x8   // word is phonemes
#define FLAG_LAST_WORD     0x10  // last word in clause
#define FLAG_EMBEDDED      0x40  // word is preceded by embedded commands
#define FLAG_HYPHEN        0x80
#define FLAG_NOSPACE       0x100 // word is not seperated from previous word by a space
#define FLAG_FIRST_WORD    0x200 // first word in clause
#define FLAG_FOCUS         0x400 // the focus word of a clause
#define FLAG_EMPHASIZED    0x800
#define FLAG_EMPHASIZED2   0xc00 // FLAG_FOCUS | FLAG_EMPHASIZED
#define FLAG_DONT_SWITCH_TRANSLATOR  0x1000
#define FLAG_SUFFIX_REMOVED  0x2000
#define FLAG_HYPHEN_AFTER    0x4000
#define FLAG_ORDINAL       0x8000   // passed to TranslateNumber() to indicate an ordinal number
#define FLAG_HAS_DOT       0x10000  // dot after this word
#define FLAG_COMMA_AFTER   0x20000  // comma after this word
#define FLAG_MULTIPLE_SPACES 0x40000  // word is preceded by multiple spaces, newline, or tab
#define FLAG_INDIVIDUAL_DIGITS 0x80000  // speak number as individual digits
#define FLAG_DELETE_WORD     0x100000   // don't speak this word, it has been spoken as part of the previous word
#define FLAG_CHAR_REPLACED   0x200000   // characters have been replaced by .replace in the *_rules
#define FLAG_TRANSLATOR2     0x400000   // retranslating using a different language
#define FLAG_PREFIX_REMOVED  0x800000   // a prefix has been removed from this word

#define FLAG_SUFFIX_VOWEL  0x08000000 // remember an initial vowel from the suffix
#define FLAG_NO_TRACE      0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout
#define FLAG_NO_PREFIX     0x20000000
#define FLAG_UNPRON_TEST   0x80000000 // do unpronounability test on the beginning of the word

// prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000
#define SUFX_E        0x0100   // e may have been added
#define SUFX_I        0x0200   // y may have been changed to i
#define SUFX_P        0x0400   // prefix
#define SUFX_V        0x0800   // suffix means use the verb form pronunciation
#define SUFX_D        0x1000   // previous letter may have been doubled
#define SUFX_F        0x2000   // verb follows
#define SUFX_Q        0x4000   // don't retranslate
#define SUFX_T        0x10000   // don't affect the stress position in the stem
#define SUFX_B        0x20000  // break, this character breaks the word into stem and suffix (used with SUFX_P)
#define SUFX_A        0x40000  // remember that the suffix starts with a vowel
#define SUFX_M        0x80000  // bit 19, allow multiple suffixes

#define SUFX_UNPRON     0x8000   // used to return $unpron flag from *_rules

#define FLAG_ALLOW_TEXTMODE  0x02  // allow dictionary to translate to text rather than phonemes
#define FLAG_SUFX       0x04
#define FLAG_SUFX_S     0x08
#define FLAG_SUFX_E_ADDED 0x10

// codes in dictionary rules
#define RULE_PRE         1
#define RULE_POST        2
#define RULE_PHONEMES    3
#define RULE_PH_COMMON   4 // At start of rule. Its phoneme string is used by subsequent rules
#define RULE_CONDITION   5 // followed by condition number (byte)
#define RULE_GROUP_START 6
#define RULE_GROUP_END   7
#define RULE_PRE_ATSTART 8 // as RULE_PRE but also match with 'start of word'
#define RULE_LINENUM     9 // next 2 bytes give a line number, for debugging purposes

#define RULE_SPACE        32 // ascii space
#define RULE_SYLLABLE     21 // @
#define RULE_STRESSED     10 // &
#define RULE_DOUBLE       11 // %
#define RULE_INC_SCORE    12 // +
#define RULE_DEL_FWD      13 // #
#define RULE_ENDING       14 // S
#define RULE_DIGIT        15 // D digit
#define RULE_NONALPHA     16 // Z non-alpha
#define RULE_LETTERGP     17 // A B C H F G Y   letter group number
#define RULE_LETTERGP2    18 // L + letter group number
#define RULE_CAPITAL      19 // !   word starts with a capital letter
#define RULE_REPLACEMENTS 20 // section for character replacements
#define RULE_SKIPCHARS    23 // J
#define RULE_NO_SUFFIX    24 // N
#define RULE_NOTVOWEL     25 // K
#define RULE_IFVERB       26 // V
#define RULE_DOLLAR       28 // $ commands
#define RULE_NOVOWELS     29 // X no vowels up to word boundary
#define RULE_SPELLING     31 // W while spelling letter-by-letter
#define RULE_LAST_RULE    31

#define DOLLAR_UNPR     0x01
#define DOLLAR_NOPREFIX 0x02
#define DOLLAR_LIST     0x03

#define LETTERGP_A      0
#define LETTERGP_B      1
#define LETTERGP_C      2
#define LETTERGP_H      3
#define LETTERGP_F      4
#define LETTERGP_G      5
#define LETTERGP_Y      6
#define LETTERGP_VOWEL2 7

// Punctuation types  returned by ReadClause()
// bits 0-11 pause x 10mS
// bits12-14 intonation type
// bit 15- don't need space after the punctuation
// bit 19=sentence, bit 18=clause,  bits 17=voice change
// bit 16 used to distinguish otherwise identical types
// bit 20= punctuation character can be inside a word (Armenian)
// bit 21= speak the name of the punctuation character
// bit 22= dot after the last word
// bit 23= pause is x 320mS (not x 10mS)

#define CLAUSE_BIT_SENTENCE     0x80000
#define CLAUSE_BIT_CLAUSE       0x40000
#define CLAUSE_BIT_VOICE        0x20000
#define CLAUSE_BITS_INTONATION   0x7000
#define PUNCT_IN_WORD          0x100000
#define PUNCT_SAY_NAME         0x200000
#define CLAUSE_DOT             0x400000
#define CLAUSE_PAUSE_LONG      0x800000

#define CLAUSE_NONE        ( 0 + 0x04000)
#define CLAUSE_PARAGRAPH   (70 + 0x80000)
#define CLAUSE_EOF         (40 + 0x90000)
#define CLAUSE_VOICE       ( 0 + 0x24000)
#define CLAUSE_PERIOD      (40 + 0x80000)
#define CLAUSE_COMMA       (20 + 0x41000)
#define CLAUSE_SHORTCOMMA  ( 4 + 0x41000)
#define CLAUSE_SHORTFALL   ( 4 + 0x40000)
#define CLAUSE_QUESTION    (40 + 0x82000)
#define CLAUSE_EXCLAMATION (45 + 0x83000)
#define CLAUSE_COLON       (30 + 0x40000)
#define CLAUSE_SEMICOLON   (30 + 0x41000)

#define SAYAS_CHARS        0x12
#define SAYAS_GLYPHS       0x13
#define SAYAS_SINGLE_CHARS 0x14
#define SAYAS_KEY          0x24
#define SAYAS_DIGITS       0x40 // + number of digits
#define SAYAS_DIGITS1      0xc1

#define CHAR_EMPHASIS    0x0530 // this is an unused character code
#define CHAR_COMMA_BREAK 0x0557 // unused character code

// Rule:
// [4] [match] [1 pre] [2 post] [3 phonemes] 0
//     match 1 pre 2 post 0     - use common phoneme string
//     match 1 pre 2 post 3 0   - empty phoneme string

typedef const char *constcharptr;

typedef struct {
	int points;
	const char *phonemes;
	int end_type;
	char *del_fwd;
} MatchRecord;

// used to mark words with the source[] buffer
typedef struct {
	unsigned int flags;
	unsigned short start;
	unsigned char pre_pause;
	unsigned char wmark;
	unsigned short sourceix;
	unsigned char length;
} WORD_TAB;

typedef struct {
	int type;
	int parameter[N_SPEECH_PARAM];
} PARAM_STACK;

extern PARAM_STACK param_stack[];
extern const int param_defaults[N_SPEECH_PARAM];

typedef struct {
	const char *name;
	int offset;
	unsigned short range_min, range_max;
	int language;
	int flags;
} ALPHABET;

extern ALPHABET alphabets[];
extern ALPHABET *current_alphabet;
// alphabet flags
#define AL_DONT_NAME    0x01 // don't speak the alphabet name
#define AL_NOT_LETTERS  0x02 // don't use the language for speaking letters
#define AL_WORDS        0x04 // use the language to speak words
#define AL_NOT_CODE     0x08 // don't speak the character code
#define AL_NO_SYMBOL    0x10 // don't repeat "symbol" or "character"

#define N_LOPTS       21
#define LOPT_DIERESES  1
// 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables
// bit 4=0, if stress < 4,  bit 4=1, if not the highest stress in the word
#define LOPT_IT_LENGTHEN 2

// 1=german
#define LOPT_PREFIXES 3

// non-zero, change voiced/unoiced to match last consonant in a cluster
// bit 0=use regressive voicing
// bit 1=LANG=cz,bg  don't propagate over [v]
// bit 2=don't propagate acress word boundaries
// bit 3=LANG=pl,  propagate over liquids and nasals
// bit 4=LANG=cz,sk  don't progagate to [v]
// bit 8=devoice word-final consonants
#define LOPT_REGRESSIVE_VOICING 4

// 0=default, 1=no check, other allow this character as an extra initial letter (default is 's')
#define LOPT_UNPRONOUNCABLE 5

// select length_mods tables,  (length_mod_tab) + (length_mod_tab0 * 100)
#define LOPT_LENGTH_MODS 6

// increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels
#define LOPT_SONORANT_MIN 7

// bit 0: don't break vowels at word boundary
#define LOPT_WORD_MERGE 8

// max. amplitude for vowel at the end of a clause
#define LOPT_MAXAMP_EOC 9

// bit 0=reduce even if phonemes are specified in the **_list file
// bit 1=don't reduce the strongest vowel in a word which is marked 'unstressed'
#define LOPT_REDUCE 10

// LANG=cs,sk  combine some prepositions with the following word, if the combination has N or fewer syllables
// bits 0-3  N syllables
// bit 4=only if the second word has $alt attribute
// bit 5=not if the second word is end-of-sentence
#define LOPT_COMBINE_WORDS 11

// change [t] when followed by unstressed vowel
#define LOPT_REDUCE_T 12

// 1 = allow capitals inside a word
// 2 = stressed syllable is indicated by capitals
#define LOPT_CAPS_IN_WORD 13

// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
// bit 1=also after a word which ends with a stressed vowel
#define LOPT_IT_DOUBLING 14

// Call ApplySpecialAttributes() if $alt or $alt2 is set for a word
// bit 1: stressed syllable: $alt change [e],[o] to [E],[O],  $alt2 change [E],[O] to [e],[o]
#define LOPT_ALT 15

// pause for bracket (default=4), pause when annoucing bracket names (default=2)
#define LOPT_BRACKET_PAUSE 16

// bit 1, don't break clause before annoucning . ? !
#define LOPT_ANNOUNCE_PUNCT 17

// recognize long vowels (0 = don't recognize)
#define LOPT_LONG_VOWEL_THRESHOLD 18

// bit 0:  Don't allow suffices if there is no previous syllable
#define LOPT_SUFFIX 19

// bit 0  Apostrophe at start of word is part of the word
// bit 1  Apostrophe at end of word is part of the word
#define LOPT_APOSTROPHE 20

// stress_rule
#define STRESSPOSN_1L 0 // 1st syllable
#define STRESSPOSN_2L 1 // 2nd syllable
#define STRESSPOSN_2R 2 // penultimate
#define STRESSPOSN_1R 3 // final syllable
#define STRESSPOSN_3R 4 // antipenultimate

typedef struct {
// bits0-2  separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme)
// bit 3=don't use linking phoneme
// bit4=longer pause before STOP, VSTOP,FRIC
// bit5=length of a final vowel doesn't depend on the next phoneme
	int word_gap;
	int vowel_pause;
	int stress_rule; // 1=first syllable, 2=penultimate,  3=last

#define S_NO_DIM            0x02
#define S_FINAL_DIM         0x04
#define S_FINAL_DIM_ONLY    0x06
// bit1=don't set diminished stress,
// bit2=mark unstressed final syllables as diminished

// bit3=set consecutive unstressed syllables in unstressed words to diminished, but not in stressed words

#define S_FINAL_NO_2        0x10
// bit4=don't allow secondary stress on last syllable

#define S_NO_AUTO_2         0x20
// bit5-don't use automatic secondary stress

#define S_2_TO_HEAVY        0x40
// bit6=light syllable followed by heavy, move secondary stress to the heavy syllable. LANG=Finnish

#define S_FIRST_PRIMARY     0x80
// bit7=if more than one primary stress, make the subsequent primaries to secondary stress

#define S_FINAL_VOWEL_UNSTRESSED    0x100
// bit8=don't apply default stress to a word-final vowel

#define S_FINAL_SPANISH     0x200
// bit9=stress last syllable if it doesn't end in vowel or "s" or "n"  LANG=Spanish

#define S_2_SYL_2           0x1000
// bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress

#define S_INITIAL_2         0x2000
// bit13= If there is only one syllable before the primary stress, give it a secondary stress

#define S_MID_DIM           0x10000
// bit 16= Set (not first or last) syllables to diminished stress

#define S_PRIORITY_STRESS   0x20000
// bit17= "priority" stress reduces other primary stress to "unstressed" not "secondary"

#define S_EO_CLAUSE1        0x40000
// bit18= don't lengthen short vowels more than long vowels at end-of-clause

#define S_FINAL_LONG         0x80000
// bit19=stress on final syllable if it has a long vowel, but previous syllable has a short vowel


#define S_HYPEN_UNSTRESS    0x100000
// bit20= hyphenated words, 2nd part is unstressed

#define S_NO_EOC_LENGTHEN   0x200000
// bit21= don't lengthen vowels at end-of-clause

// bit15= Give stress to the first unstressed syllable

	int stress_flags;
	int unstressed_wd1; // stress for $u word of 1 syllable
	int unstressed_wd2; // stress for $u word of >1 syllable
	int param[N_LOPTS];
	int param2[N_LOPTS];
	unsigned char *length_mods;
	unsigned char *length_mods0;

#define NUM_THOUS_SPACE  0x4
#define NUM_DECIMAL_COMMA 0x8
#define NUM_SWAP_TENS    0x10
#define NUM_AND_UNITS    0x20
#define NUM_HUNDRED_AND  0x40
#define NUM_SINGLE_AND   0x80
#define NUM_SINGLE_STRESS 0x100
#define NUM_SINGLE_VOWEL 0x200
#define NUM_OMIT_1_HUNDRED 0x400
#define NUM_1900         0x800
#define NUM_ALLOW_SPACE  0x1000
#define NUM_DFRACTION_1  0x2000
#define NUM_DFRACTION_2  0x4000
#define NUM_DFRACTION_3  0x6000
#define NUM_DFRACTION_4  0x8000
#define NUM_DFRACTION_5  0xa000
#define NUM_DFRACTION_6  0xc000
#define NUM_DFRACTION_7  0xe000    // lang=si, alternative form of number for decimal fraction digits (except the last)
#define NUM_ORDINAL_DOT   0x10000
#define NUM_NOPAUSE       0x20000
#define NUM_AND_HUNDRED   0x40000
#define NUM_THOUSAND_AND  0x80000
#define NUM_VIGESIMAL       0x100000
#define NUM_OMIT_1_THOUSAND 0x200000
#define NUM_ZERO_HUNDRED    0x400000
#define NUM_HUNDRED_AND_DIGIT   0x800000
#define NUM_ROMAN          0x1000000
#define NUM_ROMAN_CAPITALS 0x2000000
#define NUM_ROMAN_AFTER    0x4000000
#define NUM_ROMAN_ORDINAL  0x8000000
#define NUM_SINGLE_STRESS_L  0x10000000

	// bits0-1=which numbers routine to use.
	// bit2=  thousands separator must be space
	// bit3=  , decimal separator, not .
	// bit4=use three-and-twenty rather than twenty-three
	// bit5='and' between tens and units
	// bit6=add "and" after hundred or thousand
	// bit7=don't have "and" both after hundreds and also between tens and units
	// bit8=only one primary stress in tens+units
	// bit9=only one vowel betwen tens and units
	// bit10=omit "one" before "hundred"
	// bit11=say 19** as nineteen hundred
	// bit12=allow space as thousands separator (in addition to langopts.thousands_sep)
	// bits13-15  post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro)

	// bit16= dot after number indicates ordinal
	// bit17= don't add pause after a number
	// bit18= 'and' before hundreds
	// bit19= 'and' after thousands if there are no hundreds
	// bit20= vigesimal number, if tens are not found
	// bit21= omit "one" before "thousand"
	// bit22= say "zero" before hundred
	// bit23= add "and" after hundreds and thousands, only if there are digits and no tens

	// bit24= recognize roman numbers
	// bit25= Roman numbers only if upper case
	// bit26= say "roman" after the number, not before
	// bit27= Roman numbers are ordinal numbers
	// bit28= only one primary stress in tens+units (on the tens)
	int numbers;

#define NUM2_THOUSANDS_VAR1     0x40
#define NUM2_THOUSANDS_VAR2     0x80
#define NUM2_THOUSANDS_VAR3     0xc0
#define NUM2_THOUSANDS_VAR4     0x100
#define NUM2_THOUSANDS_VAR5     0x140

#define NUM2_SWAP_THOUSANDS     0x200
#define NUM2_ORDINAL_NO_AND     0x800
#define NUM2_MULTIPLE_ORDINAL   0x1000
#define NUM2_NO_TEEN_ORDINALS   0x2000
#define NUM2_MYRIADS            0x4000
#define NUM2_ENGLISH_NUMERALS   0x8000
#define NUM2_PERCENT_BEFORE     0x10000
#define NUM2_OMIT_1_HUNDRED_ONLY 0x20000
#define NUM2_ORDINAL_AND_THOUSANDS 0x40000
#define NUM2_ORDINAL_DROP_VOWEL  0x80000 // currently only for tens and units
#define NUM2_ZERO_TENS          0x100000
	// bits 1-4  use variant form of numbers before thousands,millions,etc.
	// bits 6-8  use different forms of thousand, million, etc (M MA MB)
	// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
	// bit11=(LANG=es,an) don't say 'and' between tens and units for ordinal numbers
	// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
	// bit13=(LANG=pt) don't use 11-19 numbers to make ordinals
	// bit14=(LANG=ko)  use myriads (groups of 4 digits) not thousands (groups of 3)
	// bit15=(LANG=ne)  speak (non-replaced) English numerals in English
	// bit16=(LANG=si)  say "%" before the number
	// bit17=(LANG=ml)  omit "one" before hundred only if there are no previous digits
	// bit18=(LANG=ta)  same variant for ordinals and thousands (#o = #a)
	// bit19=(LANG=te)  drop final vowel from cardial number before adding ordinal suffix
	// bit20=(LANG=zh)  say zero tens
	int numbers2;

#define BREAK_THOUSANDS   0x49249248
	int break_numbers;  // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000)
	int max_roman;
	int min_roman;
	int thousands_sep;
	int decimal_sep;
	int max_digits;    // max number of digits which can be spoken as an integer number (rather than individual digits)
	const char *ordinal_indicator;   // UTF-8 string
	const unsigned char *roman_suffix;    // add this (ordinal) suffix to Roman numbers (LANG=an)

	// bit 0, accent name before the letter name, bit 1 "capital" after letter name
	int accents;

	int tone_language;          // 1=tone language
	int intonation_group;
	unsigned char tunes[6];
	int long_stop;          // extra mS pause for a lengthened stop
	int phoneme_change;     // TEST, change phonemes, after translation
	char max_initial_consonants;
	char spelling_stress;   // 0=default, 1=stress first letter
	char tone_numbers;
	char ideographs;      // treat as separate words
	char textmode;          // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled)
	char dotless_i;         // uses letter U+0131
	int testing;            // testing options: bit 1= specify stressed syllable in the form:  "outdoor/2"
	int listx;    // compile *_listx after *list
	const unsigned int *replace_chars;      // characters to be substitutes
	char ascii_language[8];  // switch to this language for Latin characters
	int our_alphabet;           // offset for main alphabet (if not set in letter_bits_offset)
	int alt_alphabet;       // offset for another language to recognize
	int alt_alphabet_lang;  // language for the alt_alphabet
	int max_lengthmod;
	int lengthen_tonic;   // lengthen the tonic syllable
	int suffix_add_e;      // replace a suffix (which has the SUFX_E flag) with this character

#define DICTDIALECT_EN_US  1  // bit number
#define DICTDIALECT_ES_LA  2
	int dict_dialect;         // bitmap, use a dialect for foreign words
} LANGUAGE_OPTIONS;

// a parameter of ChangePhonemes()
typedef struct {
	int flags;
	unsigned char stress;          // stress level of this vowel
	unsigned char stress_highest;  // the highest stress level of a vowel in this word
	unsigned char n_vowels;        // number of vowels in the word
	unsigned char vowel_this;      // syllable number of this vowel (counting from 1)
	unsigned char vowel_stressed;  // syllable number of the highest stressed vowel
} CHANGEPH;

typedef struct {
	LANGUAGE_OPTIONS langopts;
	int translator_name;
	int transpose_max;
	int transpose_min;
	const char *transpose_map;
	char dictionary_name[40];

	char phonemes_repeat[20];
	int phonemes_repeat_count;
	int phoneme_tab_ix;

	unsigned char stress_amps[8];
	unsigned char stress_amps_r[8];
	short stress_lengths[8];
	int dict_condition;    // conditional apply some pronunciation rules and dict.lookups
	int dict_min_size;
	const unsigned short *charset_a0;   // unicodes for characters 0xa0 to oxff
	const wchar_t *char_plus_apostrophe;  // single chars + apostrophe treated as words
	const wchar_t *punct_within_word;   // allow these punctuation characters within words
	const unsigned short *chars_ignore;

// holds properties of characters: vowel, consonant, etc for pronunciation rules
	unsigned char letter_bits[256];
	int letter_bits_offset;
	const wchar_t *letter_groups[8];

	/* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */
	#define INTONATION_TYPES 8
	#define PUNCT_INTONATIONS 6
	unsigned char punct_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];

	char *data_dictrules;     // language_1   translation rules file
	char *data_dictlist;      // language_2   dictionary lookup file
	char *dict_hashtab[N_HASH_DICT];   // hash table to index dictionary lookup file
	char *letterGroups[N_LETTER_GROUPS];

	// groups1 and groups2 are indexes into data_dictrules, set up by InitGroups()
	// the two-letter rules for each letter must be consecutive in the language_rules source

	char *groups1[256];         // translation rule lists, index by single letter
	char *groups3[128];         // index by offset letter
	char *groups2[N_RULE_GROUP2];   // translation rule lists, indexed by two-letter pairs
	unsigned int groups2_name[N_RULE_GROUP2];  // the two letter pairs for groups2[]
	int n_groups2;              // number of groups2[] entries used

	unsigned char groups2_count[256];    // number of 2 letter groups for this initial letter
	unsigned char groups2_start[256];    // index into groups2
	const short *frequent_pairs;   // list of frequent pairs of letters, for use in compressed *_list

	int expect_verb;
	int expect_past;    // expect past tense
	int expect_verb_s;
	int expect_noun;
	int prev_last_stress;
	char *clause_end;

	int word_vowel_count;     // number of vowels so far
	int word_stressed_count;  // number of vowels so far which could be stressed

	int clause_upper_count;   // number of upper case letters in the clause
	int clause_lower_count;   // number of lower case letters in the clause

	int prepause_timeout;
	int end_stressed_vowel;  // word ends with stressed vowel
	int prev_dict_flags[2];     // dictionary flags from previous word
	int clause_terminator;
} Translator;

extern int option_tone2;
#define OPTION_EMPHASIZE_ALLCAPS  0x100
#define OPTION_EMPHASIZE_PENULTIMATE 0x200
extern int option_tone_flags;
extern int option_phonemes;
extern int option_phoneme_events;
extern int option_linelength;     // treat lines shorter than this as end-of-clause
extern int option_multibyte;
extern int option_capitals;
extern int option_punctuation;
extern int option_endpause;
extern int option_ssml;
extern int option_phoneme_input;   // allow [[phonemes]] in input text
extern int option_phoneme_variants;
extern int option_sayas;
extern int option_wordgap;

extern int count_characters;
extern int count_words;
extern int count_sentences;
extern int skip_characters;
extern int skip_words;
extern int skip_sentences;
extern int skipping_text;
extern int end_character_position;
extern int clause_start_char;
extern int clause_start_word;
extern char *namedata;
extern int pre_pause;

#define N_MARKER_LENGTH 50   // max.length of a mark name
extern char skip_marker[N_MARKER_LENGTH];

#define N_PUNCTLIST  60
extern wchar_t option_punctlist[N_PUNCTLIST];  // which punctuation characters to announce
extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];

extern Translator *translator;
extern Translator *translator2;
extern const unsigned short *charsets[N_CHARSETS];
extern char dictionary_name[40];
extern char ctrl_embedded;    // to allow an alternative CTRL for embedded commands
extern unsigned char *p_textinput;
extern wchar_t *p_wchar_input;
extern int dictionary_skipwords;

extern int (*uri_callback)(int, const char *, const char *);
extern int (*phoneme_callback)(const char *);
extern void SetLengthMods(Translator *tr, int value);

void LoadConfig(void);
int TransposeAlphabet(Translator *tr, char *text);
ESPEAK_NG_API int utf8_in(int *c, const char *buf);
int utf8_in2(int *c, const char *buf, int backwards);
int utf8_out(unsigned int c, char *buf);
int utf8_nbytes(const char *buf);
int lookupwchar(const unsigned short *list, int c);
int lookupwchar2(const unsigned short *list, int c);
int Eof(void);
char *strchr_w(const char *s, int c);
int IsBracket(int c);
void InitNamedata(void);
void InitText(int flags);
void InitText2(void);
int IsDigit(unsigned int c);
int IsDigit09(unsigned int c);
int IsAlpha(unsigned int c);
int IsVowel(Translator *tr, int c);
int IsSuperscript(int letter);
int iswalpha2(int c);
int isspace2(unsigned int c);
int iswlower2(int c);
int iswupper2(int c);
int towlower2(unsigned int c);
int towupper2(unsigned int c);
const char *GetTranslatedPhonemeString(int phoneme_mode);
const char *WordToString2(unsigned int word);
ALPHABET *AlphabetFromChar(int c);
ALPHABET *AlphabetFromName(const char *name);

Translator *SelectTranslator(const char *name);
int SetTranslator2(const char *name);
void DeleteTranslator(Translator *tr);
void ProcessLanguageOptions(LANGUAGE_OPTIONS *langopts);
int Lookup(Translator *tr, const char *word, char *ph_out);
int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out);

int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control);
int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab);

void ChangeWordStress(Translator *tr, char *word, int new_stress);
void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars);
int TranslateLetter(Translator *tr, char *letter, char *phonemes, int control);
void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf, int control);
void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf);

int LoadDictionary(Translator *tr, const char *name, int no_error);
int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab);

void MakePhonemeList(Translator *tr, int post_pause, int new_sentence);
int ChangePhonemes_ru(Translator *tr, PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch);
void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
void AppendPhonemes(Translator *tr, char *string, int size, const char *ph);

void CalcLengths(Translator *tr);
void CalcPitches(Translator *tr, int clause_tone);

int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy);
int Unpronouncable(Translator *tr, char *word, int posn);
void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int prev_stress);
int TranslateRules(Translator *tr, char *p, char *phonemes, int size, char *end_phonemes, int end_flags, unsigned int *dict_flags);
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out);
void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *tone, char **voice_change);
int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change);

void SetVoiceStack(espeak_VOICE *v, const char *variant_name);
void InterpretPhoneme(Translator *tr, int control, PHONEME_LIST *plist, PHONEME_DATA *phdata, WORD_PH_DATA *worddata);
void InterpretPhoneme2(int phcode, PHONEME_DATA *phdata);
char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags);

extern FILE *f_trans; // for logging

#ifdef __cplusplus
}
#endif