speechDictHandler.py
3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#speechDictHandler.py
#A part of NonVisual Desktop Access (NVDA)
#Copyright (C) 2006-2007 NVDA Contributors <http://www.nvda-project.org/>
#This file is covered by the GNU General Public License.
#See the file COPYING for more details.
import re
import globalVars
from logHandler import log
import os
import codecs
import api
import config
dictionaries = {}
dictTypes = ("temp", "voice", "default", "builtin") # ordered by their priority E.G. voice specific speech dictionary is processed before the default
speechDictsPath=os.path.join(globalVars.appArgs.configPath, "speechDicts")
# Types of speech dictionary entries:
ENTRY_TYPE_ANYWHERE = 0 # String can match anywhere
ENTRY_TYPE_WORD = 2 # String must have word boundaries on both sides to match
ENTRY_TYPE_REGEXP = 1 # Regular expression
class SpeechDictEntry:
def __init__(self, pattern, replacement,comment,caseSensitive=True,type=ENTRY_TYPE_ANYWHERE):
self.pattern = pattern
flags = re.U
if not caseSensitive: flags|=re.IGNORECASE
if type == ENTRY_TYPE_REGEXP:
tempPattern = pattern
elif type == ENTRY_TYPE_WORD:
tempPattern = r"\b" + re.escape(pattern) + r"\b"
else:
tempPattern= re.escape(pattern)
type = ENTRY_TYPE_ANYWHERE # Insure sane values.
self.compiled = re.compile(tempPattern,flags)
self.replacement = replacement
self.comment=comment
self.caseSensitive=caseSensitive
self.type=type
def sub(self, text):
replacement=self.replacement
return self.compiled.sub(replacement, text)
class SpeechDict(list):
def load(self, fileName):
self.fileName=fileName
comment=""
del self[:]
log.debug("Loading speech dictionary '%s'..." % fileName)
if not os.path.isfile(fileName):
log.debug("file '%s' not found." % fileName)
return
file = codecs.open(fileName,"r","utf_8_sig",errors="replace")
for line in file:
if line.isspace():
comment=""
continue
line=line.rstrip('\r\n')
if line.startswith('#'):
if comment:
comment+=" "
comment+=line[1:]
else:
temp=line.split("\t")
if len(temp) ==4:
pattern = temp[0].replace(r'\#','#')
replace = temp[1].replace(r'\#','#')
try:
dictionaryEntry=SpeechDictEntry(pattern, replace, comment, caseSensitive=bool(int(temp[2])), type=int(temp[3]))
self.append(dictionaryEntry)
except Exception as e:
log.exception("Dictionary (\"%s\") entry invalid for \"%s\" error raised: \"%s\"" % (fileName, line, e))
comment=""
else:
log.warning("can't parse line '%s'" % line)
log.debug("%d loaded records." % len(self))
file.close()
return
def save(self,fileName=None):
if not fileName:
fileName=getattr(self,'fileName',None)
if not fileName:
return
dirName=os.path.dirname(fileName)
if not os.path.isdir(dirName):
os.makedirs(dirName)
file = codecs.open(fileName,"w","utf_8_sig",errors="replace")
for entry in self:
if entry.comment:
file.write("#%s\r\n"%entry.comment)
file.write("%s\t%s\t%s\t%s\r\n"%(entry.pattern.replace('#',r'\#'),entry.replacement.replace('#',r'\#'),int(entry.caseSensitive),entry.type))
file.close()
def sub(self, text):
for entry in self:
text = entry.sub(text)
return text
def processText(text):
if not globalVars.speechDictionaryProcessing:
return text
for type in dictTypes:
text=dictionaries[type].sub(text)
return text
def initialize():
for type in dictTypes:
dictionaries[type]=SpeechDict()
dictionaries["default"].load(os.path.join(speechDictsPath, "default.dic"))
dictionaries["builtin"].load("builtin.dic")
def loadVoiceDict(synth):
"""Loads appropriate dictionary for the given synthesizer.
It handles case when the synthesizer doesn't support voice setting.
"""
if synth.isSupported("voice"):
voiceName = synth.availableVoices[synth.voice].name
fileName=r"%s\%s-%s.dic"%(speechDictsPath,synth.name,api.filterFileName(voiceName))
else:
fileName=r"%s\%s.dic"%(speechDictsPath,synth.name)
dictionaries["voice"].load(fileName)