XMLFormatting.py
1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from xml.parsers import expat
import textInfos
from logHandler import log
class XMLTextParser(object):
def __init__(self):
self.parser=expat.ParserCreate('utf-8')
self.parser.StartElementHandler=self._startElementHandler
self.parser.EndElementHandler=self._EndElementHandler
self.parser.CharacterDataHandler=self._CharacterDataHandler
self._commandList=[]
def _startElementHandler(self,tagName,attrs):
if tagName=='unich':
data=attrs.get('value',None)
if data is not None:
try:
data=unichr(int(data))
except ValueError:
data=u'\ufffd'
self._CharacterDataHandler(data)
return
elif tagName=='control':
newAttrs=textInfos.ControlField(attrs)
self._commandList.append(textInfos.FieldCommand("controlStart",newAttrs))
elif tagName=='text':
newAttrs=textInfos.FormatField(attrs)
self._commandList.append(textInfos.FieldCommand("formatChange",newAttrs))
else:
raise ValueError("Unknown tag name: %s"%tagName)
# Normalise attributes common to both field types.
try:
newAttrs["_startOfNode"] = newAttrs["_startOfNode"] == "1"
except KeyError:
pass
try:
newAttrs["_endOfNode"] = newAttrs["_endOfNode"] == "1"
except KeyError:
pass
def _EndElementHandler(self,tagName):
if tagName=="control":
self._commandList.append(textInfos.FieldCommand("controlEnd",None))
elif tagName in ("text","unich"):
pass
else:
raise ValueError("unknown tag name: %s"%tagName)
def _CharacterDataHandler(self,data):
cmdList=self._commandList
if cmdList and isinstance(cmdList[-1],basestring):
cmdList[-1]+=data
else:
cmdList.append(data)
def parse(self,XMLText):
try:
self.parser.Parse(XMLText.encode('utf-8'))
except:
log.error("XML: %s"%XMLText,exc_info=True)
return self._commandList