Trie.py
1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python
# -*- coding: utf-8 -*-
class Node:
def __init__(self, end = False):
self.end = end
self.children = {}
class Trie:
def __init__(self, characters):
self.characters = characters
self._parse_characters()
self.root = Node()
def _parse_characters(self):
self.characters_keys = {}
for i in xrange(len(self.characters)):
self.characters_keys[self.characters[i]] = i
def _key_of(self, item, i):
return self.characters_keys[item[i].encode('utf-8')]
def _add(self, item):
node = self.root
item = list(item.decode('utf-8'))
for i in xrange(len(item)):
key = self._key_of(item, i)
if not node.children.has_key(key):
node.children[key] = Node()
node = node.children[key]
node.end = True
def _to_json(self, node):
keys = [];
children = {}
for key, value in node.children.iteritems():
keys.append(key)
children[key] = self._to_json(value)
keys.sort()
return { 'end': node.end, 'keys': keys, 'children': children }
def add(self, data):
if type(data) is list:
for d in data:
if not d in ignore:
self._add(d)
else:
self._add(data)
def to_json(self):
return self._to_json(self.root)
chars = ["'", '$', ',', '_', '%', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'Á', 'Â', 'Ã', 'À', 'B', 'C', 'Ç', 'D', 'E', 'É', 'Ê', 'F', 'G', 'H',
'I', 'Í', 'J', 'K', 'L', 'M', 'N', 'O', 'Ó', 'Ô', 'Õ', 'P', 'Q', 'R', 'S',
'T', 'U', 'Ú', 'V', 'W', 'X', 'Y', 'Z']
ignore = ["[INTERROGAÇÃO]", "[EXCLAMAÇÃO]", "[PONTO]"]
def gen(data):
trie = Trie(chars)
trie.add(data)
return { 'characters': chars, 'keys': trie.characters_keys, 'trie': trie.to_json() }