Trie.py 1.69 KB
#!/usr/bin/python
# -*- coding: utf-8 -*-

class Node:
	def __init__(self, end = False):
		self.end = end
		self.children = {}

class Trie:
	def __init__(self, characters):
		self.characters = characters
		self._parse_characters()
		self.root = Node()

	def _parse_characters(self):
		self.characters_keys = {}
		for i in xrange(len(self.characters)):
			self.characters_keys[self.characters[i]] = i

	def _key_of(self, item, i):
		return self.characters_keys[item[i].encode('utf-8')]

	def _add(self, item):
		node = self.root
		item = list(item.decode('utf-8'))
		for i in xrange(len(item)):
			key = self._key_of(item, i)
			if not node.children.has_key(key):
				node.children[key] = Node()
			node = node.children[key]
		node.end = True

	def _to_json(self, node):
		keys = [];
		children = {}
		for key, value in node.children.iteritems():
			keys.append(key)
			children[key] = self._to_json(value)
		keys.sort()
		return { 'end': node.end, 'keys': keys, 'children': children }

	def add(self, data):
		if type(data) is list:
			for d in data:
				if not d in ignore:
					self._add(d)
		else:
			self._add(data)

	def to_json(self):
		return self._to_json(self.root)

chars = ["'", '$', ',', '_', '%', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
		'A', 'Á', 'Â', 'Ã', 'À', 'B', 'C', 'Ç', 'D', 'E', 'É', 'Ê', 'F', 'G', 'H',
		'I', 'Í', 'J', 'K', 'L', 'M', 'N', 'O', 'Ó', 'Ô', 'Õ', 'P', 'Q', 'R', 'S',
		'T', 'U', 'Ú', 'V', 'W', 'X', 'Y', 'Z']

ignore = ["[INTERROGAÇÃO]", "[EXCLAMAÇÃO]", "[PONTO]"]

def gen(data):
	trie = Trie(chars)
	trie.add(data)
	return { 'characters': chars, 'keys': trie.characters_keys, 'trie': trie.to_json() }