Deixa versao antiga como default

Erickson Silva
1 parent b559a77b
Showing 32 changed files with 1134 additions and 1251 deletions Show diff stats
modules/c++/include/pyTradutor.h
modules/c++/pyTradutor.cpp
modules/python/translate.py
src/AplicaRegras.py
src/AplicaSinonimos.py
src/AplicadorRegras.py
src/Classificador.py
src/Iterator.py
src/LeitorDicionarios.py
src/ModuleTranslate.py
src/Output.py
src/Simplificador.py
src/StringAux.py
src/Tradutor.py
src/WorkCSV.py
src/alexp.py
src/new/AplicaRegras.py
src/new/AplicaSinonimos.py
src/new/Iterator.py
src/new/LeitorDicionarios.py
@@ -1,31 +0,0 @@
-/**
- * @author Erickson Silva
- * @date 14/10/2013
- *
- */
-
-#include "Python.h"
-#include "dprintf.h"
-
-#ifndef _PYTRADUTOR_H
-#define _PYTRADUTOR_H
-
-namespace Tradutor {
-	class PyTradutor{
-		public: 
-                    PyTradutor();
-                    ~PyTradutor();
-                    char * convertStringToGlosa(const char * input);
-                    PyObject * pName;
-                    PyObject * pModule;
-                    PyObject * pDict;	
-                    PyObject * pFunc;
-                    PyObject * pArgs;
-                    PyObject * pResult;
-                    bool isRunning;
-     };
-}
-
-#endif
-
-
@@ -1,58 +0,0 @@
-/**
- * Essa classe invoca os metodos do tradutor em Python
- * Onde efetua a tradução do texto passado por parametro
- *
- * @author Erickson Silva
- * @date 14/10/2013
- *
- */
-
-
-#include "pyTradutor.h"
-
-namespace Tradutor {
-    PyTradutor::PyTradutor() {
-	    DPRINTF("Done!\n");
-    }
-    PyTradutor::~PyTradutor() {
-    	Py_DECREF(pName); 
-	    Py_DECREF(pModule); 
-	    Py_DECREF(pDict);
-	    Py_DECREF(pFunc); 
-	    Py_DECREF(pArgs);
-	    Py_DECREF(pResult);
-    	DDDPRINTF("PyTranslator finalized!\n");
-    }
-
-/**
-* Traduz um texto (char * input) para uma string contendo a
-* traducao para glosa
-*
-* @param input texto de entrada
-* @return string contendo os tokens em glosa traduzidos.
-**/	
-	char * PyTradutor::convertStringToGlosa(const char * input) {
-	   	if(!isRunning){
-   			Py_Initialize();
-	      	pName = PyString_FromString("ModuleTranslate");
-	      	assert(pName!=NULL);
-	      	pModule = PyImport_Import(pName);
-    	    PyErr_Print();
-	      	assert(pModule!=NULL);
-	      	pDict = PyModule_GetDict(pModule);
-	      	PyErr_Print();
-	      	assert(pDict!=NULL);
-	      	pFunc = PyDict_GetItemString(pDict, "iniciar");
-	      	PyErr_Print();
-	      	assert(pFunc!=NULL);
-	      	isRunning = 1;
-	    }
-    	pArgs = PyTuple_Pack(1,PyString_FromString(input));
-    	PyErr_Print();
-        assert(pArgs!=NULL);
-    	pResult = PyObject_CallObject(pFunc, pArgs);
-    	PyErr_Print();
-  	    assert(pResult!=NULL);
-    	return PyString_AsString(pResult);
-	}
-}            
 \ No newline at end of file
@@ -1,14 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-from Tradutor import *
-
-tradutor = Tradutor()
-
-def iniciar(x):
-	try:
-		text = x.decode("utf-8")
-	except:
-		text = x.decode("iso-8859-1")
-		
-	return tradutor.traduzir(text)
@@ -1,161 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva 
-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-#LAViD - Laboratório de Aplicações de Vídeo Digital
-
-from LeitorDicionarios import *
-from Iterator import *
-from StringAux import *
-
-class AplicaRegras(object):
-
-	# inicializa todos as variaveis
-	def __init__(self):	
-		self.__dicionarios = LeitorDicionarios()
-
-	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
-	def simplificar(self, texto):
-		it = Iterator()
-		it.load(texto)
-		self.__ts = []
-		self.__verb = False
-		self.__adv = False;
-		self.__countVerb = 0
-		self.__countAdv = 0
-		while(it.hasNext()):
-			token = it.getAtualW()
-			tag = it.getAtualT()
-			self.__b = False
-			if self.__dicionarios.hasPalavraIgnorada(tag) == False: # verifica se nao eh artigo/preposicao
-
-				#VERIFICA SE É ADVERBIO E CONTA A QUANTIDADE
-				if tag[:3] == "ADV":
-					self.__adv = True
-					self.__countAdv += 1					
-				
-				if tag[:2] == "VB":
-
-					#VERIFICA SE É VERBO NO INFINITIVO
-					if self.__dicionarios.hasVerboInfinitivo(token):		# verifica se ha um verbo infinitivo desse token
-						verboInfinitivo = self.__dicionarios.getVerboInfinitivo(token)		# se sim, adiciona numa string aux			
-						self.__ts.append([verboInfinitivo,tag])  # caso contrario, adiciona so o verbo infinitivo msm  		
-						self.__b = True
-
-					#VERIFICA SE É VERBO DE TEMPO E CONTA A QUANTIDADE
-					if tag == "VB-P" or tag == "VB-D" or tag == "VB-R":
-						self.__verb = True
-						self.__countVerb += 1
-
-
-				#VERIFICA SE É SUBTANTIVO COMUM DOS 2 GENEROS
-				if self.__dicionarios.hasSubst2Genero(token):
-					#del self.__ts[-1]
-					lenTicket = len(it.getAntT())
-					if ((self.__dicionarios.hasPalavraIgnorada(it.getAntT())) and (it.getAntT()[lenTicket-1:] == "F") or (it.getAntT()[lenTicket-3:] == "F-P")):
-						self.__ts.append(["MULHER ", "2GEN"])
-						self.__ts.append([token,tag])
-					else:
-						self.__ts.append(["HOMEM ", "2GEN"])
-						self.__ts.append([token,tag])
-					self.__b = True          
-				
-				#VERIFICA SE É PLURAL
-				#if tag[-2:] == "-P":
-				#	token = self.pluralAnalysis(token)
-
-				#SE NÃO HOUVE NENHUM ALTERAÇÃO, OU SEJA, NÃO APLICOU NENHUMA REGRA, ADICIONA O TOKEN ORIGINAL
-				if self.__b == False:             	# verifica se nao encontrou nem verbo infinito ou sinonimo
-					self.__ts.append([token,tag])
-
-		#SE ENCONTROU VERBO, ENTÃO ANALISA a SENTENCA NOVAMENTE (again?)
-		if self.__verb == True:
-			return self.verbalAnalysis(self.__ts)
-		
-		return self.__ts
-
-
-	# converte romano para numero
-	def auxConvert(self, tag):
-		try:
-			return roman_to_int(tag)
-		except:
-			return tag
-
-	def verbalAnalysis(self, lista):
-		lv = []
-		it = Iterator()
-		it.load(lista)
-		hasFut = False
-		hasPas = False
-		count = 0
-		while(it.hasNext()):
-			token = it.getAtualW().upper()
-			tag = it.getAtualT()
-
-			if(tag[:3] == "ADV"):
-				if (self.__dicionarios.hasTempoVerbal(token)):
-					it.reset()
-					#print "ADV: retornou lista original"
-					return lista
-			
-			if(tag == "VB-P"):
-				if (self.__countVerb > 1):
-					count += 1
-					#print "VB-P: Incrementou"
-					if(count == self.__countVerb):
-						#print "VB-P Adicionou " + token
-						lv.append([token,tag])
-				else:
-					#print "VB-P: retornou lista original"
-					it.reset()
-					return lista
-			elif(tag == "VB-D"):
-				count += 1
-				hasPas = True
-				#print "VB-D: Incrementou"
-				if(count == self.__countVerb):
-					#print "VB-D Adicionou " + token
-					lv.append([token,tag])
-			elif(tag == "VB-R"):
-				count += 1
-				hasFut = True
-				#print "VB-R: Incrementou"
-				if(count == self.__countVerb):
-					#print "VB-R Adicionou " + token
-					lv.append([token,tag])
-			else:
-				lv.append([token,tag])	
-		if (hasFut):
-			lv.append(["FUTURO", "T-VB"])
-		elif (hasPas):
-			lv.append(["PASSADO", "T-VB"])
-		it.reset()
-		return lv
-
-
-	def pluralAnalysis(self, word):
-
-		if(word[-3:] == "OES" or word[-2:] == "AES" or word[-2:] == "AOS"):
-			return word[0:-3]+"AO"
-		elif(word[-3:] == "RES" or word[-2:] == "ZES" or word[-2:] == "NES"):
-			return word[0:-2]
-		elif(word[-3:] == "SES"):
-			#TODO: Algumas palavras possuem marcações gráficas na raiz singular. Ex: Gás – Gases
-			return word[0:-2]
-		elif(word[-2:] == "NS"):
-			return word[0:-2]+"M"
-		elif(word[-3:] == "EIS"):
-			return word[0:-3]+"IL"
-		elif(word[-2:] == "IS"):
-			if(word[-3] == "A" or word[-3] == "E" or word[-3] == "O" or word[-3] == "U"):
-				return word[0:-2]+"L"	
-			else:
-				return word	
-		elif(word[-1] == "S"):
-	    	#TODO: Palavras paroxítonas ou proparoxítonas terminadas em S. Ex: lápis, vírus, tagênis, ônibus, etc
-			return word[0:-1]
-		else:
-			return word
 \ No newline at end of file
@@ -1,48 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva 
-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-#LAViD - Laboratório de Aplicações de Vídeo Digital
-
-import os, csv, sys
-from nltk.tree import *
-from LeitorDicionarios import *
-
-class AplicaSinonimos(object):
-
-	 # Define e inicializa os atributos
-	def __init__(self):
-		self.__dicionarios = LeitorDicionarios()
-
-	def sinonimosMorfologico(self, texto):
-		lista = texto
-		for i, elem in enumerate(lista):
-			token = self.verificaPalavra(elem[0])
-			listmp = list(elem)
-			listmp[0] = token 
-			lista[i] = listmp
-		return lista
-
-
-	def dicionarioSinonimoFolhas(self, folhas):
-		dic = {}
-		for f in folhas:
-			token = self.verificaPalavra(f)
-			dic[f] = token
-		return dic
-
-	def sinonimosSintatico(self, texto):
-		folhas = Tree.leaves(texto)
-		dic = self.dicionarioSinonimoFolhas(folhas)
-		stringTree = str(texto)
-		for t in folhas:
-			stringTree.replace(t, dic[t])
-		tree = Tree.fromstring(stringTree, brackets='()')
-		return tree
-
-	def verificaPalavra(self, token):
-		if self.__dicionarios.hasSinonimo(token):
-			return self.__dicionarios.getSinonimo(token)
-		return token
 \ No newline at end of file
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+import xml.etree.ElementTree as ET
+import os
+ 
+class AplicadorRegras(object):
+
+    # inicializacao das variaves
+    def __init__(self):  
+        self.__tree = ET.parse('vlibras_user/vlibras-core/data/regras.xml')
+        self.__root = self.__tree.getroot()
+        self.__tAux = []
+        self.__dAux = {}
+
+    # aplica as regras
+    def aplicarRegras(self, ts):
+        self.__n = len(ts) # quantidade de tokens
+        for i in range(0,self.__n):
+            self.__tAux.append(self.__n)
+        self.__name = self.getNameRule(ts) # todos os etiquetadores numa so string (ver linha 35)
+        for morpho in self.__root:
+            for rule in morpho.findall('rule'): # procura a tag rule 
+                if rule.get('name') == self.__name: # procura o atributo name na tag rule (ver linha 17)
+                    if rule.find('active').text == "true": # verifica se a regra esta ativa
+                        self.__c = 0
+                    for classe in rule.iter('class'): # for nas tags class
+                        self.__dAux[self.__c] = int(classe.find('newpos').text) # preenche dicionario com a ordem atual e futura das palavras
+                        self.__c += 1
+                    self.__c = 0
+                    for w,t in ts:
+                        i = self.__dAux.get(self.__c) # pega o indice de onde ficara a palavra
+                        self.__tAux[i] = ([w,t]) # preenche a lista com a palavra+etiqueta na posicao correta (segundo o arquivo regras.xml)
+                        self.__c += 1
+                    return self.__tAux # retorna nova lista (ordenada)       
+        return ts # retorna a lista sem alteracoes (nao existe regra)                     
+
+    def getNameRule(self, ts):
+        self.__name = ""
+        for w,t in ts:
+            if t[:2] != "VB":
+                self.__name += t
+            else: 
+                self.__name += t[:2]
+        return self.__name
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -0,0 +1,44 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+from Aelius import AnotaCorpus, Toqueniza, Extras
+
+class Classificador(object):
+
+	# inicializacao das variaveis
+	def __init__(self):	
+		self.__h = Extras.carrega("AeliusHunPos") # carrega o modelo de idioma (passado por parametro ao instanciar)
+		
+	def anotaSentencas(self, str):
+		self.__t = ""
+		self.__tokens = ""
+		#try:
+		# tokenizae
+		self.__tokens = Toqueniza.TOK_PORT.tokenize(str)
+
+		# realiza a classificacao morfologica
+		self.__t = AnotaCorpus.anota_sentencas([self.__tokens],self.__h,'hunpos')
+			
+		return self.listClean(self.__t)
+		#except:
+		#	print "Erro ao efetuar a classificação morfologica."
+
+
+	def listClean(self, l):
+		lClean = []
+		for w,t in l[0]:
+			lClean.append([w,t])
+		return lClean
+
+	# faz a impressao (usado apenas pra testes)
+	def imprimeSentencas(self):
+		for w,t in self.t[0]:
+			print "%s_%s " % (w,t),
+
+
+
+
+
+
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
  
-#Autor: Erickson Silva 
-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
  
-#LAViD - Laboratório de Aplicações de Vídeo Digital
+from StringAux import *
  
 class Iterator(object):
  
@@ -36,22 +35,22 @@ class Iterator(object):
 			return self.__list[self.count]
  
 	def getAtualW(self):
-		return self.getToken(0)[0].upper()
+		return remover_acentos(self.getToken(0)[0].upper().encode('utf-8'))
  
 	def getAtualT(self):
-		return self.getToken(0)[1]
+		return self.getToken(0)[1].upper().encode('utf-8')
  
 	def getProxW(self):
-		return self.getToken("+")[0].upper()
+		return remover_acentos(self.getToken("+")[0].upper().encode('utf-8'))
  
 	def getProxT(self):
-		return self.getToken("+")[1]		
+		return self.getToken("+")[1].upper().encode('utf-8')		
  
 	def getAntW(self):
-		return self.getToken("-")[0].upper()
+		return remover_acentos(self.getToken("-")[0].upper().encode('utf-8'))
  
 	def getAntT(self):
-		return self.getToken("-")[1]
+		return self.getToken("-")[1].upper().encode('utf-8')
  
 	def hasNext(self):
 		if(self.count < self.size-1):
@@ -1,139 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva 
-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-#LAViD - Laboratório de Aplicações de Vídeo Digital
-
-import os, csv, sys
-
-class LeitorDicionarios(object):
-   #_iInstance = None
-
-   #class Singleton:
-   #     def __init__(self):
-   #        self.LeitorDicionarios = None
- 
-   #def __init__( self ):
-   #     if LeitorDicionarios._iInstance is None:
-   #         LeitorDicionarios._iInstance = LeitorDicionarios.Singleton()
- 
-   #     self._EventHandler_instance = LeitorDicionarios._iInstance
- 
-   #def __getattr__(self, aAttr):
-   #   return getattr(self._iInstance, aAttr)
-
-   #def __setattr__(self, aAttr, aValue):
-   #     return setattr(self._iInstance, aAttr, aValue)
-
-   # Define e inicializa os atributos
-   def __init__(self):
-      self.__path = "/home/erickson/vlibras-translate/data/"
-      self.__dicInf = {}
-      self.__dicSin = {}
-      self.__dicWords = {}
-      self.__dic2Gen = {}      
-      self.__dicTemVerbs = {}  
-      self.__fileDic = ''   
-      self.carregarVerbosInfinitivos()
-      self.carregarSinonimos()
-      self.carregarPalavrasIgnoradas()
-      self.carregarSubst2Generos()
-      self.carregarTemposVerbais() 
-
-   # Abre o self.__fileDic que contem os verbos no infinitivo e preenche o dicionario com os mesmos
-   def carregarVerbosInfinitivos(self):
-      try:
-         self.__fileDic = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";")
-      except IOError, (errno, strerror): 
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "carregarVerbosInfinitivos"
-
-      for row in self.__fileDic:
-         if row[1] != "": 
-            try:
-               self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8")  
-            except UnicodeDecodeError:
-               self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8')
-         
-   # Abre o self.__fileDic que contem os sinonimos e preenche o dicionario com os mesmos
-   def carregarSinonimos(self):
-      try:
-         self.__fileDic = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "carregarSinonimos"
-   
-      for row in self.__fileDic:
-         if row[1] != "":
-            self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
-    
-
-    # Abre o self.__fileDic que contem os tempos verbais
-   def carregarTemposVerbais(self):
-      try:
-         self.__fileDic = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "carregarTemposVerbais"
-         
-      for row in self.__fileDic:
-         self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")     
-
-   # Abre o self.__fileDic que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
-   def carregarPalavrasIgnoradas(self):
-      try:
-         self.__fileDic = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "carregarPalavrasIgnoradas"
-
-      for row in self.__fileDic:
-         self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8")
-        
-   # Abre o self.__fileDic que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos
-   def carregarSubst2Generos(self):
-      try:
-         self.__fileDic = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "carregarSubst2Generos" 
-           
-      for row in self.__fileDic:
-         self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8")    
-
-   # Retorna o dicionario dos verbos no infinitivo
-   def getVerboInfinitivo(self, token):
-      return self.__dicInf[token]
-
-   # Retorna o dicionario dos sinonimos
-   def getSinonimo(self, token):
-      return self.__dicSin[token]
-
-   # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador
-   def getPalavraIgnorada(self, token):
-      return self.__dicWords[token]
-
-   # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
-   def getSubst2Generos(self, token):
-      return self.__dic2Gen[token]
-
-   # Retorna o dicionario dos tempos verbais
-   def getTempoVerbal(self, token):
-      return self.__dicTemVerbs[token]  
-
-   def hasVerboInfinitivo(self, token):
-      return self.__dicInf.has_key(token)
-
-   def hasSinonimo(self, token):
-      return self.__dicSin.has_key(token)
-
-   def hasPalavraIgnorada(self, token):
-      return self.__dicWords.has_key(token)
-
-   def hasSubst2Genero(self, token):
-      return self.__dic2Gen.has_key(token)
-
-   def hasTempoVerbal(self, token):
-      return self.__dicTemVerbs.has_key(token)
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+from Tradutor import *
+
+tradutor = Tradutor()
+
+def iniciar(x):
+	try:
+		text = x.decode("utf-8")
+	except:
+		text = x.decode("iso-8859-1")
+		
+	return tradutor.traduzir(text)
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+import sys
+from Iterator import *
+from StringAux import *
+
+class Output(object):
+
+	# inicializa a variavel com o valor passado por paramentro ao instanciar
+	def __init__(self):
+		self.it = Iterator()
+
+	# executa a saida
+	def executeOut(self, ts):
+		self.__glosa = []
+		self.it.load(ts)
+		while(self.it.hasNext()):
+			self.__glosa.append(self.it.getAtualW())
+		self.it.reset()
+		return ' '.join(self.__glosa)
+
@@ -0,0 +1,164 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+from WorkCSV import *
+from Iterator import *
+from StringAux import *
+
+class Simplificador(object):
+
+	# inicializa todos as variaveis
+	def __init__(self):	
+		self.it = Iterator()
+		self.__csv = WorkCSV()
+		self.__dicInf = {}
+		self.__dicSin = {}
+		self.__dicWords = {}
+		self.__dic2Gen = {}
+		self.__dicTemVerbs = {}
+		self.executeWorkCSV()
+
+	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
+	def simplificar(self, texto):
+		self.__ts = []
+		self.it.load(texto)
+		self.__verb = False
+		self.__adv = False;
+		self.__countVerb = 0
+		self.__countAdv = 0
+		countWords = 0
+		while(self.it.hasNext()):
+			w = self.auxConvert(self.it.getAtualW())
+			t = self.it.getAtualT()
+			self.__b = False
+			if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao
+				wu = w.upper() 						# deixa o token maiusculo
+				#if t[:2] == "VB":
+				if t[-2:] == "-P":
+					wu = self.pluralAnalysis(w)
+				if t == "VB-P" or t == "VB-D" or t == "VB-R":
+					self.__verb = True
+					self.__countVerb += 1
+				if t[:3] == "ADV":
+					self.__adv = True
+					self.__countAdv += 1					
+				if self.__dicInf.has_key(wu):		# verifica se ha um verbo infinitivo desse token
+					sAux = self.__dicInf[wu]		# se sim, adiciona numa string aux
+					if self.__dicSin.has_key(sAux):	# verifica se ha um sinonimo para esse verbo infinitivo
+						self.__ts.append([self.__dicSin[sAux],t]) # se sim, entao adiciona na lista
+						self.__b = True          
+					else:					
+						self.__ts.append([sAux,t])  # caso contrario, adiciona so o verbo infinitivo msm
+						self.__b = True
+				if self.__b == False and self.__dicSin.has_key(wu):	# verifica se nao foi encontrado verbo infinitivo e se ha sinonimo
+					self.__ts.append([self.__dicSin[wu],t]) # adiciona na o sinonimo lista
+					self.__b = True   		
+
+				if self.__dic2Gen.has_key(wu):
+					del self.__ts[-1]
+					lenTicket = len(self.it.getAntT())
+					if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):
+						self.__ts.append(["MULHER " + wu,t])
+					else:
+						self.__ts.append(["HOMEM " + wu,t])
+					self.__b = True          
+				if self.__b == False:             	# verifica se nao encontrou nem verbo infinito ou sinonimo
+					self.__ts.append([wu,t])
+			countWords += 1
+		self.it.reset()
+		if self.__verb == True:
+			return self.verbalAnalysis(self.__ts)
+		return self.__ts
+
+	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)
+	def executeWorkCSV(self):
+		self.__dicInf = self.__csv.getDicInf()
+		self.__dicSin = self.__csv.getDicSin()
+		self.__dicWords = self.__csv.getDicWords()
+		self.__dic2Gen = self.__csv.getDic2Gen()
+		self.__dicTemVerbs = self.__csv.getDicTemVerbs()
+
+	# converte romano para numero
+	def auxConvert(self, t):
+		try:
+			return roman_to_int(t)
+		except:
+			return t
+
+
+	def verbalAnalysis(self, lista):
+		lv = []
+		self.it.load(lista)
+		hasFut = False
+		hasPas = False
+		count = 0
+		while(self.it.hasNext()):
+			w = self.it.getAtualW().upper()
+			t = self.it.getAtualT()
+
+			if(t[:3] == "ADV"):
+				if (self.__dicTemVerbs.has_key(w)):
+					self.it.reset()
+					#print "ADV: retornou lista original"
+					return lista
+			
+			if(t == "VB-P"):
+				if (self.__countVerb > 1):
+					count += 1
+					#print "VB-P: Incrementou"
+					if(count == self.__countVerb):
+						#print "VB-P Adicionou " + w
+						lv.append([w,t])
+				else:
+					#print "VB-P: retornou lista original"
+					self.it.reset()
+					return lista
+			elif(t == "VB-D"):
+				count += 1
+				hasPas = True
+				#print "VB-D: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-D Adicionou " + w
+					lv.append([w,t])
+			elif(t == "VB-R"):
+				count += 1
+				hasFut = True
+				#print "VB-R: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-R Adicionou " + w
+					lv.append([w,t])
+			else:
+				lv.append([w,t])	
+		if (hasFut):
+			lv.append(["FUTURO", "TVB"])
+		elif (hasPas):
+			lv.append(["PASSADO", "TVB"])
+		self.it.reset()
+		return lv
+
+
+	def pluralAnalysis(self, word):
+
+		if(word[-3:] == "OES" or word[-2:] == "AES" or word[-2:] == "AOS"):
+			return word[0:-3]+"AO"
+		elif(word[-3:] == "RES" or word[-2:] == "ZES" or word[-2:] == "NES"):
+			return word[0:-2]
+		elif(word[-3:] == "SES"):
+			#TODO: Algumas palavras possuem marcações gráficas na raiz singular. Ex: Gás – Gases
+			return word[0:-2]
+		elif(word[-2:] == "NS"):
+			return word[0:-2]+"M"
+		elif(word[-3:] == "EIS"):
+			return word[0:-3]+"IL"
+		elif(word[-2:] == "IS"):
+			if(word[-3] == "A" or word[-3] == "E" or word[-3] == "O" or word[-3] == "U"):
+				return word[0:-2]+"L"	
+			else:
+				return word	
+		elif(word[-1] == "S"):
+	    	#TODO: Palavras paroxítonas ou proparoxítonas terminadas em S. Ex: lápis, vírus, tênis, ônibus, etc
+			return word[0:-1]
+		else:
+			return word
 \ No newline at end of file
@@ -0,0 +1,83 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+from unicodedata import normalize
+
+ext = {1:"um", 2:"dois", 3:"três", 4:"quatro", 5:"cinco", 6:"seis", 7:"sete", 8:"oito", 9:"nove", 0:"zero"}
+
+def extenso(n):
+    strn = str(n)
+    sizen = len(strn)
+    tokens = []
+    for i in range (0, sizen):
+        x = int(strn[i])
+        tokens.append(ext[x])
+    return ' '.join(tokens)
+
+"""
+def extenso(n):
+    strn = str(n)
+    sizen = len(strn)
+    tokens = []
+    for i in range (0, sizen):
+        tokens.append(strn[i])
+    return ' '.join(tokens)
+"""    
+
+def remover_acentos(txt):
+
+    """ Devolve cópia de uma str substituindo os caracteres 
+        acentuados pelos seus equivalentes não acentuados.
+    
+    ATENÇÃO: carateres gráficos não ASCII e não alfa-numéricos,
+    tais como bullets, travessões, aspas assimétricas, etc. 
+    são simplesmente removidos!
+    
+    >>> remover_acentos('[ACENTUAÇÃO] ç: áàãâä! éèêë? íì&#297;îï, óòõôö; úù&#361;ûü.')
+    '[ACENTUACAO] c: aaaaa! eeee? iiiii, ooooo; uuuuu.'
+    
+    """
+    try:
+        return normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore')
+    except:
+        return normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore')
+
+
+def roman_to_int(input):
+    if not isinstance(input, type("")):
+        raise TypeError, "expected string, got %s" % type(input)
+    input = input.upper( )
+    nums = {'M':1000,
+            'D':500,
+            'C':100,
+            'L':50,
+            'X':10,
+            'V':5,
+            'I':1}
+    sum = 0
+    for i in range(len(input)):
+        try:
+            value = nums[input[i]]
+            if i+1 < len(input) and nums[input[i+1]] > value:
+                sum -= value
+            else: sum += value
+        except KeyError:
+            raise ValueError, 'input is not a valid Roman numeral: %s' % input
+        
+    if int_to_roman(sum) == input: return sum
+    else:        raise ValueError, 'input is not a valid Roman numeral: %s' % input
+
+def int_to_roman(input):
+    if not isinstance(input, type(1)):
+        raise TypeError, "expected integer, got %s" % type(input)
+    if not 0 < input < 4000:
+        raise ValueError, "Argument must be between 1 and 3999"
+    ints = (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1)
+    nums = ('M', 'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
+    result = []
+
+    for i in range(len(ints)):
+        count = int(input / ints[i])
+        result.append(nums[i] * count)
+        input -= ints[i] * count
+    return ''.join(result)
 \ No newline at end of file
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
  
-#Autor: Erickson Silva 
-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
  
-#LAViD - Laboratório de Aplicações de Vídeo Digital
+from Classificador import *
+from Simplificador import *
+from AplicadorRegras import * 
+from Output import *
+from StringAux import *
  
-import alexp
-from AplicaSinonimos import *
-from AplicaRegras import * 
  
-sin = AplicaSinonimos()
-reg = AplicaRegras()
+class Tradutor(object):
  
+	def __init__(self):
+		#instanciando os objetos
+		self.__classificador = Classificador()
+		self.__simplificador = Simplificador()
+		self.__regras = AplicadorRegras()
+		self.__out = Output()
  
-def iniciarTraducao(texto):
-	textoDividido = texto.split(".")
-	for w in textoDividido:
-		if len(w) > 0:
-			gerarAnalise(w)
  
-def gerarAnalise(sentenca):
-	'''tokens = alexp.toqueniza(sentenca)
-	etiquetadas = alexp.etiquetaSentenca(tokens)
-	analiseMorf = analiseMorfologica(etiquetadas)
-	print analiseMorf'''
+	def traduzir(self, txt):
+		self.__tr = None
  
-	analise = alexp.run(sentenca)
-
-	if (isinstance(analise,type(None))):
-		analise = alexp.getAnaliseMorfologica()
-		print analiseMorfologica(analise)
-	else:
-		print analiseSintatica(analise)
+		#faz a tokenizacao e a classificacao
+		self.__t = self.__classificador.anotaSentencas(txt)
  
+		#retira artigos e preposicoes
+		self.__ts = self.__simplificador.simplificar(self.__t)
+		self.__t = None
+		
+		#aplica as regras
+		#self.__tr = self.__regras.aplicarRegras(self.__ts)
+		#self.__ts = None
  
-def analiseMorfologica(sentenca):
-	proc = reg.simplificar(sentenca)
-	return sin.sinonimosMorfologico(proc)
-
-        
-def analiseSintatica(sentenca):
-	analise = sin.sinonimosSintatico(sentenca)
-	return analise
 \ No newline at end of file
+		#executa a saida
+		return self.__out.executeOut(self.__ts).encode("utf-8")
@@ -0,0 +1,125 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+import os, csv, sys
+
+class WorkCSV(object):
+   _iInstance = None
+
+   class Singleton:
+      def __init__(self):
+         self.LeitorDicionarios = None
+
+      def __init__( self ):
+         if LeitorDicionarios._iInstance is None:
+            LeitorDicionarios._iInstance = LeitorDicionarios.Singleton()
+
+         self._EventHandler_instance = LeitorDicionarios._iInstance
+
+      def __getattr__(self, aAttr):
+         return getattr(self._iInstance, aAttr)
+
+      def __setattr__(self, aAttr, aValue):
+         return setattr(self._iInstance, aAttr, aValue) 
+         
+   # Define e inicializa os atributos
+   def __init__(self):
+      self.__path = "vlibras_user/vlibras-core/data/"
+      self.__fileInf = ''
+      self.__dicInf = {}
+      self.__fileSin = ''
+      self.__dicSin = {}
+      self.__fileWords = ''
+      self.__dicWords = {}
+      self.__file2Gen = ''
+      self.__dic2Gen = {}      
+      self.__fileTemVerbs = ''
+      self.__dicTemVerbs = {}     
+      self.createDicInf()
+      self.createDicSin()
+      self.createDicWords()
+      self.createDic2Gen()
+      self.createDicTemVerbs() 
+
+   # Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos
+   def createDicInf(self):
+      try:
+         self.__fileInf = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";")
+      except IOError, (errno, strerror): 
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDicInf"
+
+      for row in self.__fileInf:
+         if row[1] != "": 
+            try:
+               self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8")  
+            except UnicodeDecodeError:
+               self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8')
+         
+   # Abre o arquivo que contem os sinonimos e preenche o dicionario com os mesmos
+   def createDicSin(self):
+      try:
+         self.__fileSin = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDicSin"
+   
+      for row in self.__fileSin:
+         if row[1] != "":
+            self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
+    
+
+    # Abre o arquivo que contem os tempos verbais
+   def createDicTemVerbs(self):
+      try:
+         self.__fileTemVerbs = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDicTemVerbs"
+   
+      for row in self.__fileTemVerbs:
+         self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")     
+
+   # Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
+   def createDicWords(self):
+      try:
+         self.__fileWords = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDicWords"
+
+      for row in self.__fileWords:
+         self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8")
+        
+   # Abre o arquivo que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos
+   def createDic2Gen(self):
+      try:
+         self.__file2Gen = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDic2Gen" 
+           
+      for row in self.__file2Gen:
+         self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8")    
+
+   # Retorna o dicionario dos verbos no infinitivo
+   def getDicInf(self):
+      return self.__dicInf
+
+   # Retorna o dicionario dos sinonimos
+   def getDicSin(self):
+      return self.__dicSin
+
+   # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador
+   def getDicWords(self):
+      return self.__dicWords
+
+   # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
+   def getDic2Gen(self):
+      return self.__dic2Gen
+
+   # Retorna o dicionario dos tempos verbais
+   def getDicTemVerbs(self):
+      return self.__dicTemVerbs   
 \ No newline at end of file
@@ -1,133 +0,0 @@
-#! /usr/bin/env python2.6
-# -*- coding: utf-8 -*-
-
-#---------------------------------
-
-# Editado:
-
-#Autor: Erickson Silva 
-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-#LAViD - Laboratório de Aplicações de Vídeo Digital
-
-#---------------------------------
-
-
-# Donatus Brazilian Portuguese Parser
-#
-# Copyright (C) 2010-2013 Leonel F. de Alencar
-#
-# Author: Leonel F. de Alencar <leonel.de.alencar@ufc.br>
-# Homepage: <http://www.leonel.profusehost.net/>
-#
-# Project's URL: <http://sourceforge.net/projects/donatus/>
-# For license information, see LICENSE.TXT
-#
-# $Id: alexp.py $
-
-"""Este módulo contém funções que permitem utilizar o Aelius para etiquetar uma sentença, construindo entradas lexicais com base nas etiquetas atribuídas às palavras da sentença. Essas entradas lexicais são integradas em uma gramática CFG dada, que é transformada em um parser, utilizado para gerar uma árvore de estrutura sintagmática da sentença. 
-"""
-import re, string,nltk,os
-from Aelius.Extras import carrega
-from Aelius import AnotaCorpus
-
-# definição de algumas variáveis globais para
-# facilitar utilização das diferentes funções do módulo
-
-# sintaxe default em subpasta de nltk_data
-DIR="/vlibras-translate/data/cfg.syn.nltk"
-
-# eventualmente será preciso incluir aqui outros sinais
-# de pontuação, como o travessão
-PUNCT=string.punctuation
-
-SENTENCA_ANOTADA=""
-
-
-def toqueniza(s):
-	"""Decodifica string utilizando utf-8, retornando uma lista de tokens em unicode.
-	"""
-	decodificada=s.decode("utf-8")
-	return AnotaCorpus.TOK_PORT.tokenize(decodificada)
-
-def getAnaliseMorfologica():
-	return SENTENCA_ANOTADA
-
-def etiquetaSentenca(s):
-	"""Aplica um dos etiquetadores do Aelius na etiquetagem da sentença dada como lista de tokens.
-	"""
-	etiquetador = list((carrega("AeliusHunPos"),"nltk"))
-	anotada = AnotaCorpus.anota_sentencas([s],etiquetador,"hunpos")[0]
-	return anotada
-
-def geraEntradasLexicais(lista):
-	"""Gera entradas lexicais no formato CFG do NLTK a partir de lista de pares constituídos de tokens e suas etiquetas.
-	"""
-	entradas=[]
-	for e in lista:
-		# é necessário substituir símbolos como "-" e "+" do CHPTB
-		# que não são aceitos pelo NLTK como símbolos não terminais
-		c=re.sub(r"[-+]","_",e[1])
-		c=re.sub(r"\$","_S",c)
-		entradas.append("%s -> '%s'" % (c, e[0].lower()))
-	return entradas
-
-def corrigeAnotacao(lista):
-	"""Esta função deverá corrigir alguns dos erros de anotação mais comuns do Aelius. No momento, apenas é corrigida VB-AN depois de TR.
-	"""
-	i=1
-	while i < len(lista):
-		if lista[i][1] == "VB-AN" and lista[i-1][1].startswith("TR"):
-			lista[i]=(lista[i][0],"VB-PP")
-		i+=1
-
-# a função abaixo parece muito restritiva; talvez não seja necessário
-# que o arquivo esteja no diretório nltk_data
-def encontraArquivo(caminho=DIR):
-	"""Encontra arquivo na pasta vlibras-translate.
-	"""
-	home = os.path.expanduser("~")
-	path = os.path.realpath(home+caminho)
-	return path
-
-def extraiSintaxe(caminho=DIR):
-	"""Extrai gramática armazenada em arquivo cujo caminho é definido relativamente ao diretório nltk_data.
-	"""
-	arquivo=encontraArquivo(caminho)
-	if arquivo:
-		f=open(arquivo,"rU")
-		sintaxe=f.read()
-		f.close()
-		return sintaxe
-	else:
-		print "Arquivo %s não encontrado em nenhum dos diretórios de dados do NLTK:\n%s" % (caminho,"\n".join(nltk.data.path))
-	
-
-def analisaSentenca(sentenca):
-	"""Retorna lista de árvores de estrutura sintagmática para a sentença dada sob a forma de uma lista de tokens, com base na gramática CFG cujo caminho é especificado como segundo argumento da função. Esse caminho é relativo à pasta nltk_data da instalação local do NLTK. A partir da etiquetagem morfossintática da sentença são geradas entradas lexicais que passam a integrar a gramática CFG. O caminho da gramática e o parser gerado são armazenados como tupla na variável ANALISADORES.
-	"""
-	parser=constroiAnalisador(sentenca)
-	codificada=[w.encode("utf-8") for w in sentenca]
-	trees=parser.parse_one(codificada)
-	return trees
-
-def constroiAnalisador(s):
-	"""Constrói analisador a partir de uma única sentença não anotada, dada como lista de tokens, e uma lista de regras sintáticas no formato CFG, armazenadas em arquivo. Esta função tem um bug, causado pela maneira como o Aelius etiqueta sentenças usando o módulo ProcessaNomesProprios: quando a sentença se inicia por paravra com inicial minúscula, essa palavra não é incorporada ao léxico, mas a versão com inicial maiúscula.
-	"""
-	global SENTENCA_ANOTADA
-	SENTENCA_ANOTADA=etiquetaSentenca(s)
-	corrigeAnotacao(SENTENCA_ANOTADA)
-	entradas=geraEntradasLexicais(SENTENCA_ANOTADA)
-	lexico="\n".join(entradas)
-	gramatica="%s\n%s" % (extraiSintaxe(DIR).strip(),lexico)
-	cfg=nltk.CFG.fromstring(gramatica)
-	return nltk.ChartParser(cfg)
-
-def exibeArvores(arvores):
-	"""Função 'wrapper' para a função de exibição de árvores do NLTK"""
-	nltk.draw.draw_trees(*arvores)
-
-def run(sentenca):
-	tokens=toqueniza(sentenca)
-	trees=analisaSentenca(tokens)
-	return trees
 \ No newline at end of file
@@ -0,0 +1,161 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva 
+#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+#LAViD - Laboratório de Aplicações de Vídeo Digital
+
+from LeitorDicionarios import *
+from Iterator import *
+from StringAux import *
+
+class AplicaRegras(object):
+
+	# inicializa todos as variaveis
+	def __init__(self):	
+		self.__dicionarios = LeitorDicionarios()
+
+	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
+	def simplificar(self, texto):
+		it = Iterator()
+		it.load(texto)
+		self.__ts = []
+		self.__verb = False
+		self.__adv = False;
+		self.__countVerb = 0
+		self.__countAdv = 0
+		while(it.hasNext()):
+			token = it.getAtualW()
+			tag = it.getAtualT()
+			self.__b = False
+			if self.__dicionarios.hasPalavraIgnorada(tag) == False: # verifica se nao eh artigo/preposicao
+
+				#VERIFICA SE É ADVERBIO E CONTA A QUANTIDADE
+				if tag[:3] == "ADV":
+					self.__adv = True
+					self.__countAdv += 1					
+				
+				if tag[:2] == "VB":
+
+					#VERIFICA SE É VERBO NO INFINITIVO
+					if self.__dicionarios.hasVerboInfinitivo(token):		# verifica se ha um verbo infinitivo desse token
+						verboInfinitivo = self.__dicionarios.getVerboInfinitivo(token)		# se sim, adiciona numa string aux			
+						self.__ts.append([verboInfinitivo,tag])  # caso contrario, adiciona so o verbo infinitivo msm  		
+						self.__b = True
+
+					#VERIFICA SE É VERBO DE TEMPO E CONTA A QUANTIDADE
+					if tag == "VB-P" or tag == "VB-D" or tag == "VB-R":
+						self.__verb = True
+						self.__countVerb += 1
+
+
+				#VERIFICA SE É SUBTANTIVO COMUM DOS 2 GENEROS
+				if self.__dicionarios.hasSubst2Genero(token):
+					#del self.__ts[-1]
+					lenTicket = len(it.getAntT())
+					if ((self.__dicionarios.hasPalavraIgnorada(it.getAntT())) and (it.getAntT()[lenTicket-1:] == "F") or (it.getAntT()[lenTicket-3:] == "F-P")):
+						self.__ts.append(["MULHER ", "2GEN"])
+						self.__ts.append([token,tag])
+					else:
+						self.__ts.append(["HOMEM ", "2GEN"])
+						self.__ts.append([token,tag])
+					self.__b = True          
+				
+				#VERIFICA SE É PLURAL
+				#if tag[-2:] == "-P":
+				#	token = self.pluralAnalysis(token)
+
+				#SE NÃO HOUVE NENHUM ALTERAÇÃO, OU SEJA, NÃO APLICOU NENHUMA REGRA, ADICIONA O TOKEN ORIGINAL
+				if self.__b == False:             	# verifica se nao encontrou nem verbo infinito ou sinonimo
+					self.__ts.append([token,tag])
+
+		#SE ENCONTROU VERBO, ENTÃO ANALISA a SENTENCA NOVAMENTE (again?)
+		if self.__verb == True:
+			return self.verbalAnalysis(self.__ts)
+		
+		return self.__ts
+
+
+	# converte romano para numero
+	def auxConvert(self, tag):
+		try:
+			return roman_to_int(tag)
+		except:
+			return tag
+
+	def verbalAnalysis(self, lista):
+		lv = []
+		it = Iterator()
+		it.load(lista)
+		hasFut = False
+		hasPas = False
+		count = 0
+		while(it.hasNext()):
+			token = it.getAtualW().upper()
+			tag = it.getAtualT()
+
+			if(tag[:3] == "ADV"):
+				if (self.__dicionarios.hasTempoVerbal(token)):
+					it.reset()
+					#print "ADV: retornou lista original"
+					return lista
+			
+			if(tag == "VB-P"):
+				if (self.__countVerb > 1):
+					count += 1
+					#print "VB-P: Incrementou"
+					if(count == self.__countVerb):
+						#print "VB-P Adicionou " + token
+						lv.append([token,tag])
+				else:
+					#print "VB-P: retornou lista original"
+					it.reset()
+					return lista
+			elif(tag == "VB-D"):
+				count += 1
+				hasPas = True
+				#print "VB-D: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-D Adicionou " + token
+					lv.append([token,tag])
+			elif(tag == "VB-R"):
+				count += 1
+				hasFut = True
+				#print "VB-R: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-R Adicionou " + token
+					lv.append([token,tag])
+			else:
+				lv.append([token,tag])	
+		if (hasFut):
+			lv.append(["FUTURO", "T-VB"])
+		elif (hasPas):
+			lv.append(["PASSADO", "T-VB"])
+		it.reset()
+		return lv
+
+
+	def pluralAnalysis(self, word):
+
+		if(word[-3:] == "OES" or word[-2:] == "AES" or word[-2:] == "AOS"):
+			return word[0:-3]+"AO"
+		elif(word[-3:] == "RES" or word[-2:] == "ZES" or word[-2:] == "NES"):
+			return word[0:-2]
+		elif(word[-3:] == "SES"):
+			#TODO: Algumas palavras possuem marcações gráficas na raiz singular. Ex: Gás – Gases
+			return word[0:-2]
+		elif(word[-2:] == "NS"):
+			return word[0:-2]+"M"
+		elif(word[-3:] == "EIS"):
+			return word[0:-3]+"IL"
+		elif(word[-2:] == "IS"):
+			if(word[-3] == "A" or word[-3] == "E" or word[-3] == "O" or word[-3] == "U"):
+				return word[0:-2]+"L"	
+			else:
+				return word	
+		elif(word[-1] == "S"):
+	    	#TODO: Palavras paroxítonas ou proparoxítonas terminadas em S. Ex: lápis, vírus, tagênis, ônibus, etc
+			return word[0:-1]
+		else:
+			return word
 \ No newline at end of file
@@ -0,0 +1,48 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva 
+#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+#LAViD - Laboratório de Aplicações de Vídeo Digital
+
+import os, csv, sys
+from nltk.tree import *
+from LeitorDicionarios import *
+
+class AplicaSinonimos(object):
+
+	 # Define e inicializa os atributos
+	def __init__(self):
+		self.__dicionarios = LeitorDicionarios()
+
+	def sinonimosMorfologico(self, texto):
+		lista = texto
+		for i, elem in enumerate(lista):
+			token = self.verificaPalavra(elem[0])
+			listmp = list(elem)
+			listmp[0] = token 
+			lista[i] = listmp
+		return lista
+
+
+	def dicionarioSinonimoFolhas(self, folhas):
+		dic = {}
+		for f in folhas:
+			token = self.verificaPalavra(f)
+			dic[f] = token
+		return dic
+
+	def sinonimosSintatico(self, texto):
+		folhas = Tree.leaves(texto)
+		dic = self.dicionarioSinonimoFolhas(folhas)
+		stringTree = str(texto)
+		for t in folhas:
+			stringTree.replace(t, dic[t])
+		tree = Tree.fromstring(stringTree, brackets='()')
+		return tree
+
+	def verificaPalavra(self, token):
+		if self.__dicionarios.hasSinonimo(token):
+			return self.__dicionarios.getSinonimo(token)
+		return token
 \ No newline at end of file
@@ -0,0 +1,60 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva 
+#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+#LAViD - Laboratório de Aplicações de Vídeo Digital
+
+class Iterator(object):
+
+	# inicializacao das variaveis
+	def __init__(self):	
+		self.count = -1
+		
+	def load(self, lista):
+		self.__list = list(lista);
+		self.size = len(lista)
+
+	def reset(self):
+		self.count = -1
+
+	def getSize(self):
+		return self.size
+
+	def getCount(self):
+		return self.count
+
+	def getToken(self, i):
+		if(i == "+"):
+			return self.__list[self.count+1]
+
+		elif(i == "-"):
+			return self.__list[self.count-1]
+
+		else:
+			return self.__list[self.count]
+
+	def getAtualW(self):
+		return self.getToken(0)[0].upper()
+
+	def getAtualT(self):
+		return self.getToken(0)[1]
+
+	def getProxW(self):
+		return self.getToken("+")[0].upper()
+
+	def getProxT(self):
+		return self.getToken("+")[1]		
+
+	def getAntW(self):
+		return self.getToken("-")[0].upper()
+
+	def getAntT(self):
+		return self.getToken("-")[1]
+
+	def hasNext(self):
+		if(self.count < self.size-1):
+			self.count += 1
+			return True
+		return False
 \ No newline at end of file
@@ -0,0 +1,139 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva 
+#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+#LAViD - Laboratório de Aplicações de Vídeo Digital
+
+import os, csv, sys
+
+class LeitorDicionarios(object):
+   #_iInstance = None
+
+   #class Singleton:
+   #     def __init__(self):
+   #        self.LeitorDicionarios = None
+ 
+   #def __init__( self ):
+   #     if LeitorDicionarios._iInstance is None:
+   #         LeitorDicionarios._iInstance = LeitorDicionarios.Singleton()
+ 
+   #     self._EventHandler_instance = LeitorDicionarios._iInstance
+ 
+   #def __getattr__(self, aAttr):
+   #   return getattr(self._iInstance, aAttr)
+
+   #def __setattr__(self, aAttr, aValue):
+   #     return setattr(self._iInstance, aAttr, aValue)
+
+   # Define e inicializa os atributos
+   def __init__(self):
+      self.__path = "/home/erickson/vlibras-translate/data/"
+      self.__dicInf = {}
+      self.__dicSin = {}
+      self.__dicWords = {}
+      self.__dic2Gen = {}      
+      self.__dicTemVerbs = {}  
+      self.__fileDic = ''   
+      self.carregarVerbosInfinitivos()
+      self.carregarSinonimos()
+      self.carregarPalavrasIgnoradas()
+      self.carregarSubst2Generos()
+      self.carregarTemposVerbais() 
+
+   # Abre o self.__fileDic que contem os verbos no infinitivo e preenche o dicionario com os mesmos
+   def carregarVerbosInfinitivos(self):
+      try:
+         self.__fileDic = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";")
+      except IOError, (errno, strerror): 
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "carregarVerbosInfinitivos"
+
+      for row in self.__fileDic:
+         if row[1] != "": 
+            try:
+               self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8")  
+            except UnicodeDecodeError:
+               self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8')
+         
+   # Abre o self.__fileDic que contem os sinonimos e preenche o dicionario com os mesmos
+   def carregarSinonimos(self):
+      try:
+         self.__fileDic = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "carregarSinonimos"
+   
+      for row in self.__fileDic:
+         if row[1] != "":
+            self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
+    
+
+    # Abre o self.__fileDic que contem os tempos verbais
+   def carregarTemposVerbais(self):
+      try:
+         self.__fileDic = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "carregarTemposVerbais"
+         
+      for row in self.__fileDic:
+         self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")     
+
+   # Abre o self.__fileDic que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
+   def carregarPalavrasIgnoradas(self):
+      try:
+         self.__fileDic = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "carregarPalavrasIgnoradas"
+
+      for row in self.__fileDic:
+         self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8")
+        
+   # Abre o self.__fileDic que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos
+   def carregarSubst2Generos(self):
+      try:
+         self.__fileDic = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "carregarSubst2Generos" 
+           
+      for row in self.__fileDic:
+         self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8")    
+
+   # Retorna o dicionario dos verbos no infinitivo
+   def getVerboInfinitivo(self, token):
+      return self.__dicInf[token]
+
+   # Retorna o dicionario dos sinonimos
+   def getSinonimo(self, token):
+      return self.__dicSin[token]
+
+   # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador
+   def getPalavraIgnorada(self, token):
+      return self.__dicWords[token]
+
+   # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
+   def getSubst2Generos(self, token):
+      return self.__dic2Gen[token]
+
+   # Retorna o dicionario dos tempos verbais
+   def getTempoVerbal(self, token):
+      return self.__dicTemVerbs[token]  
+
+   def hasVerboInfinitivo(self, token):
+      return self.__dicInf.has_key(token)
+
+   def hasSinonimo(self, token):
+      return self.__dicSin.has_key(token)
+
+   def hasPalavraIgnorada(self, token):
+      return self.__dicWords.has_key(token)
+
+   def hasSubst2Genero(self, token):
+      return self.__dic2Gen.has_key(token)
+
+   def hasTempoVerbal(self, token):
+      return self.__dicTemVerbs.has_key(token)
@@ -0,0 +1,45 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+#Autor: Erickson Silva 
+#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+#LAViD - Laboratório de Aplicações de Vídeo Digital
+
+import alexp
+from AplicaSinonimos import *
+from AplicaRegras import * 
+
+sin = AplicaSinonimos()
+reg = AplicaRegras()
+
+
+def iniciarTraducao(texto):
+	textoDividido = texto.split(".")
+	for w in textoDividido:
+		if len(w) > 0:
+			gerarAnalise(w)
+
+def gerarAnalise(sentenca):
+	'''tokens = alexp.toqueniza(sentenca)
+	etiquetadas = alexp.etiquetaSentenca(tokens)
+	analiseMorf = analiseMorfologica(etiquetadas)
+	print analiseMorf'''
+
+	analise = alexp.run(sentenca)
+
+	if (isinstance(analise,type(None))):
+		analise = alexp.getAnaliseMorfologica()
+		print analiseMorfologica(analise)
+	else:
+		print analiseSintatica(analise)
+		
+
+def analiseMorfologica(sentenca):
+	proc = reg.simplificar(sentenca)
+	return sin.sinonimosMorfologico(proc)
+
+        
+def analiseSintatica(sentenca):
+	analise = sin.sinonimosSintatico(sentenca)
+	return analise
 \ No newline at end of file
@@ -0,0 +1,133 @@
+#! /usr/bin/env python2.6
+# -*- coding: utf-8 -*-
+
+#---------------------------------
+
+# Editado:
+
+#Autor: Erickson Silva 
+#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
+
+#LAViD - Laboratório de Aplicações de Vídeo Digital
+
+#---------------------------------
+
+
+# Donatus Brazilian Portuguese Parser
+#
+# Copyright (C) 2010-2013 Leonel F. de Alencar
+#
+# Author: Leonel F. de Alencar <leonel.de.alencar@ufc.br>
+# Homepage: <http://www.leonel.profusehost.net/>
+#
+# Project's URL: <http://sourceforge.net/projects/donatus/>
+# For license information, see LICENSE.TXT
+#
+# $Id: alexp.py $
+
+"""Este módulo contém funções que permitem utilizar o Aelius para etiquetar uma sentença, construindo entradas lexicais com base nas etiquetas atribuídas às palavras da sentença. Essas entradas lexicais são integradas em uma gramática CFG dada, que é transformada em um parser, utilizado para gerar uma árvore de estrutura sintagmática da sentença. 
+"""
+import re, string,nltk,os
+from Aelius.Extras import carrega
+from Aelius import AnotaCorpus
+
+# definição de algumas variáveis globais para
+# facilitar utilização das diferentes funções do módulo
+
+# sintaxe default em subpasta de nltk_data
+DIR="/vlibras-translate/data/cfg.syn.nltk"
+
+# eventualmente será preciso incluir aqui outros sinais
+# de pontuação, como o travessão
+PUNCT=string.punctuation
+
+SENTENCA_ANOTADA=""
+
+
+def toqueniza(s):
+	"""Decodifica string utilizando utf-8, retornando uma lista de tokens em unicode.
+	"""
+	decodificada=s.decode("utf-8")
+	return AnotaCorpus.TOK_PORT.tokenize(decodificada)
+
+def getAnaliseMorfologica():
+	return SENTENCA_ANOTADA
+
+def etiquetaSentenca(s):
+	"""Aplica um dos etiquetadores do Aelius na etiquetagem da sentença dada como lista de tokens.
+	"""
+	etiquetador = list((carrega("AeliusHunPos"),"nltk"))
+	anotada = AnotaCorpus.anota_sentencas([s],etiquetador,"hunpos")[0]
+	return anotada
+
+def geraEntradasLexicais(lista):
+	"""Gera entradas lexicais no formato CFG do NLTK a partir de lista de pares constituídos de tokens e suas etiquetas.
+	"""
+	entradas=[]
+	for e in lista:
+		# é necessário substituir símbolos como "-" e "+" do CHPTB
+		# que não são aceitos pelo NLTK como símbolos não terminais
+		c=re.sub(r"[-+]","_",e[1])
+		c=re.sub(r"\$","_S",c)
+		entradas.append("%s -> '%s'" % (c, e[0].lower()))
+	return entradas
+
+def corrigeAnotacao(lista):
+	"""Esta função deverá corrigir alguns dos erros de anotação mais comuns do Aelius. No momento, apenas é corrigida VB-AN depois de TR.
+	"""
+	i=1
+	while i < len(lista):
+		if lista[i][1] == "VB-AN" and lista[i-1][1].startswith("TR"):
+			lista[i]=(lista[i][0],"VB-PP")
+		i+=1
+
+# a função abaixo parece muito restritiva; talvez não seja necessário
+# que o arquivo esteja no diretório nltk_data
+def encontraArquivo(caminho=DIR):
+	"""Encontra arquivo na pasta vlibras-translate.
+	"""
+	home = os.path.expanduser("~")
+	path = os.path.realpath(home+caminho)
+	return path
+
+def extraiSintaxe(caminho=DIR):
+	"""Extrai gramática armazenada em arquivo cujo caminho é definido relativamente ao diretório nltk_data.
+	"""
+	arquivo=encontraArquivo(caminho)
+	if arquivo:
+		f=open(arquivo,"rU")
+		sintaxe=f.read()
+		f.close()
+		return sintaxe
+	else:
+		print "Arquivo %s não encontrado em nenhum dos diretórios de dados do NLTK:\n%s" % (caminho,"\n".join(nltk.data.path))
+	
+
+def analisaSentenca(sentenca):
+	"""Retorna lista de árvores de estrutura sintagmática para a sentença dada sob a forma de uma lista de tokens, com base na gramática CFG cujo caminho é especificado como segundo argumento da função. Esse caminho é relativo à pasta nltk_data da instalação local do NLTK. A partir da etiquetagem morfossintática da sentença são geradas entradas lexicais que passam a integrar a gramática CFG. O caminho da gramática e o parser gerado são armazenados como tupla na variável ANALISADORES.
+	"""
+	parser=constroiAnalisador(sentenca)
+	codificada=[w.encode("utf-8") for w in sentenca]
+	trees=parser.parse_one(codificada)
+	return trees
+
+def constroiAnalisador(s):
+	"""Constrói analisador a partir de uma única sentença não anotada, dada como lista de tokens, e uma lista de regras sintáticas no formato CFG, armazenadas em arquivo. Esta função tem um bug, causado pela maneira como o Aelius etiqueta sentenças usando o módulo ProcessaNomesProprios: quando a sentença se inicia por paravra com inicial minúscula, essa palavra não é incorporada ao léxico, mas a versão com inicial maiúscula.
+	"""
+	global SENTENCA_ANOTADA
+	SENTENCA_ANOTADA=etiquetaSentenca(s)
+	corrigeAnotacao(SENTENCA_ANOTADA)
+	entradas=geraEntradasLexicais(SENTENCA_ANOTADA)
+	lexico="\n".join(entradas)
+	gramatica="%s\n%s" % (extraiSintaxe(DIR).strip(),lexico)
+	cfg=nltk.CFG.fromstring(gramatica)
+	return nltk.ChartParser(cfg)
+
+def exibeArvores(arvores):
+	"""Função 'wrapper' para a função de exibição de árvores do NLTK"""
+	nltk.draw.draw_trees(*arvores)
+
+def run(sentenca):
+	tokens=toqueniza(sentenca)
+	trees=analisaSentenca(tokens)
+	return trees
 \ No newline at end of file
@@ -1,60 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-import xml.etree.ElementTree as ET
-import os
- 
-class AplicadorRegras(object):
-
-    # inicializacao das variaves
-    def __init__(self):  
-        self.__tree = ET.parse('vlibras_user/vlibras-core/data/regras.xml')
-        self.__root = self.__tree.getroot()
-        self.__tAux = []
-        self.__dAux = {}
-
-    # aplica as regras
-    def aplicarRegras(self, ts):
-        self.__n = len(ts) # quantidade de tokens
-        for i in range(0,self.__n):
-            self.__tAux.append(self.__n)
-        self.__name = self.getNameRule(ts) # todos os etiquetadores numa so string (ver linha 35)
-        for morpho in self.__root:
-            for rule in morpho.findall('rule'): # procura a tag rule 
-                if rule.get('name') == self.__name: # procura o atributo name na tag rule (ver linha 17)
-                    if rule.find('active').text == "true": # verifica se a regra esta ativa
-                        self.__c = 0
-                    for classe in rule.iter('class'): # for nas tags class
-                        self.__dAux[self.__c] = int(classe.find('newpos').text) # preenche dicionario com a ordem atual e futura das palavras
-                        self.__c += 1
-                    self.__c = 0
-                    for w,t in ts:
-                        i = self.__dAux.get(self.__c) # pega o indice de onde ficara a palavra
-                        self.__tAux[i] = ([w,t]) # preenche a lista com a palavra+etiqueta na posicao correta (segundo o arquivo regras.xml)
-                        self.__c += 1
-                    return self.__tAux # retorna nova lista (ordenada)       
-        return ts # retorna a lista sem alteracoes (nao existe regra)                     
-
-    def getNameRule(self, ts):
-        self.__name = ""
-        for w,t in ts:
-            if t[:2] != "VB":
-                self.__name += t
-            else: 
-                self.__name += t[:2]
-        return self.__name
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -1,44 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-from Aelius import AnotaCorpus, Toqueniza, Extras
-
-class Classificador(object):
-
-	# inicializacao das variaveis
-	def __init__(self):	
-		self.__h = Extras.carrega("AeliusHunPos") # carrega o modelo de idioma (passado por parametro ao instanciar)
-		
-	def anotaSentencas(self, str):
-		self.__t = ""
-		self.__tokens = ""
-		#try:
-		# tokenizae
-		self.__tokens = Toqueniza.TOK_PORT.tokenize(str)
-
-		# realiza a classificacao morfologica
-		self.__t = AnotaCorpus.anota_sentencas([self.__tokens],self.__h,'hunpos')
-			
-		return self.listClean(self.__t)
-		#except:
-		#	print "Erro ao efetuar a classificação morfologica."
-
-
-	def listClean(self, l):
-		lClean = []
-		for w,t in l[0]:
-			lClean.append([w,t])
-		return lClean
-
-	# faz a impressao (usado apenas pra testes)
-	def imprimeSentencas(self):
-		for w,t in self.t[0]:
-			print "%s_%s " % (w,t),
-
-
-
-
-
-
@@ -1,59 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-from StringAux import *
-
-class Iterator(object):
-
-	# inicializacao das variaveis
-	def __init__(self):	
-		self.count = -1
-		
-	def load(self, lista):
-		self.__list = list(lista);
-		self.size = len(lista)
-
-	def reset(self):
-		self.count = -1
-
-	def getSize(self):
-		return self.size
-
-	def getCount(self):
-		return self.count
-
-	def getToken(self, i):
-		if(i == "+"):
-			return self.__list[self.count+1]
-
-		elif(i == "-"):
-			return self.__list[self.count-1]
-
-		else:
-			return self.__list[self.count]
-
-	def getAtualW(self):
-		return remover_acentos(self.getToken(0)[0].upper().encode('utf-8'))
-
-	def getAtualT(self):
-		return self.getToken(0)[1].upper().encode('utf-8')
-
-	def getProxW(self):
-		return remover_acentos(self.getToken("+")[0].upper().encode('utf-8'))
-
-	def getProxT(self):
-		return self.getToken("+")[1].upper().encode('utf-8')		
-
-	def getAntW(self):
-		return remover_acentos(self.getToken("-")[0].upper().encode('utf-8'))
-
-	def getAntT(self):
-		return self.getToken("-")[1].upper().encode('utf-8')
-
-	def hasNext(self):
-		if(self.count < self.size-1):
-			self.count += 1
-			return True
-		return False
 \ No newline at end of file
@@ -1,14 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-from Tradutor import *
-
-tradutor = Tradutor()
-
-def iniciar(x):
-	try:
-		text = x.decode("utf-8")
-	except:
-		text = x.decode("iso-8859-1")
-		
-	return tradutor.traduzir(text)
@@ -1,24 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-import sys
-from Iterator import *
-from StringAux import *
-
-class Output(object):
-
-	# inicializa a variavel com o valor passado por paramentro ao instanciar
-	def __init__(self):
-		self.it = Iterator()
-
-	# executa a saida
-	def executeOut(self, ts):
-		self.__glosa = []
-		self.it.load(ts)
-		while(self.it.hasNext()):
-			self.__glosa.append(self.it.getAtualW())
-		self.it.reset()
-		return ' '.join(self.__glosa)
-
@@ -1,164 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-from WorkCSV import *
-from Iterator import *
-from StringAux import *
-
-class Simplificador(object):
-
-	# inicializa todos as variaveis
-	def __init__(self):	
-		self.it = Iterator()
-		self.__csv = WorkCSV()
-		self.__dicInf = {}
-		self.__dicSin = {}
-		self.__dicWords = {}
-		self.__dic2Gen = {}
-		self.__dicTemVerbs = {}
-		self.executeWorkCSV()
-
-	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
-	def simplificar(self, texto):
-		self.__ts = []
-		self.it.load(texto)
-		self.__verb = False
-		self.__adv = False;
-		self.__countVerb = 0
-		self.__countAdv = 0
-		countWords = 0
-		while(self.it.hasNext()):
-			w = self.auxConvert(self.it.getAtualW())
-			t = self.it.getAtualT()
-			self.__b = False
-			if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao
-				wu = w.upper() 						# deixa o token maiusculo
-				#if t[:2] == "VB":
-				if t[-2:] == "-P":
-					wu = self.pluralAnalysis(w)
-				if t == "VB-P" or t == "VB-D" or t == "VB-R":
-					self.__verb = True
-					self.__countVerb += 1
-				if t[:3] == "ADV":
-					self.__adv = True
-					self.__countAdv += 1					
-				if self.__dicInf.has_key(wu):		# verifica se ha um verbo infinitivo desse token
-					sAux = self.__dicInf[wu]		# se sim, adiciona numa string aux
-					if self.__dicSin.has_key(sAux):	# verifica se ha um sinonimo para esse verbo infinitivo
-						self.__ts.append([self.__dicSin[sAux],t]) # se sim, entao adiciona na lista
-						self.__b = True          
-					else:					
-						self.__ts.append([sAux,t])  # caso contrario, adiciona so o verbo infinitivo msm
-						self.__b = True
-				if self.__b == False and self.__dicSin.has_key(wu):	# verifica se nao foi encontrado verbo infinitivo e se ha sinonimo
-					self.__ts.append([self.__dicSin[wu],t]) # adiciona na o sinonimo lista
-					self.__b = True   		
-
-				if self.__dic2Gen.has_key(wu):
-					del self.__ts[-1]
-					lenTicket = len(self.it.getAntT())
-					if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):
-						self.__ts.append(["MULHER " + wu,t])
-					else:
-						self.__ts.append(["HOMEM " + wu,t])
-					self.__b = True          
-				if self.__b == False:             	# verifica se nao encontrou nem verbo infinito ou sinonimo
-					self.__ts.append([wu,t])
-			countWords += 1
-		self.it.reset()
-		if self.__verb == True:
-			return self.verbalAnalysis(self.__ts)
-		return self.__ts
-
-	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)
-	def executeWorkCSV(self):
-		self.__dicInf = self.__csv.getDicInf()
-		self.__dicSin = self.__csv.getDicSin()
-		self.__dicWords = self.__csv.getDicWords()
-		self.__dic2Gen = self.__csv.getDic2Gen()
-		self.__dicTemVerbs = self.__csv.getDicTemVerbs()
-
-	# converte romano para numero
-	def auxConvert(self, t):
-		try:
-			return roman_to_int(t)
-		except:
-			return t
-
-
-	def verbalAnalysis(self, lista):
-		lv = []
-		self.it.load(lista)
-		hasFut = False
-		hasPas = False
-		count = 0
-		while(self.it.hasNext()):
-			w = self.it.getAtualW().upper()
-			t = self.it.getAtualT()
-
-			if(t[:3] == "ADV"):
-				if (self.__dicTemVerbs.has_key(w)):
-					self.it.reset()
-					#print "ADV: retornou lista original"
-					return lista
-			
-			if(t == "VB-P"):
-				if (self.__countVerb > 1):
-					count += 1
-					#print "VB-P: Incrementou"
-					if(count == self.__countVerb):
-						#print "VB-P Adicionou " + w
-						lv.append([w,t])
-				else:
-					#print "VB-P: retornou lista original"
-					self.it.reset()
-					return lista
-			elif(t == "VB-D"):
-				count += 1
-				hasPas = True
-				#print "VB-D: Incrementou"
-				if(count == self.__countVerb):
-					#print "VB-D Adicionou " + w
-					lv.append([w,t])
-			elif(t == "VB-R"):
-				count += 1
-				hasFut = True
-				#print "VB-R: Incrementou"
-				if(count == self.__countVerb):
-					#print "VB-R Adicionou " + w
-					lv.append([w,t])
-			else:
-				lv.append([w,t])	
-		if (hasFut):
-			lv.append(["FUTURO", "TVB"])
-		elif (hasPas):
-			lv.append(["PASSADO", "TVB"])
-		self.it.reset()
-		return lv
-
-
-	def pluralAnalysis(self, word):
-
-		if(word[-3:] == "OES" or word[-2:] == "AES" or word[-2:] == "AOS"):
-			return word[0:-3]+"AO"
-		elif(word[-3:] == "RES" or word[-2:] == "ZES" or word[-2:] == "NES"):
-			return word[0:-2]
-		elif(word[-3:] == "SES"):
-			#TODO: Algumas palavras possuem marcações gráficas na raiz singular. Ex: Gás – Gases
-			return word[0:-2]
-		elif(word[-2:] == "NS"):
-			return word[0:-2]+"M"
-		elif(word[-3:] == "EIS"):
-			return word[0:-3]+"IL"
-		elif(word[-2:] == "IS"):
-			if(word[-3] == "A" or word[-3] == "E" or word[-3] == "O" or word[-3] == "U"):
-				return word[0:-2]+"L"	
-			else:
-				return word	
-		elif(word[-1] == "S"):
-	    	#TODO: Palavras paroxítonas ou proparoxítonas terminadas em S. Ex: lápis, vírus, tênis, ônibus, etc
-			return word[0:-1]
-		else:
-			return word
 \ No newline at end of file
@@ -1,83 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-from unicodedata import normalize
-
-ext = {1:"um", 2:"dois", 3:"três", 4:"quatro", 5:"cinco", 6:"seis", 7:"sete", 8:"oito", 9:"nove", 0:"zero"}
-
-def extenso(n):
-    strn = str(n)
-    sizen = len(strn)
-    tokens = []
-    for i in range (0, sizen):
-        x = int(strn[i])
-        tokens.append(ext[x])
-    return ' '.join(tokens)
-
-"""
-def extenso(n):
-    strn = str(n)
-    sizen = len(strn)
-    tokens = []
-    for i in range (0, sizen):
-        tokens.append(strn[i])
-    return ' '.join(tokens)
-"""    
-
-def remover_acentos(txt):
-
-    """ Devolve cópia de uma str substituindo os caracteres 
-        acentuados pelos seus equivalentes não acentuados.
-    
-    ATENÇÃO: carateres gráficos não ASCII e não alfa-numéricos,
-    tais como bullets, travessões, aspas assimétricas, etc. 
-    são simplesmente removidos!
-    
-    >>> remover_acentos('[ACENTUAÇÃO] ç: áàãâä! éèêë? íì&#297;îï, óòõôö; úù&#361;ûü.')
-    '[ACENTUACAO] c: aaaaa! eeee? iiiii, ooooo; uuuuu.'
-    
-    """
-    try:
-        return normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore')
-    except:
-        return normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore')
-
-
-def roman_to_int(input):
-    if not isinstance(input, type("")):
-        raise TypeError, "expected string, got %s" % type(input)
-    input = input.upper( )
-    nums = {'M':1000,
-            'D':500,
-            'C':100,
-            'L':50,
-            'X':10,
-            'V':5,
-            'I':1}
-    sum = 0
-    for i in range(len(input)):
-        try:
-            value = nums[input[i]]
-            if i+1 < len(input) and nums[input[i+1]] > value:
-                sum -= value
-            else: sum += value
-        except KeyError:
-            raise ValueError, 'input is not a valid Roman numeral: %s' % input
-        
-    if int_to_roman(sum) == input: return sum
-    else:        raise ValueError, 'input is not a valid Roman numeral: %s' % input
-
-def int_to_roman(input):
-    if not isinstance(input, type(1)):
-        raise TypeError, "expected integer, got %s" % type(input)
-    if not 0 < input < 4000:
-        raise ValueError, "Argument must be between 1 and 3999"
-    ints = (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1)
-    nums = ('M', 'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I')
-    result = []
-
-    for i in range(len(ints)):
-        count = int(input / ints[i])
-        result.append(nums[i] * count)
-        input -= ints[i] * count
-    return ''.join(result)
 \ No newline at end of file
@@ -1,38 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-from Classificador import *
-from Simplificador import *
-from AplicadorRegras import * 
-from Output import *
-from StringAux import *
-
-
-class Tradutor(object):
-
-	def __init__(self):
-		#instanciando os objetos
-		self.__classificador = Classificador()
-		self.__simplificador = Simplificador()
-		self.__regras = AplicadorRegras()
-		self.__out = Output()
-
-
-	def traduzir(self, txt):
-		self.__tr = None
-
-		#faz a tokenizacao e a classificacao
-		self.__t = self.__classificador.anotaSentencas(txt)
-		
-		#retira artigos e preposicoes
-		self.__ts = self.__simplificador.simplificar(self.__t)
-		self.__t = None
-		
-		#aplica as regras
-		self.__tr = self.__regras.aplicarRegras(self.__ts)
-		self.__ts = None
-
-		#executa a saida
-		return self.__out.executeOut(self.__tr).encode("utf-8")
@@ -1,125 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
-
-import os, csv, sys
-
-class WorkCSV(object):
-   _iInstance = None
-
-   class Singleton:
-      def __init__(self):
-         self.LeitorDicionarios = None
-
-      def __init__( self ):
-         if LeitorDicionarios._iInstance is None:
-            LeitorDicionarios._iInstance = LeitorDicionarios.Singleton()
-
-         self._EventHandler_instance = LeitorDicionarios._iInstance
-
-      def __getattr__(self, aAttr):
-         return getattr(self._iInstance, aAttr)
-
-      def __setattr__(self, aAttr, aValue):
-         return setattr(self._iInstance, aAttr, aValue) 
-         
-   # Define e inicializa os atributos
-   def __init__(self):
-      self.__path = "vlibras_user/vlibras-core/data/"
-      self.__fileInf = ''
-      self.__dicInf = {}
-      self.__fileSin = ''
-      self.__dicSin = {}
-      self.__fileWords = ''
-      self.__dicWords = {}
-      self.__file2Gen = ''
-      self.__dic2Gen = {}      
-      self.__fileTemVerbs = ''
-      self.__dicTemVerbs = {}     
-      self.createDicInf()
-      self.createDicSin()
-      self.createDicWords()
-      self.createDic2Gen()
-      self.createDicTemVerbs() 
-
-   # Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos
-   def createDicInf(self):
-      try:
-         self.__fileInf = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";")
-      except IOError, (errno, strerror): 
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "createDicInf"
-
-      for row in self.__fileInf:
-         if row[1] != "": 
-            try:
-               self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8")  
-            except UnicodeDecodeError:
-               self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8')
-         
-   # Abre o arquivo que contem os sinonimos e preenche o dicionario com os mesmos
-   def createDicSin(self):
-      try:
-         self.__fileSin = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "createDicSin"
-   
-      for row in self.__fileSin:
-         if row[1] != "":
-            self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
-    
-
-    # Abre o arquivo que contem os tempos verbais
-   def createDicTemVerbs(self):
-      try:
-         self.__fileTemVerbs = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "createDicTemVerbs"
-   
-      for row in self.__fileTemVerbs:
-         self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")     
-
-   # Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
-   def createDicWords(self):
-      try:
-         self.__fileWords = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "createDicWords"
-
-      for row in self.__fileWords:
-         self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8")
-        
-   # Abre o arquivo que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos
-   def createDic2Gen(self):
-      try:
-         self.__file2Gen = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";")
-      except IOError, (errno, strerror):
-         print "I/O error(%s): %s" % (errno, strerror)
-         print "createDic2Gen" 
-           
-      for row in self.__file2Gen:
-         self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8")    
-
-   # Retorna o dicionario dos verbos no infinitivo
-   def getDicInf(self):
-      return self.__dicInf
-
-   # Retorna o dicionario dos sinonimos
-   def getDicSin(self):
-      return self.__dicSin
-
-   # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador
-   def getDicWords(self):
-      return self.__dicWords
-
-   # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
-   def getDic2Gen(self):
-      return self.__dic2Gen
-
-   # Retorna o dicionario dos tempos verbais
-   def getDicTemVerbs(self):
-      return self.__dicTemVerbs   
 \ No newline at end of file
@@ -1,14 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-from Tradutor import *
-
-tradutor = Tradutor()
-
-def iniciar(x):
-	try:
-		text = x.decode("utf-8")
-	except:
-		text = x.decode("iso-8859-1")
-		
-	return tradutor.traduzir(text)
...	...	@@ -1,31 +0,0 @@
1		-/**
2		- * @author Erickson Silva
3		- * @date 14/10/2013
4		- *
5		- */
6		-
7		-#include "Python.h"
8		-#include "dprintf.h"
9		-
10		-#ifndef _PYTRADUTOR_H
11		-#define _PYTRADUTOR_H
12		-
13		-namespace Tradutor {
14		- class PyTradutor{
15		- public:
16		- PyTradutor();
17		- ~PyTradutor();
18		- char * convertStringToGlosa(const char * input);
19		- PyObject * pName;
20		- PyObject * pModule;
21		- PyObject * pDict;
22		- PyObject * pFunc;
23		- PyObject * pArgs;
24		- PyObject * pResult;
25		- bool isRunning;
26		- };
27		-}
28		-
29		-#endif
30		-
31		-
...	...	@@ -1,58 +0,0 @@
1		-/**
2		- * Essa classe invoca os metodos do tradutor em Python
3		- * Onde efetua a tradução do texto passado por parametro
4		- *
5		- * @author Erickson Silva
6		- * @date 14/10/2013
7		- *
8		- */
9		-
10		-
11		-#include "pyTradutor.h"
12		-
13		-namespace Tradutor {
14		- PyTradutor::PyTradutor() {
15		- DPRINTF("Done!\n");
16		- }
17		- PyTradutor::~PyTradutor() {
18		- Py_DECREF(pName);
19		- Py_DECREF(pModule);
20		- Py_DECREF(pDict);
21		- Py_DECREF(pFunc);
22		- Py_DECREF(pArgs);
23		- Py_DECREF(pResult);
24		- DDDPRINTF("PyTranslator finalized!\n");
25		- }
26		-
27		-/**
28		-* Traduz um texto (char * input) para uma string contendo a
29		-* traducao para glosa
30		-*
31		-* @param input texto de entrada
32		-* @return string contendo os tokens em glosa traduzidos.
33		-**/
34		- char * PyTradutor::convertStringToGlosa(const char * input) {
35		- if(!isRunning){
36		- Py_Initialize();
37		- pName = PyString_FromString("ModuleTranslate");
38		- assert(pName!=NULL);
39		- pModule = PyImport_Import(pName);
40		- PyErr_Print();
41		- assert(pModule!=NULL);
42		- pDict = PyModule_GetDict(pModule);
43		- PyErr_Print();
44		- assert(pDict!=NULL);
45		- pFunc = PyDict_GetItemString(pDict, "iniciar");
46		- PyErr_Print();
47		- assert(pFunc!=NULL);
48		- isRunning = 1;
49		- }
50		- pArgs = PyTuple_Pack(1,PyString_FromString(input));
51		- PyErr_Print();
52		- assert(pArgs!=NULL);
53		- pResult = PyObject_CallObject(pFunc, pArgs);
54		- PyErr_Print();
55		- assert(pResult!=NULL);
56		- return PyString_AsString(pResult);
57		- }
58		-}
59	0	\ No newline at end of file
...	...	@@ -1,14 +0,0 @@
1		-#!/usr/bin/python
2		-# -- coding: utf-8 --
3		-
4		-from Tradutor import *
5		-
6		-tradutor = Tradutor()
7		-
8		-def iniciar(x):
9		- try:
10		- text = x.decode("utf-8")
11		- except:
12		- text = x.decode("iso-8859-1")
13		-
14		- return tradutor.traduzir(text)
...	...	@@ -1,161 +0,0 @@
1		-#!/usr/bin/python
2		-# -- coding: utf-8 --
3		-
4		-#Autor: Erickson Silva
5		-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
6		-
7		-#LAViD - Laboratório de Aplicações de Vídeo Digital
8		-
9		-from LeitorDicionarios import *
10		-from Iterator import *
11		-from StringAux import *
12		-
13		-class AplicaRegras(object):
14		-
15		- # inicializa todos as variaveis
16		- def __init__(self):
17		- self.__dicionarios = LeitorDicionarios()
18		-
19		- # retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
20		- def simplificar(self, texto):
21		- it = Iterator()
22		- it.load(texto)
23		- self.__ts = []
24		- self.__verb = False
25		- self.__adv = False;
26		- self.__countVerb = 0
27		- self.__countAdv = 0
28		- while(it.hasNext()):
29		- token = it.getAtualW()
30		- tag = it.getAtualT()
31		- self.__b = False
32		- if self.__dicionarios.hasPalavraIgnorada(tag) == False: # verifica se nao eh artigo/preposicao
33		-
34		- #VERIFICA SE É ADVERBIO E CONTA A QUANTIDADE
35		- if tag[:3] == "ADV":
36		- self.__adv = True
37		- self.__countAdv += 1
38		-
39		- if tag[:2] == "VB":
40		-
41		- #VERIFICA SE É VERBO NO INFINITIVO
42		- if self.__dicionarios.hasVerboInfinitivo(token): # verifica se ha um verbo infinitivo desse token
43		- verboInfinitivo = self.__dicionarios.getVerboInfinitivo(token) # se sim, adiciona numa string aux
44		- self.__ts.append([verboInfinitivo,tag]) # caso contrario, adiciona so o verbo infinitivo msm
45		- self.__b = True
46		-
47		- #VERIFICA SE É VERBO DE TEMPO E CONTA A QUANTIDADE
48		- if tag == "VB-P" or tag == "VB-D" or tag == "VB-R":
49		- self.__verb = True
50		- self.__countVerb += 1
51		-
52		-
53		- #VERIFICA SE É SUBTANTIVO COMUM DOS 2 GENEROS
54		- if self.__dicionarios.hasSubst2Genero(token):
55		- #del self.__ts[-1]
56		- lenTicket = len(it.getAntT())
57		- if ((self.__dicionarios.hasPalavraIgnorada(it.getAntT())) and (it.getAntT()[lenTicket-1:] == "F") or (it.getAntT()[lenTicket-3:] == "F-P")):
58		- self.__ts.append(["MULHER ", "2GEN"])
59		- self.__ts.append([token,tag])
60		- else:
61		- self.__ts.append(["HOMEM ", "2GEN"])
62		- self.__ts.append([token,tag])
63		- self.__b = True
64		-
65		- #VERIFICA SE É PLURAL
66		- #if tag[-2:] == "-P":
67		- # token = self.pluralAnalysis(token)
68		-
69		- #SE NÃO HOUVE NENHUM ALTERAÇÃO, OU SEJA, NÃO APLICOU NENHUMA REGRA, ADICIONA O TOKEN ORIGINAL
70		- if self.__b == False: # verifica se nao encontrou nem verbo infinito ou sinonimo
71		- self.__ts.append([token,tag])
72		-
73		- #SE ENCONTROU VERBO, ENTÃO ANALISA a SENTENCA NOVAMENTE (again?)
74		- if self.__verb == True:
75		- return self.verbalAnalysis(self.__ts)
76		-
77		- return self.__ts
78		-
79		-
80		- # converte romano para numero
81		- def auxConvert(self, tag):
82		- try:
83		- return roman_to_int(tag)
84		- except:
85		- return tag
86		-
87		- def verbalAnalysis(self, lista):
88		- lv = []
89		- it = Iterator()
90		- it.load(lista)
91		- hasFut = False
92		- hasPas = False
93		- count = 0
94		- while(it.hasNext()):
95		- token = it.getAtualW().upper()
96		- tag = it.getAtualT()
97		-
98		- if(tag[:3] == "ADV"):
99		- if (self.__dicionarios.hasTempoVerbal(token)):
100		- it.reset()
101		- #print "ADV: retornou lista original"
102		- return lista
103		-
104		- if(tag == "VB-P"):
105		- if (self.__countVerb > 1):
106		- count += 1
107		- #print "VB-P: Incrementou"
108		- if(count == self.__countVerb):
109		- #print "VB-P Adicionou " + token
110		- lv.append([token,tag])
111		- else:
112		- #print "VB-P: retornou lista original"
113		- it.reset()
114		- return lista
115		- elif(tag == "VB-D"):
116		- count += 1
117		- hasPas = True
118		- #print "VB-D: Incrementou"
119		- if(count == self.__countVerb):
120		- #print "VB-D Adicionou " + token
121		- lv.append([token,tag])
122		- elif(tag == "VB-R"):
123		- count += 1
124		- hasFut = True
125		- #print "VB-R: Incrementou"
126		- if(count == self.__countVerb):
127		- #print "VB-R Adicionou " + token
128		- lv.append([token,tag])
129		- else:
130		- lv.append([token,tag])
131		- if (hasFut):
132		- lv.append(["FUTURO", "T-VB"])
133		- elif (hasPas):
134		- lv.append(["PASSADO", "T-VB"])
135		- it.reset()
136		- return lv
137		-
138		-
139		- def pluralAnalysis(self, word):
140		-
141		- if(word[-3:] == "OES" or word[-2:] == "AES" or word[-2:] == "AOS"):
142		- return word[0:-3]+"AO"
143		- elif(word[-3:] == "RES" or word[-2:] == "ZES" or word[-2:] == "NES"):
144		- return word[0:-2]
145		- elif(word[-3:] == "SES"):
146		- #TODO: Algumas palavras possuem marcações gráficas na raiz singular. Ex: Gás – Gases
147		- return word[0:-2]
148		- elif(word[-2:] == "NS"):
149		- return word[0:-2]+"M"
150		- elif(word[-3:] == "EIS"):
151		- return word[0:-3]+"IL"
152		- elif(word[-2:] == "IS"):
153		- if(word[-3] == "A" or word[-3] == "E" or word[-3] == "O" or word[-3] == "U"):
154		- return word[0:-2]+"L"
155		- else:
156		- return word
157		- elif(word[-1] == "S"):
158		- #TODO: Palavras paroxítonas ou proparoxítonas terminadas em S. Ex: lápis, vírus, tagênis, ônibus, etc
159		- return word[0:-1]
160		- else:
161		- return word
162	0	\ No newline at end of file
...	...	@@ -1,48 +0,0 @@
1		-#!/usr/bin/python
2		-# -- coding: utf-8 --
3		-
4		-#Autor: Erickson Silva
5		-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
6		-
7		-#LAViD - Laboratório de Aplicações de Vídeo Digital
8		-
9		-import os, csv, sys
10		-from nltk.tree import *
11		-from LeitorDicionarios import *
12		-
13		-class AplicaSinonimos(object):
14		-
15		- # Define e inicializa os atributos
16		- def __init__(self):
17		- self.__dicionarios = LeitorDicionarios()
18		-
19		- def sinonimosMorfologico(self, texto):
20		- lista = texto
21		- for i, elem in enumerate(lista):
22		- token = self.verificaPalavra(elem[0])
23		- listmp = list(elem)
24		- listmp[0] = token
25		- lista[i] = listmp
26		- return lista
27		-
28		-
29		- def dicionarioSinonimoFolhas(self, folhas):
30		- dic = {}
31		- for f in folhas:
32		- token = self.verificaPalavra(f)
33		- dic[f] = token
34		- return dic
35		-
36		- def sinonimosSintatico(self, texto):
37		- folhas = Tree.leaves(texto)
38		- dic = self.dicionarioSinonimoFolhas(folhas)
39		- stringTree = str(texto)
40		- for t in folhas:
41		- stringTree.replace(t, dic[t])
42		- tree = Tree.fromstring(stringTree, brackets='()')
43		- return tree
44		-
45		- def verificaPalavra(self, token):
46		- if self.__dicionarios.hasSinonimo(token):
47		- return self.__dicionarios.getSinonimo(token)
48		- return token
49	0	\ No newline at end of file
...	...	@@ -0,0 +1,60 @@
	1	+#!/usr/bin/python
	2	+# -- coding: utf-8 --
	3	+
	4	+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
	5	+
	6	+import xml.etree.ElementTree as ET
	7	+import os
	8	+
	9	+class AplicadorRegras(object):
	10	+
	11	+ # inicializacao das variaves
	12	+ def __init__(self):
	13	+ self.__tree = ET.parse('vlibras_user/vlibras-core/data/regras.xml')
	14	+ self.__root = self.__tree.getroot()
	15	+ self.__tAux = []
	16	+ self.__dAux = {}
	17	+
	18	+ # aplica as regras
	19	+ def aplicarRegras(self, ts):
	20	+ self.__n = len(ts) # quantidade de tokens
	21	+ for i in range(0,self.__n):
	22	+ self.__tAux.append(self.__n)
	23	+ self.__name = self.getNameRule(ts) # todos os etiquetadores numa so string (ver linha 35)
	24	+ for morpho in self.__root:
	25	+ for rule in morpho.findall('rule'): # procura a tag rule
	26	+ if rule.get('name') == self.__name: # procura o atributo name na tag rule (ver linha 17)
	27	+ if rule.find('active').text == "true": # verifica se a regra esta ativa
	28	+ self.__c = 0
	29	+ for classe in rule.iter('class'): # for nas tags class
	30	+ self.__dAux[self.__c] = int(classe.find('newpos').text) # preenche dicionario com a ordem atual e futura das palavras
	31	+ self.__c += 1
	32	+ self.__c = 0
	33	+ for w,t in ts:
	34	+ i = self.__dAux.get(self.__c) # pega o indice de onde ficara a palavra
	35	+ self.__tAux[i] = ([w,t]) # preenche a lista com a palavra+etiqueta na posicao correta (segundo o arquivo regras.xml)
	36	+ self.__c += 1
	37	+ return self.__tAux # retorna nova lista (ordenada)
	38	+ return ts # retorna a lista sem alteracoes (nao existe regra)
	39	+
	40	+ def getNameRule(self, ts):
	41	+ self.__name = ""
	42	+ for w,t in ts:
	43	+ if t[:2] != "VB":
	44	+ self.__name += t
	45	+ else:
	46	+ self.__name += t[:2]
	47	+ return self.__name
	48	+
	49	+
	50	+
	51	+
	52	+
	53	+
	54	+
	55	+
	56	+
	57	+
	58	+
	59	+
	60	+
...	...
...	...	@@ -0,0 +1,44 @@
	1	+#!/usr/bin/python
	2	+# -- coding: utf-8 --
	3	+
	4	+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
	5	+
	6	+from Aelius import AnotaCorpus, Toqueniza, Extras
	7	+
	8	+class Classificador(object):
	9	+
	10	+ # inicializacao das variaveis
	11	+ def __init__(self):
	12	+ self.__h = Extras.carrega("AeliusHunPos") # carrega o modelo de idioma (passado por parametro ao instanciar)
	13	+
	14	+ def anotaSentencas(self, str):
	15	+ self.__t = ""
	16	+ self.__tokens = ""
	17	+ #try:
	18	+ # tokenizae
	19	+ self.__tokens = Toqueniza.TOK_PORT.tokenize(str)
	20	+
	21	+ # realiza a classificacao morfologica
	22	+ self.__t = AnotaCorpus.anota_sentencas([self.__tokens],self.__h,'hunpos')
	23	+
	24	+ return self.listClean(self.__t)
	25	+ #except:
	26	+ # print "Erro ao efetuar a classificação morfologica."
	27	+
	28	+
	29	+ def listClean(self, l):
	30	+ lClean = []
	31	+ for w,t in l[0]:
	32	+ lClean.append([w,t])
	33	+ return lClean
	34	+
	35	+ # faz a impressao (usado apenas pra testes)
	36	+ def imprimeSentencas(self):
	37	+ for w,t in self.t[0]:
	38	+ print "%s_%s " % (w,t),
	39	+
	40	+
	41	+
	42	+
	43	+
	44	+
...	...
1	1	#!/usr/bin/python
2	2	# -- coding: utf-8 --
3	3
4		-#Autor: Erickson Silva
5		-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
	4	+#Autor: Erickson Silva <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
6	5
7		-#LAViD - Laboratório de Aplicações de Vídeo Digital
	6	+from StringAux import *
8	7
9	8	class Iterator(object):
10	9
...	...	@@ -36,22 +35,22 @@ class Iterator(object):
36	35	return self.__list[self.count]
37	36
38	37	def getAtualW(self):
39		- return self.getToken(0)[0].upper()
	38	+ return remover_acentos(self.getToken(0)[0].upper().encode('utf-8'))
40	39
41	40	def getAtualT(self):
42		- return self.getToken(0)[1]
	41	+ return self.getToken(0)[1].upper().encode('utf-8')
43	42
44	43	def getProxW(self):
45		- return self.getToken("+")[0].upper()
	44	+ return remover_acentos(self.getToken("+")[0].upper().encode('utf-8'))
46	45
47	46	def getProxT(self):
48		- return self.getToken("+")[1]
	47	+ return self.getToken("+")[1].upper().encode('utf-8')
49	48
50	49	def getAntW(self):
51		- return self.getToken("-")[0].upper()
	50	+ return remover_acentos(self.getToken("-")[0].upper().encode('utf-8'))
52	51
53	52	def getAntT(self):
54		- return self.getToken("-")[1]
	53	+ return self.getToken("-")[1].upper().encode('utf-8')
55	54
56	55	def hasNext(self):
57	56	if(self.count < self.size-1):
...	...
...	...	@@ -1,139 +0,0 @@
1		-#!/usr/bin/python
2		-# -- coding: utf-8 --
3		-
4		-#Autor: Erickson Silva
5		-#Email: <erickson.silva@lavid.ufpb.br> <ericksonsilva@live.com>
6		-
7		-#LAViD - Laboratório de Aplicações de Vídeo Digital
8		-
9		-import os, csv, sys
10		-
11		-class LeitorDicionarios(object):
12		- #_iInstance = None
13		-
14		- #class Singleton:
15		- # def __init__(self):
16		- # self.LeitorDicionarios = None
17		-
18		- #def __init__( self ):
19		- # if LeitorDicionarios._iInstance is None:
20		- # LeitorDicionarios._iInstance = LeitorDicionarios.Singleton()
21		-
22		- # self._EventHandler_instance = LeitorDicionarios._iInstance
23		-
24		- #def __getattr__(self, aAttr):
25		- # return getattr(self._iInstance, aAttr)
26		-
27		- #def __setattr__(self, aAttr, aValue):
28		- # return setattr(self._iInstance, aAttr, aValue)
29		-
30		- # Define e inicializa os atributos
31		- def __init__(self):
32		- self.__path = "/home/erickson/vlibras-translate/data/"
33		- self.__dicInf = {}
34		- self.__dicSin = {}
35		- self.__dicWords = {}
36		- self.__dic2Gen = {}
37		- self.__dicTemVerbs = {}
38		- self.__fileDic = ''
39		- self.carregarVerbosInfinitivos()
40		- self.carregarSinonimos()
41		- self.carregarPalavrasIgnoradas()
42		- self.carregarSubst2Generos()
43		- self.carregarTemposVerbais()
44		-
45		- # Abre o self.__fileDic que contem os verbos no infinitivo e preenche o dicionario com os mesmos
46		- def carregarVerbosInfinitivos(self):
47		- try:
48		- self.__fileDic = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";")
49		- except IOError, (errno, strerror):
50		- print "I/O error(%s): %s" % (errno, strerror)
51		- print "carregarVerbosInfinitivos"
52		-
53		- for row in self.__fileDic:
54		- if row[1] != "":
55		- try:
56		- self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8")
57		- except UnicodeDecodeError:
58		- self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8')
59		-
60		- # Abre o self.__fileDic que contem os sinonimos e preenche o dicionario com os mesmos
61		- def carregarSinonimos(self):
62		- try:
63		- self.__fileDic = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";")
64		- except IOError, (errno, strerror):
65		- print "I/O error(%s): %s" % (errno, strerror)
66		- print "carregarSinonimos"
67		-
68		- for row in self.__fileDic:
69		- if row[1] != "":
70		- self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
71		-
72		-
73		- # Abre o self.__fileDic que contem os tempos verbais
74		- def carregarTemposVerbais(self):
75		- try:
76		- self.__fileDic = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
77		- except IOError, (errno, strerror):
78		- print "I/O error(%s): %s" % (errno, strerror)
79		- print "carregarTemposVerbais"
80		-
81		- for row in self.__fileDic:
82		- self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")
83		-
84		- # Abre o self.__fileDic que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
85		- def carregarPalavrasIgnoradas(self):
86		- try:
87		- self.__fileDic = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";")
88		- except IOError, (errno, strerror):
89		- print "I/O error(%s): %s" % (errno, strerror)
90		- print "carregarPalavrasIgnoradas"
91		-
92		- for row in self.__fileDic:
93		- self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8")
94		-
95		- # Abre o self.__fileDic que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos
96		- def carregarSubst2Generos(self):
97		- try:
98		- self.__fileDic = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";")
99		- except IOError, (errno, strerror):
100		- print "I/O error(%s): %s" % (errno, strerror)
101		- print "carregarSubst2Generos"
102		-
103		- for row in self.__fileDic:
104		- self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8")
105		-
106		- # Retorna o dicionario dos verbos no infinitivo
107		- def getVerboInfinitivo(self, token):
108		- return self.__dicInf[token]
109		-
110		- # Retorna o dicionario dos sinonimos
111		- def getSinonimo(self, token):
112		- return self.__dicSin[token]
113		-
114		- # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador
115		- def getPalavraIgnorada(self, token):
116		- return self.__dicWords[token]
117		-
118		- # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
119		- def getSubst2Generos(self, token):
120		- return self.__dic2Gen[token]
121		-
122		- # Retorna o dicionario dos tempos verbais
123		- def getTempoVerbal(self, token):
124		- return self.__dicTemVerbs[token]
125		-
126		- def hasVerboInfinitivo(self, token):
127		- return self.__dicInf.has_key(token)
128		-
129		- def hasSinonimo(self, token):
130		- return self.__dicSin.has_key(token)
131		-
132		- def hasPalavraIgnorada(self, token):
133		- return self.__dicWords.has_key(token)
134		-
135		- def hasSubst2Genero(self, token):
136		- return self.__dic2Gen.has_key(token)
137		-
138		- def hasTempoVerbal(self, token):
139		- return self.__dicTemVerbs.has_key(token)
...	...	@@ -0,0 +1,14 @@
	1	+#!/usr/bin/python
	2	+# -- coding: utf-8 --
	3	+
	4	+from Tradutor import *
	5	+
	6	+tradutor = Tradutor()
	7	+
	8	+def iniciar(x):
	9	+ try:
	10	+ text = x.decode("utf-8")
	11	+ except:
	12	+ text = x.decode("iso-8859-1")
	13	+
	14	+ return tradutor.traduzir(text)
...	...