From 571e562c03d2394ffc8ce44bec76ad17805788f7 Mon Sep 17 00:00:00 2001 From: Erickson Silva Date: Mon, 9 Feb 2015 10:52:12 -0300 Subject: [PATCH] Renomeia as classes 'ConversorExtenso', 'LeitorDicionarios' e 'Tradutor' --- src/new/AplicaRegras.py | 6 +++--- src/new/AplicaSinonimos.py | 2 +- src/new/ConversorExtenso.py | 150 ------------------------------------------------------------------------------------------------------------------------------------------------------ src/new/ConverteExtenso.py | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/new/LeitorDicionarios.py | 146 -------------------------------------------------------------------------------------------------------------------------------------------------- src/new/LerDicionarios.py | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/new/Tradutor.py | 46 ---------------------------------------------- src/new/TraduzSentencas.py | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 346 insertions(+), 346 deletions(-) delete mode 100644 src/new/ConversorExtenso.py create mode 100644 src/new/ConverteExtenso.py delete mode 100644 src/new/LeitorDicionarios.py create mode 100644 src/new/LerDicionarios.py delete mode 100644 src/new/Tradutor.py create mode 100644 src/new/TraduzSentencas.py diff --git a/src/new/AplicaRegras.py b/src/new/AplicaRegras.py index 6e3c5be..bd80bd0 100644 --- a/src/new/AplicaRegras.py +++ b/src/new/AplicaRegras.py @@ -10,10 +10,10 @@ from collections import deque import xml.etree.ElementTree as ET from os.path import expanduser import platform -from LeitorDicionarios import * +from LerDicionarios import * from Iterator import * from StringAux import * -from ConversorExtenso import * +from ConverteExtenso import * class AplicaRegras(object): @@ -31,7 +31,7 @@ class AplicaRegras(object): else: return ET.parse(expanduser("~")+'/vlibras-translate/data/regras.xml').getroot() - def aplicarRegrasMorpho(self, lista): + def aplicarRegrasMorfo(self, lista): self.__especificos = {"advt" : self.verificarAdvTempo, "v" : self.verificarVbInfinitivo, "x" : self.verificarPrepos, "c" : self.verificarSubs2Generos, "a" : self.verificarArtigo, "l" : self.verificarVbLigacao, "i": self.verificarAdvIntensidade, "vbi":"zero", "n":"zero", "abmn":"zero", "adji":"zero", "adjn":"zero", "advi":"zero"} self.pularIteracoes = 0 diff --git a/src/new/AplicaSinonimos.py b/src/new/AplicaSinonimos.py index e809efa..d7157cd 100644 --- a/src/new/AplicaSinonimos.py +++ b/src/new/AplicaSinonimos.py @@ -8,7 +8,7 @@ import os, csv, sys from nltk.tree import * -from LeitorDicionarios import * +from LerDicionarios import * class AplicaSinonimos(object): diff --git a/src/new/ConversorExtenso.py b/src/new/ConversorExtenso.py deleted file mode 100644 index 13f82b0..0000000 --- a/src/new/ConversorExtenso.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -#Autor: Erickson Silva -#Email: - -#LAViD - Laboratório de Aplicações de Vídeo Digital - -import sys -from unicodedata import normalize -from Iterator import * - -num = {"zero":0, "um":1, "dois":2, "tres":3, "quatro":4, "cinco":5, "seis":6, -"sete":7, "oito":8, "nove":9} - -und = {"mil":1000, "milhao":1000000, "bilhao":1000000000, "trilhao":1000000000000} - -ext = [{"um":"1", "dois":"2", "tres":"3", "quatro":"4", "cinco":"5", "seis":"6", -"sete":"7", "oito":"8", "nove":"9", "dez":"10", "onze":"11", "doze":"12", -"treze":"13", "quatorze":"14", "quinze":"15", "dezesseis":"16", -"dezessete":"17", "dezoito":"18", "dezenove":"19"}, {"vinte":"2", "trinta":"3", -"quarenta":"4", "cinquenta":"5", "sessenta":"6", "setenta":"7", "oitenta":"8", -"noventa":"9"}, {"cento":"1", "cem":"1", "duzentos":"2", "trezentos":"3", -"quatrocentos":"4", "quinhentos":"5", "seissentos":"6", "setessentos":"7", -"oitocentos":"8", "novecentos":"9"}] - -unds = {"mil":"000", "milhao":"000000","milhoes":"000000", "bilhao":"000000000","bilhoes":"000000000", "trilhao":"000000000000", "trilhoes":"000000000000"} - - - -def oneDigit(x): - return ext[0][x] - -def twoDigit(x): - try: - return ext[1][x[0]]+ext[0][x[1]] - except: - return ext[1][x[0]]+"0" - -def threeDigit(x): - return ext[2][x[0]]+ext[1][x[1]]+ext[0][x[2]] - -def extenso2(n): - sn = n.split(",") - size = len(sn) - firstWord = sn[0] - endWord = "" - numExt = "" - - if(unds.has_key(sn[size-1])): - size -= 1 - endWord = sn[size] - del sn[size] - - if(ext[0].has_key(firstWord)): - numExt = oneDigit(firstWord) - - elif (ext[1].has_key(firstWord)): - numExt = twoDigit(sn) - - elif (ext[2].has_key(firstWord)): - if(size == 1): - numExt = ext[2][firstWord]+"00" - elif (size == 2): - if(sn[1] == "dez"): - numExt = ext[2][firstWord]+oneDigit(sn[1]) - try: - numExt = ext[2][firstWord]+"0"+oneDigit(sn[1]) - except: - numExt = ext[2][firstWord]+twoDigit([sn[1]]) - else: - numExt = threeDigit(sn) - - if(endWord != ""): - numExt = numExt+unds[endWord] - - return numExt - -def extenso(extenso): - global newToken, auxToken - extensoQuebrado = extenso.lower().split(" ") - nums = [] - it = Iterator() - it.load(extensoQuebrado) - while(it.hasNext()): - token = simplifica(it.getToken()) - tokenAnterior = simplifica(it.getToken(-1)) - if (und.has_key(token)): - #print "cond1" - if(it.getCount() == 0): - #print "cond2" - nums.append(und[token]) - else: - #print "cond3" - newToken = und[token] * int(nums[-1]) - nums[-1] = newToken - else: - #print "cond4" - if (num.has_key(token)): - #print "cond5" - auxToken = num[token] - elif (not und.has_key(token)): - #print "cond6" - auxToken = extenso2(token) - - if((not und.has_key(tokenAnterior)) and it.getCount() > 0): - #print "cond7" - newToken = int(auxToken) + int(nums[-1]) - nums[-1] = newToken - else: - #print "cond8" - nums.append(auxToken) - - return soma(nums) - -def soma(lista): - soma = 0 - for i in lista: - soma += int(i) - return soma - -def simplifica(txt): - - newToken = "" - try: - newToken = normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore') - except: - newToken = normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore') - - if(newToken[-3:] == "oes"): return newToken[:-3] + "ao" - return newToken - -''' -if __name__ == '__main__': - n = sys.argv[1] - return extenso(n) - arquivoExts = open('exts', 'r') - listaExts = arquivoExts.readlines() - arquivoNums = open('nums', 'r') - listaNums = arquivoNums.readlines() - for i in range(0,500): - n = listaNums[i].replace("\n","") - e = listaExts[i].replace("\n","") - numNew = extenso(e) - if (str(numNew) != n): - print n + " != " + str(numNew) - #else: - # print "OK: " + n + " == " + str(numNew) -''' - diff --git a/src/new/ConverteExtenso.py b/src/new/ConverteExtenso.py new file mode 100644 index 0000000..13f82b0 --- /dev/null +++ b/src/new/ConverteExtenso.py @@ -0,0 +1,150 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +#Autor: Erickson Silva +#Email: + +#LAViD - Laboratório de Aplicações de Vídeo Digital + +import sys +from unicodedata import normalize +from Iterator import * + +num = {"zero":0, "um":1, "dois":2, "tres":3, "quatro":4, "cinco":5, "seis":6, +"sete":7, "oito":8, "nove":9} + +und = {"mil":1000, "milhao":1000000, "bilhao":1000000000, "trilhao":1000000000000} + +ext = [{"um":"1", "dois":"2", "tres":"3", "quatro":"4", "cinco":"5", "seis":"6", +"sete":"7", "oito":"8", "nove":"9", "dez":"10", "onze":"11", "doze":"12", +"treze":"13", "quatorze":"14", "quinze":"15", "dezesseis":"16", +"dezessete":"17", "dezoito":"18", "dezenove":"19"}, {"vinte":"2", "trinta":"3", +"quarenta":"4", "cinquenta":"5", "sessenta":"6", "setenta":"7", "oitenta":"8", +"noventa":"9"}, {"cento":"1", "cem":"1", "duzentos":"2", "trezentos":"3", +"quatrocentos":"4", "quinhentos":"5", "seissentos":"6", "setessentos":"7", +"oitocentos":"8", "novecentos":"9"}] + +unds = {"mil":"000", "milhao":"000000","milhoes":"000000", "bilhao":"000000000","bilhoes":"000000000", "trilhao":"000000000000", "trilhoes":"000000000000"} + + + +def oneDigit(x): + return ext[0][x] + +def twoDigit(x): + try: + return ext[1][x[0]]+ext[0][x[1]] + except: + return ext[1][x[0]]+"0" + +def threeDigit(x): + return ext[2][x[0]]+ext[1][x[1]]+ext[0][x[2]] + +def extenso2(n): + sn = n.split(",") + size = len(sn) + firstWord = sn[0] + endWord = "" + numExt = "" + + if(unds.has_key(sn[size-1])): + size -= 1 + endWord = sn[size] + del sn[size] + + if(ext[0].has_key(firstWord)): + numExt = oneDigit(firstWord) + + elif (ext[1].has_key(firstWord)): + numExt = twoDigit(sn) + + elif (ext[2].has_key(firstWord)): + if(size == 1): + numExt = ext[2][firstWord]+"00" + elif (size == 2): + if(sn[1] == "dez"): + numExt = ext[2][firstWord]+oneDigit(sn[1]) + try: + numExt = ext[2][firstWord]+"0"+oneDigit(sn[1]) + except: + numExt = ext[2][firstWord]+twoDigit([sn[1]]) + else: + numExt = threeDigit(sn) + + if(endWord != ""): + numExt = numExt+unds[endWord] + + return numExt + +def extenso(extenso): + global newToken, auxToken + extensoQuebrado = extenso.lower().split(" ") + nums = [] + it = Iterator() + it.load(extensoQuebrado) + while(it.hasNext()): + token = simplifica(it.getToken()) + tokenAnterior = simplifica(it.getToken(-1)) + if (und.has_key(token)): + #print "cond1" + if(it.getCount() == 0): + #print "cond2" + nums.append(und[token]) + else: + #print "cond3" + newToken = und[token] * int(nums[-1]) + nums[-1] = newToken + else: + #print "cond4" + if (num.has_key(token)): + #print "cond5" + auxToken = num[token] + elif (not und.has_key(token)): + #print "cond6" + auxToken = extenso2(token) + + if((not und.has_key(tokenAnterior)) and it.getCount() > 0): + #print "cond7" + newToken = int(auxToken) + int(nums[-1]) + nums[-1] = newToken + else: + #print "cond8" + nums.append(auxToken) + + return soma(nums) + +def soma(lista): + soma = 0 + for i in lista: + soma += int(i) + return soma + +def simplifica(txt): + + newToken = "" + try: + newToken = normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore') + except: + newToken = normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore') + + if(newToken[-3:] == "oes"): return newToken[:-3] + "ao" + return newToken + +''' +if __name__ == '__main__': + n = sys.argv[1] + return extenso(n) + arquivoExts = open('exts', 'r') + listaExts = arquivoExts.readlines() + arquivoNums = open('nums', 'r') + listaNums = arquivoNums.readlines() + for i in range(0,500): + n = listaNums[i].replace("\n","") + e = listaExts[i].replace("\n","") + numNew = extenso(e) + if (str(numNew) != n): + print n + " != " + str(numNew) + #else: + # print "OK: " + n + " == " + str(numNew) +''' + diff --git a/src/new/LeitorDicionarios.py b/src/new/LeitorDicionarios.py deleted file mode 100644 index 6d75f01..0000000 --- a/src/new/LeitorDicionarios.py +++ /dev/null @@ -1,146 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -#Autor: Erickson Silva -#Email: - -#LAViD - Laboratório de Aplicações de Vídeo Digital - -from os.path import expanduser -import csv, platform - -class LeitorDicionarios(object): - #_iInstance = None - - #class Singleton: - # def __init__(self): - # self.LeitorDicionarios = None - - #def __init__( self ): - # if LeitorDicionarios._iInstance is None: - # LeitorDicionarios._iInstance = LeitorDicionarios.Singleton() - - # self._EventHandler_instance = LeitorDicionarios._iInstance - - #def __getattr__(self, aAttr): - # return getattr(self._iInstance, aAttr) - - #def __setattr__(self, aAttr, aValue): - # return setattr(self._iInstance, aAttr, aValue) - - # Define e inicializa os atributos - def __init__(self): - - so = platform.system() - if so == 'Windows': - self.__path = expanduser("~") + "\\vlibras-translate\data\\" - else: - self.__path = expanduser("~") + "/vlibras-translate/data/" - - self.__dicInf = {} - self.__dicSin = {} - self.__dicWords = {} - self.__dic2Gen = {} - self.__dicTemVerbs = {} - self.__fileDic = '' - self.carregarVerbosInfinitivos() - self.carregarSinonimos() - self.carregarPalavrasIgnoradas() - self.carregarSubst2Generos() - self.carregarTemposVerbais() - - # Abre o self.__fileDic que contem os verbos no infinitivo e preenche o dicionario com os mesmos - def carregarVerbosInfinitivos(self): - try: - self.__fileDic = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";") - except IOError, (errno, strerror): - print "I/O error(%s): %s" % (errno, strerror) - print "carregarVerbosInfinitivos" - - for row in self.__fileDic: - if row[1] != "": - try: - self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8") - except UnicodeDecodeError: - self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8') - - # Abre o self.__fileDic que contem os sinonimos e preenche o dicionario com os mesmos - def carregarSinonimos(self): - try: - self.__fileDic = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";") - except IOError, (errno, strerror): - print "I/O error(%s): %s" % (errno, strerror) - print "carregarSinonimos" - - for row in self.__fileDic: - if row[1] != "": - self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8") - - - # Abre o self.__fileDic que contem os tempos verbais - def carregarTemposVerbais(self): - try: - self.__fileDic = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";") - except IOError, (errno, strerror): - print "I/O error(%s): %s" % (errno, strerror) - print "carregarTemposVerbais" - - for row in self.__fileDic: - self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8") - - # Abre o self.__fileDic que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos - def carregarPalavrasIgnoradas(self): - try: - self.__fileDic = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";") - except IOError, (errno, strerror): - print "I/O error(%s): %s" % (errno, strerror) - print "carregarPalavrasIgnoradas" - - for row in self.__fileDic: - self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8") - - # Abre o self.__fileDic que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos - def carregarSubst2Generos(self): - try: - self.__fileDic = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";") - except IOError, (errno, strerror): - print "I/O error(%s): %s" % (errno, strerror) - print "carregarSubst2Generos" - - for row in self.__fileDic: - self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8") - - # Retorna o dicionario dos verbos no infinitivo - def getVerboInfinitivo(self, token): - return self.__dicInf[token] - - # Retorna o dicionario dos sinonimos - def getSinonimo(self, token): - return self.__dicSin[token] - - # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador - def getPalavraIgnorada(self, token): - return self.__dicWords[token] - - # Retorna o dicionario dos substantivos a serem analisados pelo simplificador - def getSubst2Generos(self, token): - return self.__dic2Gen[token] - - # Retorna o dicionario dos tempos verbais - def getTempoVerbal(self, token): - return self.__dicTemVerbs[token] - - def hasVerboInfinitivo(self, token): - return self.__dicInf.has_key(token) - - def hasSinonimo(self, token): - return self.__dicSin.has_key(token) - - def hasPalavraIgnorada(self, token): - return self.__dicWords.has_key(token) - - def hasSubst2Genero(self, token): - return self.__dic2Gen.has_key(token) - - def hasTempoVerbal(self, token): - return self.__dicTemVerbs.has_key(token) diff --git a/src/new/LerDicionarios.py b/src/new/LerDicionarios.py new file mode 100644 index 0000000..6d75f01 --- /dev/null +++ b/src/new/LerDicionarios.py @@ -0,0 +1,146 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +#Autor: Erickson Silva +#Email: + +#LAViD - Laboratório de Aplicações de Vídeo Digital + +from os.path import expanduser +import csv, platform + +class LeitorDicionarios(object): + #_iInstance = None + + #class Singleton: + # def __init__(self): + # self.LeitorDicionarios = None + + #def __init__( self ): + # if LeitorDicionarios._iInstance is None: + # LeitorDicionarios._iInstance = LeitorDicionarios.Singleton() + + # self._EventHandler_instance = LeitorDicionarios._iInstance + + #def __getattr__(self, aAttr): + # return getattr(self._iInstance, aAttr) + + #def __setattr__(self, aAttr, aValue): + # return setattr(self._iInstance, aAttr, aValue) + + # Define e inicializa os atributos + def __init__(self): + + so = platform.system() + if so == 'Windows': + self.__path = expanduser("~") + "\\vlibras-translate\data\\" + else: + self.__path = expanduser("~") + "/vlibras-translate/data/" + + self.__dicInf = {} + self.__dicSin = {} + self.__dicWords = {} + self.__dic2Gen = {} + self.__dicTemVerbs = {} + self.__fileDic = '' + self.carregarVerbosInfinitivos() + self.carregarSinonimos() + self.carregarPalavrasIgnoradas() + self.carregarSubst2Generos() + self.carregarTemposVerbais() + + # Abre o self.__fileDic que contem os verbos no infinitivo e preenche o dicionario com os mesmos + def carregarVerbosInfinitivos(self): + try: + self.__fileDic = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";") + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregarVerbosInfinitivos" + + for row in self.__fileDic: + if row[1] != "": + try: + self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8") + except UnicodeDecodeError: + self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8') + + # Abre o self.__fileDic que contem os sinonimos e preenche o dicionario com os mesmos + def carregarSinonimos(self): + try: + self.__fileDic = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";") + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregarSinonimos" + + for row in self.__fileDic: + if row[1] != "": + self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8") + + + # Abre o self.__fileDic que contem os tempos verbais + def carregarTemposVerbais(self): + try: + self.__fileDic = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";") + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregarTemposVerbais" + + for row in self.__fileDic: + self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8") + + # Abre o self.__fileDic que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos + def carregarPalavrasIgnoradas(self): + try: + self.__fileDic = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";") + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregarPalavrasIgnoradas" + + for row in self.__fileDic: + self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8") + + # Abre o self.__fileDic que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos + def carregarSubst2Generos(self): + try: + self.__fileDic = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";") + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregarSubst2Generos" + + for row in self.__fileDic: + self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8") + + # Retorna o dicionario dos verbos no infinitivo + def getVerboInfinitivo(self, token): + return self.__dicInf[token] + + # Retorna o dicionario dos sinonimos + def getSinonimo(self, token): + return self.__dicSin[token] + + # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador + def getPalavraIgnorada(self, token): + return self.__dicWords[token] + + # Retorna o dicionario dos substantivos a serem analisados pelo simplificador + def getSubst2Generos(self, token): + return self.__dic2Gen[token] + + # Retorna o dicionario dos tempos verbais + def getTempoVerbal(self, token): + return self.__dicTemVerbs[token] + + def hasVerboInfinitivo(self, token): + return self.__dicInf.has_key(token) + + def hasSinonimo(self, token): + return self.__dicSin.has_key(token) + + def hasPalavraIgnorada(self, token): + return self.__dicWords.has_key(token) + + def hasSubst2Genero(self, token): + return self.__dic2Gen.has_key(token) + + def hasTempoVerbal(self, token): + return self.__dicTemVerbs.has_key(token) diff --git a/src/new/Tradutor.py b/src/new/Tradutor.py deleted file mode 100644 index 91982ea..0000000 --- a/src/new/Tradutor.py +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -#Autor: Erickson Silva -#Email: - -#LAViD - Laboratório de Aplicações de Vídeo Digital - -import alexp -from AplicaSinonimos import * -from AplicaRegras import * - -aplicaSinonimos = AplicaSinonimos() -aplicaRegras = AplicaRegras() - - -def iniciarTraducao(texto): - textoDividido = texto.split(".") - for w in textoDividido: - if len(w) > 0 and w != " ": - return gerarAnalise(w) - - -def gerarAnalise(sentenca): - '''tokens = alexp.toqueniza(sentenca) - etiquetadas = alexp.etiquetaSentenca(tokens) - analiseMorf = analiseMorfologica(etiquetadas) - print analiseMorf''' - - sintatica = alexp.run(sentenca) - morfologica = alexp.getAnaliseMorfologica() - - if (isinstance(sintatica,type(None))): - return analiseMorfologica(morfologica) - else: - print analiseSintatica(sintatica, morfologica) - - -def analiseMorfologica(listaMorfologica): - proc = aplicaRegras.aplicarRegrasMorfo(listaMorfologica) - return aplicaSinonimos.sinonimosMorfologico(proc) - - -def analiseSintatica(arvoreSintatica, listaMorfologica): - proc = aplicaRegras.aplicarRegrasSint(arvoreSintatica, listaMorfologica) - return aplicaSinonimos.sinonimosSintatico(proc) \ No newline at end of file diff --git a/src/new/TraduzSentencas.py b/src/new/TraduzSentencas.py new file mode 100644 index 0000000..91982ea --- /dev/null +++ b/src/new/TraduzSentencas.py @@ -0,0 +1,46 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +#Autor: Erickson Silva +#Email: + +#LAViD - Laboratório de Aplicações de Vídeo Digital + +import alexp +from AplicaSinonimos import * +from AplicaRegras import * + +aplicaSinonimos = AplicaSinonimos() +aplicaRegras = AplicaRegras() + + +def iniciarTraducao(texto): + textoDividido = texto.split(".") + for w in textoDividido: + if len(w) > 0 and w != " ": + return gerarAnalise(w) + + +def gerarAnalise(sentenca): + '''tokens = alexp.toqueniza(sentenca) + etiquetadas = alexp.etiquetaSentenca(tokens) + analiseMorf = analiseMorfologica(etiquetadas) + print analiseMorf''' + + sintatica = alexp.run(sentenca) + morfologica = alexp.getAnaliseMorfologica() + + if (isinstance(sintatica,type(None))): + return analiseMorfologica(morfologica) + else: + print analiseSintatica(sintatica, morfologica) + + +def analiseMorfologica(listaMorfologica): + proc = aplicaRegras.aplicarRegrasMorfo(listaMorfologica) + return aplicaSinonimos.sinonimosMorfologico(proc) + + +def analiseSintatica(arvoreSintatica, listaMorfologica): + proc = aplicaRegras.aplicarRegrasSint(arvoreSintatica, listaMorfologica) + return aplicaSinonimos.sinonimosSintatico(proc) \ No newline at end of file -- libgit2 0.21.2