From 6022aeba9a7a1063c5cbe7e9d92c083ca5ee22d2 Mon Sep 17 00:00:00 2001 From: Erickson Silva Date: Wed, 18 Feb 2015 19:42:39 -0300 Subject: [PATCH] Melhora acesso ao Leitor de Dicionarios e adiciona os metodos para novos dicionarios --- src/new/LerDicionarios.py | 233 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------- 1 file changed, 126 insertions(+), 107 deletions(-) diff --git a/src/new/LerDicionarios.py b/src/new/LerDicionarios.py index 6d75f01..75da0f5 100644 --- a/src/new/LerDicionarios.py +++ b/src/new/LerDicionarios.py @@ -7,140 +7,159 @@ #LAViD - Laboratório de Aplicações de Vídeo Digital from os.path import expanduser -import csv, platform - -class LeitorDicionarios(object): - #_iInstance = None - - #class Singleton: - # def __init__(self): - # self.LeitorDicionarios = None - - #def __init__( self ): - # if LeitorDicionarios._iInstance is None: - # LeitorDicionarios._iInstance = LeitorDicionarios.Singleton() - - # self._EventHandler_instance = LeitorDicionarios._iInstance - - #def __getattr__(self, aAttr): - # return getattr(self._iInstance, aAttr) - - #def __setattr__(self, aAttr, aValue): - # return setattr(self._iInstance, aAttr, aValue) - - # Define e inicializa os atributos - def __init__(self): +import csv +import platform + +class LerDicionarios(object): - so = platform.system() - if so == 'Windows': - self.__path = expanduser("~") + "\\vlibras-translate\data\\" - else: - self.__path = expanduser("~") + "/vlibras-translate/data/" - - self.__dicInf = {} - self.__dicSin = {} - self.__dicWords = {} - self.__dic2Gen = {} - self.__dicTemVerbs = {} - self.__fileDic = '' - self.carregarVerbosInfinitivos() - self.carregarSinonimos() - self.carregarPalavrasIgnoradas() - self.carregarSubst2Generos() - self.carregarTemposVerbais() - - # Abre o self.__fileDic que contem os verbos no infinitivo e preenche o dicionario com os mesmos - def carregarVerbosInfinitivos(self): + def __init__(self): + self.path = self.get_path() + self.dic_adv_intensidade = [] + self.dic_adv_tempo = [] + self.dic_art = [] + self.dic_prep = [] + self.dic_sin = {} + self.dic_sb_2_gen = [] + self.dic_vb_infinitivo = {} + self.dic_vb_ligacao = [] + self.file = '' + self.carregar_dicionarios() + + def get_path(self): + if platform.system() == 'Windows': + return expanduser("~") + "\\vlibras-translate\data\\" + return expanduser("~") + "/vlibras-translate/data/" + + def carregar_dicionarios(self): + self.carregar_adverbios_intensidade() + self.carregar_adverbios_tempo() + self.carregar_artigos() + self.carregar_preposicoes() + self.carregar_sinonimos() + self.carregar_subs_2_generos() + self.carregar_verbos_infinitivo() + self.carregar_verbos_ligacao() + + def carregar_adverbios_intensidade(self): try: - self.__fileDic = csv.reader(open(self.__path+"dicPortGlosa.csv"), delimiter=";") - except IOError, (errno, strerror): + self.file = csv.reader(open(self.path+"adverbiosIntensidade.csv")) + except IOError, (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) - print "carregarVerbosInfinitivos" + print "carregar_adverbios_intensidade" + + rows = [] + for row in self.file: + rows.append(row[0].decode("utf-8")) + self.dic_adv_intensidade = set(rows) - for row in self.__fileDic: - if row[1] != "": - try: - self.__dicInf[row[0].decode("utf-8")] = row[1].decode("utf-8") - except UnicodeDecodeError: - self.__dicInf[row[0].decode('iso8859-1').encode('utf-8')] = row[1].decode('iso8859-1').encode('utf-8') + def carregar_adverbios_tempo(self): + try: + self.file = csv.reader(open(self.path+"adverbiosTempo.csv")) + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregar_adverbios_tempo" - # Abre o self.__fileDic que contem os sinonimos e preenche o dicionario com os mesmos - def carregarSinonimos(self): + rows = [] + for row in self.file: + rows.append(row[0].decode("utf-8")) + self.dic_adv_tempo = set(rows) + + def carregar_artigos(self): try: - self.__fileDic = csv.reader(open(self.__path+"portuguesGlosa.csv"), delimiter=";") + self.file = csv.reader(open(self.path+"artigos.csv")) except IOError, (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) - print "carregarSinonimos" - - for row in self.__fileDic: - if row[1] != "": - self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8") - + print "carregar_artigos" + + rows = [] + for row in self.file: + rows.append(row[0].decode("utf-8")) + self.dic_art = set(rows) - # Abre o self.__fileDic que contem os tempos verbais - def carregarTemposVerbais(self): + def carregar_preposicoes(self): try: - self.__fileDic = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";") + self.file = csv.reader(open(self.path+"preposicoes.csv")) except IOError, (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) - print "carregarTemposVerbais" - - for row in self.__fileDic: - self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8") + print "carregar_preposicoes" + + rows = [] + for row in self.file: + rows.append(row[0].decode("utf-8")) + self.dic_prep = set(rows) - # Abre o self.__fileDic que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos - def carregarPalavrasIgnoradas(self): + def carregar_sinonimos(self): try: - self.__fileDic = csv.reader(open(self.__path+"hWordsRemove.csv"), delimiter=";") + self.file = csv.reader(open(self.path+"sinonimos.csv"), delimiter=";") except IOError, (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) - print "carregarPalavrasIgnoradas" + print "carregar_sinonimos" + + for row in self.file: + if row[1] != "": + self.dic_sin[row[0].decode("utf-8")] = row[1].decode("utf-8") - for row in self.__fileDic: - self.__dicWords[row[0].decode("utf-8")] = row[0].decode("utf-8") - - # Abre o self.__fileDic que contem os substantivos que sao comuns dos 2 generos e preenche o dicionario com os mesmos - def carregarSubst2Generos(self): + def carregar_subs_2_generos(self): try: - self.__fileDic = csv.reader(open(self.__path+"subs2Generos.csv"), delimiter=";") + self.file = csv.reader(open(self.path+"subs2Generos.csv")) except IOError, (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) - print "carregarSubst2Generos" - - for row in self.__fileDic: - self.__dic2Gen[row[0].decode("utf-8")] = row[0].decode("utf-8") + print "carregar_subs_2_generos" + + rows = [] + for row in self.file: + rows.append(row[0].decode("utf-8")) + self.dic_sb_2_gen = set(rows) + + def carregar_verbos_infinitivo(self): + try: + self.file = csv.reader(open(self.path+"verbosInfinitivo.csv"), delimiter=";") + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregar_verbos_infinitivo" + + for row in self.file: + if row[1] != "": + self.dic_vb_infinitivo[row[0].decode("utf-8")] = row[1].decode("utf-8") - # Retorna o dicionario dos verbos no infinitivo - def getVerboInfinitivo(self, token): - return self.__dicInf[token] + def carregar_verbos_ligacao(self): + try: + self.file = csv.reader(open(self.path+" verbosLigacao.csv")) + except IOError, (errno, strerror): + print "I/O error(%s): %s" % (errno, strerror) + print "carregar_verbos_ligacao" + + rows = [] + for row in self.file: + rows.append(row[0].decode("utf-8")) + self.dic_vb_ligacao = set(rows) - # Retorna o dicionario dos sinonimos - def getSinonimo(self, token): - return self.__dicSin[token] + def has_adverbio_intensidade(self, token): + return token in self.dic_adv_intensidade - # Retorna o dicionario dos artigos e preposicoes a serem removidos pelo simplificador - def getPalavraIgnorada(self, token): - return self.__dicWords[token] + def has_adverbio_tempo(self, token): + return token in self.dic_adv_tempo - # Retorna o dicionario dos substantivos a serem analisados pelo simplificador - def getSubst2Generos(self, token): - return self.__dic2Gen[token] + def has_artigo(self, token): + return token in self.dic_art - # Retorna o dicionario dos tempos verbais - def getTempoVerbal(self, token): - return self.__dicTemVerbs[token] + def has_preposicao(self, token): + return token in self.dic_prep + + def has_sinonimo(self, token): + return self.dic_sin.has_key(token) - def hasVerboInfinitivo(self, token): - return self.__dicInf.has_key(token) + def has_subst_2_generos (self, token): + return token in self.dic_sb_2_gen - def hasSinonimo(self, token): - return self.__dicSin.has_key(token) + def has_verbo_infinitivo(self, token): + return self.dic_vb_infinitivo.has_key(token) - def hasPalavraIgnorada(self, token): - return self.__dicWords.has_key(token) + def has_verbo_ligacao(self, token): + return token in self.dic_vb_ligacao - def hasSubst2Genero(self, token): - return self.__dic2Gen.has_key(token) + def get_sinonimo(self, token): + return self.dic_sin[token] - def hasTempoVerbal(self, token): - return self.__dicTemVerbs.has_key(token) + def get_verbo_infinitivo(self, token): + return self.dic_vb_infinitivo[token] \ No newline at end of file -- libgit2 0.21.2