Commit 408ce64883cb450ab0acbf9694e90e75d3952b46

Authored by Erickson Silva
1 parent a5237893
Exists in master and in 1 other branch devel

Remove caractere especial 'marcador' ao classificar sentença.

Showing 1 changed file with 2 additions and 1 deletions   Show diff stats
src/ClassificaSentencas.py
@@ -50,6 +50,7 @@ class ClassificaSentencas(object): @@ -50,6 +50,7 @@ class ClassificaSentencas(object):
50 decodificada = regex2.sub('',regex.sub('"',s.replace("–", "-").replace("—", "-"))).decode("utf-8") 50 decodificada = regex2.sub('',regex.sub('"',s.replace("–", "-").replace("—", "-"))).decode("utf-8")
51 except: 51 except:
52 decodificada = s.decode("utf-8") 52 decodificada = s.decode("utf-8")
  53 +
53 return Toqueniza.TOK_PORT.tokenize(decodificada) 54 return Toqueniza.TOK_PORT.tokenize(decodificada)
54 55
55 def obter_classificacao_morfologica(self): 56 def obter_classificacao_morfologica(self):
@@ -63,7 +64,7 @@ class ClassificaSentencas(object): @@ -63,7 +64,7 @@ class ClassificaSentencas(object):
63 while (anotada[0][1] is None): 64 while (anotada[0][1] is None):
64 time.sleep(random.choice(sleep_times)) 65 time.sleep(random.choice(sleep_times))
65 anotada = AnotaCorpus.anota_sentencas([s],etiquetador,"hunpos")[0] 66 anotada = AnotaCorpus.anota_sentencas([s],etiquetador,"hunpos")[0]
66 - regex = re.compile('[%s]' % re.escape('!"#&()*+,-./:;<=>?@[\]^_`{|}~')) 67 + regex = re.compile('[%s]' % re.escape(u'\u2022''!"#&()*+,-./:;<=>?@[\]^_`{|}~'))
67 tag_punctuation = [".",",","QT","("] 68 tag_punctuation = [".",",","QT","("]
68 anotada_corrigida = [] 69 anotada_corrigida = []
69 for x in anotada: 70 for x in anotada: