Commit 548a8ac1ff799d65fff40e18b6cdc001ca3e8e1a

Authored by Erickson Silva
1 parent 20777b90
Exists in master and in 1 other branch devel

Mantêm vírgula quando é um float

Showing 1 changed file with 10 additions and 5 deletions   Show diff stats
src/alexp.py
... ... @@ -31,7 +31,7 @@ import re,nltk, time, random
31 31 from os.path import expanduser
32 32 from os import environ, path
33 33 from Aelius.Extras import carrega
34   -from Aelius import AnotaCorpus
  34 +from Aelius import AnotaCorpus, Toqueniza
35 35 from unicodedata import normalize
36 36  
37 37 sentenca_anotada=""
... ... @@ -42,7 +42,7 @@ def toqueniza(s):
42 42 """
43 43 regex = re.compile('[%s]' % re.escape('“”'))
44 44 decodificada=regex.sub('"',s.replace("–", "-").replace("—", "-")).decode("utf-8")
45   - return AnotaCorpus.TOK_PORT.tokenize(decodificada)
  45 + return Toqueniza.TOK_PORT.tokenize(decodificada)
46 46  
47 47 def getAnaliseMorfologica():
48 48 return sentenca_anotada
... ... @@ -60,9 +60,14 @@ def etiquetaSentenca(s):
60 60 anotada_corrigida = []
61 61 for x in anotada:
62 62 if x[1] not in tag_punctuation:
63   - if x[1] == "NUM" and x[1].isdigit():
64   - anotada_corrigida.append(x)
65   - continue
  63 + if x[1] == "NUM":
  64 + try:
  65 + float(x[0].replace(',', '.'))
  66 + anotada_corrigida.append(x)
  67 + continue
  68 + except:
  69 + pass
  70 +
66 71 tupla = [regex.sub('',x[0]).lower(),x[1]]
67 72 if tupla[0] != "": anotada_corrigida.append(tupla)
68 73 else:
... ...