Mantêm vírgula quando é um float

Erickson Silva
1 parent 20777b90
Showing 1 changed file with 10 additions and 5 deletions Show diff stats
src/alexp.py
@@ -31,7 +31,7 @@ import re,nltk, time, random
 from os.path import expanduser
 from os import environ, path
 from Aelius.Extras import carrega
-from Aelius import AnotaCorpus
+from Aelius import AnotaCorpus, Toqueniza
 from unicodedata import normalize
 sentenca_anotada=""
@@ -42,7 +42,7 @@ def toqueniza(s):
 	"""
 	regex = re.compile('[%s]' % re.escape('“”'))
 	decodificada=regex.sub('"',s.replace("–", "-").replace("—", "-")).decode("utf-8")
-	return AnotaCorpus.TOK_PORT.tokenize(decodificada)
+	return Toqueniza.TOK_PORT.tokenize(decodificada)
 def getAnaliseMorfologica():
 	return sentenca_anotada
@@ -60,9 +60,14 @@ def etiquetaSentenca(s):
 	anotada_corrigida = []
 	for x in anotada:
 		if x[1] not in tag_punctuation:
-			if x[1] == "NUM" and x[1].isdigit():
-				anotada_corrigida.append(x)
-				continue
+			if x[1] == "NUM":
+				try:
+					float(x[0].replace(',', '.'))
+					anotada_corrigida.append(x)
+					continue
+				except:
+					pass
+				
 			tupla = [regex.sub('',x[0]).lower(),x[1]]
 			if tupla[0] != "": anotada_corrigida.append(tupla)
 		else: