From 2e84c19e106cbf855b1ed153080cb6638fd2a666 Mon Sep 17 00:00:00 2001 From: Erickson Silva Date: Wed, 21 Jan 2015 16:51:34 -0300 Subject: [PATCH] Adiciona conversor de numeros por extenso --- src/new/ConversorExtenso.py | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+), 0 deletions(-) create mode 100644 src/new/ConversorExtenso.py diff --git a/src/new/ConversorExtenso.py b/src/new/ConversorExtenso.py new file mode 100644 index 0000000..863f844 --- /dev/null +++ b/src/new/ConversorExtenso.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +# Autor: Erickson Silva + +import sys +from unicodedata import normalize +from Iterator import * + +num = {"zero":0, "um":1, "dois":2, "tres":3, "quatro":4, "cinco":5, "seis":6, +"sete":7, "oito":8, "nove":9} + +und = {"mil":1000, "milhao":1000000, "bilhao":1000000000, "trilhao":1000000000000} + +ext = [{"um":"1", "dois":"2", "tres":"3", "quatro":"4", "cinco":"5", "seis":"6", +"sete":"7", "oito":"8", "nove":"9", "dez":"10", "onze":"11", "doze":"12", +"treze":"13", "quatorze":"14", "quinze":"15", "dezesseis":"16", +"dezessete":"17", "dezoito":"18", "dezenove":"19"}, {"vinte":"2", "trinta":"3", +"quarenta":"4", "cinquenta":"5", "sessenta":"6", "setenta":"7", "oitenta":"8", +"noventa":"9"}, {"cento":"1", "cem":"1", "duzentos":"2", "trezentos":"3", +"quatrocentos":"4", "quinhentos":"5", "seissentos":"6", "setessentos":"7", +"oitocentos":"8", "novecentos":"9"}] + +unds = {"mil":"000", "milhao":"000000","milhoes":"000000", "bilhao":"000000000","bilhoes":"000000000", "trilhao":"000000000000", "trilhoes":"000000000000"} + + + +def oneDigit(x): + return ext[0][x] + +def twoDigit(x): + try: + return ext[1][x[0]]+ext[0][x[1]] + except: + return ext[1][x[0]]+"0" + +def threeDigit(x): + return ext[2][x[0]]+ext[1][x[1]]+ext[0][x[2]] + +def extenso2(n): + sn = n.split(",") + size = len(sn) + firstWord = sn[0] + endWord = "" + numExt = "" + + if(unds.has_key(sn[size-1])): + size -= 1 + endWord = sn[size] + del sn[size] + + if(ext[0].has_key(firstWord)): + numExt = oneDigit(firstWord) + + elif (ext[1].has_key(firstWord)): + numExt = twoDigit(sn) + + elif (ext[2].has_key(firstWord)): + if(size == 1): + numExt = ext[2][firstWord]+"00" + elif (size == 2): + if(sn[1] == "dez"): + numExt = ext[2][firstWord]+oneDigit(sn[1]) + try: + numExt = ext[2][firstWord]+"0"+oneDigit(sn[1]) + except: + numExt = ext[2][firstWord]+twoDigit([sn[1]]) + else: + numExt = threeDigit(sn) + + if(endWord != ""): + numExt = numExt+unds[endWord] + + return numExt + +def extenso(extenso): + global newToken, auxToken + extensoQuebrado = extenso.split(" ") + nums = [] + it = Iterator() + it.load(extensoQuebrado) + while(it.hasNext()): + token = simplifica(it.getToken()) + tokenAnterior = simplifica(it.getToken('-')) + if (und.has_key(token)): + #print "cond1" + if(it.getCount() == 0): + #print "cond2" + nums.append(und[token]) + else: + #print "cond3" + newToken = und[token] * int(nums[-1]) + nums[-1] = newToken + else: + #print "cond4" + if (num.has_key(token)): + #print "cond5" + auxToken = num[token] + elif (not und.has_key(token)): + #print "cond6" + auxToken = extenso2(token) + + if((not und.has_key(tokenAnterior)) and it.getCount() > 0): + #print "cond7" + newToken = int(auxToken) + int(nums[-1]) + nums[-1] = newToken + else: + #print "cond8" + nums.append(auxToken) + + return soma(nums) + +def soma(lista): + soma = 0 + for i in lista: + soma += int(i) + return soma + +def simplifica(txt): + + newToken = "" + try: + newToken = normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore') + except: + newToken = normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore') + + if(newToken[-3:] == "oes"): return newToken[:-3] + "ao" + return newToken + + +if __name__ == '__main__': + n = sys.argv[1] + print extenso(n) + '''arquivoExts = open('exts', 'r') + listaExts = arquivoExts.readlines() + arquivoNums = open('nums', 'r') + listaNums = arquivoNums.readlines() + for i in range(0,500): + n = listaNums[i].replace("\n","") + e = listaExts[i].replace("\n","") + numNew = extenso(e) + if (str(numNew) != n): + print n + " != " + str(numNew) + #else: + # print "OK: " + n + " == " + str(numNew)''' + -- libgit2 0.21.2