ConversorExtenso.py 3.68 KB
# -*- coding: utf-8 -*-
# Autor: Erickson Silva

import sys
from unicodedata import normalize
from Iterator import *

num = {"zero":0, "um":1, "dois":2, "tres":3, "quatro":4, "cinco":5, "seis":6,
"sete":7, "oito":8, "nove":9}

und = {"mil":1000, "milhao":1000000, "bilhao":1000000000, "trilhao":1000000000000}
 
ext = [{"um":"1", "dois":"2", "tres":"3", "quatro":"4", "cinco":"5", "seis":"6",
"sete":"7", "oito":"8", "nove":"9", "dez":"10", "onze":"11", "doze":"12",
"treze":"13", "quatorze":"14", "quinze":"15", "dezesseis":"16", 
"dezessete":"17", "dezoito":"18", "dezenove":"19"}, {"vinte":"2", "trinta":"3",
"quarenta":"4", "cinquenta":"5", "sessenta":"6", "setenta":"7", "oitenta":"8",
"noventa":"9"}, {"cento":"1", "cem":"1", "duzentos":"2", "trezentos":"3",
"quatrocentos":"4", "quinhentos":"5", "seissentos":"6", "setessentos":"7",
"oitocentos":"8", "novecentos":"9"}]

unds = {"mil":"000", "milhao":"000000","milhoes":"000000", "bilhao":"000000000","bilhoes":"000000000", "trilhao":"000000000000", "trilhoes":"000000000000"}



def oneDigit(x):
	return ext[0][x]

def twoDigit(x):
	try:
		return ext[1][x[0]]+ext[0][x[1]]
	except:
		return ext[1][x[0]]+"0"

def threeDigit(x):
	return ext[2][x[0]]+ext[1][x[1]]+ext[0][x[2]]     

def extenso2(n):
    sn = n.split(",")
    size = len(sn)
    firstWord = sn[0]
    endWord = ""
    numExt = ""

    if(unds.has_key(sn[size-1])):
    	size -= 1
    	endWord = sn[size]
    	del sn[size]

    if(ext[0].has_key(firstWord)):
    	numExt = oneDigit(firstWord)
        
    elif (ext[1].has_key(firstWord)):
    	numExt = twoDigit(sn)

    elif (ext[2].has_key(firstWord)):
    	if(size == 1):
    		numExt = ext[2][firstWord]+"00"
    	elif (size == 2):
    		if(sn[1] == "dez"):
    			numExt = ext[2][firstWord]+oneDigit(sn[1])
    		try:
    			numExt = ext[2][firstWord]+"0"+oneDigit(sn[1])
    		except:
    			numExt = ext[2][firstWord]+twoDigit([sn[1]])
    	else:
	    	numExt = threeDigit(sn)

    if(endWord != ""):
    	numExt = numExt+unds[endWord]

    return numExt 

def extenso(extenso):
	global newToken, auxToken
	extensoQuebrado = extenso.split(" ")
	nums = []
	it = Iterator()
	it.load(extensoQuebrado)
	while(it.hasNext()):
		token = simplifica(it.getToken())
		tokenAnterior = simplifica(it.getToken('-'))
		if (und.has_key(token)):
			#print "cond1"
			if(it.getCount() == 0):
				#print "cond2"
				nums.append(und[token])
			else:
				#print "cond3"
				newToken = und[token] * int(nums[-1])
				nums[-1] = newToken
		else:
			#print "cond4"
			if (num.has_key(token)):
				#print "cond5"
				auxToken = num[token]
			elif (not und.has_key(token)):
				#print "cond6"
				auxToken = extenso2(token)
				
			if((not und.has_key(tokenAnterior)) and it.getCount() > 0):
				#print "cond7"
				newToken = int(auxToken) + int(nums[-1])
				nums[-1] = newToken
			else:
				#print "cond8"
				nums.append(auxToken)

	return soma(nums)

def soma(lista):
	soma = 0
	for i in lista:
		soma += int(i)
	return soma

def simplifica(txt):

	newToken = ""
	try:
		newToken = normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore')
	except:
		newToken = normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore')

	if(newToken[-3:] == "oes"): return newToken[:-3] + "ao"
	return newToken


if __name__ == '__main__':
    n = sys.argv[1]
    print extenso(n)
    '''arquivoExts = open('exts', 'r')
    listaExts = arquivoExts.readlines()
    arquivoNums = open('nums', 'r')
    listaNums = arquivoNums.readlines()
    for i in range(0,500):
    	n = listaNums[i].replace("\n","")
    	e = listaExts[i].replace("\n","")
        numNew = extenso(e)
        if (str(numNew) != n):
        	print n + " != " + str(numNew)
        #else:
        #	print "OK: " + n + " == " + str(numNew)'''