Commit 2e84c19e106cbf855b1ed153080cb6638fd2a666

Authored by Erickson Silva
1 parent 840afa0e
Exists in master and in 1 other branch devel

Adiciona conversor de numeros por extenso

Showing 1 changed file with 144 additions and 0 deletions   Show diff stats
src/new/ConversorExtenso.py 0 → 100644
... ... @@ -0,0 +1,144 @@
  1 +# -*- coding: utf-8 -*-
  2 +# Autor: Erickson Silva
  3 +
  4 +import sys
  5 +from unicodedata import normalize
  6 +from Iterator import *
  7 +
  8 +num = {"zero":0, "um":1, "dois":2, "tres":3, "quatro":4, "cinco":5, "seis":6,
  9 +"sete":7, "oito":8, "nove":9}
  10 +
  11 +und = {"mil":1000, "milhao":1000000, "bilhao":1000000000, "trilhao":1000000000000}
  12 +
  13 +ext = [{"um":"1", "dois":"2", "tres":"3", "quatro":"4", "cinco":"5", "seis":"6",
  14 +"sete":"7", "oito":"8", "nove":"9", "dez":"10", "onze":"11", "doze":"12",
  15 +"treze":"13", "quatorze":"14", "quinze":"15", "dezesseis":"16",
  16 +"dezessete":"17", "dezoito":"18", "dezenove":"19"}, {"vinte":"2", "trinta":"3",
  17 +"quarenta":"4", "cinquenta":"5", "sessenta":"6", "setenta":"7", "oitenta":"8",
  18 +"noventa":"9"}, {"cento":"1", "cem":"1", "duzentos":"2", "trezentos":"3",
  19 +"quatrocentos":"4", "quinhentos":"5", "seissentos":"6", "setessentos":"7",
  20 +"oitocentos":"8", "novecentos":"9"}]
  21 +
  22 +unds = {"mil":"000", "milhao":"000000","milhoes":"000000", "bilhao":"000000000","bilhoes":"000000000", "trilhao":"000000000000", "trilhoes":"000000000000"}
  23 +
  24 +
  25 +
  26 +def oneDigit(x):
  27 + return ext[0][x]
  28 +
  29 +def twoDigit(x):
  30 + try:
  31 + return ext[1][x[0]]+ext[0][x[1]]
  32 + except:
  33 + return ext[1][x[0]]+"0"
  34 +
  35 +def threeDigit(x):
  36 + return ext[2][x[0]]+ext[1][x[1]]+ext[0][x[2]]
  37 +
  38 +def extenso2(n):
  39 + sn = n.split(",")
  40 + size = len(sn)
  41 + firstWord = sn[0]
  42 + endWord = ""
  43 + numExt = ""
  44 +
  45 + if(unds.has_key(sn[size-1])):
  46 + size -= 1
  47 + endWord = sn[size]
  48 + del sn[size]
  49 +
  50 + if(ext[0].has_key(firstWord)):
  51 + numExt = oneDigit(firstWord)
  52 +
  53 + elif (ext[1].has_key(firstWord)):
  54 + numExt = twoDigit(sn)
  55 +
  56 + elif (ext[2].has_key(firstWord)):
  57 + if(size == 1):
  58 + numExt = ext[2][firstWord]+"00"
  59 + elif (size == 2):
  60 + if(sn[1] == "dez"):
  61 + numExt = ext[2][firstWord]+oneDigit(sn[1])
  62 + try:
  63 + numExt = ext[2][firstWord]+"0"+oneDigit(sn[1])
  64 + except:
  65 + numExt = ext[2][firstWord]+twoDigit([sn[1]])
  66 + else:
  67 + numExt = threeDigit(sn)
  68 +
  69 + if(endWord != ""):
  70 + numExt = numExt+unds[endWord]
  71 +
  72 + return numExt
  73 +
  74 +def extenso(extenso):
  75 + global newToken, auxToken
  76 + extensoQuebrado = extenso.split(" ")
  77 + nums = []
  78 + it = Iterator()
  79 + it.load(extensoQuebrado)
  80 + while(it.hasNext()):
  81 + token = simplifica(it.getToken())
  82 + tokenAnterior = simplifica(it.getToken('-'))
  83 + if (und.has_key(token)):
  84 + #print "cond1"
  85 + if(it.getCount() == 0):
  86 + #print "cond2"
  87 + nums.append(und[token])
  88 + else:
  89 + #print "cond3"
  90 + newToken = und[token] * int(nums[-1])
  91 + nums[-1] = newToken
  92 + else:
  93 + #print "cond4"
  94 + if (num.has_key(token)):
  95 + #print "cond5"
  96 + auxToken = num[token]
  97 + elif (not und.has_key(token)):
  98 + #print "cond6"
  99 + auxToken = extenso2(token)
  100 +
  101 + if((not und.has_key(tokenAnterior)) and it.getCount() > 0):
  102 + #print "cond7"
  103 + newToken = int(auxToken) + int(nums[-1])
  104 + nums[-1] = newToken
  105 + else:
  106 + #print "cond8"
  107 + nums.append(auxToken)
  108 +
  109 + return soma(nums)
  110 +
  111 +def soma(lista):
  112 + soma = 0
  113 + for i in lista:
  114 + soma += int(i)
  115 + return soma
  116 +
  117 +def simplifica(txt):
  118 +
  119 + newToken = ""
  120 + try:
  121 + newToken = normalize('NFKD', txt.decode('utf-8')).encode('ASCII','ignore')
  122 + except:
  123 + newToken = normalize('NFKD', txt.decode('iso-8859-1')).encode('ASCII','ignore')
  124 +
  125 + if(newToken[-3:] == "oes"): return newToken[:-3] + "ao"
  126 + return newToken
  127 +
  128 +
  129 +if __name__ == '__main__':
  130 + n = sys.argv[1]
  131 + print extenso(n)
  132 + '''arquivoExts = open('exts', 'r')
  133 + listaExts = arquivoExts.readlines()
  134 + arquivoNums = open('nums', 'r')
  135 + listaNums = arquivoNums.readlines()
  136 + for i in range(0,500):
  137 + n = listaNums[i].replace("\n","")
  138 + e = listaExts[i].replace("\n","")
  139 + numNew = extenso(e)
  140 + if (str(numNew) != n):
  141 + print n + " != " + str(numNew)
  142 + #else:
  143 + # print "OK: " + n + " == " + str(numNew)'''
  144 +
... ...