Adicionado tratamento de tempo verbal no tradutor.

Erickson Silva
1 parent ea7323f4
Showing 7 changed files with 109 additions and 16 deletions Show diff stats
ModuleTranslate.py
data/dicPortGlosa.csv
data/temposVerbais.csv
servico/src/serviceWindowGeneration.cpp
tradutor/src/py/Simplificador.py
tradutor/src/py/Tradutor.py
tradutor/src/py/WorkCSV.py
@@ -3,7 +3,6 @@
 import sys, os
 sys.path.append(os.getcwd()+"/tradutor/src/py")
-#sys.path.append(os.path.expanduser("~/gtaaas/tradutor/src/py"))
 from Tradutor import *
 tradutor = Tradutor()
@@ -98462,7 +98462,7 @@ DEMOCRATIZASTE;DEMOCRATIZAR;all;;;;;;;;;ver
 SUAVAS;SUAR;all;;;;;;;;;ver
 AMANTE;AMANTE;all;;;;;;;;;adj, sub
 ESTUDO;ESTUDAR;all;;;;;;;;;ver, sub
-FUI;SER;all;IR;ver;;;;;;;ver
+FUI;IR;all;;ver;;;;;;;ver
 CONTRADIGAIS;CONTRADIZER;all;;;;;;;;;ver
 APERTAS;APERTAR;all;;;;;;;;;ver
 AUXILIARIAMOS;AUXILIAR;all;;;;;;;;;ver
@@ -0,0 +1,15 @@
+ONTEM
+ANTIGAMENTE
+PASSADO
+ANTEONTEM
+ANTES
+ATRAS
+HOJE
+AGORA
+IMEDIATAMENTE
+JA
+DIARAMENTE
+AMANHA
+DEPOIS
+FUTURO
+FUTURAMENTE
 \ No newline at end of file
@@ -43,8 +43,7 @@ bool ServiceWindowGeneration::isRunning() {
 /* Quando o sincronizador termina, ele invoca esse método para avisar! */
 void ServiceWindowGeneration::finalizouSincronizacao() {
-    char op = this->getRunningOption();
-    if (op != '2' && serviceType != SERVICE_TYPE_TEXT && serviceType != SERVICE_TYPE_SRT_ONLY) {
+    if (getRunningOption() != '2' && serviceType != SERVICE_TYPE_TEXT && serviceType != SERVICE_TYPE_SRT_ONLY) {
         mixer = new Mixer();
         mixer->initialize(this->path_input, this->path_libras,this->position,this->size,this->transparency);
     }
@@ -17,6 +17,7 @@ class Simplificador(object):
 		self.__dicSin = {}
 		self.__dicWords = {}
 		self.__dic2Gen = {}
+		self.__dicTemVerbs = {}
 		self.executeWorkCSV()	
 	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
@@ -24,11 +25,21 @@ class Simplificador(object):
 		self.__ts = []
 		self.it.load(texto)
 		self.__b = False
+		self.__verb = False
+		self.__adv = False;
+		self.__countVerb = 0
+		self.__countAdv = 0
 		while(self.it.hasNext()):
 			w = self.auxConvert(self.it.getAtualW())
 			t = self.it.getAtualT()
 			if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao
 				wu = w.upper() 						# deixa o token maiusculo
+				if t[:2] == "VB":
+					self.__verb = True
+					self.__countVerb += 1
+				if t[:3] == "ADV":
+					self.__adv = True
+					self.__countAdv += 1
 				if self.__dicInf.has_key(wu):		# verifica se ha um verbo infinitivo desse token
 					sAux = self.__dicInf[wu]		# se sim, adiciona numa string aux
 					if self.__dicSin.has_key(sAux):	# verifica se ha um sinonimo para esse verbo infinitivo
@@ -42,6 +53,7 @@ class Simplificador(object):
 					self.__bSin = True   		
 				if self.__dic2Gen.has_key(wu):
+					del self.__ts[-1]
 					lenTicket = len(self.it.getAntT())
 					if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):
 						self.__ts.append(["MULHER " + wu,t])
@@ -50,21 +62,18 @@ class Simplificador(object):
 					self.__b = True          
 				if self.__b == False:             	# verifica se nao encontrou nem verbo infinito ou sinonimo
 					self.__ts.append([wu,t])
-				self.__b = False
-
 		self.it.reset()
+		if self.__verb == True:
+			return self.verbalAnalysis(self.__ts)
 		return self.__ts
 	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)
 	def executeWorkCSV(self):
-		self.__csv.createDicInf()
-		self.__csv.createDicSin()
-		self.__csv.createDicWords()
-		self.__csv.createDic2Gen()
 		self.__dicInf = self.__csv.getDicInf()
 		self.__dicSin = self.__csv.getDicSin()
 		self.__dicWords = self.__csv.getDicWords()
 		self.__dic2Gen = self.__csv.getDic2Gen()
+		self.__dicTemVerbs = self.__csv.getDicTemVerbs()
 	# converte romano para numero/numero para palavra
 	def auxConvert(self, t):
@@ -74,4 +83,55 @@ class Simplificador(object):
 		except:
 			if t.isdigit():
 				return extenso(t).decode("utf-8")
-			return t
 \ No newline at end of file
+			return t
+
+
+	def verbalAnalysis(self, lista):
+		lv = []
+		self.it.load(lista)
+		hasFut = False
+		hasPas = False
+		count = 0
+		while(self.it.hasNext()):
+			w = self.it.getAtualW().upper()
+			t = self.it.getAtualT()
+
+			if(t[:3] == "ADV"):
+				if (self.__dicTemVerbs.has_key(w)):
+					self.it.reset()
+					#print "ADV: retornou lista original"
+					return lista
+			
+			if(t == "VB-P"):
+				if (self.__countVerb > 1):
+					count += 1
+					#print "VB-P: Incrementou"
+					if(count == self.__countVerb):
+						#print "VB-P Adicionou " + w
+						lv.append([w,t])
+				else:
+					#print "VB-P: retornou lista original"
+					self.it.reset()
+					return lista
+			elif(t == "VB-D"):
+				count += 1
+				hasPas = True
+				#print "VB-D: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-D Adicionou " + w
+					lv.append([w,t])
+			elif(t == "VB-R"):
+				count += 1
+				hasFut = True
+				#print "VB-R: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-R Adicionou " + w
+					lv.append([w,t])
+			else:
+				lv.append([w,t])	
+		if (hasFut):
+			lv.append(["FUTURO", "TVB"])
+		elif (hasPas):
+			lv.append(["PASSADO", "TVB"])
+		self.it.reset()
+		return lv
 \ No newline at end of file
@@ -29,7 +29,7 @@ class Tradutor(object):
 		#retira artigos e preposicoes
 		self.__ts = self.__simplificador.simplificar(self.__t)
 		self.__t = None
-
+		
 		#aplica as regras
 		#self.__tr = self.__regras.aplicarRegras(self.__ts)
 		#self.__ts = None
@@ -19,6 +19,13 @@ class WorkCSV(object):
       self.__dicWords = {}
       self.__file2Gen = ''
       self.__dic2Gen = {}      
+      self.__fileTemVerbs = ''
+      self.__dicTemVerbs = {}     
+      self.createDicInf()
+      self.createDicSin()
+      self.createDicWords()
+      self.createDic2Gen()
+      self.createDicTemVerbs() 
    # Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos
    def createDicInf(self):
@@ -46,9 +53,18 @@ class WorkCSV(object):
       for row in self.__fileSin:
          if row[1] != "":
             self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
-         
-      #except:
-      #   print "Unexpected error:", sys.exc_info()[0]
+    
+
+    # Abre o arquivo que contem os tempos verbais
+   def createDicTemVerbs(self):
+      try:
+         self.__fileTemVerbs = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDicTemVerbs"
+   
+      for row in self.__fileTemVerbs:
+         self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")     
    # Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
    def createDicWords(self):
@@ -86,4 +102,8 @@ class WorkCSV(object):
    # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
    def getDic2Gen(self):
-      return self.__dic2Gen
 \ No newline at end of file
+      return self.__dic2Gen
+
+   # Retorna o dicionario dos tempos verbais
+   def getDicTemVerbs(self):
+      return self.__dicTemVerbs   
 \ No newline at end of file
	@@ -3,7 +3,6 @@		@@ -3,7 +3,6 @@
3		3
4	import sys, os	4	import sys, os
5	sys.path.append(os.getcwd()+"/tradutor/src/py")	5	sys.path.append(os.getcwd()+"/tradutor/src/py")
6	-#sys.path.append(os.path.expanduser("~/gtaaas/tradutor/src/py"))
7	from Tradutor import *	6	from Tradutor import *
8		7
9	tradutor = Tradutor()	8	tradutor = Tradutor()
	@@ -98462,7 +98462,7 @@ DEMOCRATIZASTE;DEMOCRATIZAR;all;;;;;;;;;ver		@@ -98462,7 +98462,7 @@ DEMOCRATIZASTE;DEMOCRATIZAR;all;;;;;;;;;ver
98462	SUAVAS;SUAR;all;;;;;;;;;ver	98462	SUAVAS;SUAR;all;;;;;;;;;ver
98463	AMANTE;AMANTE;all;;;;;;;;;adj, sub	98463	AMANTE;AMANTE;all;;;;;;;;;adj, sub
98464	ESTUDO;ESTUDAR;all;;;;;;;;;ver, sub	98464	ESTUDO;ESTUDAR;all;;;;;;;;;ver, sub
98465	-FUI;SER;all;IR;ver;;;;;;;ver	98465	+FUI;IR;all;;ver;;;;;;;ver
98466	CONTRADIGAIS;CONTRADIZER;all;;;;;;;;;ver	98466	CONTRADIGAIS;CONTRADIZER;all;;;;;;;;;ver
98467	APERTAS;APERTAR;all;;;;;;;;;ver	98467	APERTAS;APERTAR;all;;;;;;;;;ver
98468	AUXILIARIAMOS;AUXILIAR;all;;;;;;;;;ver	98468	AUXILIARIAMOS;AUXILIAR;all;;;;;;;;;ver
	@@ -0,0 +1,15 @@		@@ -0,0 +1,15 @@
		1	+ONTEM
		2	+ANTIGAMENTE
		3	+PASSADO
		4	+ANTEONTEM
		5	+ANTES
		6	+ATRAS
		7	+HOJE
		8	+AGORA
		9	+IMEDIATAMENTE
		10	+JA
		11	+DIARAMENTE
		12	+AMANHA
		13	+DEPOIS
		14	+FUTURO
		15	+FUTURAMENTE
0	\ No newline at end of file	16	\ No newline at end of file
	@@ -17,6 +17,7 @@ class Simplificador(object):		@@ -17,6 +17,7 @@ class Simplificador(object):
17	self.__dicSin = {}	17	self.__dicSin = {}
18	self.__dicWords = {}	18	self.__dicWords = {}
19	self.__dic2Gen = {}	19	self.__dic2Gen = {}
		20	+ self.__dicTemVerbs = {}
20	self.executeWorkCSV()	21	self.executeWorkCSV()
21		22
22	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos	23	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
	@@ -24,11 +25,21 @@ class Simplificador(object):		@@ -24,11 +25,21 @@ class Simplificador(object):
24	self.__ts = []	25	self.__ts = []
25	self.it.load(texto)	26	self.it.load(texto)
26	self.__b = False	27	self.__b = False
		28	+ self.__verb = False
		29	+ self.__adv = False;
		30	+ self.__countVerb = 0
		31	+ self.__countAdv = 0
27	while(self.it.hasNext()):	32	while(self.it.hasNext()):
28	w = self.auxConvert(self.it.getAtualW())	33	w = self.auxConvert(self.it.getAtualW())
29	t = self.it.getAtualT()	34	t = self.it.getAtualT()
30	if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao	35	if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao
31	wu = w.upper() # deixa o token maiusculo	36	wu = w.upper() # deixa o token maiusculo
		37	+ if t[:2] == "VB":
		38	+ self.__verb = True
		39	+ self.__countVerb += 1
		40	+ if t[:3] == "ADV":
		41	+ self.__adv = True
		42	+ self.__countAdv += 1
32	if self.__dicInf.has_key(wu): # verifica se ha um verbo infinitivo desse token	43	if self.__dicInf.has_key(wu): # verifica se ha um verbo infinitivo desse token
33	sAux = self.__dicInf[wu] # se sim, adiciona numa string aux	44	sAux = self.__dicInf[wu] # se sim, adiciona numa string aux
34	if self.__dicSin.has_key(sAux): # verifica se ha um sinonimo para esse verbo infinitivo	45	if self.__dicSin.has_key(sAux): # verifica se ha um sinonimo para esse verbo infinitivo
	@@ -42,6 +53,7 @@ class Simplificador(object):		@@ -42,6 +53,7 @@ class Simplificador(object):
42	self.__bSin = True	53	self.__bSin = True
43		54
44	if self.__dic2Gen.has_key(wu):	55	if self.__dic2Gen.has_key(wu):
		56	+ del self.__ts[-1]
45	lenTicket = len(self.it.getAntT())	57	lenTicket = len(self.it.getAntT())
46	if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):	58	if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):
47	self.__ts.append(["MULHER " + wu,t])	59	self.__ts.append(["MULHER " + wu,t])
	@@ -50,21 +62,18 @@ class Simplificador(object):		@@ -50,21 +62,18 @@ class Simplificador(object):
50	self.__b = True	62	self.__b = True
51	if self.__b == False: # verifica se nao encontrou nem verbo infinito ou sinonimo	63	if self.__b == False: # verifica se nao encontrou nem verbo infinito ou sinonimo
52	self.__ts.append([wu,t])	64	self.__ts.append([wu,t])
53	- self.__b = False
54	-
55	self.it.reset()	65	self.it.reset()
		66	+ if self.__verb == True:
		67	+ return self.verbalAnalysis(self.__ts)
56	return self.__ts	68	return self.__ts
57		69
58	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)	70	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)
59	def executeWorkCSV(self):	71	def executeWorkCSV(self):
60	- self.__csv.createDicInf()
61	- self.__csv.createDicSin()
62	- self.__csv.createDicWords()
63	- self.__csv.createDic2Gen()
64	self.__dicInf = self.__csv.getDicInf()	72	self.__dicInf = self.__csv.getDicInf()
65	self.__dicSin = self.__csv.getDicSin()	73	self.__dicSin = self.__csv.getDicSin()
66	self.__dicWords = self.__csv.getDicWords()	74	self.__dicWords = self.__csv.getDicWords()
67	self.__dic2Gen = self.__csv.getDic2Gen()	75	self.__dic2Gen = self.__csv.getDic2Gen()
		76	+ self.__dicTemVerbs = self.__csv.getDicTemVerbs()
68		77
69	# converte romano para numero/numero para palavra	78	# converte romano para numero/numero para palavra
70	def auxConvert(self, t):	79	def auxConvert(self, t):
	@@ -74,4 +83,55 @@ class Simplificador(object):		@@ -74,4 +83,55 @@ class Simplificador(object):
74	except:	83	except:
75	if t.isdigit():	84	if t.isdigit():
76	return extenso(t).decode("utf-8")	85	return extenso(t).decode("utf-8")
77	- return t
78	\ No newline at end of file	86	\ No newline at end of file
		87	+ return t
		88	+
		89	+
		90	+ def verbalAnalysis(self, lista):
		91	+ lv = []
		92	+ self.it.load(lista)
		93	+ hasFut = False
		94	+ hasPas = False
		95	+ count = 0
		96	+ while(self.it.hasNext()):
		97	+ w = self.it.getAtualW().upper()
		98	+ t = self.it.getAtualT()
		99	+
		100	+ if(t[:3] == "ADV"):
		101	+ if (self.__dicTemVerbs.has_key(w)):
		102	+ self.it.reset()
		103	+ #print "ADV: retornou lista original"
		104	+ return lista
		105	+
		106	+ if(t == "VB-P"):
		107	+ if (self.__countVerb > 1):
		108	+ count += 1
		109	+ #print "VB-P: Incrementou"
		110	+ if(count == self.__countVerb):
		111	+ #print "VB-P Adicionou " + w
		112	+ lv.append([w,t])
		113	+ else:
		114	+ #print "VB-P: retornou lista original"
		115	+ self.it.reset()
		116	+ return lista
		117	+ elif(t == "VB-D"):
		118	+ count += 1
		119	+ hasPas = True
		120	+ #print "VB-D: Incrementou"
		121	+ if(count == self.__countVerb):
		122	+ #print "VB-D Adicionou " + w
		123	+ lv.append([w,t])
		124	+ elif(t == "VB-R"):
		125	+ count += 1
		126	+ hasFut = True
		127	+ #print "VB-R: Incrementou"
		128	+ if(count == self.__countVerb):
		129	+ #print "VB-R Adicionou " + w
		130	+ lv.append([w,t])
		131	+ else:
		132	+ lv.append([w,t])
		133	+ if (hasFut):
		134	+ lv.append(["FUTURO", "TVB"])
		135	+ elif (hasPas):
		136	+ lv.append(["PASSADO", "TVB"])
		137	+ self.it.reset()
		138	+ return lv
79	\ No newline at end of file	139	\ No newline at end of file
	@@ -29,7 +29,7 @@ class Tradutor(object):		@@ -29,7 +29,7 @@ class Tradutor(object):
29	#retira artigos e preposicoes	29	#retira artigos e preposicoes
30	self.__ts = self.__simplificador.simplificar(self.__t)	30	self.__ts = self.__simplificador.simplificar(self.__t)
31	self.__t = None	31	self.__t = None
32	-	32	+
33	#aplica as regras	33	#aplica as regras
34	#self.__tr = self.__regras.aplicarRegras(self.__ts)	34	#self.__tr = self.__regras.aplicarRegras(self.__ts)
35	#self.__ts = None	35	#self.__ts = None
	@@ -19,6 +19,13 @@ class WorkCSV(object):		@@ -19,6 +19,13 @@ class WorkCSV(object):
19	self.__dicWords = {}	19	self.__dicWords = {}
20	self.__file2Gen = ''	20	self.__file2Gen = ''
21	self.__dic2Gen = {}	21	self.__dic2Gen = {}
		22	+ self.__fileTemVerbs = ''
		23	+ self.__dicTemVerbs = {}
		24	+ self.createDicInf()
		25	+ self.createDicSin()
		26	+ self.createDicWords()
		27	+ self.createDic2Gen()
		28	+ self.createDicTemVerbs()
22		29
23	# Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos	30	# Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos
24	def createDicInf(self):	31	def createDicInf(self):
	@@ -46,9 +53,18 @@ class WorkCSV(object):		@@ -46,9 +53,18 @@ class WorkCSV(object):
46	for row in self.__fileSin:	53	for row in self.__fileSin:
47	if row[1] != "":	54	if row[1] != "":
48	self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")	55	self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
49	-
50	- #except:
51	- # print "Unexpected error:", sys.exc_info()[0]	56	+
		57	+
		58	+ # Abre o arquivo que contem os tempos verbais
		59	+ def createDicTemVerbs(self):
		60	+ try:
		61	+ self.__fileTemVerbs = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
		62	+ except IOError, (errno, strerror):
		63	+ print "I/O error(%s): %s" % (errno, strerror)
		64	+ print "createDicTemVerbs"
		65	+
		66	+ for row in self.__fileTemVerbs:
		67	+ self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")
52		68
53	# Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos	69	# Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
54	def createDicWords(self):	70	def createDicWords(self):
	@@ -86,4 +102,8 @@ class WorkCSV(object):		@@ -86,4 +102,8 @@ class WorkCSV(object):
86		102
87	# Retorna o dicionario dos substantivos a serem analisados pelo simplificador	103	# Retorna o dicionario dos substantivos a serem analisados pelo simplificador
88	def getDic2Gen(self):	104	def getDic2Gen(self):
89	- return self.__dic2Gen
90	\ No newline at end of file	105	\ No newline at end of file
		106	+ return self.__dic2Gen
		107	+
		108	+ # Retorna o dicionario dos tempos verbais
		109	+ def getDicTemVerbs(self):
		110	+ return self.__dicTemVerbs
91	\ No newline at end of file	111	\ No newline at end of file