Adicionado tratamento de tempo verbal no tradutor.

Erickson Silva
1 parent ea7323f4
Showing 7 changed files with 109 additions and 16 deletions Show diff stats
ModuleTranslate.py
data/dicPortGlosa.csv
data/temposVerbais.csv
servico/src/serviceWindowGeneration.cpp
tradutor/src/py/Simplificador.py
tradutor/src/py/Tradutor.py
tradutor/src/py/WorkCSV.py
@@ -3,7 +3,6 @@
  
 import sys, os
 sys.path.append(os.getcwd()+"/tradutor/src/py")
-#sys.path.append(os.path.expanduser("~/gtaaas/tradutor/src/py"))
 from Tradutor import *
  
 tradutor = Tradutor()
@@ -98462,7 +98462,7 @@ DEMOCRATIZASTE;DEMOCRATIZAR;all;;;;;;;;;ver
 SUAVAS;SUAR;all;;;;;;;;;ver
 AMANTE;AMANTE;all;;;;;;;;;adj, sub
 ESTUDO;ESTUDAR;all;;;;;;;;;ver, sub
-FUI;SER;all;IR;ver;;;;;;;ver
+FUI;IR;all;;ver;;;;;;;ver
 CONTRADIGAIS;CONTRADIZER;all;;;;;;;;;ver
 APERTAS;APERTAR;all;;;;;;;;;ver
 AUXILIARIAMOS;AUXILIAR;all;;;;;;;;;ver
@@ -0,0 +1,15 @@
+ONTEM
+ANTIGAMENTE
+PASSADO
+ANTEONTEM
+ANTES
+ATRAS
+HOJE
+AGORA
+IMEDIATAMENTE
+JA
+DIARAMENTE
+AMANHA
+DEPOIS
+FUTURO
+FUTURAMENTE
 \ No newline at end of file
@@ -43,8 +43,7 @@ bool ServiceWindowGeneration::isRunning() {
  
 /* Quando o sincronizador termina, ele invoca esse método para avisar! */
 void ServiceWindowGeneration::finalizouSincronizacao() {
-    char op = this->getRunningOption();
-    if (op != '2' && serviceType != SERVICE_TYPE_TEXT && serviceType != SERVICE_TYPE_SRT_ONLY) {
+    if (getRunningOption() != '2' && serviceType != SERVICE_TYPE_TEXT && serviceType != SERVICE_TYPE_SRT_ONLY) {
         mixer = new Mixer();
         mixer->initialize(this->path_input, this->path_libras,this->position,this->size,this->transparency);
     }
@@ -17,6 +17,7 @@ class Simplificador(object):
 		self.__dicSin = {}
 		self.__dicWords = {}
 		self.__dic2Gen = {}
+		self.__dicTemVerbs = {}
 		self.executeWorkCSV()	
  
 	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
@@ -24,11 +25,21 @@ class Simplificador(object):
 		self.__ts = []
 		self.it.load(texto)
 		self.__b = False
+		self.__verb = False
+		self.__adv = False;
+		self.__countVerb = 0
+		self.__countAdv = 0
 		while(self.it.hasNext()):
 			w = self.auxConvert(self.it.getAtualW())
 			t = self.it.getAtualT()
 			if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao
 				wu = w.upper() 						# deixa o token maiusculo
+				if t[:2] == "VB":
+					self.__verb = True
+					self.__countVerb += 1
+				if t[:3] == "ADV":
+					self.__adv = True
+					self.__countAdv += 1
 				if self.__dicInf.has_key(wu):		# verifica se ha um verbo infinitivo desse token
 					sAux = self.__dicInf[wu]		# se sim, adiciona numa string aux
 					if self.__dicSin.has_key(sAux):	# verifica se ha um sinonimo para esse verbo infinitivo
@@ -42,6 +53,7 @@ class Simplificador(object):
 					self.__bSin = True   		
  
 				if self.__dic2Gen.has_key(wu):
+					del self.__ts[-1]
 					lenTicket = len(self.it.getAntT())
 					if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):
 						self.__ts.append(["MULHER " + wu,t])
@@ -50,21 +62,18 @@ class Simplificador(object):
 					self.__b = True          
 				if self.__b == False:             	# verifica se nao encontrou nem verbo infinito ou sinonimo
 					self.__ts.append([wu,t])
-				self.__b = False
-
 		self.it.reset()
+		if self.__verb == True:
+			return self.verbalAnalysis(self.__ts)
 		return self.__ts
  
 	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)
 	def executeWorkCSV(self):
-		self.__csv.createDicInf()
-		self.__csv.createDicSin()
-		self.__csv.createDicWords()
-		self.__csv.createDic2Gen()
 		self.__dicInf = self.__csv.getDicInf()
 		self.__dicSin = self.__csv.getDicSin()
 		self.__dicWords = self.__csv.getDicWords()
 		self.__dic2Gen = self.__csv.getDic2Gen()
+		self.__dicTemVerbs = self.__csv.getDicTemVerbs()
  
 	# converte romano para numero/numero para palavra
 	def auxConvert(self, t):
@@ -74,4 +83,55 @@ class Simplificador(object):
 		except:
 			if t.isdigit():
 				return extenso(t).decode("utf-8")
-			return t
 \ No newline at end of file
+			return t
+
+
+	def verbalAnalysis(self, lista):
+		lv = []
+		self.it.load(lista)
+		hasFut = False
+		hasPas = False
+		count = 0
+		while(self.it.hasNext()):
+			w = self.it.getAtualW().upper()
+			t = self.it.getAtualT()
+
+			if(t[:3] == "ADV"):
+				if (self.__dicTemVerbs.has_key(w)):
+					self.it.reset()
+					#print "ADV: retornou lista original"
+					return lista
+			
+			if(t == "VB-P"):
+				if (self.__countVerb > 1):
+					count += 1
+					#print "VB-P: Incrementou"
+					if(count == self.__countVerb):
+						#print "VB-P Adicionou " + w
+						lv.append([w,t])
+				else:
+					#print "VB-P: retornou lista original"
+					self.it.reset()
+					return lista
+			elif(t == "VB-D"):
+				count += 1
+				hasPas = True
+				#print "VB-D: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-D Adicionou " + w
+					lv.append([w,t])
+			elif(t == "VB-R"):
+				count += 1
+				hasFut = True
+				#print "VB-R: Incrementou"
+				if(count == self.__countVerb):
+					#print "VB-R Adicionou " + w
+					lv.append([w,t])
+			else:
+				lv.append([w,t])	
+		if (hasFut):
+			lv.append(["FUTURO", "TVB"])
+		elif (hasPas):
+			lv.append(["PASSADO", "TVB"])
+		self.it.reset()
+		return lv
 \ No newline at end of file
@@ -29,7 +29,7 @@ class Tradutor(object):
 		#retira artigos e preposicoes
 		self.__ts = self.__simplificador.simplificar(self.__t)
 		self.__t = None
-
+		
 		#aplica as regras
 		#self.__tr = self.__regras.aplicarRegras(self.__ts)
 		#self.__ts = None
@@ -19,6 +19,13 @@ class WorkCSV(object):
       self.__dicWords = {}
       self.__file2Gen = ''
       self.__dic2Gen = {}      
+      self.__fileTemVerbs = ''
+      self.__dicTemVerbs = {}     
+      self.createDicInf()
+      self.createDicSin()
+      self.createDicWords()
+      self.createDic2Gen()
+      self.createDicTemVerbs() 
  
    # Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos
    def createDicInf(self):
@@ -46,9 +53,18 @@ class WorkCSV(object):
       for row in self.__fileSin:
          if row[1] != "":
             self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
-         
-      #except:
-      #   print "Unexpected error:", sys.exc_info()[0]
+    
+
+    # Abre o arquivo que contem os tempos verbais
+   def createDicTemVerbs(self):
+      try:
+         self.__fileTemVerbs = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
+      except IOError, (errno, strerror):
+         print "I/O error(%s): %s" % (errno, strerror)
+         print "createDicTemVerbs"
+   
+      for row in self.__fileTemVerbs:
+         self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")     
  
    # Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
    def createDicWords(self):
@@ -86,4 +102,8 @@ class WorkCSV(object):
  
    # Retorna o dicionario dos substantivos a serem analisados pelo simplificador
    def getDic2Gen(self):
-      return self.__dic2Gen
 \ No newline at end of file
+      return self.__dic2Gen
+
+   # Retorna o dicionario dos tempos verbais
+   def getDicTemVerbs(self):
+      return self.__dicTemVerbs   
 \ No newline at end of file
...	...	@@ -3,7 +3,6 @@
3	3
4	4	import sys, os
5	5	sys.path.append(os.getcwd()+"/tradutor/src/py")
6		-#sys.path.append(os.path.expanduser("~/gtaaas/tradutor/src/py"))
7	6	from Tradutor import *
8	7
9	8	tradutor = Tradutor()
...	...
...	...	@@ -98462,7 +98462,7 @@ DEMOCRATIZASTE;DEMOCRATIZAR;all;;;;;;;;;ver
98462	98462	SUAVAS;SUAR;all;;;;;;;;;ver
98463	98463	AMANTE;AMANTE;all;;;;;;;;;adj, sub
98464	98464	ESTUDO;ESTUDAR;all;;;;;;;;;ver, sub
98465		-FUI;SER;all;IR;ver;;;;;;;ver
	98465	+FUI;IR;all;;ver;;;;;;;ver
98466	98466	CONTRADIGAIS;CONTRADIZER;all;;;;;;;;;ver
98467	98467	APERTAS;APERTAR;all;;;;;;;;;ver
98468	98468	AUXILIARIAMOS;AUXILIAR;all;;;;;;;;;ver
...	...
...	...	@@ -0,0 +1,15 @@
	1	+ONTEM
	2	+ANTIGAMENTE
	3	+PASSADO
	4	+ANTEONTEM
	5	+ANTES
	6	+ATRAS
	7	+HOJE
	8	+AGORA
	9	+IMEDIATAMENTE
	10	+JA
	11	+DIARAMENTE
	12	+AMANHA
	13	+DEPOIS
	14	+FUTURO
	15	+FUTURAMENTE
0	16	\ No newline at end of file
...	...
...	...	@@ -43,8 +43,7 @@ bool ServiceWindowGeneration::isRunning() {
43	43
44	44	/* Quando o sincronizador termina, ele invoca esse método para avisar! */
45	45	void ServiceWindowGeneration::finalizouSincronizacao() {
46		- char op = this->getRunningOption();
47		- if (op != '2' && serviceType != SERVICE_TYPE_TEXT && serviceType != SERVICE_TYPE_SRT_ONLY) {
	46	+ if (getRunningOption() != '2' && serviceType != SERVICE_TYPE_TEXT && serviceType != SERVICE_TYPE_SRT_ONLY) {
48	47	mixer = new Mixer();
49	48	mixer->initialize(this->path_input, this->path_libras,this->position,this->size,this->transparency);
50	49	}
...	...
...	...	@@ -17,6 +17,7 @@ class Simplificador(object):
17	17	self.__dicSin = {}
18	18	self.__dicWords = {}
19	19	self.__dic2Gen = {}
	20	+ self.__dicTemVerbs = {}
20	21	self.executeWorkCSV()
21	22
22	23	# retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos
...	...	@@ -24,11 +25,21 @@ class Simplificador(object):
24	25	self.__ts = []
25	26	self.it.load(texto)
26	27	self.__b = False
	28	+ self.__verb = False
	29	+ self.__adv = False;
	30	+ self.__countVerb = 0
	31	+ self.__countAdv = 0
27	32	while(self.it.hasNext()):
28	33	w = self.auxConvert(self.it.getAtualW())
29	34	t = self.it.getAtualT()
30	35	if self.__dicWords.has_key(t) == False: # verifica se nao eh artigo/preposicao
31	36	wu = w.upper() # deixa o token maiusculo
	37	+ if t[:2] == "VB":
	38	+ self.__verb = True
	39	+ self.__countVerb += 1
	40	+ if t[:3] == "ADV":
	41	+ self.__adv = True
	42	+ self.__countAdv += 1
32	43	if self.__dicInf.has_key(wu): # verifica se ha um verbo infinitivo desse token
33	44	sAux = self.__dicInf[wu] # se sim, adiciona numa string aux
34	45	if self.__dicSin.has_key(sAux): # verifica se ha um sinonimo para esse verbo infinitivo
...	...	@@ -42,6 +53,7 @@ class Simplificador(object):
42	53	self.__bSin = True
43	54
44	55	if self.__dic2Gen.has_key(wu):
	56	+ del self.__ts[-1]
45	57	lenTicket = len(self.it.getAntT())
46	58	if ((self.__dicWords.has_key(self.it.getAntT())) and (self.it.getAntT()[lenTicket-1:] == "F") or (self.it.getAntT()[lenTicket-3:] == "F-P")):
47	59	self.__ts.append(["MULHER " + wu,t])
...	...	@@ -50,21 +62,18 @@ class Simplificador(object):
50	62	self.__b = True
51	63	if self.__b == False: # verifica se nao encontrou nem verbo infinito ou sinonimo
52	64	self.__ts.append([wu,t])
53		- self.__b = False
54		-
55	65	self.it.reset()
	66	+ if self.__verb == True:
	67	+ return self.verbalAnalysis(self.__ts)
56	68	return self.__ts
57	69
58	70	# cria e recupera todos os dicionarios (verbos inf., sinonimos e artigos/preposicoes)
59	71	def executeWorkCSV(self):
60		- self.__csv.createDicInf()
61		- self.__csv.createDicSin()
62		- self.__csv.createDicWords()
63		- self.__csv.createDic2Gen()
64	72	self.__dicInf = self.__csv.getDicInf()
65	73	self.__dicSin = self.__csv.getDicSin()
66	74	self.__dicWords = self.__csv.getDicWords()
67	75	self.__dic2Gen = self.__csv.getDic2Gen()
	76	+ self.__dicTemVerbs = self.__csv.getDicTemVerbs()
68	77
69	78	# converte romano para numero/numero para palavra
70	79	def auxConvert(self, t):
...	...	@@ -74,4 +83,55 @@ class Simplificador(object):
74	83	except:
75	84	if t.isdigit():
76	85	return extenso(t).decode("utf-8")
77		- return t
78	86	\ No newline at end of file
	87	+ return t
	88	+
	89	+
	90	+ def verbalAnalysis(self, lista):
	91	+ lv = []
	92	+ self.it.load(lista)
	93	+ hasFut = False
	94	+ hasPas = False
	95	+ count = 0
	96	+ while(self.it.hasNext()):
	97	+ w = self.it.getAtualW().upper()
	98	+ t = self.it.getAtualT()
	99	+
	100	+ if(t[:3] == "ADV"):
	101	+ if (self.__dicTemVerbs.has_key(w)):
	102	+ self.it.reset()
	103	+ #print "ADV: retornou lista original"
	104	+ return lista
	105	+
	106	+ if(t == "VB-P"):
	107	+ if (self.__countVerb > 1):
	108	+ count += 1
	109	+ #print "VB-P: Incrementou"
	110	+ if(count == self.__countVerb):
	111	+ #print "VB-P Adicionou " + w
	112	+ lv.append([w,t])
	113	+ else:
	114	+ #print "VB-P: retornou lista original"
	115	+ self.it.reset()
	116	+ return lista
	117	+ elif(t == "VB-D"):
	118	+ count += 1
	119	+ hasPas = True
	120	+ #print "VB-D: Incrementou"
	121	+ if(count == self.__countVerb):
	122	+ #print "VB-D Adicionou " + w
	123	+ lv.append([w,t])
	124	+ elif(t == "VB-R"):
	125	+ count += 1
	126	+ hasFut = True
	127	+ #print "VB-R: Incrementou"
	128	+ if(count == self.__countVerb):
	129	+ #print "VB-R Adicionou " + w
	130	+ lv.append([w,t])
	131	+ else:
	132	+ lv.append([w,t])
	133	+ if (hasFut):
	134	+ lv.append(["FUTURO", "TVB"])
	135	+ elif (hasPas):
	136	+ lv.append(["PASSADO", "TVB"])
	137	+ self.it.reset()
	138	+ return lv
79	139	\ No newline at end of file
...	...
...	...	@@ -29,7 +29,7 @@ class Tradutor(object):
29	29	#retira artigos e preposicoes
30	30	self.__ts = self.__simplificador.simplificar(self.__t)
31	31	self.__t = None
32		-
	32	+
33	33	#aplica as regras
34	34	#self.__tr = self.__regras.aplicarRegras(self.__ts)
35	35	#self.__ts = None
...	...
...	...	@@ -19,6 +19,13 @@ class WorkCSV(object):
19	19	self.__dicWords = {}
20	20	self.__file2Gen = ''
21	21	self.__dic2Gen = {}
	22	+ self.__fileTemVerbs = ''
	23	+ self.__dicTemVerbs = {}
	24	+ self.createDicInf()
	25	+ self.createDicSin()
	26	+ self.createDicWords()
	27	+ self.createDic2Gen()
	28	+ self.createDicTemVerbs()
22	29
23	30	# Abre o arquivo que contem os verbos no infinitivo e preenche o dicionario com os mesmos
24	31	def createDicInf(self):
...	...	@@ -46,9 +53,18 @@ class WorkCSV(object):
46	53	for row in self.__fileSin:
47	54	if row[1] != "":
48	55	self.__dicSin[row[0].decode("utf-8")] = row[1].decode("utf-8")
49		-
50		- #except:
51		- # print "Unexpected error:", sys.exc_info()[0]
	56	+
	57	+
	58	+ # Abre o arquivo que contem os tempos verbais
	59	+ def createDicTemVerbs(self):
	60	+ try:
	61	+ self.__fileTemVerbs = csv.reader(open(self.__path+"temposVerbais.csv"), delimiter=";")
	62	+ except IOError, (errno, strerror):
	63	+ print "I/O error(%s): %s" % (errno, strerror)
	64	+ print "createDicTemVerbs"
	65	+
	66	+ for row in self.__fileTemVerbs:
	67	+ self.__dicTemVerbs[row[0].decode("utf-8")] = row[0].decode("utf-8")
52	68
53	69	# Abre o arquivo que contem os artigos e preposicoes de acordo com o modelo de idioma passado na chamada e preenche o dicionario com os mesmos
54	70	def createDicWords(self):
...	...	@@ -86,4 +102,8 @@ class WorkCSV(object):
86	102
87	103	# Retorna o dicionario dos substantivos a serem analisados pelo simplificador
88	104	def getDic2Gen(self):
89		- return self.__dic2Gen
90	105	\ No newline at end of file
	106	+ return self.__dic2Gen
	107	+
	108	+ # Retorna o dicionario dos tempos verbais
	109	+ def getDicTemVerbs(self):
	110	+ return self.__dicTemVerbs
91	111	\ No newline at end of file
...	...