Corrige Iterator e Aplicador de Sinonimos

Erickson Silva
1 parent 792af4f2
Showing 2 changed files with 49 additions and 60 deletions Show diff stats
src/new/AplicaSinonimos.py
src/new/Iterator.py
@@ -6,45 +6,32 @@
  
 #LAViD - Laboratório de Aplicações de Vídeo Digital
  
-import os, csv, sys
-from nltk.tree import *
+import os
+import csv
+import sys
+from nltk.tree import Tree
 from LerDicionarios import *
  
 class AplicaSinonimos(object):
+	"""Classe para aplicar sinonimos após a aplicação de regras morfológicas/sintáticas"""
  
-	 # Define e inicializa os atributos
 	def __init__(self):
-		self.__dicionarios = LeitorDicionarios()
-
-	def sinonimosMorfologico(self, texto):
-		lista = texto
-		for i, elem in enumerate(lista):
-			token = self.verificaPalavra(elem[0])
-			listmp = list(elem)
-			listmp[0] = token 
-			lista[i] = listmp
-		return lista
-
-
-	def dicionarioSinonimoFolhas(self, folhas):
-		dic = {}
-		for f in folhas:
-			token = self.verificaPalavra(f)
-			dic[f] = token
-		return dic
-
-
-	def sinonimosSintatico(self, texto):
-		folhas = Tree.leaves(texto)
-		dic = self.dicionarioSinonimoFolhas(folhas)
-		stringTree = str(texto)
-		for t in folhas:
-			stringTree.replace(t, dic[t])
-		tree = Tree.fromstring(stringTree, brackets='()')
-		return tree
-
-
-	def verificaPalavra(self, token):
-		if self.__dicionarios.hasSinonimo(token):
-			return self.__dicionarios.getSinonimo(token)
+		self.dicionarios = LerDicionarios()
+
+	# Itera sobre os tokens obtendo os sinonimos
+	def aplicar_sinonimos(self, analise):
+		lista_anotada = analise
+		lista_corrigida = []
+		if type(analise) is not list:
+			lista_anotada = Tree.leaves(analise)
+
+		for tupla in lista_anotada:
+			sinonimo = self.verificar_sinonimo(tupla[0])
+			lista_corrigida.append(sinonimo)
+		return " ".join(lista_corrigida)
+
+	# Verifica se há sinonimo do token  
+	def verificar_sinonimo(self, token):
+		if self.dicionarios.hasSinonimo(token):  
+			return self.dicionarios.getSinonimo(token)
 		return token
 \ No newline at end of file
@@ -7,52 +7,54 @@
 #LAViD - Laboratório de Aplicações de Vídeo Digital
  
 class Iterator(object):
+	"""Classe para iterar sobre as tuplas (palavra,etiqueta) após análise morfologica"""
  
-	# inicializacao das variaveis
-	def __init__(self):	
+	def init(self):	
 		self.count = -1
  
 	def load(self, lista):
 		self.reset()
-		self.__list = list(lista);
+		self.list = list(lista);
 		self.size = len(lista)
  
 	def reset(self):
 		self.count = -1
  
-	def getSize(self):
+	def get_size(self):
 		return self.size
  
-	def getCount(self):
+	def get_count(self):
 		return self.count
  
-	def getToken(self, i=None):
-		if(i != None): return self.__list[self.count+(i)]
-		return self.__list[self.count]
+	def get_token(self, i=None):
+		if(i != None):
+			return self.list[self.count+(i)]
+		return self.list[self.count]
  
-	def getAtualW(self):
-		return self.getToken(0)[0].upper()
+	def get_word(self):
+		return self.get_token()[0]
  
-	def getAtualT(self):
-		return self.getToken(0)[1]
+	def get_ticket(self):
+		return self.get_token()[1]
  
-	def getProxW(self):
-		return self.getToken("+")[0].upper()
+	def get_next_word(self):
+		return self.get_token(1)[0]
  
-	def getProxT(self):
-		return self.getToken("+")[1]		
+	def get_next_ticket(self):
+		return self.get_token(1)[1]		
  
-	def getAntW(self):
-		return self.getToken("-")[0].upper()
+	def get_prev_word(self):
+		return self.get_token(-1)[0]
  
-	def getAntT(self):
-		return self.getToken("-")[1]
+	def get_prev_ticket(self):
+		return self.get_token(-1)[1]
  
-	def getInterval(self, n):
-		if self.count+n > self.size: raise IndexError
-		return self.__list[self.count:self.count+n]
+	def get_interval(self, n):
+		if self.count+n > self.size:
+			raise IndexError
+		return self.list[self.count:self.count+n]
  
-	def hasNext(self):
+	def has_next(self):
 		if(self.count < self.size-1):
 			self.count += 1
 			return True
...	...	@@ -6,45 +6,32 @@
6	6
7	7	#LAViD - Laboratório de Aplicações de Vídeo Digital
8	8
9		-import os, csv, sys
10		-from nltk.tree import *
	9	+import os
	10	+import csv
	11	+import sys
	12	+from nltk.tree import Tree
11	13	from LerDicionarios import *
12	14
13	15	class AplicaSinonimos(object):
	16	+ """Classe para aplicar sinonimos após a aplicação de regras morfológicas/sintáticas"""
14	17
15		- # Define e inicializa os atributos
16	18	def __init__(self):
17		- self.__dicionarios = LeitorDicionarios()
18		-
19		- def sinonimosMorfologico(self, texto):
20		- lista = texto
21		- for i, elem in enumerate(lista):
22		- token = self.verificaPalavra(elem[0])
23		- listmp = list(elem)
24		- listmp[0] = token
25		- lista[i] = listmp
26		- return lista
27		-
28		-
29		- def dicionarioSinonimoFolhas(self, folhas):
30		- dic = {}
31		- for f in folhas:
32		- token = self.verificaPalavra(f)
33		- dic[f] = token
34		- return dic
35		-
36		-
37		- def sinonimosSintatico(self, texto):
38		- folhas = Tree.leaves(texto)
39		- dic = self.dicionarioSinonimoFolhas(folhas)
40		- stringTree = str(texto)
41		- for t in folhas:
42		- stringTree.replace(t, dic[t])
43		- tree = Tree.fromstring(stringTree, brackets='()')
44		- return tree
45		-
46		-
47		- def verificaPalavra(self, token):
48		- if self.__dicionarios.hasSinonimo(token):
49		- return self.__dicionarios.getSinonimo(token)
	19	+ self.dicionarios = LerDicionarios()
	20	+
	21	+ # Itera sobre os tokens obtendo os sinonimos
	22	+ def aplicar_sinonimos(self, analise):
	23	+ lista_anotada = analise
	24	+ lista_corrigida = []
	25	+ if type(analise) is not list:
	26	+ lista_anotada = Tree.leaves(analise)
	27	+
	28	+ for tupla in lista_anotada:
	29	+ sinonimo = self.verificar_sinonimo(tupla[0])
	30	+ lista_corrigida.append(sinonimo)
	31	+ return " ".join(lista_corrigida)
	32	+
	33	+ # Verifica se há sinonimo do token
	34	+ def verificar_sinonimo(self, token):
	35	+ if self.dicionarios.hasSinonimo(token):
	36	+ return self.dicionarios.getSinonimo(token)
50	37	return token
51	38	\ No newline at end of file
...	...
...	...	@@ -7,52 +7,54 @@
7	7	#LAViD - Laboratório de Aplicações de Vídeo Digital
8	8
9	9	class Iterator(object):
	10	+ """Classe para iterar sobre as tuplas (palavra,etiqueta) após análise morfologica"""
10	11
11		- # inicializacao das variaveis
12		- def __init__(self):
	12	+ def init(self):
13	13	self.count = -1
14	14
15	15	def load(self, lista):
16	16	self.reset()
17		- self.__list = list(lista);
	17	+ self.list = list(lista);
18	18	self.size = len(lista)
19	19
20	20	def reset(self):
21	21	self.count = -1
22	22
23		- def getSize(self):
	23	+ def get_size(self):
24	24	return self.size
25	25
26		- def getCount(self):
	26	+ def get_count(self):
27	27	return self.count
28	28
29		- def getToken(self, i=None):
30		- if(i != None): return self.__list[self.count+(i)]
31		- return self.__list[self.count]
	29	+ def get_token(self, i=None):
	30	+ if(i != None):
	31	+ return self.list[self.count+(i)]
	32	+ return self.list[self.count]
32	33
33		- def getAtualW(self):
34		- return self.getToken(0)[0].upper()
	34	+ def get_word(self):
	35	+ return self.get_token()[0]
35	36
36		- def getAtualT(self):
37		- return self.getToken(0)[1]
	37	+ def get_ticket(self):
	38	+ return self.get_token()[1]
38	39
39		- def getProxW(self):
40		- return self.getToken("+")[0].upper()
	40	+ def get_next_word(self):
	41	+ return self.get_token(1)[0]
41	42
42		- def getProxT(self):
43		- return self.getToken("+")[1]
	43	+ def get_next_ticket(self):
	44	+ return self.get_token(1)[1]
44	45
45		- def getAntW(self):
46		- return self.getToken("-")[0].upper()
	46	+ def get_prev_word(self):
	47	+ return self.get_token(-1)[0]
47	48
48		- def getAntT(self):
49		- return self.getToken("-")[1]
	49	+ def get_prev_ticket(self):
	50	+ return self.get_token(-1)[1]
50	51
51		- def getInterval(self, n):
52		- if self.count+n > self.size: raise IndexError
53		- return self.__list[self.count:self.count+n]
	52	+ def get_interval(self, n):
	53	+ if self.count+n > self.size:
	54	+ raise IndexError
	55	+ return self.list[self.count:self.count+n]
54	56
55		- def hasNext(self):
	57	+ def has_next(self):
56	58	if(self.count < self.size-1):
57	59	self.count += 1
58	60	return True
...	...