diff --git a/src/new/AplicaRegras.py b/src/new/AplicaRegras.py deleted file mode 100644 index bd80bd0..0000000 --- a/src/new/AplicaRegras.py +++ /dev/null @@ -1,336 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- - -#Autor: Erickson Silva -#Email: - -#LAViD - Laboratório de Aplicações de Vídeo Digital - -from collections import deque -import xml.etree.ElementTree as ET -from os.path import expanduser -import platform -from LerDicionarios import * -from Iterator import * -from StringAux import * -from ConverteExtenso import * - -class AplicaRegras(object): - - # inicializa todos as variaveis - def __init__(self): - - self.__root = self.getRoot() - self.__dicionarios = LeitorDicionarios() - - def getRoot(self): - - so = platform.system() - if so == 'Windows': - return ET.parse(expanduser("~")+'\\vlibras-translate\data\\regras.xml').getroot() - else: - return ET.parse(expanduser("~")+'/vlibras-translate/data/regras.xml').getroot() - - def aplicarRegrasMorfo(self, lista): - - self.__especificos = {"advt" : self.verificarAdvTempo, "v" : self.verificarVbInfinitivo, "x" : self.verificarPrepos, "c" : self.verificarSubs2Generos, "a" : self.verificarArtigo, "l" : self.verificarVbLigacao, "i": self.verificarAdvIntensidade, "vbi":"zero", "n":"zero", "abmn":"zero", "adji":"zero", "adjn":"zero", "advi":"zero"} - self.pularIteracoes = 0 - self.__tAux = [] - it = Iterator() - it.load(lista) - - while(it.hasNext()): - if self.pularIteracoes > 0: - self.pularIteracoes-=1 - continue - - for morpho in self.__root.findall('morphological'): - self.hasRule = False - for rule in morpho.findall('rule'): # procura a tag rule - if rule.find('active').text == "true" and rule.get('name').split("_")[0] == it.getAtualT(): - count = int(rule.find('count').text) - self.listaIter = [] - if count == 1: - self.listaIter = [it.getToken()] - else: - try: - self.listaIter = it.getInterval(count) - self.pularIteracoes = count-1 - except: - continue - - - self.nomeRegra = self.gerarNomeDaRegra(self.listaIter) - if rule.get('name') == self.nomeRegra: # verifica se a regra é aplicavel e a mesma esta ativa - print "Achou regra: " + self.nomeRegra - #subIter = Iterator() - #subIter.load(self.listaIter) - #while(subIter.hasNext()): - self.hasRule = True - self.listaTmp = count * [None] - self.countListaIter = -1 - for classe in rule.iter('class'): # for nas tags class - title = classe.find('title') - newpos = classe.find('newpos') - newprop = classe.find('newprop') - newtoken = classe.find('newtoken') - newtokenpos = classe.find('newtokenpos') - self.specific = classe.find('specific') - - self.countListaIter += 1 - token = self.listaIter[self.countListaIter] - - if self.specific is not None: - self.specific = self.__especificos[self.specific.text](token[0]) - if newprop is not None and type(self.specific) != bool: - self.__tAux.append([self.specific,newprop.text]) - - if newpos is not None: - if newpos.text != "-1": - if type(self.specific) == bool: - self.listaTmp[int(newpos.text)] = token - else: - self.__tAux.append([self.specific, title.text]) - - if newtoken is not None: - self.listaTmp[int(newtokenpos.text)] = [newtoken.text, "NEWTOKEN"] - - self.listaTmp = filter(None, self.listaTmp) - for i in self.listaTmp: - self.__tAux.append(i) - - break - - if (self.hasRule == False): self.__tAux.append(it.getToken()) - if self.__tAux: return self.__tAux - return lista # retorna a lista sem alteracoes (nao existe regra) - - - def gerarNomeDaRegra(self, lista): - self.__nomeRegra = [] - for t in lista: - self.__nomeRegra.append(t[1]) - return "_".join(self.__nomeRegra) - - def verificarAdvTempo(self, lista): - for i in lista: - if i[1][:3] == "ADV": - if (self.__dicionarios.hasTempoVerbal(i[0])): - return True - return False - - def verificarVbInfinitivo(self, token): - if self.__dicionarios.hasVerboInfinitivo(token): # verifica se ha um verbo infinitivo desse token - return self.__dicionarios.getVerboInfinitivo(token) - return False - - #TODO - def verificarPrepos(self, token): - return None - - def verificarSubs2Generos(self, token): - return self.__dicionarios.hasSubst2Genero(token) - - #TODO - def verificarArtigo(self, token): - return None - - #TODO - def verificarVbLigacao(self, token): - return None - - #TODO - def verificarAdvIntensidade(self, token): - return None - - # retira artigos e preposicoes; passa verbos para infinitivo e verificar se há sinonimos - def inicializar(self, texto): - it = Iterator() - it.load(texto) - self.__ts = [] - self.__verb = False - self.__adv = False - self.__num = False - self.__plural = False - self.__countVerb = 0 - self.__countAdv = 0 - while(it.hasNext()): - token = it.getAtualW() - tag = it.getAtualT() - self.__b = False - - if self.__dicionarios.hasPalavraIgnorada(tag) == False: # verifica se nao eh artigo/preposicao - - if tag == "NUM": - self.__num = True - - if tag[-2:] == "-P": - self.__plural = True - - #VERIFICA SE É ADVERBIO E CONTA A QUANTIDADE - if tag[:3] == "ADV": - if (self.__dicionarios.hasTempoVerbal(token)): - self.__adv = True - - if tag[:2] == "VB": - - #VERIFICA SE É VERBO NO INFINITIVO - if self.__dicionarios.hasVerboInfinitivo(token): # verifica se ha um verbo infinitivo desse token - verboInfinitivo = self.__dicionarios.getVerboInfinitivo(token) # se sim, adiciona numa string aux - self.__ts.append([verboInfinitivo,tag]) # caso contrario, adiciona so o verbo infinitivo msm - self.__b = True - - #VERIFICA SE É VERBO DE TEMPO E CONTA A QUANTIDADE - if tag == "VB-P" or tag == "VB-D" or tag == "VB-R": - self.__verb = True - self.__countVerb += 1 - - #VERIFICA SE É SUBTANTIVO COMUM DOS 2 GENEROS - if self.__dicionarios.hasSubst2Genero(token): - #del self.__ts[-1] - lenTicket = len(it.getAntT()) - if ((self.__dicionarios.hasPalavraIgnorada(it.getAntT())) and (it.getAntT()[lenTicket-1:] == "F") or (it.getAntT()[lenTicket-3:] == "F-P")): - self.__ts.append(["MULHER ", "2GEN"]) - self.__ts.append([token,tag]) - else: - self.__ts.append(["HOMEM ", "2GEN"]) - self.__ts.append([token,tag]) - self.__b = True - - #SE NÃO HOUVE NENHUM ALTERAÇÃO, OU SEJA, NÃO APLICOU NENHUMA REGRA, ADICIONA O TOKEN ORIGINAL - if self.__b == False: # verifica se nao encontrou nem verbo infinito ou sinonimo - self.__ts.append([token,tag]) - - #SE ENCONTROU VERBO, ENTÃO ANALISA a SENTENCA NOVAMENTE (again?) - if self.__verb == True and self.__adv == False: - self.__ts = self.verbalAnalysis(self.__ts) - - #VERIFICA SE É PLURAL - if self.__plural: - self.__ts = self.hasPlural(self.__ts) - - #CONVERTE EXTENSO PARA NUMERO - if self.__num: return self.converteExtenso(self.__ts) - - return self.__ts - - - # converte romano para numero - def auxConvert(self, tag): - try: - return roman_to_int(tag) - except: - return tag - - def verbalAnalysis(self, lista): - lv = [] - it = Iterator() - it.load(lista) - hasFut = False - hasPas = False - count = 0 - while(it.hasNext()): - token = it.getAtualW().upper() - tag = it.getAtualT() - - if(tag == "VB-P"): - if (self.__countVerb > 1): - count += 1 - #print "VB-P: Incrementou" - if(count == self.__countVerb): - #print "VB-P Adicionou " + token - lv.append([token,tag]) - else: - #print "VB-P: retornou lista original" - it.reset() - return lista - elif(tag == "VB-D"): - count += 1 - hasPas = True - #print "VB-D: Incrementou" - if(count == self.__countVerb): - #print "VB-D Adicionou " + token - lv.append([token,tag]) - elif(tag == "VB-R"): - count += 1 - hasFut = True - #print "VB-R: Incrementou" - if(count == self.__countVerb): - #print "VB-R Adicionou " + token - lv.append([token,tag]) - else: - lv.append([token,tag]) - if (hasFut): - lv.append(["FUTURO", "T-VB"]) - elif (hasPas): - lv.append(["PASSADO", "T-VB"]) - it.reset() - return lv - - - def hasPlural(self, lista): - - tmp = lista - for e in tmp: - if e[1][-2:] == "-P": - e[0] = self.analisarPlural(e[0]) - - return tmp - - - def analisarPlural(self, word): - - if(word[-3:] == "OES" or word[-2:] == "AES" or word[-2:] == "AOS"): - return word[0:-3]+"AO" - elif(word[-3:] == "RES" or word[-2:] == "ZES" or word[-2:] == "NES"): - return word[0:-2] - elif(word[-3:] == "SES"): - #TODO: Algumas palavras possuem marcações gráficas na raiz singular. Ex: Gás – Gases - return word[0:-2] - elif(word[-2:] == "NS"): - return word[0:-2]+"M" - elif(word[-3:] == "EIS"): - return word[0:-3]+"IL" - elif(word[-2:] == "IS"): - if(word[-3] == "A" or word[-3] == "E" or word[-3] == "O" or word[-3] == "U"): - return word[0:-2]+"L" - else: - return word - elif(word[-1] == "S"): - #TODO: Palavras paroxítonas ou proparoxítonas terminadas em S. Ex: lápis, vírus, tagênis, ônibus, etc - return word[0:-1] - else: - return word - - - def converteExtenso(self, lista): - - listAux = [] - indexDel = [] - count = 0 - isRunning = False - - for i in range(0, len(lista)): - token = lista[i][0] - tag = lista[i][1] - if (tag == "NUM"): - if (isRunning == False and len(listAux) == count): - listAux.append([i,[token]]) - isRunning = True - else: - listAux[count][1].append(token) - indexDel.append(i) - elif (isRunning == True): - if ((lista[i-1][1] == "NUM") and (lista[i+1][1] == "NUM") and (tag == "CONJ")): - indexDel.append(i) - else: - isRunning = False - count += 1 - - for i in listAux: - ext = extenso(' '.join(i[1])) - lista[i[0]] = [ext, "NUM"] - - deque((list.pop(lista, i) for i in sorted(indexDel, reverse=True)), maxlen=0) - - return lista diff --git a/src/new/TraduzSentencas.py b/src/new/TraduzSentencas.py index 089cfda..d22c3e9 100644 --- a/src/new/TraduzSentencas.py +++ b/src/new/TraduzSentencas.py @@ -23,20 +23,20 @@ def iniciar_traducao(texto): return "" def gerar_analise(sentenca): - sinonimos = AplicaSinonimos() - regras = AplicaRegras() - analise = None + aplic_sinonimos = AplicaSinonimos() + aplic_regras = AplicaRegras() + analise = None try: analise = alexp.run(sentenca) except ValueError: # TODO: Permitir acentos na sentença analise = None + morfologica = alexp.getAnaliseMorfologica() if (isinstance(analise,type(None))): - morfologica = alexp.getAnaliseMorfologica() - analise = regras.aplicar_regras_morfo(morfologica) + analise = aplic_regras.aplicar_regras_morfo(morfologica) else: - analise = regras.aplicar_regras_sint(arvoreSintatica) - - return sinonimos.aplicar_sinonimos(analise) \ No newline at end of file + analise = aplic_regras.aplicar_regras_sint(morfologica, analise) + analise = aplic_regras.simplificar_sentenca(analise) + return aplic_sinonimos.aplicar_sinonimos(analise) \ No newline at end of file diff --git a/src/new/tgrep.py b/src/new/tgrep.py deleted file mode 100644 index c374bf6..0000000 --- a/src/new/tgrep.py +++ /dev/null @@ -1,597 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -''' -TGrep search implementation for NTLK trees. - -(c) 16 March, 2013 Will Roberts . - -This module supports TGrep2 syntax for matching parts of NLTK Trees. -Note that many tgrep operators require the tree passed to be a -ParentedTree. - -Tgrep tutorial: -http://www.stanford.edu/dept/linguistics/corpora/cas-tut-tgrep.html -Tgrep2 manual: -http://tedlab.mit.edu/~dr/Tgrep2/tgrep2.pdf -Tgrep2 source: -http://tedlab.mit.edu/~dr/Tgrep2/ -''' - -from builtins import bytes, range, str -import nltk.tree -import pyparsing -import re - -def ancestors(node): - ''' - Returns the list of all nodes dominating the given tree node. - This method will not work with leaf nodes, since there is no way - to recover the parent. - ''' - results = [] - try: - current = node.parent() - except AttributeError: - # if node is a leaf, we cannot retrieve its parent - return results - while current: - results.append(current) - current = current.parent() - return results - -def unique_ancestors(node): - ''' - Returns the list of all nodes dominating the given node, where - there is only a single path of descent. - ''' - results = [] - try: - current = node.parent() - except AttributeError: - # if node is a leaf, we cannot retrieve its parent - return results - while current and len(current) == 1: - results.append(current) - current = current.parent() - return results - -def _descendants(node): - ''' - Returns the list of all nodes which are descended from the given - tree node in some way. - ''' - try: - treepos = node.treepositions() - except AttributeError: - return [] - return [node[x] for x in treepos[1:]] - -def _leftmost_descendants(node): - ''' - Returns the set of all nodes descended in some way through - left branches from this node. - ''' - try: - treepos = node.treepositions() - except AttributeError: - return [] - return [node[x] for x in treepos[1:] if all(y == 0 for y in x)] - -def _rightmost_descendants(node): - ''' - Returns the set of all nodes descended in some way through - right branches from this node. - ''' - try: - rightmost_leaf = max(node.treepositions()) - except AttributeError: - return [] - return [node[rightmost_leaf[:i]] for i in range(1, len(rightmost_leaf) + 1)] - -def _istree(obj): - '''Predicate to check whether `obj` is a nltk.tree.Tree.''' - return isinstance(obj, nltk.tree.Tree) - -def _unique_descendants(node): - ''' - Returns the list of all nodes descended from the given node, where - there is only a single path of descent. - ''' - results = [] - current = node - while current and _istree(current) and len(current) == 1: - current = current[0] - results.append(current) - return results - -def _before(node): - ''' - Returns the set of all nodes that are before the given node. - ''' - try: - pos = node.treeposition() - tree = node.root() - except AttributeError: - return [] - return [tree[x] for x in tree.treepositions() - if x[:len(pos)] < pos[:len(x)]] - -def _immediately_before(node): - ''' - Returns the set of all nodes that are immediately before the given - node. - - Tree node A immediately precedes node B if the last terminal - symbol (word) produced by A immediately precedes the first - terminal symbol produced by B. - ''' - try: - pos = node.treeposition() - tree = node.root() - except AttributeError: - return [] - # go "upwards" from pos until there is a place we can go to the left - idx = len(pos) - 1 - while 0 <= idx and pos[idx] == 0: - idx -= 1 - if idx < 0: - return [] - pos = list(pos[:idx + 1]) - pos[-1] -= 1 - before = tree[pos] - return [before] + _rightmost_descendants(before) - -def _after(node): - ''' - Returns the set of all nodes that are after the given node. - ''' - try: - pos = node.treeposition() - tree = node.root() - except AttributeError: - return [] - return [tree[x] for x in tree.treepositions() - if x[:len(pos)] > pos[:len(x)]] - -def _immediately_after(node): - ''' - Returns the set of all nodes that are immediately after the given - node. - - Tree node A immediately follows node B if the first terminal - symbol (word) produced by A immediately follows the last - terminal symbol produced by B. - ''' - try: - pos = node.treeposition() - tree = node.root() - current = node.parent() - except AttributeError: - return [] - # go "upwards" from pos until there is a place we can go to the - # right - idx = len(pos) - 1 - while 0 <= idx and pos[idx] == len(current) - 1: - idx -= 1 - current = current.parent() - if idx < 0: - return [] - pos = list(pos[:idx + 1]) - pos[-1] += 1 - after = tree[pos] - return [after] + _leftmost_descendants(after) - -def _tgrep_node_literal_value(node): - ''' - Gets the string value of a given parse tree node, for comparison - using the tgrep node literal predicates. - ''' - return (node.label() if _istree(node) else str(node)) - -def _tgrep_node_action(_s, _l, tokens): - ''' - Builds a lambda function representing a predicate on a tree node - depending on the name of its node. - ''' - # print 'node tokens: ', tokens - if tokens[0] == u"'": - # strip initial apostrophe (tgrep2 print command) - tokens = tokens[1:] - if len(tokens) > 1: - # disjunctive definition of a node name - assert list(set(tokens[1::2])) == [u'|'] - # recursively call self to interpret each node name definition - tokens = [_tgrep_node_action(None, None, [node]) - for node in tokens[::2]] - # capture tokens and return the disjunction - return (lambda t: lambda n: any(f(n) for f in t))(tokens) - else: - if hasattr(tokens[0], u'__call__'): - # this is a previously interpreted parenthetical node - # definition (lambda function) - return tokens[0] - elif tokens[0] == u'*' or tokens[0] == u'__': - return lambda n: True - elif tokens[0].startswith(u'"'): - return (lambda s: lambda n: _tgrep_node_literal_value(n) == s)(tokens[0].strip(u'"')) - elif tokens[0].startswith(u'/'): - return (lambda r: lambda n: - r.match(_tgrep_node_literal_value(n)))(re.compile(tokens[0].strip(u'/'))) - elif tokens[0].startswith(u'i@'): - return (lambda s: lambda n: - _tgrep_node_literal_value(n).lower() == s)(tokens[0][2:].lower()) - else: - return (lambda s: lambda n: _tgrep_node_literal_value(n) == s)(tokens[0]) - -def _tgrep_parens_action(_s, _l, tokens): - ''' - Builds a lambda function representing a predicate on a tree node - from a parenthetical notation. - ''' - # print 'parenthetical tokens: ', tokens - assert len(tokens) == 3 - assert tokens[0] == u'(' - assert tokens[2] == u')' - return tokens[1] - -def _tgrep_nltk_tree_pos_action(_s, _l, tokens): - ''' - Builds a lambda function representing a predicate on a tree node - which returns true if the node is located at a specific tree - position. - ''' - # recover the tuple from the parsed sting - node_tree_position = tuple(int(x) for x in tokens if x.isdigit()) - # capture the node's tree position - return (lambda i: lambda n: (hasattr(n, u'treeposition') and - n.treeposition() == i))(node_tree_position) - -def _tgrep_relation_action(_s, _l, tokens): - ''' - Builds a lambda function representing a predicate on a tree node - depending on its relation to other nodes in the tree. - ''' - # print 'relation tokens: ', tokens - # process negation first if needed - negated = False - if tokens[0] == u'!': - negated = True - tokens = tokens[1:] - if tokens[0] == u'[': - # process square-bracketed relation expressions - assert len(tokens) == 3 - assert tokens[2] == u']' - retval = tokens[1] - else: - # process operator-node relation expressions - assert len(tokens) == 2 - operator, predicate = tokens - # A < B A is the parent of (immediately dominates) B. - if operator == u'<': - retval = lambda n: (_istree(n) and - any(predicate(x) for x in n)) - # A > B A is the child of B. - elif operator == u'>': - retval = lambda n: (hasattr(n, u'parent') and - bool(n.parent()) and - predicate(n.parent())) - # A <, B Synonymous with A <1 B. - elif operator == u'<,' or operator == u'<1': - retval = lambda n: (_istree(n) and - bool(list(n)) and - predicate(n[0])) - # A >, B Synonymous with A >1 B. - elif operator == u'>,' or operator == u'>1': - retval = lambda n: (hasattr(n, u'parent') and - bool(n.parent()) and - (n is n.parent()[0]) and - predicate(n.parent())) - # A N B A is the Nth child of B (the first child is >1). - elif operator[0] == u'>' and operator[1:].isdigit(): - idx = int(operator[1:]) - # capture the index parameter - retval = (lambda i: lambda n: (hasattr(n, u'parent') and - bool(n.parent()) and - 0 <= i < len(n.parent()) and - (n is n.parent()[i]) and - predicate(n.parent())))(idx - 1) - # A <' B B is the last child of A (also synonymous with A <-1 B). - # A <- B B is the last child of A (synonymous with A <-1 B). - elif operator == u'<\'' or operator == u'<-' or operator == u'<-1': - retval = lambda n: (_istree(n) and bool(list(n)) - and predicate(n[-1])) - # A >' B A is the last child of B (also synonymous with A >-1 B). - # A >- B A is the last child of B (synonymous with A >-1 B). - elif operator == u'>\'' or operator == u'>-' or operator == u'>-1': - retval = lambda n: (hasattr(n, u'parent') and - bool(n.parent()) and - (n is n.parent()[-1]) and - predicate(n.parent())) - # A <-N B B is the N th-to-last child of A (the last child is <-1). - elif operator[:2] == u'<-' and operator[2:].isdigit(): - idx = -int(operator[2:]) - # capture the index parameter - retval = (lambda i: lambda n: (_istree(n) and - bool(list(n)) and - 0 <= (i + len(n)) < len(n) and - predicate(n[i + len(n)])))(idx) - # A >-N B A is the N th-to-last child of B (the last child is >-1). - elif operator[:2] == u'>-' and operator[2:].isdigit(): - idx = -int(operator[2:]) - # capture the index parameter - retval = (lambda i: lambda n: - (hasattr(n, u'parent') and - bool(n.parent()) and - 0 <= (i + len(n.parent())) < len(n.parent()) and - (n is n.parent()[i + len(n.parent())]) and - predicate(n.parent())))(idx) - # A <: B B is the only child of A - elif operator == u'<:': - retval = lambda n: (_istree(n) and - len(n) == 1 and - predicate(n[0])) - # A >: B A is the only child of B. - elif operator == u'>:': - retval = lambda n: (hasattr(n, u'parent') and - bool(n.parent()) and - len(n.parent()) == 1 and - predicate(n.parent())) - # A << B A dominates B (A is an ancestor of B). - elif operator == u'<<': - retval = lambda n: (_istree(n) and - any(predicate(x) for x in _descendants(n))) - # A >> B A is dominated by B (A is a descendant of B). - elif operator == u'>>': - retval = lambda n: any(predicate(x) for x in ancestors(n)) - # A <<, B B is a left-most descendant of A. - elif operator == u'<<,' or operator == u'<<1': - retval = lambda n: (_istree(n) and - any(predicate(x) - for x in _leftmost_descendants(n))) - # A >>, B A is a left-most descendant of B. - elif operator == u'>>,': - retval = lambda n: any((predicate(x) and - n in _leftmost_descendants(x)) - for x in ancestors(n)) - # A <<' B B is a right-most descendant of A. - elif operator == u'<<\'': - retval = lambda n: (_istree(n) and - any(predicate(x) - for x in _rightmost_descendants(n))) - # A >>' B A is a right-most descendant of B. - elif operator == u'>>\'': - retval = lambda n: any((predicate(x) and - n in _rightmost_descendants(x)) - for x in ancestors(n)) - # A <<: B There is a single path of descent from A and B is on it. - elif operator == u'<<:': - retval = lambda n: (_istree(n) and - any(predicate(x) - for x in _unique_descendants(n))) - # A >>: B There is a single path of descent from B and A is on it. - elif operator == u'>>:': - retval = lambda n: any(predicate(x) for x in unique_ancestors(n)) - # A . B A immediately precedes B. - elif operator == u'.': - retval = lambda n: any(predicate(x) - for x in _immediately_after(n)) - # A , B A immediately follows B. - elif operator == u',': - retval = lambda n: any(predicate(x) - for x in _immediately_before(n)) - # A .. B A precedes B. - elif operator == u'..': - retval = lambda n: any(predicate(x) for x in _after(n)) - # A ,, B A follows B. - elif operator == u',,': - retval = lambda n: any(predicate(x) for x in _before(n)) - # A $ B A is a sister of B (and A != B). - elif operator == u'$' or operator == u'%': - retval = lambda n: (hasattr(n, u'parent') and - bool(n.parent()) and - any(predicate(x) - for x in n.parent() if x is not n)) - # A $. B A is a sister of and immediately precedes B. - elif operator == u'$.' or operator == u'%.': - retval = lambda n: (hasattr(n, u'right_sibling') and - bool(n.right_sibling()) and - predicate(n.right_sibling())) - # A $, B A is a sister of and immediately follows B. - elif operator == u'$,' or operator == u'%,': - retval = lambda n: (hasattr(n, u'left_sibling') and - bool(n.left_sibling()) and - predicate(n.left_sibling())) - # A $.. B A is a sister of and precedes B. - elif operator == u'$..' or operator == u'%..': - retval = lambda n: (hasattr(n, u'parent') and - hasattr(n, u'parent_index') and - bool(n.parent()) and - any(predicate(x) for x in - n.parent()[n.parent_index() + 1:])) - # A $,, B A is a sister of and follows B. - elif operator == u'$,,' or operator == u'%,,': - retval = lambda n: (hasattr(n, u'parent') and - hasattr(n, u'parent_index') and - bool(n.parent()) and - any(predicate(x) for x in - n.parent()[:n.parent_index()])) - else: - assert False, u'cannot interpret tgrep operator "{0}"'.format( - operator) - # now return the built function - if negated: - return (lambda r: (lambda n: not r(n)))(retval) - else: - return retval - -def _tgrep_rel_conjunction_action(_s, _l, tokens): - ''' - Builds a lambda function representing a predicate on a tree node - from the conjunction of several other such lambda functions. - ''' - # filter out the ampersand - tokens = [x for x in tokens if x != u'&'] - # print 'relation conjunction tokens: ', tokens - if len(tokens) == 1: - return tokens[0] - elif len(tokens) == 2: - return (lambda a, b: lambda n: a(n) and b(n))(tokens[0], tokens[1]) - -def _tgrep_rel_disjunction_action(_s, _l, tokens): - ''' - Builds a lambda function representing a predicate on a tree node - from the disjunction of several other such lambda functions. - ''' - # filter out the pipe - tokens = [x for x in tokens if x != u'|'] - # print 'relation disjunction tokens: ', tokens - if len(tokens) == 1: - return tokens[0] - elif len(tokens) == 2: - return (lambda a, b: lambda n: a(n) or b(n))(tokens[0], tokens[1]) - -def _build_tgrep_parser(set_parse_actions = True): - ''' - Builds a pyparsing-based parser object for tokenizing and - interpreting tgrep search strings. - ''' - tgrep_op = (pyparsing.Optional(u'!') + - pyparsing.Regex(u'[$%,.<>][%,.<>0-9-\':]*')) - tgrep_qstring = pyparsing.QuotedString(quoteChar=u'"', escChar=u'\\', - unquoteResults=False) - tgrep_node_regex = pyparsing.QuotedString(quoteChar=u'/', escChar=u'\\', - unquoteResults=False) - tgrep_node_literal = pyparsing.Regex(u'[^][ \r\t\n;:.,&|<>()$!@%\'^=]+') - tgrep_expr = pyparsing.Forward() - tgrep_relations = pyparsing.Forward() - tgrep_parens = pyparsing.Literal(u'(') + tgrep_expr + u')' - tgrep_nltk_tree_pos = ( - pyparsing.Literal(u'N(') + - pyparsing.Optional(pyparsing.Word(pyparsing.nums) + u',' + - pyparsing.Optional(pyparsing.delimitedList( - pyparsing.Word(pyparsing.nums), delim=u',') + - pyparsing.Optional(u','))) + u')') - tgrep_node_expr = (tgrep_qstring | - tgrep_node_regex | - u'*' | - tgrep_node_literal) - tgrep_node = (tgrep_parens | - tgrep_nltk_tree_pos | - (pyparsing.Optional(u"'") + - tgrep_node_expr + - pyparsing.ZeroOrMore(u"|" + tgrep_node_expr))) - tgrep_relation = pyparsing.Forward() - tgrep_brackets = pyparsing.Optional(u'!') + u'[' + tgrep_relations + u']' - tgrep_relation = tgrep_brackets | tgrep_op + tgrep_node - tgrep_rel_conjunction = pyparsing.Forward() - tgrep_rel_conjunction << (tgrep_relation + - pyparsing.ZeroOrMore(pyparsing.Optional(u'&') + - tgrep_rel_conjunction)) - tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore( - u"|" + tgrep_relations) - tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations) - if set_parse_actions: - tgrep_node.setParseAction(_tgrep_node_action) - tgrep_parens.setParseAction(_tgrep_parens_action) - tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action) - tgrep_relation.setParseAction(_tgrep_relation_action) - tgrep_rel_conjunction.setParseAction(_tgrep_rel_conjunction_action) - tgrep_relations.setParseAction(_tgrep_rel_disjunction_action) - # the whole expression is also the conjunction of two - # predicates: the first node predicate, and the remaining - # relation predicates - tgrep_expr.setParseAction(_tgrep_rel_conjunction_action) - return tgrep_expr - -def tgrep_tokenize(tgrep_string): - ''' - Tokenizes a TGrep search string into separate tokens. - ''' - parser = _build_tgrep_parser(False) - if isinstance(tgrep_string, bytes): - tgrep_string = tgrep_string.decode() - return list(parser.parseString(tgrep_string)) - -def tgrep_compile(tgrep_string): - ''' - Parses (and tokenizes, if necessary) a TGrep search string into a - lambda function. - ''' - parser = _build_tgrep_parser(True) - if isinstance(tgrep_string, bytes): - tgrep_string = tgrep_string.decode() - return list(parser.parseString(tgrep_string, parseAll=True))[0] - -def treepositions_no_leaves(tree): - ''' - Returns all the tree positions in the given tree which are not - leaf nodes. - ''' - treepositions = tree.treepositions() - # leaves are treeposition tuples that are not prefixes of any - # other treeposition - prefixes = set() - for pos in treepositions: - for length in range(len(pos)): - prefixes.add(pos[:length]) - return [pos for pos in treepositions if pos in prefixes] - -def tgrep_positions(tree, tgrep_string, search_leaves = True): - ''' - Return all tree positions in the given tree which match the given - `tgrep_string`. - - If `search_leaves` is False, the method will not return any - results in leaf positions. - ''' - try: - if search_leaves: - search_positions = tree.treepositions() - else: - search_positions = treepositions_no_leaves(tree) - except AttributeError: - return [] - if isinstance(tgrep_string, (bytes, str)): - tgrep_string = tgrep_compile(tgrep_string) - return [position for position in search_positions - if tgrep_string(tree[position])] - -def tgrep_nodes(tree, tgrep_string, search_leaves = True): - ''' - Return all tree nodes in the given tree which match the given - `tgrep_ string`. - - If `search_leaves` is False, the method will not return any - results in leaf positions. - ''' - return [tree[position] for position in tgrep_positions(tree, tgrep_string, - search_leaves)] -- libgit2 0.21.2