Commit 9ba27f8347a6d6a430526782cb96d8e69e0c58c3

Authored by Erickson Silva
1 parent f806b639
Exists in master and in 1 other branch devel

Adiciona classe (tgrep) para procurar nós na arvore NLTK

Showing 1 changed file with 575 additions and 0 deletions   Show diff stats
src/new/tgrep.py 0 → 100644
... ... @@ -0,0 +1,575 @@
  1 +#!/usr/bin/env python
  2 +# -*- coding: utf-8 -*-
  3 +#
  4 +# Permission is hereby granted, free of charge, to any person
  5 +# obtaining a copy of this software and associated documentation files
  6 +# (the "Software"), to deal in the Software without restriction,
  7 +# including without limitation the rights to use, copy, modify, merge,
  8 +# publish, distribute, sublicense, and/or sell copies of the Software,
  9 +# and to permit persons to whom the Software is furnished to do so,
  10 +# subject to the following conditions:
  11 +#
  12 +# The above copyright notice and this permission notice shall be
  13 +# included in all copies or substantial portions of the Software.
  14 +#
  15 +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16 +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17 +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18 +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19 +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20 +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21 +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 +# SOFTWARE.
  23 +
  24 +'''
  25 +TGrep search implementation for NTLK trees.
  26 +
  27 +(c) 16 March, 2013 Will Roberts <wildwilhelm@gmail.com>.
  28 +
  29 +This module supports TGrep2 syntax for matching parts of NLTK Trees.
  30 +Note that many tgrep operators require the tree passed to be a
  31 +ParentedTree.
  32 +
  33 +Tgrep tutorial:
  34 +http://www.stanford.edu/dept/linguistics/corpora/cas-tut-tgrep.html
  35 +Tgrep2 manual:
  36 +http://tedlab.mit.edu/~dr/Tgrep2/tgrep2.pdf
  37 +Tgrep2 source:
  38 +http://tedlab.mit.edu/~dr/Tgrep2/
  39 +'''
  40 +
  41 +import nltk.tree
  42 +import pyparsing
  43 +import re
  44 +
  45 +def ancestors(node):
  46 + '''
  47 + Returns the list of all nodes dominating the given tree node.
  48 + This method will not work with leaf nodes, since there is no way
  49 + to recover the parent.
  50 + '''
  51 + # if node is a leaf, we cannot retrieve its parent
  52 + if not hasattr(node, 'parent'):
  53 + return []
  54 + results = []
  55 + current = node.parent()
  56 + while current:
  57 + results.append(current)
  58 + current = current.parent()
  59 + return results
  60 +
  61 +def unique_ancestors(node):
  62 + '''
  63 + Returns the list of all nodes dominating the given node, where
  64 + there is only a single path of descent.
  65 + '''
  66 + # if node is a leaf, we cannot retrieve its parent
  67 + if not hasattr(node, 'parent'):
  68 + return []
  69 + results = []
  70 + current = node.parent()
  71 + while current and len(current) == 1:
  72 + results.append(current)
  73 + current = current.parent()
  74 + return results
  75 +
  76 +def _descendants(node):
  77 + '''
  78 + Returns the list of all nodes which are descended from the given
  79 + tree node in some way.
  80 + '''
  81 + if not hasattr(node, 'treepositions'):
  82 + return []
  83 + return [node[x] for x in node.treepositions()[1:]]
  84 +
  85 +def _leftmost_descendants(node):
  86 + '''
  87 + Returns the set of all nodes descended in some way through
  88 + left branches from this node.
  89 + '''
  90 + if not hasattr(node, 'treepositions'):
  91 + return []
  92 + return [node[x] for x in node.treepositions()[1:] if all(y == 0 for y in x)]
  93 +
  94 +def _rightmost_descendants(node):
  95 + '''
  96 + Returns the set of all nodes descended in some way through
  97 + right branches from this node.
  98 + '''
  99 + if not hasattr(node, 'treepositions'):
  100 + return []
  101 + rightmost_leaf = max(node.treepositions())
  102 + return [node[rightmost_leaf[:i]] for i in range(1, len(rightmost_leaf) + 1)]
  103 +
  104 +def _unique_descendants(node):
  105 + '''
  106 + Returns the list of all nodes descended from the given node, where
  107 + there is only a single path of descent.
  108 + '''
  109 + results = []
  110 + current = node
  111 + while current and isinstance(current, nltk.tree.Tree) and len(current) == 1:
  112 + current = current[0]
  113 + results.append(current)
  114 + return results
  115 +
  116 +def _before(node):
  117 + '''
  118 + Returns the set of all nodes that are before the given node.
  119 + '''
  120 + if not hasattr(node, 'root') or not hasattr(node, 'treeposition'):
  121 + return []
  122 + pos = node.treeposition()
  123 + tree = node.root()
  124 + return [tree[x] for x in tree.treepositions()
  125 + if x[:len(pos)] < pos[:len(x)]]
  126 +
  127 +def _immediately_before(node):
  128 + '''
  129 + Returns the set of all nodes that are immediately before the given
  130 + node.
  131 +
  132 + Tree node A immediately precedes node B if the last terminal
  133 + symbol (word) produced by A immediately precedes the first
  134 + terminal symbol produced by B.
  135 + '''
  136 + if not hasattr(node, 'root') or not hasattr(node, 'treeposition'):
  137 + return []
  138 + pos = node.treeposition()
  139 + # go "upwards" from pos until there is a place we can go to the left
  140 + idx = len(pos) - 1
  141 + while 0 <= idx and pos[idx] == 0:
  142 + idx -= 1
  143 + if idx < 0:
  144 + return []
  145 + pos = list(pos[:idx + 1])
  146 + pos[-1] -= 1
  147 + before = node.root()[pos]
  148 + return [before] + _rightmost_descendants(before)
  149 +
  150 +def _after(node):
  151 + '''
  152 + Returns the set of all nodes that are after the given node.
  153 + '''
  154 + if not hasattr(node, 'root') or not hasattr(node, 'treeposition'):
  155 + return []
  156 + pos = node.treeposition()
  157 + tree = node.root()
  158 + return [tree[x] for x in tree.treepositions()
  159 + if x[:len(pos)] > pos[:len(x)]]
  160 +
  161 +def _immediately_after(node):
  162 + '''
  163 + Returns the set of all nodes that are immediately after the given
  164 + node.
  165 +
  166 + Tree node A immediately follows node B if the first terminal
  167 + symbol (word) produced by A immediately follows the last
  168 + terminal symbol produced by B.
  169 + '''
  170 + if (not hasattr(node, 'root') or not hasattr(node, 'treeposition') or
  171 + not hasattr(node, 'parent')):
  172 + return []
  173 + pos = node.treeposition()
  174 + # go "upwards" from pos until there is a place we can go to the
  175 + # right
  176 + idx = len(pos) - 1
  177 + current = node.parent()
  178 + while 0 <= idx and pos[idx] == len(current) - 1:
  179 + idx -= 1
  180 + current = current.parent()
  181 + if idx < 0:
  182 + return []
  183 + pos = list(pos[:idx + 1])
  184 + pos[-1] += 1
  185 + after = node.root()[pos]
  186 + return [after] + _leftmost_descendants(after)
  187 +
  188 +def _tgrep_node_literal_value(node):
  189 + '''
  190 + Gets the string value of a given parse tree node, for comparison
  191 + using the tgrep node literal predicates.
  192 + '''
  193 + return (node.label() if isinstance(node, nltk.tree.Tree) else unicode(node))
  194 +
  195 +def _tgrep_node_action(_s, _l, tokens):
  196 + '''
  197 + Builds a lambda function representing a predicate on a tree node
  198 + depending on the name of its node.
  199 + '''
  200 + # print 'node tokens: ', tokens
  201 + if tokens[0] == "'":
  202 + # strip initial apostrophe (tgrep2 print command)
  203 + tokens = tokens[1:]
  204 + if len(tokens) > 1:
  205 + # disjunctive definition of a node name
  206 + assert list(set(tokens[1::2])) == ['|']
  207 + # recursively call self to interpret each node name definition
  208 + tokens = [_tgrep_node_action(None, None, [node])
  209 + for node in tokens[::2]]
  210 + # capture tokens and return the disjunction
  211 + return (lambda t: lambda n: any(f(n) for f in t))(tokens)
  212 + else:
  213 + if hasattr(tokens[0], '__call__'):
  214 + # this is a previously interpreted parenthetical node
  215 + # definition (lambda function)
  216 + return tokens[0]
  217 + elif tokens[0] == '*' or tokens[0] == '__':
  218 + return lambda n: True
  219 + elif tokens[0].startswith('"'):
  220 + return (lambda s: lambda n: _tgrep_node_literal_value(n) == s)(tokens[0].strip('"'))
  221 + elif tokens[0].startswith('/'):
  222 + return (lambda r: lambda n:
  223 + r.match(_tgrep_node_literal_value(n)))(re.compile(tokens[0].strip('/')))
  224 + elif tokens[0].startswith('i@'):
  225 + return (lambda s: lambda n:
  226 + _tgrep_node_literal_value(n).lower() == s)(tokens[0][2:].lower())
  227 + else:
  228 + return (lambda s: lambda n: _tgrep_node_literal_value(n) == s)(tokens[0])
  229 +
  230 +def _tgrep_parens_action(_s, _l, tokens):
  231 + '''
  232 + Builds a lambda function representing a predicate on a tree node
  233 + from a parenthetical notation.
  234 + '''
  235 + # print 'parenthetical tokens: ', tokens
  236 + assert len(tokens) == 3
  237 + assert tokens[0] == '('
  238 + assert tokens[2] == ')'
  239 + return tokens[1]
  240 +
  241 +def _tgrep_nltk_tree_pos_action(_s, _l, tokens):
  242 + '''
  243 + Builds a lambda function representing a predicate on a tree node
  244 + which returns true if the node is located at a specific tree
  245 + position.
  246 + '''
  247 + # recover the tuple from the parsed sting
  248 + node_tree_position = tuple(int(x) for x in tokens if x.isdigit())
  249 + # capture the node's tree position
  250 + return (lambda i: lambda n: (hasattr(n, 'treeposition') and
  251 + n.treeposition() == i))(node_tree_position)
  252 +
  253 +def _tgrep_relation_action(_s, _l, tokens):
  254 + '''
  255 + Builds a lambda function representing a predicate on a tree node
  256 + depending on its relation to other nodes in the tree.
  257 + '''
  258 + # print 'relation tokens: ', tokens
  259 + # process negation first if needed
  260 + negated = False
  261 + if tokens[0] == '!':
  262 + negated = True
  263 + tokens = tokens[1:]
  264 + if tokens[0] == '[':
  265 + # process square-bracketed relation expressions
  266 + assert len(tokens) == 3
  267 + assert tokens[2] == ']'
  268 + retval = tokens[1]
  269 + else:
  270 + # process operator-node relation expressions
  271 + assert len(tokens) == 2
  272 + operator, predicate = tokens
  273 + # A < B A is the parent of (immediately dominates) B.
  274 + if operator == '<':
  275 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  276 + any(predicate(x) for x in n))
  277 + # A > B A is the child of B.
  278 + elif operator == '>':
  279 + retval = lambda n: (hasattr(n, 'parent') and
  280 + bool(n.parent()) and
  281 + predicate(n.parent()))
  282 + # A <, B Synonymous with A <1 B.
  283 + elif operator == '<,' or operator == '<1':
  284 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  285 + bool(list(n)) and
  286 + predicate(n[0]))
  287 + # A >, B Synonymous with A >1 B.
  288 + elif operator == '>,' or operator == '>1':
  289 + retval = lambda n: (hasattr(n, 'parent') and
  290 + bool(n.parent()) and
  291 + (n is n.parent()[0]) and
  292 + predicate(n.parent()))
  293 + # A <N B B is the Nth child of A (the first child is <1).
  294 + elif operator[0] == '<' and operator[1:].isdigit():
  295 + idx = int(operator[1:])
  296 + # capture the index parameter
  297 + retval = (lambda i: lambda n: (isinstance(n, nltk.tree.Tree) and
  298 + bool(list(n)) and
  299 + 0 <= i < len(n) and
  300 + predicate(n[i])))(idx - 1)
  301 + # A >N B A is the Nth child of B (the first child is >1).
  302 + elif operator[0] == '>' and operator[1:].isdigit():
  303 + idx = int(operator[1:])
  304 + # capture the index parameter
  305 + retval = (lambda i: lambda n: (hasattr(n, 'parent') and
  306 + bool(n.parent()) and
  307 + 0 <= i < len(n.parent()) and
  308 + (n is n.parent()[i]) and
  309 + predicate(n.parent())))(idx - 1)
  310 + # A <' B B is the last child of A (also synonymous with A <-1 B).
  311 + # A <- B B is the last child of A (synonymous with A <-1 B).
  312 + elif operator == '<\'' or operator == '<-' or operator == '<-1':
  313 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and bool(list(n))
  314 + and predicate(n[-1]))
  315 + # A >' B A is the last child of B (also synonymous with A >-1 B).
  316 + # A >- B A is the last child of B (synonymous with A >-1 B).
  317 + elif operator == '>\'' or operator == '>-' or operator == '>-1':
  318 + retval = lambda n: (hasattr(n, 'parent') and
  319 + bool(n.parent()) and
  320 + (n is n.parent()[-1]) and
  321 + predicate(n.parent()))
  322 + # A <-N B B is the N th-to-last child of A (the last child is <-1).
  323 + elif operator[:2] == '<-' and operator[2:].isdigit():
  324 + idx = -int(operator[2:])
  325 + # capture the index parameter
  326 + retval = (lambda i: lambda n: (isinstance(n, nltk.tree.Tree) and
  327 + bool(list(n)) and
  328 + 0 <= (i + len(n)) < len(n) and
  329 + predicate(n[i + len(n)])))(idx)
  330 + # A >-N B A is the N th-to-last child of B (the last child is >-1).
  331 + elif operator[:2] == '>-' and operator[2:].isdigit():
  332 + idx = -int(operator[2:])
  333 + # capture the index parameter
  334 + retval = (lambda i: lambda n:
  335 + (hasattr(n, 'parent') and
  336 + bool(n.parent()) and
  337 + 0 <= (i + len(n.parent())) < len(n.parent()) and
  338 + (n is n.parent()[i + len(n.parent())]) and
  339 + predicate(n.parent())))(idx)
  340 + # A <: B B is the only child of A
  341 + elif operator == '<:':
  342 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  343 + len(n) == 1 and
  344 + predicate(n[0]))
  345 + # A >: B A is the only child of B.
  346 + elif operator == '>:':
  347 + retval = lambda n: (hasattr(n, 'parent') and
  348 + bool(n.parent()) and
  349 + len(n.parent()) == 1 and
  350 + predicate(n.parent()))
  351 + # A << B A dominates B (A is an ancestor of B).
  352 + elif operator == '<<':
  353 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  354 + any(predicate(x) for x in _descendants(n)))
  355 + # A >> B A is dominated by B (A is a descendant of B).
  356 + elif operator == '>>':
  357 + retval = lambda n: any(predicate(x) for x in ancestors(n))
  358 + # A <<, B B is a left-most descendant of A.
  359 + elif operator == '<<,' or operator == '<<1':
  360 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  361 + any(predicate(x)
  362 + for x in _leftmost_descendants(n)))
  363 + # A >>, B A is a left-most descendant of B.
  364 + elif operator == '>>,':
  365 + retval = lambda n: any((predicate(x) and
  366 + n in _leftmost_descendants(x))
  367 + for x in ancestors(n))
  368 + # A <<' B B is a right-most descendant of A.
  369 + elif operator == '<<\'':
  370 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  371 + any(predicate(x)
  372 + for x in _rightmost_descendants(n)))
  373 + # A >>' B A is a right-most descendant of B.
  374 + elif operator == '>>\'':
  375 + retval = lambda n: any((predicate(x) and
  376 + n in _rightmost_descendants(x))
  377 + for x in ancestors(n))
  378 + # A <<: B There is a single path of descent from A and B is on it.
  379 + elif operator == '<<:':
  380 + retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  381 + any(predicate(x)
  382 + for x in _unique_descendants(n)))
  383 + # A >>: B There is a single path of descent from B and A is on it.
  384 + elif operator == '>>:':
  385 + retval = lambda n: any(predicate(x) for x in unique_ancestors(n))
  386 + # A . B A immediately precedes B.
  387 + elif operator == '.':
  388 + retval = lambda n: any(predicate(x)
  389 + for x in _immediately_after(n))
  390 + # A , B A immediately follows B.
  391 + elif operator == ',':
  392 + retval = lambda n: any(predicate(x)
  393 + for x in _immediately_before(n))
  394 + # A .. B A precedes B.
  395 + elif operator == '..':
  396 + retval = lambda n: any(predicate(x) for x in _after(n))
  397 + # A ,, B A follows B.
  398 + elif operator == ',,':
  399 + retval = lambda n: any(predicate(x) for x in _before(n))
  400 + # A $ B A is a sister of B (and A != B).
  401 + elif operator == '$' or operator == '%':
  402 + retval = lambda n: (hasattr(n, 'parent') and
  403 + bool(n.parent()) and
  404 + any(predicate(x)
  405 + for x in n.parent() if x is not n))
  406 + # A $. B A is a sister of and immediately precedes B.
  407 + elif operator == '$.' or operator == '%.':
  408 + retval = lambda n: (hasattr(n, 'right_sibling') and
  409 + bool(n.right_sibling()) and
  410 + predicate(n.right_sibling()))
  411 + # A $, B A is a sister of and immediately follows B.
  412 + elif operator == '$,' or operator == '%,':
  413 + retval = lambda n: (hasattr(n, 'left_sibling') and
  414 + bool(n.left_sibling()) and
  415 + predicate(n.left_sibling()))
  416 + # A $.. B A is a sister of and precedes B.
  417 + elif operator == '$..' or operator == '%..':
  418 + retval = lambda n: (hasattr(n, 'parent') and
  419 + hasattr(n, 'parent_index') and
  420 + bool(n.parent()) and
  421 + any(predicate(x) for x in
  422 + n.parent()[n.parent_index() + 1:]))
  423 + # A $,, B A is a sister of and follows B.
  424 + elif operator == '$,,' or operator == '%,,':
  425 + retval = lambda n: (hasattr(n, 'parent') and
  426 + hasattr(n, 'parent_index') and
  427 + bool(n.parent()) and
  428 + any(predicate(x) for x in
  429 + n.parent()[:n.parent_index()]))
  430 + else:
  431 + assert False, 'cannot interpret tgrep operator "{0}"'.format(
  432 + operator)
  433 + # now return the built function
  434 + if negated:
  435 + return (lambda r: (lambda n: not r(n)))(retval)
  436 + else:
  437 + return retval
  438 +
  439 +def _tgrep_rel_conjunction_action(_s, _l, tokens):
  440 + '''
  441 + Builds a lambda function representing a predicate on a tree node
  442 + from the conjunction of several other such lambda functions.
  443 + '''
  444 + # filter out the ampersand
  445 + tokens = [x for x in tokens if x != '&']
  446 + # print 'relation conjunction tokens: ', tokens
  447 + if len(tokens) == 1:
  448 + return tokens[0]
  449 + elif len(tokens) == 2:
  450 + return (lambda a, b: lambda n: a(n) and b(n))(tokens[0], tokens[1])
  451 +
  452 +def _tgrep_rel_disjunction_action(_s, _l, tokens):
  453 + '''
  454 + Builds a lambda function representing a predicate on a tree node
  455 + from the disjunction of several other such lambda functions.
  456 + '''
  457 + # filter out the pipe
  458 + tokens = [x for x in tokens if x != '|']
  459 + # print 'relation disjunction tokens: ', tokens
  460 + if len(tokens) == 1:
  461 + return tokens[0]
  462 + elif len(tokens) == 2:
  463 + return (lambda a, b: lambda n: a(n) or b(n))(tokens[0], tokens[1])
  464 +
  465 +def _build_tgrep_parser(set_parse_actions = True):
  466 + '''
  467 + Builds a pyparsing-based parser object for tokenizing and
  468 + interpreting tgrep search strings.
  469 + '''
  470 + tgrep_op = (pyparsing.Optional('!') +
  471 + pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*'))
  472 + tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\',
  473 + unquoteResults=False)
  474 + tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\\',
  475 + unquoteResults=False)
  476 + tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%\'^=]+')
  477 + tgrep_expr = pyparsing.Forward()
  478 + tgrep_relations = pyparsing.Forward()
  479 + tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')'
  480 + tgrep_nltk_tree_pos = (
  481 + pyparsing.Literal('N(') +
  482 + pyparsing.Optional(pyparsing.Word(pyparsing.nums) + ',' +
  483 + pyparsing.Optional(pyparsing.delimitedList(
  484 + pyparsing.Word(pyparsing.nums), delim=',') +
  485 + pyparsing.Optional(','))) + ')')
  486 + tgrep_node_expr = (tgrep_qstring |
  487 + tgrep_node_regex |
  488 + '*' |
  489 + tgrep_node_literal)
  490 + tgrep_node = (tgrep_parens |
  491 + tgrep_nltk_tree_pos |
  492 + (pyparsing.Optional("'") +
  493 + tgrep_node_expr +
  494 + pyparsing.ZeroOrMore("|" + tgrep_node_expr)))
  495 + tgrep_relation = pyparsing.Forward()
  496 + tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']'
  497 + tgrep_relation = tgrep_brackets | tgrep_op + tgrep_node
  498 + tgrep_rel_conjunction = pyparsing.Forward()
  499 + tgrep_rel_conjunction << (tgrep_relation +
  500 + pyparsing.ZeroOrMore(pyparsing.Optional('&') +
  501 + tgrep_rel_conjunction))
  502 + tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
  503 + "|" + tgrep_relations)
  504 + tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
  505 + if set_parse_actions:
  506 + tgrep_node.setParseAction(_tgrep_node_action)
  507 + tgrep_parens.setParseAction(_tgrep_parens_action)
  508 + tgrep_nltk_tree_pos.setParseAction(_tgrep_nltk_tree_pos_action)
  509 + tgrep_relation.setParseAction(_tgrep_relation_action)
  510 + tgrep_rel_conjunction.setParseAction(_tgrep_rel_conjunction_action)
  511 + tgrep_relations.setParseAction(_tgrep_rel_disjunction_action)
  512 + # the whole expression is also the conjunction of two
  513 + # predicates: the first node predicate, and the remaining
  514 + # relation predicates
  515 + tgrep_expr.setParseAction(_tgrep_rel_conjunction_action)
  516 + return tgrep_expr
  517 +
  518 +def tgrep_tokenize(tgrep_string):
  519 + '''
  520 + Tokenizes a TGrep search string into separate tokens.
  521 + '''
  522 + parser = _build_tgrep_parser(False)
  523 + return list(parser.parseString(tgrep_string))
  524 +
  525 +def tgrep_compile(tgrep_string):
  526 + '''
  527 + Parses (and tokenizes, if necessary) a TGrep search string into a
  528 + lambda function.
  529 + '''
  530 + parser = _build_tgrep_parser(True)
  531 + return list(parser.parseString(tgrep_string, parseAll=True))[0]
  532 +
  533 +def treepositions_no_leaves(tree):
  534 + '''
  535 + Returns all the tree positions in the given tree which are not
  536 + leaf nodes.
  537 + '''
  538 + treepositions = tree.treepositions()
  539 + # leaves are treeposition tuples that are not prefixes of any
  540 + # other treeposition
  541 + prefixes = set()
  542 + for pos in treepositions:
  543 + for length in range(len(pos)):
  544 + prefixes.add(pos[:length])
  545 + return [pos for pos in treepositions if pos in prefixes]
  546 +
  547 +def tgrep_positions(tree, tgrep_string, search_leaves = True):
  548 + '''
  549 + Return all tree positions in the given tree which match the given
  550 + `tgrep_string`.
  551 +
  552 + If `search_leaves` is False, the method will not return any
  553 + results in leaf positions.
  554 + '''
  555 + if not hasattr(tree, 'treepositions'):
  556 + return []
  557 + if isinstance(tgrep_string, basestring):
  558 + tgrep_string = tgrep_compile(tgrep_string)
  559 + if search_leaves:
  560 + search_positions = tree.treepositions()
  561 + else:
  562 + search_positions = treepositions_no_leaves(tree)
  563 + return [position for position in search_positions
  564 + if tgrep_string(tree[position])]
  565 +
  566 +def tgrep_nodes(tree, tgrep_string, search_leaves = True):
  567 + '''
  568 + Return all tree nodes in the given tree which match the given
  569 + `tgrep_ string`.
  570 +
  571 + If `search_leaves` is False, the method will not return any
  572 + results in leaf positions.
  573 + '''
  574 + return [tree[position] for position in tgrep_positions(tree, tgrep_string,
  575 + search_leaves)]
... ...