Commit 3dc55945f34e948398d13b7346524af6a001348d

Authored by Erickson Silva
1 parent 4fd9f50a
Exists in master and in 1 other branch devel

Adiciona nova versao tgrep

Showing 1 changed file with 144 additions and 122 deletions   Show diff stats
src/new/tgrep.py
... ... @@ -38,6 +38,7 @@ Tgrep2 source:
38 38 http://tedlab.mit.edu/~dr/Tgrep2/
39 39 '''
40 40  
  41 +from builtins import bytes, range, str
41 42 import nltk.tree
42 43 import pyparsing
43 44 import re
... ... @@ -48,11 +49,12 @@ def ancestors(node):
48 49 This method will not work with leaf nodes, since there is no way
49 50 to recover the parent.
50 51 '''
51   - # if node is a leaf, we cannot retrieve its parent
52   - if not hasattr(node, 'parent'):
53   - return []
54 52 results = []
55   - current = node.parent()
  53 + try:
  54 + current = node.parent()
  55 + except AttributeError:
  56 + # if node is a leaf, we cannot retrieve its parent
  57 + return results
56 58 while current:
57 59 results.append(current)
58 60 current = current.parent()
... ... @@ -63,11 +65,12 @@ def unique_ancestors(node):
63 65 Returns the list of all nodes dominating the given node, where
64 66 there is only a single path of descent.
65 67 '''
66   - # if node is a leaf, we cannot retrieve its parent
67   - if not hasattr(node, 'parent'):
68   - return []
69 68 results = []
70   - current = node.parent()
  69 + try:
  70 + current = node.parent()
  71 + except AttributeError:
  72 + # if node is a leaf, we cannot retrieve its parent
  73 + return results
71 74 while current and len(current) == 1:
72 75 results.append(current)
73 76 current = current.parent()
... ... @@ -78,29 +81,38 @@ def _descendants(node):
78 81 Returns the list of all nodes which are descended from the given
79 82 tree node in some way.
80 83 '''
81   - if not hasattr(node, 'treepositions'):
  84 + try:
  85 + treepos = node.treepositions()
  86 + except AttributeError:
82 87 return []
83   - return [node[x] for x in node.treepositions()[1:]]
  88 + return [node[x] for x in treepos[1:]]
84 89  
85 90 def _leftmost_descendants(node):
86 91 '''
87 92 Returns the set of all nodes descended in some way through
88 93 left branches from this node.
89 94 '''
90   - if not hasattr(node, 'treepositions'):
  95 + try:
  96 + treepos = node.treepositions()
  97 + except AttributeError:
91 98 return []
92   - return [node[x] for x in node.treepositions()[1:] if all(y == 0 for y in x)]
  99 + return [node[x] for x in treepos[1:] if all(y == 0 for y in x)]
93 100  
94 101 def _rightmost_descendants(node):
95 102 '''
96 103 Returns the set of all nodes descended in some way through
97 104 right branches from this node.
98 105 '''
99   - if not hasattr(node, 'treepositions'):
  106 + try:
  107 + rightmost_leaf = max(node.treepositions())
  108 + except AttributeError:
100 109 return []
101   - rightmost_leaf = max(node.treepositions())
102 110 return [node[rightmost_leaf[:i]] for i in range(1, len(rightmost_leaf) + 1)]
103 111  
  112 +def _istree(obj):
  113 + '''Predicate to check whether `obj` is a nltk.tree.Tree.'''
  114 + return isinstance(obj, nltk.tree.Tree)
  115 +
104 116 def _unique_descendants(node):
105 117 '''
106 118 Returns the list of all nodes descended from the given node, where
... ... @@ -108,7 +120,7 @@ def _unique_descendants(node):
108 120 '''
109 121 results = []
110 122 current = node
111   - while current and isinstance(current, nltk.tree.Tree) and len(current) == 1:
  123 + while current and _istree(current) and len(current) == 1:
112 124 current = current[0]
113 125 results.append(current)
114 126 return results
... ... @@ -117,10 +129,11 @@ def _before(node):
117 129 '''
118 130 Returns the set of all nodes that are before the given node.
119 131 '''
120   - if not hasattr(node, 'root') or not hasattr(node, 'treeposition'):
  132 + try:
  133 + pos = node.treeposition()
  134 + tree = node.root()
  135 + except AttributeError:
121 136 return []
122   - pos = node.treeposition()
123   - tree = node.root()
124 137 return [tree[x] for x in tree.treepositions()
125 138 if x[:len(pos)] < pos[:len(x)]]
126 139  
... ... @@ -133,9 +146,11 @@ def _immediately_before(node):
133 146 symbol (word) produced by A immediately precedes the first
134 147 terminal symbol produced by B.
135 148 '''
136   - if not hasattr(node, 'root') or not hasattr(node, 'treeposition'):
  149 + try:
  150 + pos = node.treeposition()
  151 + tree = node.root()
  152 + except AttributeError:
137 153 return []
138   - pos = node.treeposition()
139 154 # go "upwards" from pos until there is a place we can go to the left
140 155 idx = len(pos) - 1
141 156 while 0 <= idx and pos[idx] == 0:
... ... @@ -144,17 +159,18 @@ def _immediately_before(node):
144 159 return []
145 160 pos = list(pos[:idx + 1])
146 161 pos[-1] -= 1
147   - before = node.root()[pos]
  162 + before = tree[pos]
148 163 return [before] + _rightmost_descendants(before)
149 164  
150 165 def _after(node):
151 166 '''
152 167 Returns the set of all nodes that are after the given node.
153 168 '''
154   - if not hasattr(node, 'root') or not hasattr(node, 'treeposition'):
  169 + try:
  170 + pos = node.treeposition()
  171 + tree = node.root()
  172 + except AttributeError:
155 173 return []
156   - pos = node.treeposition()
157   - tree = node.root()
158 174 return [tree[x] for x in tree.treepositions()
159 175 if x[:len(pos)] > pos[:len(x)]]
160 176  
... ... @@ -167,14 +183,15 @@ def _immediately_after(node):
167 183 symbol (word) produced by A immediately follows the last
168 184 terminal symbol produced by B.
169 185 '''
170   - if (not hasattr(node, 'root') or not hasattr(node, 'treeposition') or
171   - not hasattr(node, 'parent')):
  186 + try:
  187 + pos = node.treeposition()
  188 + tree = node.root()
  189 + current = node.parent()
  190 + except AttributeError:
172 191 return []
173   - pos = node.treeposition()
174 192 # go "upwards" from pos until there is a place we can go to the
175 193 # right
176 194 idx = len(pos) - 1
177   - current = node.parent()
178 195 while 0 <= idx and pos[idx] == len(current) - 1:
179 196 idx -= 1
180 197 current = current.parent()
... ... @@ -182,7 +199,7 @@ def _immediately_after(node):
182 199 return []
183 200 pos = list(pos[:idx + 1])
184 201 pos[-1] += 1
185   - after = node.root()[pos]
  202 + after = tree[pos]
186 203 return [after] + _leftmost_descendants(after)
187 204  
188 205 def _tgrep_node_literal_value(node):
... ... @@ -190,7 +207,7 @@ def _tgrep_node_literal_value(node):
190 207 Gets the string value of a given parse tree node, for comparison
191 208 using the tgrep node literal predicates.
192 209 '''
193   - return (node.label() if isinstance(node, nltk.tree.Tree) else unicode(node))
  210 + return (node.label() if _istree(node) else str(node))
194 211  
195 212 def _tgrep_node_action(_s, _l, tokens):
196 213 '''
... ... @@ -198,30 +215,30 @@ def _tgrep_node_action(_s, _l, tokens):
198 215 depending on the name of its node.
199 216 '''
200 217 # print 'node tokens: ', tokens
201   - if tokens[0] == "'":
  218 + if tokens[0] == u"'":
202 219 # strip initial apostrophe (tgrep2 print command)
203 220 tokens = tokens[1:]
204 221 if len(tokens) > 1:
205 222 # disjunctive definition of a node name
206   - assert list(set(tokens[1::2])) == ['|']
  223 + assert list(set(tokens[1::2])) == [u'|']
207 224 # recursively call self to interpret each node name definition
208 225 tokens = [_tgrep_node_action(None, None, [node])
209 226 for node in tokens[::2]]
210 227 # capture tokens and return the disjunction
211 228 return (lambda t: lambda n: any(f(n) for f in t))(tokens)
212 229 else:
213   - if hasattr(tokens[0], '__call__'):
  230 + if hasattr(tokens[0], u'__call__'):
214 231 # this is a previously interpreted parenthetical node
215 232 # definition (lambda function)
216 233 return tokens[0]
217   - elif tokens[0] == '*' or tokens[0] == '__':
  234 + elif tokens[0] == u'*' or tokens[0] == u'__':
218 235 return lambda n: True
219   - elif tokens[0].startswith('"'):
220   - return (lambda s: lambda n: _tgrep_node_literal_value(n) == s)(tokens[0].strip('"'))
221   - elif tokens[0].startswith('/'):
  236 + elif tokens[0].startswith(u'"'):
  237 + return (lambda s: lambda n: _tgrep_node_literal_value(n) == s)(tokens[0].strip(u'"'))
  238 + elif tokens[0].startswith(u'/'):
222 239 return (lambda r: lambda n:
223   - r.match(_tgrep_node_literal_value(n)))(re.compile(tokens[0].strip('/')))
224   - elif tokens[0].startswith('i@'):
  240 + r.match(_tgrep_node_literal_value(n)))(re.compile(tokens[0].strip(u'/')))
  241 + elif tokens[0].startswith(u'i@'):
225 242 return (lambda s: lambda n:
226 243 _tgrep_node_literal_value(n).lower() == s)(tokens[0][2:].lower())
227 244 else:
... ... @@ -234,8 +251,8 @@ def _tgrep_parens_action(_s, _l, tokens):
234 251 '''
235 252 # print 'parenthetical tokens: ', tokens
236 253 assert len(tokens) == 3
237   - assert tokens[0] == '('
238   - assert tokens[2] == ')'
  254 + assert tokens[0] == u'('
  255 + assert tokens[2] == u')'
239 256 return tokens[1]
240 257  
241 258 def _tgrep_nltk_tree_pos_action(_s, _l, tokens):
... ... @@ -247,7 +264,7 @@ def _tgrep_nltk_tree_pos_action(_s, _l, tokens):
247 264 # recover the tuple from the parsed sting
248 265 node_tree_position = tuple(int(x) for x in tokens if x.isdigit())
249 266 # capture the node's tree position
250   - return (lambda i: lambda n: (hasattr(n, 'treeposition') and
  267 + return (lambda i: lambda n: (hasattr(n, u'treeposition') and
251 268 n.treeposition() == i))(node_tree_position)
252 269  
253 270 def _tgrep_relation_action(_s, _l, tokens):
... ... @@ -258,177 +275,177 @@ def _tgrep_relation_action(_s, _l, tokens):
258 275 # print 'relation tokens: ', tokens
259 276 # process negation first if needed
260 277 negated = False
261   - if tokens[0] == '!':
  278 + if tokens[0] == u'!':
262 279 negated = True
263 280 tokens = tokens[1:]
264   - if tokens[0] == '[':
  281 + if tokens[0] == u'[':
265 282 # process square-bracketed relation expressions
266 283 assert len(tokens) == 3
267   - assert tokens[2] == ']'
  284 + assert tokens[2] == u']'
268 285 retval = tokens[1]
269 286 else:
270 287 # process operator-node relation expressions
271 288 assert len(tokens) == 2
272 289 operator, predicate = tokens
273 290 # A < B A is the parent of (immediately dominates) B.
274   - if operator == '<':
275   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  291 + if operator == u'<':
  292 + retval = lambda n: (_istree(n) and
276 293 any(predicate(x) for x in n))
277 294 # A > B A is the child of B.
278   - elif operator == '>':
279   - retval = lambda n: (hasattr(n, 'parent') and
  295 + elif operator == u'>':
  296 + retval = lambda n: (hasattr(n, u'parent') and
280 297 bool(n.parent()) and
281 298 predicate(n.parent()))
282 299 # A <, B Synonymous with A <1 B.
283   - elif operator == '<,' or operator == '<1':
284   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  300 + elif operator == u'<,' or operator == u'<1':
  301 + retval = lambda n: (_istree(n) and
285 302 bool(list(n)) and
286 303 predicate(n[0]))
287 304 # A >, B Synonymous with A >1 B.
288   - elif operator == '>,' or operator == '>1':
289   - retval = lambda n: (hasattr(n, 'parent') and
  305 + elif operator == u'>,' or operator == u'>1':
  306 + retval = lambda n: (hasattr(n, u'parent') and
290 307 bool(n.parent()) and
291 308 (n is n.parent()[0]) and
292 309 predicate(n.parent()))
293 310 # A <N B B is the Nth child of A (the first child is <1).
294   - elif operator[0] == '<' and operator[1:].isdigit():
  311 + elif operator[0] == u'<' and operator[1:].isdigit():
295 312 idx = int(operator[1:])
296 313 # capture the index parameter
297   - retval = (lambda i: lambda n: (isinstance(n, nltk.tree.Tree) and
  314 + retval = (lambda i: lambda n: (_istree(n) and
298 315 bool(list(n)) and
299 316 0 <= i < len(n) and
300 317 predicate(n[i])))(idx - 1)
301 318 # A >N B A is the Nth child of B (the first child is >1).
302   - elif operator[0] == '>' and operator[1:].isdigit():
  319 + elif operator[0] == u'>' and operator[1:].isdigit():
303 320 idx = int(operator[1:])
304 321 # capture the index parameter
305   - retval = (lambda i: lambda n: (hasattr(n, 'parent') and
  322 + retval = (lambda i: lambda n: (hasattr(n, u'parent') and
306 323 bool(n.parent()) and
307 324 0 <= i < len(n.parent()) and
308 325 (n is n.parent()[i]) and
309 326 predicate(n.parent())))(idx - 1)
310 327 # A <' B B is the last child of A (also synonymous with A <-1 B).
311 328 # A <- B B is the last child of A (synonymous with A <-1 B).
312   - elif operator == '<\'' or operator == '<-' or operator == '<-1':
313   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and bool(list(n))
  329 + elif operator == u'<\'' or operator == u'<-' or operator == u'<-1':
  330 + retval = lambda n: (_istree(n) and bool(list(n))
314 331 and predicate(n[-1]))
315 332 # A >' B A is the last child of B (also synonymous with A >-1 B).
316 333 # A >- B A is the last child of B (synonymous with A >-1 B).
317   - elif operator == '>\'' or operator == '>-' or operator == '>-1':
318   - retval = lambda n: (hasattr(n, 'parent') and
  334 + elif operator == u'>\'' or operator == u'>-' or operator == u'>-1':
  335 + retval = lambda n: (hasattr(n, u'parent') and
319 336 bool(n.parent()) and
320 337 (n is n.parent()[-1]) and
321 338 predicate(n.parent()))
322 339 # A <-N B B is the N th-to-last child of A (the last child is <-1).
323   - elif operator[:2] == '<-' and operator[2:].isdigit():
  340 + elif operator[:2] == u'<-' and operator[2:].isdigit():
324 341 idx = -int(operator[2:])
325 342 # capture the index parameter
326   - retval = (lambda i: lambda n: (isinstance(n, nltk.tree.Tree) and
  343 + retval = (lambda i: lambda n: (_istree(n) and
327 344 bool(list(n)) and
328 345 0 <= (i + len(n)) < len(n) and
329 346 predicate(n[i + len(n)])))(idx)
330 347 # A >-N B A is the N th-to-last child of B (the last child is >-1).
331   - elif operator[:2] == '>-' and operator[2:].isdigit():
  348 + elif operator[:2] == u'>-' and operator[2:].isdigit():
332 349 idx = -int(operator[2:])
333 350 # capture the index parameter
334 351 retval = (lambda i: lambda n:
335   - (hasattr(n, 'parent') and
  352 + (hasattr(n, u'parent') and
336 353 bool(n.parent()) and
337 354 0 <= (i + len(n.parent())) < len(n.parent()) and
338 355 (n is n.parent()[i + len(n.parent())]) and
339 356 predicate(n.parent())))(idx)
340 357 # A <: B B is the only child of A
341   - elif operator == '<:':
342   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  358 + elif operator == u'<:':
  359 + retval = lambda n: (_istree(n) and
343 360 len(n) == 1 and
344 361 predicate(n[0]))
345 362 # A >: B A is the only child of B.
346   - elif operator == '>:':
347   - retval = lambda n: (hasattr(n, 'parent') and
  363 + elif operator == u'>:':
  364 + retval = lambda n: (hasattr(n, u'parent') and
348 365 bool(n.parent()) and
349 366 len(n.parent()) == 1 and
350 367 predicate(n.parent()))
351 368 # A << B A dominates B (A is an ancestor of B).
352   - elif operator == '<<':
353   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  369 + elif operator == u'<<':
  370 + retval = lambda n: (_istree(n) and
354 371 any(predicate(x) for x in _descendants(n)))
355 372 # A >> B A is dominated by B (A is a descendant of B).
356   - elif operator == '>>':
  373 + elif operator == u'>>':
357 374 retval = lambda n: any(predicate(x) for x in ancestors(n))
358 375 # A <<, B B is a left-most descendant of A.
359   - elif operator == '<<,' or operator == '<<1':
360   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  376 + elif operator == u'<<,' or operator == u'<<1':
  377 + retval = lambda n: (_istree(n) and
361 378 any(predicate(x)
362 379 for x in _leftmost_descendants(n)))
363 380 # A >>, B A is a left-most descendant of B.
364   - elif operator == '>>,':
  381 + elif operator == u'>>,':
365 382 retval = lambda n: any((predicate(x) and
366 383 n in _leftmost_descendants(x))
367 384 for x in ancestors(n))
368 385 # A <<' B B is a right-most descendant of A.
369   - elif operator == '<<\'':
370   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  386 + elif operator == u'<<\'':
  387 + retval = lambda n: (_istree(n) and
371 388 any(predicate(x)
372 389 for x in _rightmost_descendants(n)))
373 390 # A >>' B A is a right-most descendant of B.
374   - elif operator == '>>':
  391 + elif operator == u'>>':
375 392 retval = lambda n: any((predicate(x) and
376 393 n in _rightmost_descendants(x))
377 394 for x in ancestors(n))
378 395 # A <<: B There is a single path of descent from A and B is on it.
379   - elif operator == '<<:':
380   - retval = lambda n: (isinstance(n, nltk.tree.Tree) and
  396 + elif operator == u'<<:':
  397 + retval = lambda n: (_istree(n) and
381 398 any(predicate(x)
382 399 for x in _unique_descendants(n)))
383 400 # A >>: B There is a single path of descent from B and A is on it.
384   - elif operator == '>>:':
  401 + elif operator == u'>>:':
385 402 retval = lambda n: any(predicate(x) for x in unique_ancestors(n))
386 403 # A . B A immediately precedes B.
387   - elif operator == '.':
  404 + elif operator == u'.':
388 405 retval = lambda n: any(predicate(x)
389 406 for x in _immediately_after(n))
390 407 # A , B A immediately follows B.
391   - elif operator == ',':
  408 + elif operator == u',':
392 409 retval = lambda n: any(predicate(x)
393 410 for x in _immediately_before(n))
394 411 # A .. B A precedes B.
395   - elif operator == '..':
  412 + elif operator == u'..':
396 413 retval = lambda n: any(predicate(x) for x in _after(n))
397 414 # A ,, B A follows B.
398   - elif operator == ',,':
  415 + elif operator == u',,':
399 416 retval = lambda n: any(predicate(x) for x in _before(n))
400 417 # A $ B A is a sister of B (and A != B).
401   - elif operator == '$' or operator == '%':
402   - retval = lambda n: (hasattr(n, 'parent') and
  418 + elif operator == u'$' or operator == u'%':
  419 + retval = lambda n: (hasattr(n, u'parent') and
403 420 bool(n.parent()) and
404 421 any(predicate(x)
405 422 for x in n.parent() if x is not n))
406 423 # A $. B A is a sister of and immediately precedes B.
407   - elif operator == '$.' or operator == '%.':
408   - retval = lambda n: (hasattr(n, 'right_sibling') and
  424 + elif operator == u'$.' or operator == u'%.':
  425 + retval = lambda n: (hasattr(n, u'right_sibling') and
409 426 bool(n.right_sibling()) and
410 427 predicate(n.right_sibling()))
411 428 # A $, B A is a sister of and immediately follows B.
412   - elif operator == '$,' or operator == '%,':
413   - retval = lambda n: (hasattr(n, 'left_sibling') and
  429 + elif operator == u'$,' or operator == u'%,':
  430 + retval = lambda n: (hasattr(n, u'left_sibling') and
414 431 bool(n.left_sibling()) and
415 432 predicate(n.left_sibling()))
416 433 # A $.. B A is a sister of and precedes B.
417   - elif operator == '$..' or operator == '%..':
418   - retval = lambda n: (hasattr(n, 'parent') and
419   - hasattr(n, 'parent_index') and
  434 + elif operator == u'$..' or operator == u'%..':
  435 + retval = lambda n: (hasattr(n, u'parent') and
  436 + hasattr(n, u'parent_index') and
420 437 bool(n.parent()) and
421 438 any(predicate(x) for x in
422 439 n.parent()[n.parent_index() + 1:]))
423 440 # A $,, B A is a sister of and follows B.
424   - elif operator == '$,,' or operator == '%,,':
425   - retval = lambda n: (hasattr(n, 'parent') and
426   - hasattr(n, 'parent_index') and
  441 + elif operator == u'$,,' or operator == u'%,,':
  442 + retval = lambda n: (hasattr(n, u'parent') and
  443 + hasattr(n, u'parent_index') and
427 444 bool(n.parent()) and
428 445 any(predicate(x) for x in
429 446 n.parent()[:n.parent_index()]))
430 447 else:
431   - assert False, 'cannot interpret tgrep operator "{0}"'.format(
  448 + assert False, u'cannot interpret tgrep operator "{0}"'.format(
432 449 operator)
433 450 # now return the built function
434 451 if negated:
... ... @@ -442,7 +459,7 @@ def _tgrep_rel_conjunction_action(_s, _l, tokens):
442 459 from the conjunction of several other such lambda functions.
443 460 '''
444 461 # filter out the ampersand
445   - tokens = [x for x in tokens if x != '&']
  462 + tokens = [x for x in tokens if x != u'&']
446 463 # print 'relation conjunction tokens: ', tokens
447 464 if len(tokens) == 1:
448 465 return tokens[0]
... ... @@ -455,7 +472,7 @@ def _tgrep_rel_disjunction_action(_s, _l, tokens):
455 472 from the disjunction of several other such lambda functions.
456 473 '''
457 474 # filter out the pipe
458   - tokens = [x for x in tokens if x != '|']
  475 + tokens = [x for x in tokens if x != u'|']
459 476 # print 'relation disjunction tokens: ', tokens
460 477 if len(tokens) == 1:
461 478 return tokens[0]
... ... @@ -467,40 +484,40 @@ def _build_tgrep_parser(set_parse_actions = True):
467 484 Builds a pyparsing-based parser object for tokenizing and
468 485 interpreting tgrep search strings.
469 486 '''
470   - tgrep_op = (pyparsing.Optional('!') +
471   - pyparsing.Regex('[$%,.<>][%,.<>0-9-\':]*'))
472   - tgrep_qstring = pyparsing.QuotedString(quoteChar='"', escChar='\\',
  487 + tgrep_op = (pyparsing.Optional(u'!') +
  488 + pyparsing.Regex(u'[$%,.<>][%,.<>0-9-\':]*'))
  489 + tgrep_qstring = pyparsing.QuotedString(quoteChar=u'"', escChar=u'\\',
473 490 unquoteResults=False)
474   - tgrep_node_regex = pyparsing.QuotedString(quoteChar='/', escChar='\',
  491 + tgrep_node_regex = pyparsing.QuotedString(quoteChar=u'/', escChar=u'\',
475 492 unquoteResults=False)
476   - tgrep_node_literal = pyparsing.Regex('[^][ \r\t\n;:.,&|<>()$!@%^=]+')
  493 + tgrep_node_literal = pyparsing.Regex(u'[^][ \r\t\n;:.,&|<>()$!@%^=]+')
477 494 tgrep_expr = pyparsing.Forward()
478 495 tgrep_relations = pyparsing.Forward()
479   - tgrep_parens = pyparsing.Literal('(') + tgrep_expr + ')'
  496 + tgrep_parens = pyparsing.Literal(u'(') + tgrep_expr + u')'
480 497 tgrep_nltk_tree_pos = (
481   - pyparsing.Literal('N(') +
482   - pyparsing.Optional(pyparsing.Word(pyparsing.nums) + ',' +
  498 + pyparsing.Literal(u'N(') +
  499 + pyparsing.Optional(pyparsing.Word(pyparsing.nums) + u',' +
483 500 pyparsing.Optional(pyparsing.delimitedList(
484   - pyparsing.Word(pyparsing.nums), delim=',') +
485   - pyparsing.Optional(','))) + ')')
  501 + pyparsing.Word(pyparsing.nums), delim=u',') +
  502 + pyparsing.Optional(u','))) + u')')
486 503 tgrep_node_expr = (tgrep_qstring |
487 504 tgrep_node_regex |
488   - '*' |
  505 + u'*' |
489 506 tgrep_node_literal)
490 507 tgrep_node = (tgrep_parens |
491 508 tgrep_nltk_tree_pos |
492   - (pyparsing.Optional("'") +
  509 + (pyparsing.Optional(u"'") +
493 510 tgrep_node_expr +
494   - pyparsing.ZeroOrMore("|" + tgrep_node_expr)))
  511 + pyparsing.ZeroOrMore(u"|" + tgrep_node_expr)))
495 512 tgrep_relation = pyparsing.Forward()
496   - tgrep_brackets = pyparsing.Optional('!') + '[' + tgrep_relations + ']'
  513 + tgrep_brackets = pyparsing.Optional(u'!') + u'[' + tgrep_relations + u']'
497 514 tgrep_relation = tgrep_brackets | tgrep_op + tgrep_node
498 515 tgrep_rel_conjunction = pyparsing.Forward()
499 516 tgrep_rel_conjunction << (tgrep_relation +
500   - pyparsing.ZeroOrMore(pyparsing.Optional('&') +
  517 + pyparsing.ZeroOrMore(pyparsing.Optional(u'&') +
501 518 tgrep_rel_conjunction))
502 519 tgrep_relations << tgrep_rel_conjunction + pyparsing.ZeroOrMore(
503   - "|" + tgrep_relations)
  520 + u"|" + tgrep_relations)
504 521 tgrep_expr << tgrep_node + pyparsing.Optional(tgrep_relations)
505 522 if set_parse_actions:
506 523 tgrep_node.setParseAction(_tgrep_node_action)
... ... @@ -520,6 +537,8 @@ def tgrep_tokenize(tgrep_string):
520 537 Tokenizes a TGrep search string into separate tokens.
521 538 '''
522 539 parser = _build_tgrep_parser(False)
  540 + if isinstance(tgrep_string, bytes):
  541 + tgrep_string = tgrep_string.decode()
523 542 return list(parser.parseString(tgrep_string))
524 543  
525 544 def tgrep_compile(tgrep_string):
... ... @@ -528,6 +547,8 @@ def tgrep_compile(tgrep_string):
528 547 lambda function.
529 548 '''
530 549 parser = _build_tgrep_parser(True)
  550 + if isinstance(tgrep_string, bytes):
  551 + tgrep_string = tgrep_string.decode()
531 552 return list(parser.parseString(tgrep_string, parseAll=True))[0]
532 553  
533 554 def treepositions_no_leaves(tree):
... ... @@ -552,14 +573,15 @@ def tgrep_positions(tree, tgrep_string, search_leaves = True):
552 573 If `search_leaves` is False, the method will not return any
553 574 results in leaf positions.
554 575 '''
555   - if not hasattr(tree, 'treepositions'):
  576 + try:
  577 + if search_leaves:
  578 + search_positions = tree.treepositions()
  579 + else:
  580 + search_positions = treepositions_no_leaves(tree)
  581 + except AttributeError:
556 582 return []
557   - if isinstance(tgrep_string, basestring):
  583 + if isinstance(tgrep_string, (bytes, str)):
558 584 tgrep_string = tgrep_compile(tgrep_string)
559   - if search_leaves:
560   - search_positions = tree.treepositions()
561   - else:
562   - search_positions = treepositions_no_leaves(tree)
563 585 return [position for position in search_positions
564 586 if tgrep_string(tree[position])]
565 587  
... ...