pyval_repr.py
21.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
# epydoc -- Marked-up Representations for Python Values
#
# Copyright (C) 2005 Edward Loper
# Author: Edward Loper <edloper@loper.org>
# URL: <http://epydoc.sf.net>
#
# $Id: apidoc.py 1448 2007-02-11 00:05:34Z dvarrazzo $
"""
Syntax highlighter for Python values. Currently provides special
colorization support for:
- lists, tuples, sets, frozensets, dicts
- numbers
- strings
- compiled regexps
The highlighter also takes care of line-wrapping, and automatically
stops generating repr output as soon as it has exceeded the specified
number of lines (which should make it faster than pprint for large
values). It does I{not} bother to do automatic cycle detection,
because maxlines is typically around 5, so it's really not worth it.
The syntax-highlighted output is encoded using a
L{ParsedEpytextDocstring}, which can then be used to generate output in
a variety of formats.
"""
__docformat__ = 'epytext en'
# Implementation note: we use exact tests for classes (list, etc)
# rather than using isinstance, because subclasses might override
# __repr__.
import types, re
import epydoc.apidoc
from epydoc.util import decode_with_backslashreplace
from epydoc.util import plaintext_to_html, plaintext_to_latex
from epydoc.compat import *
import sre_parse, sre_constants
from epydoc.markup.epytext import Element, ParsedEpytextDocstring
def is_re_pattern(pyval):
return type(pyval).__name__ == 'SRE_Pattern'
class _ColorizerState:
"""
An object uesd to keep track of the current state of the pyval
colorizer. The L{mark()}/L{restore()} methods can be used to set
a backup point, and restore back to that backup point. This is
used by several colorization methods that first try colorizing
their object on a single line (setting linebreakok=False); and
then fall back on a multi-line output if that fails. The L{score}
variable is used to keep track of a 'score', reflecting how good
we think this repr is. E.g., unhelpful values like '<Foo instance
at 0x12345>' get low scores. If the score is too low, we'll use
the parse-derived repr instead.
"""
def __init__(self):
self.result = []
self.charpos = 0
self.lineno = 1
self.linebreakok = True
#: How good this represention is?
self.score = 0
def mark(self):
return (len(self.result), self.charpos,
self.lineno, self.linebreakok, self.score)
def restore(self, mark):
n, self.charpos, self.lineno, self.linebreakok, self.score = mark
del self.result[n:]
class _Maxlines(Exception):
"""A control-flow exception that is raised when PyvalColorizer
exeeds the maximum number of allowed lines."""
class _Linebreak(Exception):
"""A control-flow exception that is raised when PyvalColorizer
generates a string containing a newline, but the state object's
linebreakok variable is False."""
class ColorizedPyvalRepr(ParsedEpytextDocstring):
"""
@ivar score: A score, evaluating how good this repr is.
@ivar is_complete: True if this colorized repr completely describes
the object.
"""
def __init__(self, tree, score, is_complete):
ParsedEpytextDocstring.__init__(self, tree)
self.score = score
self.is_complete = is_complete
def colorize_pyval(pyval, parse_repr=None, min_score=None,
linelen=75, maxlines=5, linebreakok=True, sort=True):
return PyvalColorizer(linelen, maxlines, linebreakok, sort).colorize(
pyval, parse_repr, min_score)
class PyvalColorizer:
"""
Syntax highlighter for Python values.
"""
def __init__(self, linelen=75, maxlines=5, linebreakok=True, sort=True):
self.linelen = linelen
self.maxlines = maxlines
self.linebreakok = linebreakok
self.sort = sort
#////////////////////////////////////////////////////////////
# Colorization Tags & other constants
#////////////////////////////////////////////////////////////
GROUP_TAG = 'variable-group' # e.g., "[" and "]"
COMMA_TAG = 'variable-op' # The "," that separates elements
COLON_TAG = 'variable-op' # The ":" in dictionaries
CONST_TAG = None # None, True, False
NUMBER_TAG = None # ints, floats, etc
QUOTE_TAG = 'variable-quote' # Quotes around strings.
STRING_TAG = 'variable-string' # Body of string literals
RE_CHAR_TAG = None
RE_GROUP_TAG = 're-group'
RE_REF_TAG = 're-ref'
RE_OP_TAG = 're-op'
RE_FLAGS_TAG = 're-flags'
ELLIPSIS = Element('code', u'...', style='variable-ellipsis')
LINEWRAP = Element('symbol', u'crarr')
UNKNOWN_REPR = Element('code', u'??', style='variable-unknown')
GENERIC_OBJECT_RE = re.compile(r'^<.* at 0x[0-9a-f]+>$', re.IGNORECASE)
ESCAPE_UNICODE = False # should we escape non-ascii unicode chars?
#////////////////////////////////////////////////////////////
# Entry Point
#////////////////////////////////////////////////////////////
def colorize(self, pyval, parse_repr=None, min_score=None):
"""
@return: A L{ColorizedPyvalRepr} describing the given pyval.
"""
UNKNOWN = epydoc.apidoc.UNKNOWN
# Create an object to keep track of the colorization.
state = _ColorizerState()
state.linebreakok = self.linebreakok
# Colorize the value. If we reach maxlines, then add on an
# ellipsis marker and call it a day.
try:
if pyval is not UNKNOWN:
self._colorize(pyval, state)
elif parse_repr not in (None, UNKNOWN):
self._output(parse_repr, None, state)
else:
state.result.append(PyvalColorizer.UNKNOWN_REPR)
is_complete = True
except (_Maxlines, _Linebreak):
if self.linebreakok:
state.result.append('\n')
state.result.append(self.ELLIPSIS)
else:
if state.result[-1] is self.LINEWRAP:
state.result.pop()
self._trim_result(state.result, 3)
state.result.append(self.ELLIPSIS)
is_complete = False
# If we didn't score high enough, then try again.
if (pyval is not UNKNOWN and parse_repr not in (None, UNKNOWN)
and min_score is not None and state.score < min_score):
return self.colorize(UNKNOWN, parse_repr)
# Put it all together.
tree = Element('epytext', *state.result)
return ColorizedPyvalRepr(tree, state.score, is_complete)
def _colorize(self, pyval, state):
pyval_type = type(pyval)
state.score += 1
if pyval is None or pyval is True or pyval is False:
self._output(unicode(pyval), self.CONST_TAG, state)
elif pyval_type in (int, float, long, types.ComplexType):
self._output(unicode(pyval), self.NUMBER_TAG, state)
elif pyval_type is str:
self._colorize_str(pyval, state, '', 'string-escape')
elif pyval_type is unicode:
if self.ESCAPE_UNICODE:
self._colorize_str(pyval, state, 'u', 'unicode-escape')
else:
self._colorize_str(pyval, state, 'u', None)
elif pyval_type is list:
self._multiline(self._colorize_iter, pyval, state, '[', ']')
elif pyval_type is tuple:
self._multiline(self._colorize_iter, pyval, state, '(', ')')
elif pyval_type is set:
self._multiline(self._colorize_iter, self._sort(pyval),
state, 'set([', '])')
elif pyval_type is frozenset:
self._multiline(self._colorize_iter, self._sort(pyval),
state, 'frozenset([', '])')
elif pyval_type is dict:
self._multiline(self._colorize_dict, self._sort(pyval.items()),
state, '{', '}')
elif is_re_pattern(pyval):
self._colorize_re(pyval, state)
else:
try:
pyval_repr = repr(pyval)
if not isinstance(pyval_repr, (str, unicode)):
pyval_repr = unicode(pyval_repr)
pyval_repr_ok = True
except KeyboardInterrupt:
raise
except:
pyval_repr_ok = False
state.score -= 100
if pyval_repr_ok:
if self.GENERIC_OBJECT_RE.match(pyval_repr):
state.score -= 5
self._output(pyval_repr, None, state)
else:
state.result.append(self.UNKNOWN_REPR)
def _sort(self, items):
if not self.sort: return items
try: return sorted(items)
except KeyboardInterrupt: raise
except: return items
def _trim_result(self, result, num_chars):
while num_chars > 0:
if not result: return
if isinstance(result[-1], Element):
assert len(result[-1].children) == 1
trim = min(num_chars, len(result[-1].children[0]))
result[-1].children[0] = result[-1].children[0][:-trim]
if not result[-1].children[0]: result.pop()
num_chars -= trim
else:
trim = min(num_chars, len(result[-1]))
result[-1] = result[-1][:-trim]
if not result[-1]: result.pop()
num_chars -= trim
#////////////////////////////////////////////////////////////
# Object Colorization Functions
#////////////////////////////////////////////////////////////
def _multiline(self, func, pyval, state, *args):
"""
Helper for container-type colorizers. First, try calling
C{func(pyval, state, *args)} with linebreakok set to false;
and if that fails, then try again with it set to true.
"""
linebreakok = state.linebreakok
mark = state.mark()
try:
state.linebreakok = False
func(pyval, state, *args)
state.linebreakok = linebreakok
except _Linebreak:
if not linebreakok:
raise
state.restore(mark)
func(pyval, state, *args)
def _colorize_iter(self, pyval, state, prefix, suffix):
self._output(prefix, self.GROUP_TAG, state)
indent = state.charpos
for i, elt in enumerate(pyval):
if i>=1:
if state.linebreakok:
self._output(',', self.COMMA_TAG, state)
self._output('\n'+' '*indent, None, state)
else:
self._output(', ', self.COMMA_TAG, state)
self._colorize(elt, state)
self._output(suffix, self.GROUP_TAG, state)
def _colorize_dict(self, items, state, prefix, suffix):
self._output(prefix, self.GROUP_TAG, state)
indent = state.charpos
for i, (key, val) in enumerate(items):
if i>=1:
if state.linebreakok:
self._output(',', self.COMMA_TAG, state)
self._output('\n'+' '*indent, None, state)
else:
self._output(', ', self.COMMA_TAG, state)
self._colorize(key, state)
self._output(': ', self.COLON_TAG, state)
self._colorize(val, state)
self._output(suffix, self.GROUP_TAG, state)
def _colorize_str(self, pyval, state, prefix, encoding):
# Decide which quote to use.
if '\n' in pyval and state.linebreakok: quote = "'''"
else: quote = "'"
# Divide the string into lines.
if state.linebreakok:
lines = pyval.split('\n')
else:
lines = [pyval]
# Open quote.
self._output(prefix+quote, self.QUOTE_TAG, state)
# Body
for i, line in enumerate(lines):
if i>0: self._output('\n', None, state)
if encoding: line = line.encode(encoding)
self._output(line, self.STRING_TAG, state)
# Close quote.
self._output(quote, self.QUOTE_TAG, state)
def _colorize_re(self, pyval, state):
# Extract the flag & pattern from the regexp.
pat, flags = pyval.pattern, pyval.flags
# If the pattern is a string, decode it to unicode.
if isinstance(pat, str):
pat = decode_with_backslashreplace(pat)
# Parse the regexp pattern.
tree = sre_parse.parse(pat, flags)
groups = dict([(num,name) for (name,num) in
tree.pattern.groupdict.items()])
# Colorize it!
self._output("re.compile(r'", None, state)
self._colorize_re_flags(tree.pattern.flags, state)
self._colorize_re_tree(tree, state, True, groups)
self._output("')", None, state)
def _colorize_re_flags(self, flags, state):
if flags:
flags = [c for (c,n) in sorted(sre_parse.FLAGS.items())
if (n&flags)]
flags = '(?%s)' % ''.join(flags)
self._output(flags, self.RE_FLAGS_TAG, state)
def _colorize_re_tree(self, tree, state, noparen, groups):
assert noparen in (True, False)
if len(tree) > 1 and not noparen:
self._output('(', self.RE_GROUP_TAG, state)
for elt in tree:
op = elt[0]
args = elt[1]
if op == sre_constants.LITERAL:
c = unichr(args)
# Add any appropriate escaping.
if c in '.^$\\*+?{}[]|()\'': c = '\\'+c
elif c == '\t': c = '\\t'
elif c == '\r': c = '\\r'
elif c == '\n': c = '\\n'
elif c == '\f': c = '\\f'
elif c == '\v': c = '\\v'
elif ord(c) > 0xffff: c = r'\U%08x' % ord(c)
elif ord(c) > 0xff: c = r'\u%04x' % ord(c)
elif ord(c)<32 or ord(c)>=127: c = r'\x%02x' % ord(c)
self._output(c, self.RE_CHAR_TAG, state)
elif op == sre_constants.ANY:
self._output('.', self.RE_CHAR_TAG, state)
elif op == sre_constants.BRANCH:
if args[0] is not None:
raise ValueError('Branch expected None arg but got %s'
% args[0])
for i, item in enumerate(args[1]):
if i > 0:
self._output('|', self.RE_OP_TAG, state)
self._colorize_re_tree(item, state, True, groups)
elif op == sre_constants.IN:
if (len(args) == 1 and args[0][0] == sre_constants.CATEGORY):
self._colorize_re_tree(args, state, False, groups)
else:
self._output('[', self.RE_GROUP_TAG, state)
self._colorize_re_tree(args, state, True, groups)
self._output(']', self.RE_GROUP_TAG, state)
elif op == sre_constants.CATEGORY:
if args == sre_constants.CATEGORY_DIGIT: val = r'\d'
elif args == sre_constants.CATEGORY_NOT_DIGIT: val = r'\D'
elif args == sre_constants.CATEGORY_SPACE: val = r'\s'
elif args == sre_constants.CATEGORY_NOT_SPACE: val = r'\S'
elif args == sre_constants.CATEGORY_WORD: val = r'\w'
elif args == sre_constants.CATEGORY_NOT_WORD: val = r'\W'
else: raise ValueError('Unknown category %s' % args)
self._output(val, self.RE_CHAR_TAG, state)
elif op == sre_constants.AT:
if args == sre_constants.AT_BEGINNING_STRING: val = r'\A'
elif args == sre_constants.AT_BEGINNING: val = r'^'
elif args == sre_constants.AT_END: val = r'$'
elif args == sre_constants.AT_BOUNDARY: val = r'\b'
elif args == sre_constants.AT_NON_BOUNDARY: val = r'\B'
elif args == sre_constants.AT_END_STRING: val = r'\Z'
else: raise ValueError('Unknown position %s' % args)
self._output(val, self.RE_CHAR_TAG, state)
elif op in (sre_constants.MAX_REPEAT, sre_constants.MIN_REPEAT):
minrpt = args[0]
maxrpt = args[1]
if maxrpt == sre_constants.MAXREPEAT:
if minrpt == 0: val = '*'
elif minrpt == 1: val = '+'
else: val = '{%d,}' % (minrpt)
elif minrpt == 0:
if maxrpt == 1: val = '?'
else: val = '{,%d}' % (maxrpt)
elif minrpt == maxrpt:
val = '{%d}' % (maxrpt)
else:
val = '{%d,%d}' % (minrpt, maxrpt)
if op == sre_constants.MIN_REPEAT:
val += '?'
self._colorize_re_tree(args[2], state, False, groups)
self._output(val, self.RE_OP_TAG, state)
elif op == sre_constants.SUBPATTERN:
if args[0] is None:
self._output('(?:', self.RE_GROUP_TAG, state)
elif args[0] in groups:
self._output('(?P<', self.RE_GROUP_TAG, state)
self._output(groups[args[0]], self.RE_REF_TAG, state)
self._output('>', self.RE_GROUP_TAG, state)
elif isinstance(args[0], (int, long)):
# This is cheating:
self._output('(', self.RE_GROUP_TAG, state)
else:
self._output('(?P<', self.RE_GROUP_TAG, state)
self._output(args[0], self.RE_REF_TAG, state)
self._output('>', self.RE_GROUP_TAG, state)
self._colorize_re_tree(args[1], state, True, groups)
self._output(')', self.RE_GROUP_TAG, state)
elif op == sre_constants.GROUPREF:
self._output('\\%d' % args, self.RE_REF_TAG, state)
elif op == sre_constants.RANGE:
self._colorize_re_tree( ((sre_constants.LITERAL, args[0]),),
state, False, groups )
self._output('-', self.RE_OP_TAG, state)
self._colorize_re_tree( ((sre_constants.LITERAL, args[1]),),
state, False, groups )
elif op == sre_constants.NEGATE:
self._output('^', self.RE_OP_TAG, state)
elif op == sre_constants.ASSERT:
if args[0] > 0:
self._output('(?=', self.RE_GROUP_TAG, state)
else:
self._output('(?<=', self.RE_GROUP_TAG, state)
self._colorize_re_tree(args[1], state, True, groups)
self._output(')', self.RE_GROUP_TAG, state)
elif op == sre_constants.ASSERT_NOT:
if args[0] > 0:
self._output('(?!', self.RE_GROUP_TAG, state)
else:
self._output('(?<!', self.RE_GROUP_TAG, state)
self._colorize_re_tree(args[1], state, True, groups)
self._output(')', self.RE_GROUP_TAG, state)
elif op == sre_constants.NOT_LITERAL:
self._output('[^', self.RE_GROUP_TAG, state)
self._colorize_re_tree( ((sre_constants.LITERAL, args),),
state, False, groups )
self._output(']', self.RE_GROUP_TAG, state)
else:
log.error("Error colorizing regexp: unknown elt %r" % elt)
if len(tree) > 1 and not noparen:
self._output(')', self.RE_GROUP_TAG, state)
#////////////////////////////////////////////////////////////
# Output function
#////////////////////////////////////////////////////////////
def _output(self, s, tag, state):
"""
Add the string `s` to the result list, tagging its contents
with tag `tag`. Any lines that go beyond `self.linelen` will
be line-wrapped. If the total number of lines exceeds
`self.maxlines`, then raise a `_Maxlines` exception.
"""
# Make sure the string is unicode.
if isinstance(s, str):
s = decode_with_backslashreplace(s)
# Split the string into segments. The first segment is the
# content to add to the current line, and the remaining
# segments are new lines.
segments = s.split('\n')
for i, segment in enumerate(segments):
# If this isn't the first segment, then add a newline to
# split it from the previous segment.
if i > 0:
if (state.lineno+1) > self.maxlines:
raise _Maxlines()
if not state.linebreakok:
raise _Linebreak()
state.result.append(u'\n')
state.lineno += 1
state.charpos = 0
# If the segment fits on the current line, then just call
# markup to tag it, and store the result.
if state.charpos + len(segment) <= self.linelen:
state.charpos += len(segment)
if tag:
segment = Element('code', segment, style=tag)
state.result.append(segment)
# If the segment doesn't fit on the current line, then
# line-wrap it, and insert the remainder of the line into
# the segments list that we're iterating over. (We'll go
# the the beginning of the next line at the start of the
# next iteration through the loop.)
else:
split = self.linelen-state.charpos
segments.insert(i+1, segment[split:])
segment = segment[:split]
if tag:
segment = Element('code', segment, style=tag)
state.result += [segment, self.LINEWRAP]