Commit f97f4db2002913e1a18d17d7988ff27e00b7b9c1
1 parent
4c12b068
Exists in
master
and in
7 other branches
--no commit message
Showing
12 changed files
with
0 additions
and
3250 deletions
Show diff stats
pacotes/openlayers/tools/BeautifulSoup.py
@@ -1,1767 +0,0 @@ | @@ -1,1767 +0,0 @@ | ||
1 | -"""Beautiful Soup | ||
2 | -Elixir and Tonic | ||
3 | -"The Screen-Scraper's Friend" | ||
4 | -http://www.crummy.com/software/BeautifulSoup/ | ||
5 | - | ||
6 | -Beautiful Soup parses a (possibly invalid) XML or HTML document into a | ||
7 | -tree representation. It provides methods and Pythonic idioms that make | ||
8 | -it easy to navigate, search, and modify the tree. | ||
9 | - | ||
10 | -A well-formed XML/HTML document yields a well-formed data | ||
11 | -structure. An ill-formed XML/HTML document yields a correspondingly | ||
12 | -ill-formed data structure. If your document is only locally | ||
13 | -well-formed, you can use this library to find and process the | ||
14 | -well-formed part of it. The BeautifulSoup class | ||
15 | - | ||
16 | -Beautiful Soup works with Python 2.2 and up. It has no external | ||
17 | -dependencies, but you'll have more success at converting data to UTF-8 | ||
18 | -if you also install these three packages: | ||
19 | - | ||
20 | -* chardet, for auto-detecting character encodings | ||
21 | - http://chardet.feedparser.org/ | ||
22 | -* cjkcodecs and iconv_codec, which add more encodings to the ones supported | ||
23 | - by stock Python. | ||
24 | - http://cjkpython.i18n.org/ | ||
25 | - | ||
26 | -Beautiful Soup defines classes for two main parsing strategies: | ||
27 | - | ||
28 | - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific | ||
29 | - language that kind of looks like XML. | ||
30 | - | ||
31 | - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid | ||
32 | - or invalid. This class has web browser-like heuristics for | ||
33 | - obtaining a sensible parse tree in the face of common HTML errors. | ||
34 | - | ||
35 | -Beautiful Soup also defines a class (UnicodeDammit) for autodetecting | ||
36 | -the encoding of an HTML or XML document, and converting it to | ||
37 | -Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. | ||
38 | - | ||
39 | -For more than you ever wanted to know about Beautiful Soup, see the | ||
40 | -documentation: | ||
41 | -http://www.crummy.com/software/BeautifulSoup/documentation.html | ||
42 | - | ||
43 | -""" | ||
44 | -from __future__ import generators | ||
45 | - | ||
46 | -__author__ = "Leonard Richardson (leonardr@segfault.org)" | ||
47 | -__version__ = "3.0.4" | ||
48 | -__copyright__ = "Copyright (c) 2004-2007 Leonard Richardson" | ||
49 | -__license__ = "PSF" | ||
50 | - | ||
51 | -from sgmllib import SGMLParser, SGMLParseError | ||
52 | -import codecs | ||
53 | -import types | ||
54 | -import re | ||
55 | -import sgmllib | ||
56 | -try: | ||
57 | - from htmlentitydefs import name2codepoint | ||
58 | -except ImportError: | ||
59 | - name2codepoint = {} | ||
60 | - | ||
61 | -#This hack makes Beautiful Soup able to parse XML with namespaces | ||
62 | -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') | ||
63 | - | ||
64 | -DEFAULT_OUTPUT_ENCODING = "utf-8" | ||
65 | - | ||
66 | -# First, the classes that represent markup elements. | ||
67 | - | ||
68 | -class PageElement: | ||
69 | - """Contains the navigational information for some part of the page | ||
70 | - (either a tag or a piece of text)""" | ||
71 | - | ||
72 | - def setup(self, parent=None, previous=None): | ||
73 | - """Sets up the initial relations between this element and | ||
74 | - other elements.""" | ||
75 | - self.parent = parent | ||
76 | - self.previous = previous | ||
77 | - self.next = None | ||
78 | - self.previousSibling = None | ||
79 | - self.nextSibling = None | ||
80 | - if self.parent and self.parent.contents: | ||
81 | - self.previousSibling = self.parent.contents[-1] | ||
82 | - self.previousSibling.nextSibling = self | ||
83 | - | ||
84 | - def replaceWith(self, replaceWith): | ||
85 | - oldParent = self.parent | ||
86 | - myIndex = self.parent.contents.index(self) | ||
87 | - if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: | ||
88 | - # We're replacing this element with one of its siblings. | ||
89 | - index = self.parent.contents.index(replaceWith) | ||
90 | - if index and index < myIndex: | ||
91 | - # Furthermore, it comes before this element. That | ||
92 | - # means that when we extract it, the index of this | ||
93 | - # element will change. | ||
94 | - myIndex = myIndex - 1 | ||
95 | - self.extract() | ||
96 | - oldParent.insert(myIndex, replaceWith) | ||
97 | - | ||
98 | - def extract(self): | ||
99 | - """Destructively rips this element out of the tree.""" | ||
100 | - if self.parent: | ||
101 | - try: | ||
102 | - self.parent.contents.remove(self) | ||
103 | - except ValueError: | ||
104 | - pass | ||
105 | - | ||
106 | - #Find the two elements that would be next to each other if | ||
107 | - #this element (and any children) hadn't been parsed. Connect | ||
108 | - #the two. | ||
109 | - lastChild = self._lastRecursiveChild() | ||
110 | - nextElement = lastChild.next | ||
111 | - | ||
112 | - if self.previous: | ||
113 | - self.previous.next = nextElement | ||
114 | - if nextElement: | ||
115 | - nextElement.previous = self.previous | ||
116 | - self.previous = None | ||
117 | - lastChild.next = None | ||
118 | - | ||
119 | - self.parent = None | ||
120 | - if self.previousSibling: | ||
121 | - self.previousSibling.nextSibling = self.nextSibling | ||
122 | - if self.nextSibling: | ||
123 | - self.nextSibling.previousSibling = self.previousSibling | ||
124 | - self.previousSibling = self.nextSibling = None | ||
125 | - | ||
126 | - def _lastRecursiveChild(self): | ||
127 | - "Finds the last element beneath this object to be parsed." | ||
128 | - lastChild = self | ||
129 | - while hasattr(lastChild, 'contents') and lastChild.contents: | ||
130 | - lastChild = lastChild.contents[-1] | ||
131 | - return lastChild | ||
132 | - | ||
133 | - def insert(self, position, newChild): | ||
134 | - if (isinstance(newChild, basestring) | ||
135 | - or isinstance(newChild, unicode)) \ | ||
136 | - and not isinstance(newChild, NavigableString): | ||
137 | - newChild = NavigableString(newChild) | ||
138 | - | ||
139 | - position = min(position, len(self.contents)) | ||
140 | - if hasattr(newChild, 'parent') and newChild.parent != None: | ||
141 | - # We're 'inserting' an element that's already one | ||
142 | - # of this object's children. | ||
143 | - if newChild.parent == self: | ||
144 | - index = self.find(newChild) | ||
145 | - if index and index < position: | ||
146 | - # Furthermore we're moving it further down the | ||
147 | - # list of this object's children. That means that | ||
148 | - # when we extract this element, our target index | ||
149 | - # will jump down one. | ||
150 | - position = position - 1 | ||
151 | - newChild.extract() | ||
152 | - | ||
153 | - newChild.parent = self | ||
154 | - previousChild = None | ||
155 | - if position == 0: | ||
156 | - newChild.previousSibling = None | ||
157 | - newChild.previous = self | ||
158 | - else: | ||
159 | - previousChild = self.contents[position-1] | ||
160 | - newChild.previousSibling = previousChild | ||
161 | - newChild.previousSibling.nextSibling = newChild | ||
162 | - newChild.previous = previousChild._lastRecursiveChild() | ||
163 | - if newChild.previous: | ||
164 | - newChild.previous.next = newChild | ||
165 | - | ||
166 | - newChildsLastElement = newChild._lastRecursiveChild() | ||
167 | - | ||
168 | - if position >= len(self.contents): | ||
169 | - newChild.nextSibling = None | ||
170 | - | ||
171 | - parent = self | ||
172 | - parentsNextSibling = None | ||
173 | - while not parentsNextSibling: | ||
174 | - parentsNextSibling = parent.nextSibling | ||
175 | - parent = parent.parent | ||
176 | - if not parent: # This is the last element in the document. | ||
177 | - break | ||
178 | - if parentsNextSibling: | ||
179 | - newChildsLastElement.next = parentsNextSibling | ||
180 | - else: | ||
181 | - newChildsLastElement.next = None | ||
182 | - else: | ||
183 | - nextChild = self.contents[position] | ||
184 | - newChild.nextSibling = nextChild | ||
185 | - if newChild.nextSibling: | ||
186 | - newChild.nextSibling.previousSibling = newChild | ||
187 | - newChildsLastElement.next = nextChild | ||
188 | - | ||
189 | - if newChildsLastElement.next: | ||
190 | - newChildsLastElement.next.previous = newChildsLastElement | ||
191 | - self.contents.insert(position, newChild) | ||
192 | - | ||
193 | - def findNext(self, name=None, attrs={}, text=None, **kwargs): | ||
194 | - """Returns the first item that matches the given criteria and | ||
195 | - appears after this Tag in the document.""" | ||
196 | - return self._findOne(self.findAllNext, name, attrs, text, **kwargs) | ||
197 | - | ||
198 | - def findAllNext(self, name=None, attrs={}, text=None, limit=None, | ||
199 | - **kwargs): | ||
200 | - """Returns all items that match the given criteria and appear | ||
201 | - before after Tag in the document.""" | ||
202 | - return self._findAll(name, attrs, text, limit, self.nextGenerator) | ||
203 | - | ||
204 | - def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): | ||
205 | - """Returns the closest sibling to this Tag that matches the | ||
206 | - given criteria and appears after this Tag in the document.""" | ||
207 | - return self._findOne(self.findNextSiblings, name, attrs, text, | ||
208 | - **kwargs) | ||
209 | - | ||
210 | - def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, | ||
211 | - **kwargs): | ||
212 | - """Returns the siblings of this Tag that match the given | ||
213 | - criteria and appear after this Tag in the document.""" | ||
214 | - return self._findAll(name, attrs, text, limit, | ||
215 | - self.nextSiblingGenerator, **kwargs) | ||
216 | - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x | ||
217 | - | ||
218 | - def findPrevious(self, name=None, attrs={}, text=None, **kwargs): | ||
219 | - """Returns the first item that matches the given criteria and | ||
220 | - appears before this Tag in the document.""" | ||
221 | - return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) | ||
222 | - | ||
223 | - def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, | ||
224 | - **kwargs): | ||
225 | - """Returns all items that match the given criteria and appear | ||
226 | - before this Tag in the document.""" | ||
227 | - return self._findAll(name, attrs, text, limit, self.previousGenerator, | ||
228 | - **kwargs) | ||
229 | - fetchPrevious = findAllPrevious # Compatibility with pre-3.x | ||
230 | - | ||
231 | - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): | ||
232 | - """Returns the closest sibling to this Tag that matches the | ||
233 | - given criteria and appears before this Tag in the document.""" | ||
234 | - return self._findOne(self.findPreviousSiblings, name, attrs, text, | ||
235 | - **kwargs) | ||
236 | - | ||
237 | - def findPreviousSiblings(self, name=None, attrs={}, text=None, | ||
238 | - limit=None, **kwargs): | ||
239 | - """Returns the siblings of this Tag that match the given | ||
240 | - criteria and appear before this Tag in the document.""" | ||
241 | - return self._findAll(name, attrs, text, limit, | ||
242 | - self.previousSiblingGenerator, **kwargs) | ||
243 | - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x | ||
244 | - | ||
245 | - def findParent(self, name=None, attrs={}, **kwargs): | ||
246 | - """Returns the closest parent of this Tag that matches the given | ||
247 | - criteria.""" | ||
248 | - # NOTE: We can't use _findOne because findParents takes a different | ||
249 | - # set of arguments. | ||
250 | - r = None | ||
251 | - l = self.findParents(name, attrs, 1) | ||
252 | - if l: | ||
253 | - r = l[0] | ||
254 | - return r | ||
255 | - | ||
256 | - def findParents(self, name=None, attrs={}, limit=None, **kwargs): | ||
257 | - """Returns the parents of this Tag that match the given | ||
258 | - criteria.""" | ||
259 | - | ||
260 | - return self._findAll(name, attrs, None, limit, self.parentGenerator, | ||
261 | - **kwargs) | ||
262 | - fetchParents = findParents # Compatibility with pre-3.x | ||
263 | - | ||
264 | - #These methods do the real heavy lifting. | ||
265 | - | ||
266 | - def _findOne(self, method, name, attrs, text, **kwargs): | ||
267 | - r = None | ||
268 | - l = method(name, attrs, text, 1, **kwargs) | ||
269 | - if l: | ||
270 | - r = l[0] | ||
271 | - return r | ||
272 | - | ||
273 | - def _findAll(self, name, attrs, text, limit, generator, **kwargs): | ||
274 | - "Iterates over a generator looking for things that match." | ||
275 | - | ||
276 | - if isinstance(name, SoupStrainer): | ||
277 | - strainer = name | ||
278 | - else: | ||
279 | - # Build a SoupStrainer | ||
280 | - strainer = SoupStrainer(name, attrs, text, **kwargs) | ||
281 | - results = ResultSet(strainer) | ||
282 | - g = generator() | ||
283 | - while True: | ||
284 | - try: | ||
285 | - i = g.next() | ||
286 | - except StopIteration: | ||
287 | - break | ||
288 | - if i: | ||
289 | - found = strainer.search(i) | ||
290 | - if found: | ||
291 | - results.append(found) | ||
292 | - if limit and len(results) >= limit: | ||
293 | - break | ||
294 | - return results | ||
295 | - | ||
296 | - #These Generators can be used to navigate starting from both | ||
297 | - #NavigableStrings and Tags. | ||
298 | - def nextGenerator(self): | ||
299 | - i = self | ||
300 | - while i: | ||
301 | - i = i.next | ||
302 | - yield i | ||
303 | - | ||
304 | - def nextSiblingGenerator(self): | ||
305 | - i = self | ||
306 | - while i: | ||
307 | - i = i.nextSibling | ||
308 | - yield i | ||
309 | - | ||
310 | - def previousGenerator(self): | ||
311 | - i = self | ||
312 | - while i: | ||
313 | - i = i.previous | ||
314 | - yield i | ||
315 | - | ||
316 | - def previousSiblingGenerator(self): | ||
317 | - i = self | ||
318 | - while i: | ||
319 | - i = i.previousSibling | ||
320 | - yield i | ||
321 | - | ||
322 | - def parentGenerator(self): | ||
323 | - i = self | ||
324 | - while i: | ||
325 | - i = i.parent | ||
326 | - yield i | ||
327 | - | ||
328 | - # Utility methods | ||
329 | - def substituteEncoding(self, str, encoding=None): | ||
330 | - encoding = encoding or "utf-8" | ||
331 | - return str.replace("%SOUP-ENCODING%", encoding) | ||
332 | - | ||
333 | - def toEncoding(self, s, encoding=None): | ||
334 | - """Encodes an object to a string in some encoding, or to Unicode. | ||
335 | - .""" | ||
336 | - if isinstance(s, unicode): | ||
337 | - if encoding: | ||
338 | - s = s.encode(encoding) | ||
339 | - elif isinstance(s, str): | ||
340 | - if encoding: | ||
341 | - s = s.encode(encoding) | ||
342 | - else: | ||
343 | - s = unicode(s) | ||
344 | - else: | ||
345 | - if encoding: | ||
346 | - s = self.toEncoding(str(s), encoding) | ||
347 | - else: | ||
348 | - s = unicode(s) | ||
349 | - return s | ||
350 | - | ||
351 | -class NavigableString(unicode, PageElement): | ||
352 | - | ||
353 | - def __getattr__(self, attr): | ||
354 | - """text.string gives you text. This is for backwards | ||
355 | - compatibility for Navigable*String, but for CData* it lets you | ||
356 | - get the string without the CData wrapper.""" | ||
357 | - if attr == 'string': | ||
358 | - return self | ||
359 | - else: | ||
360 | - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) | ||
361 | - | ||
362 | - def __unicode__(self): | ||
363 | - return self.__str__(None) | ||
364 | - | ||
365 | - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
366 | - if encoding: | ||
367 | - return self.encode(encoding) | ||
368 | - else: | ||
369 | - return self | ||
370 | - | ||
371 | -class CData(NavigableString): | ||
372 | - | ||
373 | - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
374 | - return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) | ||
375 | - | ||
376 | -class ProcessingInstruction(NavigableString): | ||
377 | - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
378 | - output = self | ||
379 | - if "%SOUP-ENCODING%" in output: | ||
380 | - output = self.substituteEncoding(output, encoding) | ||
381 | - return "<?%s?>" % self.toEncoding(output, encoding) | ||
382 | - | ||
383 | -class Comment(NavigableString): | ||
384 | - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
385 | - return "<!--%s-->" % NavigableString.__str__(self, encoding) | ||
386 | - | ||
387 | -class Declaration(NavigableString): | ||
388 | - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
389 | - return "<!%s>" % NavigableString.__str__(self, encoding) | ||
390 | - | ||
391 | -class Tag(PageElement): | ||
392 | - | ||
393 | - """Represents a found HTML tag with its attributes and contents.""" | ||
394 | - | ||
395 | - XML_SPECIAL_CHARS_TO_ENTITIES = { "'" : "squot", | ||
396 | - '"' : "quote", | ||
397 | - "&" : "amp", | ||
398 | - "<" : "lt", | ||
399 | - ">" : "gt" } | ||
400 | - | ||
401 | - def __init__(self, parser, name, attrs=None, parent=None, | ||
402 | - previous=None): | ||
403 | - "Basic constructor." | ||
404 | - | ||
405 | - # We don't actually store the parser object: that lets extracted | ||
406 | - # chunks be garbage-collected | ||
407 | - self.parserClass = parser.__class__ | ||
408 | - self.isSelfClosing = parser.isSelfClosingTag(name) | ||
409 | - self.name = name | ||
410 | - if attrs == None: | ||
411 | - attrs = [] | ||
412 | - self.attrs = attrs | ||
413 | - self.contents = [] | ||
414 | - self.setup(parent, previous) | ||
415 | - self.hidden = False | ||
416 | - self.containsSubstitutions = False | ||
417 | - | ||
418 | - def get(self, key, default=None): | ||
419 | - """Returns the value of the 'key' attribute for the tag, or | ||
420 | - the value given for 'default' if it doesn't have that | ||
421 | - attribute.""" | ||
422 | - return self._getAttrMap().get(key, default) | ||
423 | - | ||
424 | - def has_key(self, key): | ||
425 | - return self._getAttrMap().has_key(key) | ||
426 | - | ||
427 | - def __getitem__(self, key): | ||
428 | - """tag[key] returns the value of the 'key' attribute for the tag, | ||
429 | - and throws an exception if it's not there.""" | ||
430 | - return self._getAttrMap()[key] | ||
431 | - | ||
432 | - def __iter__(self): | ||
433 | - "Iterating over a tag iterates over its contents." | ||
434 | - return iter(self.contents) | ||
435 | - | ||
436 | - def __len__(self): | ||
437 | - "The length of a tag is the length of its list of contents." | ||
438 | - return len(self.contents) | ||
439 | - | ||
440 | - def __contains__(self, x): | ||
441 | - return x in self.contents | ||
442 | - | ||
443 | - def __nonzero__(self): | ||
444 | - "A tag is non-None even if it has no contents." | ||
445 | - return True | ||
446 | - | ||
447 | - def __setitem__(self, key, value): | ||
448 | - """Setting tag[key] sets the value of the 'key' attribute for the | ||
449 | - tag.""" | ||
450 | - self._getAttrMap() | ||
451 | - self.attrMap[key] = value | ||
452 | - found = False | ||
453 | - for i in range(0, len(self.attrs)): | ||
454 | - if self.attrs[i][0] == key: | ||
455 | - self.attrs[i] = (key, value) | ||
456 | - found = True | ||
457 | - if not found: | ||
458 | - self.attrs.append((key, value)) | ||
459 | - self._getAttrMap()[key] = value | ||
460 | - | ||
461 | - def __delitem__(self, key): | ||
462 | - "Deleting tag[key] deletes all 'key' attributes for the tag." | ||
463 | - for item in self.attrs: | ||
464 | - if item[0] == key: | ||
465 | - self.attrs.remove(item) | ||
466 | - #We don't break because bad HTML can define the same | ||
467 | - #attribute multiple times. | ||
468 | - self._getAttrMap() | ||
469 | - if self.attrMap.has_key(key): | ||
470 | - del self.attrMap[key] | ||
471 | - | ||
472 | - def __call__(self, *args, **kwargs): | ||
473 | - """Calling a tag like a function is the same as calling its | ||
474 | - findAll() method. Eg. tag('a') returns a list of all the A tags | ||
475 | - found within this tag.""" | ||
476 | - return apply(self.findAll, args, kwargs) | ||
477 | - | ||
478 | - def __getattr__(self, tag): | ||
479 | - #print "Getattr %s.%s" % (self.__class__, tag) | ||
480 | - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: | ||
481 | - return self.find(tag[:-3]) | ||
482 | - elif tag.find('__') != 0: | ||
483 | - return self.find(tag) | ||
484 | - | ||
485 | - def __eq__(self, other): | ||
486 | - """Returns true iff this tag has the same name, the same attributes, | ||
487 | - and the same contents (recursively) as the given tag. | ||
488 | - | ||
489 | - NOTE: right now this will return false if two tags have the | ||
490 | - same attributes in a different order. Should this be fixed?""" | ||
491 | - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): | ||
492 | - return False | ||
493 | - for i in range(0, len(self.contents)): | ||
494 | - if self.contents[i] != other.contents[i]: | ||
495 | - return False | ||
496 | - return True | ||
497 | - | ||
498 | - def __ne__(self, other): | ||
499 | - """Returns true iff this tag is not identical to the other tag, | ||
500 | - as defined in __eq__.""" | ||
501 | - return not self == other | ||
502 | - | ||
503 | - def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
504 | - """Renders this tag as a string.""" | ||
505 | - return self.__str__(encoding) | ||
506 | - | ||
507 | - def __unicode__(self): | ||
508 | - return self.__str__(None) | ||
509 | - | ||
510 | - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, | ||
511 | - prettyPrint=False, indentLevel=0): | ||
512 | - """Returns a string or Unicode representation of this tag and | ||
513 | - its contents. To get Unicode, pass None for encoding. | ||
514 | - | ||
515 | - NOTE: since Python's HTML parser consumes whitespace, this | ||
516 | - method is not certain to reproduce the whitespace present in | ||
517 | - the original string.""" | ||
518 | - | ||
519 | - encodedName = self.toEncoding(self.name, encoding) | ||
520 | - | ||
521 | - attrs = [] | ||
522 | - if self.attrs: | ||
523 | - for key, val in self.attrs: | ||
524 | - fmt = '%s="%s"' | ||
525 | - if isString(val): | ||
526 | - if self.containsSubstitutions and '%SOUP-ENCODING%' in val: | ||
527 | - val = self.substituteEncoding(val, encoding) | ||
528 | - | ||
529 | - # The attribute value either: | ||
530 | - # | ||
531 | - # * Contains no embedded double quotes or single quotes. | ||
532 | - # No problem: we enclose it in double quotes. | ||
533 | - # * Contains embedded single quotes. No problem: | ||
534 | - # double quotes work here too. | ||
535 | - # * Contains embedded double quotes. No problem: | ||
536 | - # we enclose it in single quotes. | ||
537 | - # * Embeds both single _and_ double quotes. This | ||
538 | - # can't happen naturally, but it can happen if | ||
539 | - # you modify an attribute value after parsing | ||
540 | - # the document. Now we have a bit of a | ||
541 | - # problem. We solve it by enclosing the | ||
542 | - # attribute in single quotes, and escaping any | ||
543 | - # embedded single quotes to XML entities. | ||
544 | - if '"' in val: | ||
545 | - fmt = "%s='%s'" | ||
546 | - # This can't happen naturally, but it can happen | ||
547 | - # if you modify an attribute value after parsing. | ||
548 | - if "'" in val: | ||
549 | - val = val.replace("'", "&squot;") | ||
550 | - | ||
551 | - # Now we're okay w/r/t quotes. But the attribute | ||
552 | - # value might also contain angle brackets, or | ||
553 | - # ampersands that aren't part of entities. We need | ||
554 | - # to escape those to XML entities too. | ||
555 | - val = re.sub("([<>]|&(?![^\s]+;))", | ||
556 | - lambda x: "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";", | ||
557 | - val) | ||
558 | - | ||
559 | - attrs.append(fmt % (self.toEncoding(key, encoding), | ||
560 | - self.toEncoding(val, encoding))) | ||
561 | - close = '' | ||
562 | - closeTag = '' | ||
563 | - if self.isSelfClosing: | ||
564 | - close = ' /' | ||
565 | - else: | ||
566 | - closeTag = '</%s>' % encodedName | ||
567 | - | ||
568 | - indentTag, indentContents = 0, 0 | ||
569 | - if prettyPrint: | ||
570 | - indentTag = indentLevel | ||
571 | - space = (' ' * (indentTag-1)) | ||
572 | - indentContents = indentTag + 1 | ||
573 | - contents = self.renderContents(encoding, prettyPrint, indentContents) | ||
574 | - if self.hidden: | ||
575 | - s = contents | ||
576 | - else: | ||
577 | - s = [] | ||
578 | - attributeString = '' | ||
579 | - if attrs: | ||
580 | - attributeString = ' ' + ' '.join(attrs) | ||
581 | - if prettyPrint: | ||
582 | - s.append(space) | ||
583 | - s.append('<%s%s%s>' % (encodedName, attributeString, close)) | ||
584 | - if prettyPrint: | ||
585 | - s.append("\n") | ||
586 | - s.append(contents) | ||
587 | - if prettyPrint and contents and contents[-1] != "\n": | ||
588 | - s.append("\n") | ||
589 | - if prettyPrint and closeTag: | ||
590 | - s.append(space) | ||
591 | - s.append(closeTag) | ||
592 | - if prettyPrint and closeTag and self.nextSibling: | ||
593 | - s.append("\n") | ||
594 | - s = ''.join(s) | ||
595 | - return s | ||
596 | - | ||
597 | - def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): | ||
598 | - return self.__str__(encoding, True) | ||
599 | - | ||
600 | - def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, | ||
601 | - prettyPrint=False, indentLevel=0): | ||
602 | - """Renders the contents of this tag as a string in the given | ||
603 | - encoding. If encoding is None, returns a Unicode string..""" | ||
604 | - s=[] | ||
605 | - for c in self: | ||
606 | - text = None | ||
607 | - if isinstance(c, NavigableString): | ||
608 | - text = c.__str__(encoding) | ||
609 | - elif isinstance(c, Tag): | ||
610 | - s.append(c.__str__(encoding, prettyPrint, indentLevel)) | ||
611 | - if text and prettyPrint: | ||
612 | - text = text.strip() | ||
613 | - if text: | ||
614 | - if prettyPrint: | ||
615 | - s.append(" " * (indentLevel-1)) | ||
616 | - s.append(text) | ||
617 | - if prettyPrint: | ||
618 | - s.append("\n") | ||
619 | - return ''.join(s) | ||
620 | - | ||
621 | - #Soup methods | ||
622 | - | ||
623 | - def find(self, name=None, attrs={}, recursive=True, text=None, | ||
624 | - **kwargs): | ||
625 | - """Return only the first child of this Tag matching the given | ||
626 | - criteria.""" | ||
627 | - r = None | ||
628 | - l = self.findAll(name, attrs, recursive, text, 1, **kwargs) | ||
629 | - if l: | ||
630 | - r = l[0] | ||
631 | - return r | ||
632 | - findChild = find | ||
633 | - | ||
634 | - def findAll(self, name=None, attrs={}, recursive=True, text=None, | ||
635 | - limit=None, **kwargs): | ||
636 | - """Extracts a list of Tag objects that match the given | ||
637 | - criteria. You can specify the name of the Tag and any | ||
638 | - attributes you want the Tag to have. | ||
639 | - | ||
640 | - The value of a key-value pair in the 'attrs' map can be a | ||
641 | - string, a list of strings, a regular expression object, or a | ||
642 | - callable that takes a string and returns whether or not the | ||
643 | - string matches for some custom definition of 'matches'. The | ||
644 | - same is true of the tag name.""" | ||
645 | - generator = self.recursiveChildGenerator | ||
646 | - if not recursive: | ||
647 | - generator = self.childGenerator | ||
648 | - return self._findAll(name, attrs, text, limit, generator, **kwargs) | ||
649 | - findChildren = findAll | ||
650 | - | ||
651 | - # Pre-3.x compatibility methods | ||
652 | - first = find | ||
653 | - fetch = findAll | ||
654 | - | ||
655 | - def fetchText(self, text=None, recursive=True, limit=None): | ||
656 | - return self.findAll(text=text, recursive=recursive, limit=limit) | ||
657 | - | ||
658 | - def firstText(self, text=None, recursive=True): | ||
659 | - return self.find(text=text, recursive=recursive) | ||
660 | - | ||
661 | - #Utility methods | ||
662 | - | ||
663 | - def append(self, tag): | ||
664 | - """Appends the given tag to the contents of this tag.""" | ||
665 | - self.contents.append(tag) | ||
666 | - | ||
667 | - #Private methods | ||
668 | - | ||
669 | - def _getAttrMap(self): | ||
670 | - """Initializes a map representation of this tag's attributes, | ||
671 | - if not already initialized.""" | ||
672 | - if not getattr(self, 'attrMap'): | ||
673 | - self.attrMap = {} | ||
674 | - for (key, value) in self.attrs: | ||
675 | - self.attrMap[key] = value | ||
676 | - return self.attrMap | ||
677 | - | ||
678 | - #Generator methods | ||
679 | - def childGenerator(self): | ||
680 | - for i in range(0, len(self.contents)): | ||
681 | - yield self.contents[i] | ||
682 | - raise StopIteration | ||
683 | - | ||
684 | - def recursiveChildGenerator(self): | ||
685 | - stack = [(self, 0)] | ||
686 | - while stack: | ||
687 | - tag, start = stack.pop() | ||
688 | - if isinstance(tag, Tag): | ||
689 | - for i in range(start, len(tag.contents)): | ||
690 | - a = tag.contents[i] | ||
691 | - yield a | ||
692 | - if isinstance(a, Tag) and tag.contents: | ||
693 | - if i < len(tag.contents) - 1: | ||
694 | - stack.append((tag, i+1)) | ||
695 | - stack.append((a, 0)) | ||
696 | - break | ||
697 | - raise StopIteration | ||
698 | - | ||
699 | -# Next, a couple classes to represent queries and their results. | ||
700 | -class SoupStrainer: | ||
701 | - """Encapsulates a number of ways of matching a markup element (tag or | ||
702 | - text).""" | ||
703 | - | ||
704 | - def __init__(self, name=None, attrs={}, text=None, **kwargs): | ||
705 | - self.name = name | ||
706 | - if isString(attrs): | ||
707 | - kwargs['class'] = attrs | ||
708 | - attrs = None | ||
709 | - if kwargs: | ||
710 | - if attrs: | ||
711 | - attrs = attrs.copy() | ||
712 | - attrs.update(kwargs) | ||
713 | - else: | ||
714 | - attrs = kwargs | ||
715 | - self.attrs = attrs | ||
716 | - self.text = text | ||
717 | - | ||
718 | - def __str__(self): | ||
719 | - if self.text: | ||
720 | - return self.text | ||
721 | - else: | ||
722 | - return "%s|%s" % (self.name, self.attrs) | ||
723 | - | ||
724 | - def searchTag(self, markupName=None, markupAttrs={}): | ||
725 | - found = None | ||
726 | - markup = None | ||
727 | - if isinstance(markupName, Tag): | ||
728 | - markup = markupName | ||
729 | - markupAttrs = markup | ||
730 | - callFunctionWithTagData = callable(self.name) \ | ||
731 | - and not isinstance(markupName, Tag) | ||
732 | - | ||
733 | - if (not self.name) \ | ||
734 | - or callFunctionWithTagData \ | ||
735 | - or (markup and self._matches(markup, self.name)) \ | ||
736 | - or (not markup and self._matches(markupName, self.name)): | ||
737 | - if callFunctionWithTagData: | ||
738 | - match = self.name(markupName, markupAttrs) | ||
739 | - else: | ||
740 | - match = True | ||
741 | - markupAttrMap = None | ||
742 | - for attr, matchAgainst in self.attrs.items(): | ||
743 | - if not markupAttrMap: | ||
744 | - if hasattr(markupAttrs, 'get'): | ||
745 | - markupAttrMap = markupAttrs | ||
746 | - else: | ||
747 | - markupAttrMap = {} | ||
748 | - for k,v in markupAttrs: | ||
749 | - markupAttrMap[k] = v | ||
750 | - attrValue = markupAttrMap.get(attr) | ||
751 | - if not self._matches(attrValue, matchAgainst): | ||
752 | - match = False | ||
753 | - break | ||
754 | - if match: | ||
755 | - if markup: | ||
756 | - found = markup | ||
757 | - else: | ||
758 | - found = markupName | ||
759 | - return found | ||
760 | - | ||
761 | - def search(self, markup): | ||
762 | - #print 'looking for %s in %s' % (self, markup) | ||
763 | - found = None | ||
764 | - # If given a list of items, scan it for a text element that | ||
765 | - # matches. | ||
766 | - if isList(markup) and not isinstance(markup, Tag): | ||
767 | - for element in markup: | ||
768 | - if isinstance(element, NavigableString) \ | ||
769 | - and self.search(element): | ||
770 | - found = element | ||
771 | - break | ||
772 | - # If it's a Tag, make sure its name or attributes match. | ||
773 | - # Don't bother with Tags if we're searching for text. | ||
774 | - elif isinstance(markup, Tag): | ||
775 | - if not self.text: | ||
776 | - found = self.searchTag(markup) | ||
777 | - # If it's text, make sure the text matches. | ||
778 | - elif isinstance(markup, NavigableString) or \ | ||
779 | - isString(markup): | ||
780 | - if self._matches(markup, self.text): | ||
781 | - found = markup | ||
782 | - else: | ||
783 | - raise Exception, "I don't know how to match against a %s" \ | ||
784 | - % markup.__class__ | ||
785 | - return found | ||
786 | - | ||
787 | - def _matches(self, markup, matchAgainst): | ||
788 | - #print "Matching %s against %s" % (markup, matchAgainst) | ||
789 | - result = False | ||
790 | - if matchAgainst == True and type(matchAgainst) == types.BooleanType: | ||
791 | - result = markup != None | ||
792 | - elif callable(matchAgainst): | ||
793 | - result = matchAgainst(markup) | ||
794 | - else: | ||
795 | - #Custom match methods take the tag as an argument, but all | ||
796 | - #other ways of matching match the tag name as a string. | ||
797 | - if isinstance(markup, Tag): | ||
798 | - markup = markup.name | ||
799 | - if markup and not isString(markup): | ||
800 | - markup = unicode(markup) | ||
801 | - #Now we know that chunk is either a string, or None. | ||
802 | - if hasattr(matchAgainst, 'match'): | ||
803 | - # It's a regexp object. | ||
804 | - result = markup and matchAgainst.search(markup) | ||
805 | - elif isList(matchAgainst): | ||
806 | - result = markup in matchAgainst | ||
807 | - elif hasattr(matchAgainst, 'items'): | ||
808 | - result = markup.has_key(matchAgainst) | ||
809 | - elif matchAgainst and isString(markup): | ||
810 | - if isinstance(markup, unicode): | ||
811 | - matchAgainst = unicode(matchAgainst) | ||
812 | - else: | ||
813 | - matchAgainst = str(matchAgainst) | ||
814 | - | ||
815 | - if not result: | ||
816 | - result = matchAgainst == markup | ||
817 | - return result | ||
818 | - | ||
819 | -class ResultSet(list): | ||
820 | - """A ResultSet is just a list that keeps track of the SoupStrainer | ||
821 | - that created it.""" | ||
822 | - def __init__(self, source): | ||
823 | - list.__init__([]) | ||
824 | - self.source = source | ||
825 | - | ||
826 | -# Now, some helper functions. | ||
827 | - | ||
828 | -def isList(l): | ||
829 | - """Convenience method that works with all 2.x versions of Python | ||
830 | - to determine whether or not something is listlike.""" | ||
831 | - return hasattr(l, '__iter__') \ | ||
832 | - or (type(l) in (types.ListType, types.TupleType)) | ||
833 | - | ||
834 | -def isString(s): | ||
835 | - """Convenience method that works with all 2.x versions of Python | ||
836 | - to determine whether or not something is stringlike.""" | ||
837 | - try: | ||
838 | - return isinstance(s, unicode) or isintance(s, basestring) | ||
839 | - except NameError: | ||
840 | - return isinstance(s, str) | ||
841 | - | ||
842 | -def buildTagMap(default, *args): | ||
843 | - """Turns a list of maps, lists, or scalars into a single map. | ||
844 | - Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and | ||
845 | - NESTING_RESET_TAGS maps out of lists and partial maps.""" | ||
846 | - built = {} | ||
847 | - for portion in args: | ||
848 | - if hasattr(portion, 'items'): | ||
849 | - #It's a map. Merge it. | ||
850 | - for k,v in portion.items(): | ||
851 | - built[k] = v | ||
852 | - elif isList(portion): | ||
853 | - #It's a list. Map each item to the default. | ||
854 | - for k in portion: | ||
855 | - built[k] = default | ||
856 | - else: | ||
857 | - #It's a scalar. Map it to the default. | ||
858 | - built[portion] = default | ||
859 | - return built | ||
860 | - | ||
861 | -# Now, the parser classes. | ||
862 | - | ||
863 | -class BeautifulStoneSoup(Tag, SGMLParser): | ||
864 | - | ||
865 | - """This class contains the basic parser and search code. It defines | ||
866 | - a parser that knows nothing about tag behavior except for the | ||
867 | - following: | ||
868 | - | ||
869 | - You can't close a tag without closing all the tags it encloses. | ||
870 | - That is, "<foo><bar></foo>" actually means | ||
871 | - "<foo><bar></bar></foo>". | ||
872 | - | ||
873 | - [Another possible explanation is "<foo><bar /></foo>", but since | ||
874 | - this class defines no SELF_CLOSING_TAGS, it will never use that | ||
875 | - explanation.] | ||
876 | - | ||
877 | - This class is useful for parsing XML or made-up markup languages, | ||
878 | - or when BeautifulSoup makes an assumption counter to what you were | ||
879 | - expecting.""" | ||
880 | - | ||
881 | - XML_ENTITY_LIST = {} | ||
882 | - for i in Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values(): | ||
883 | - XML_ENTITY_LIST[i] = True | ||
884 | - | ||
885 | - SELF_CLOSING_TAGS = {} | ||
886 | - NESTABLE_TAGS = {} | ||
887 | - RESET_NESTING_TAGS = {} | ||
888 | - QUOTE_TAGS = {} | ||
889 | - | ||
890 | - MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), | ||
891 | - lambda x: x.group(1) + ' />'), | ||
892 | - (re.compile('<!\s+([^<>]*)>'), | ||
893 | - lambda x: '<!' + x.group(1) + '>') | ||
894 | - ] | ||
895 | - | ||
896 | - ROOT_TAG_NAME = u'[document]' | ||
897 | - | ||
898 | - HTML_ENTITIES = "html" | ||
899 | - XML_ENTITIES = "xml" | ||
900 | - | ||
901 | - def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, | ||
902 | - markupMassage=True, smartQuotesTo=XML_ENTITIES, | ||
903 | - convertEntities=None, selfClosingTags=None): | ||
904 | - """The Soup object is initialized as the 'root tag', and the | ||
905 | - provided markup (which can be a string or a file-like object) | ||
906 | - is fed into the underlying parser. | ||
907 | - | ||
908 | - sgmllib will process most bad HTML, and the BeautifulSoup | ||
909 | - class has some tricks for dealing with some HTML that kills | ||
910 | - sgmllib, but Beautiful Soup can nonetheless choke or lose data | ||
911 | - if your data uses self-closing tags or declarations | ||
912 | - incorrectly. | ||
913 | - | ||
914 | - By default, Beautiful Soup uses regexes to sanitize input, | ||
915 | - avoiding the vast majority of these problems. If the problems | ||
916 | - don't apply to you, pass in False for markupMassage, and | ||
917 | - you'll get better performance. | ||
918 | - | ||
919 | - The default parser massage techniques fix the two most common | ||
920 | - instances of invalid HTML that choke sgmllib: | ||
921 | - | ||
922 | - <br/> (No space between name of closing tag and tag close) | ||
923 | - <! --Comment--> (Extraneous whitespace in declaration) | ||
924 | - | ||
925 | - You can pass in a custom list of (RE object, replace method) | ||
926 | - tuples to get Beautiful Soup to scrub your input the way you | ||
927 | - want.""" | ||
928 | - | ||
929 | - self.parseOnlyThese = parseOnlyThese | ||
930 | - self.fromEncoding = fromEncoding | ||
931 | - self.smartQuotesTo = smartQuotesTo | ||
932 | - self.convertEntities = convertEntities | ||
933 | - if self.convertEntities: | ||
934 | - # It doesn't make sense to convert encoded characters to | ||
935 | - # entities even while you're converting entities to Unicode. | ||
936 | - # Just convert it all to Unicode. | ||
937 | - self.smartQuotesTo = None | ||
938 | - self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) | ||
939 | - SGMLParser.__init__(self) | ||
940 | - | ||
941 | - if hasattr(markup, 'read'): # It's a file-type object. | ||
942 | - markup = markup.read() | ||
943 | - self.markup = markup | ||
944 | - self.markupMassage = markupMassage | ||
945 | - try: | ||
946 | - self._feed() | ||
947 | - except StopParsing: | ||
948 | - pass | ||
949 | - self.markup = None # The markup can now be GCed | ||
950 | - | ||
951 | - def _feed(self, inDocumentEncoding=None): | ||
952 | - # Convert the document to Unicode. | ||
953 | - markup = self.markup | ||
954 | - if isinstance(markup, unicode): | ||
955 | - if not hasattr(self, 'originalEncoding'): | ||
956 | - self.originalEncoding = None | ||
957 | - else: | ||
958 | - dammit = UnicodeDammit\ | ||
959 | - (markup, [self.fromEncoding, inDocumentEncoding], | ||
960 | - smartQuotesTo=self.smartQuotesTo) | ||
961 | - markup = dammit.unicode | ||
962 | - self.originalEncoding = dammit.originalEncoding | ||
963 | - if markup: | ||
964 | - if self.markupMassage: | ||
965 | - if not isList(self.markupMassage): | ||
966 | - self.markupMassage = self.MARKUP_MASSAGE | ||
967 | - for fix, m in self.markupMassage: | ||
968 | - markup = fix.sub(m, markup) | ||
969 | - self.reset() | ||
970 | - | ||
971 | - SGMLParser.feed(self, markup) | ||
972 | - # Close out any unfinished strings and close all the open tags. | ||
973 | - self.endData() | ||
974 | - while self.currentTag.name != self.ROOT_TAG_NAME: | ||
975 | - self.popTag() | ||
976 | - | ||
977 | - def __getattr__(self, methodName): | ||
978 | - """This method routes method call requests to either the SGMLParser | ||
979 | - superclass or the Tag superclass, depending on the method name.""" | ||
980 | - #print "__getattr__ called on %s.%s" % (self.__class__, methodName) | ||
981 | - | ||
982 | - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ | ||
983 | - or methodName.find('do_') == 0: | ||
984 | - return SGMLParser.__getattr__(self, methodName) | ||
985 | - elif methodName.find('__') != 0: | ||
986 | - return Tag.__getattr__(self, methodName) | ||
987 | - else: | ||
988 | - raise AttributeError | ||
989 | - | ||
990 | - def isSelfClosingTag(self, name): | ||
991 | - """Returns true iff the given string is the name of a | ||
992 | - self-closing tag according to this parser.""" | ||
993 | - return self.SELF_CLOSING_TAGS.has_key(name) \ | ||
994 | - or self.instanceSelfClosingTags.has_key(name) | ||
995 | - | ||
996 | - def reset(self): | ||
997 | - Tag.__init__(self, self, self.ROOT_TAG_NAME) | ||
998 | - self.hidden = 1 | ||
999 | - SGMLParser.reset(self) | ||
1000 | - self.currentData = [] | ||
1001 | - self.currentTag = None | ||
1002 | - self.tagStack = [] | ||
1003 | - self.quoteStack = [] | ||
1004 | - self.pushTag(self) | ||
1005 | - | ||
1006 | - def popTag(self): | ||
1007 | - tag = self.tagStack.pop() | ||
1008 | - # Tags with just one string-owning child get the child as a | ||
1009 | - # 'string' property, so that soup.tag.string is shorthand for | ||
1010 | - # soup.tag.contents[0] | ||
1011 | - if len(self.currentTag.contents) == 1 and \ | ||
1012 | - isinstance(self.currentTag.contents[0], NavigableString): | ||
1013 | - self.currentTag.string = self.currentTag.contents[0] | ||
1014 | - | ||
1015 | - #print "Pop", tag.name | ||
1016 | - if self.tagStack: | ||
1017 | - self.currentTag = self.tagStack[-1] | ||
1018 | - return self.currentTag | ||
1019 | - | ||
1020 | - def pushTag(self, tag): | ||
1021 | - #print "Push", tag.name | ||
1022 | - if self.currentTag: | ||
1023 | - self.currentTag.append(tag) | ||
1024 | - self.tagStack.append(tag) | ||
1025 | - self.currentTag = self.tagStack[-1] | ||
1026 | - | ||
1027 | - def endData(self, containerClass=NavigableString): | ||
1028 | - if self.currentData: | ||
1029 | - currentData = ''.join(self.currentData) | ||
1030 | - if not currentData.strip(): | ||
1031 | - if '\n' in currentData: | ||
1032 | - currentData = '\n' | ||
1033 | - else: | ||
1034 | - currentData = ' ' | ||
1035 | - self.currentData = [] | ||
1036 | - if self.parseOnlyThese and len(self.tagStack) <= 1 and \ | ||
1037 | - (not self.parseOnlyThese.text or \ | ||
1038 | - not self.parseOnlyThese.search(currentData)): | ||
1039 | - return | ||
1040 | - o = containerClass(currentData) | ||
1041 | - o.setup(self.currentTag, self.previous) | ||
1042 | - if self.previous: | ||
1043 | - self.previous.next = o | ||
1044 | - self.previous = o | ||
1045 | - self.currentTag.contents.append(o) | ||
1046 | - | ||
1047 | - | ||
1048 | - def _popToTag(self, name, inclusivePop=True): | ||
1049 | - """Pops the tag stack up to and including the most recent | ||
1050 | - instance of the given tag. If inclusivePop is false, pops the tag | ||
1051 | - stack up to but *not* including the most recent instqance of | ||
1052 | - the given tag.""" | ||
1053 | - #print "Popping to %s" % name | ||
1054 | - if name == self.ROOT_TAG_NAME: | ||
1055 | - return | ||
1056 | - | ||
1057 | - numPops = 0 | ||
1058 | - mostRecentTag = None | ||
1059 | - for i in range(len(self.tagStack)-1, 0, -1): | ||
1060 | - if name == self.tagStack[i].name: | ||
1061 | - numPops = len(self.tagStack)-i | ||
1062 | - break | ||
1063 | - if not inclusivePop: | ||
1064 | - numPops = numPops - 1 | ||
1065 | - | ||
1066 | - for i in range(0, numPops): | ||
1067 | - mostRecentTag = self.popTag() | ||
1068 | - return mostRecentTag | ||
1069 | - | ||
1070 | - def _smartPop(self, name): | ||
1071 | - | ||
1072 | - """We need to pop up to the previous tag of this type, unless | ||
1073 | - one of this tag's nesting reset triggers comes between this | ||
1074 | - tag and the previous tag of this type, OR unless this tag is a | ||
1075 | - generic nesting trigger and another generic nesting trigger | ||
1076 | - comes between this tag and the previous tag of this type. | ||
1077 | - | ||
1078 | - Examples: | ||
1079 | - <p>Foo<b>Bar<p> should pop to 'p', not 'b'. | ||
1080 | - <p>Foo<table>Bar<p> should pop to 'table', not 'p'. | ||
1081 | - <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'. | ||
1082 | - <p>Foo<b>Bar<p> should pop to 'p', not 'b'. | ||
1083 | - | ||
1084 | - <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. | ||
1085 | - <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' | ||
1086 | - <td><tr><td> *<td>* should pop to 'tr', not the first 'td' | ||
1087 | - """ | ||
1088 | - | ||
1089 | - nestingResetTriggers = self.NESTABLE_TAGS.get(name) | ||
1090 | - isNestable = nestingResetTriggers != None | ||
1091 | - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) | ||
1092 | - popTo = None | ||
1093 | - inclusive = True | ||
1094 | - for i in range(len(self.tagStack)-1, 0, -1): | ||
1095 | - p = self.tagStack[i] | ||
1096 | - if (not p or p.name == name) and not isNestable: | ||
1097 | - #Non-nestable tags get popped to the top or to their | ||
1098 | - #last occurance. | ||
1099 | - popTo = name | ||
1100 | - break | ||
1101 | - if (nestingResetTriggers != None | ||
1102 | - and p.name in nestingResetTriggers) \ | ||
1103 | - or (nestingResetTriggers == None and isResetNesting | ||
1104 | - and self.RESET_NESTING_TAGS.has_key(p.name)): | ||
1105 | - | ||
1106 | - #If we encounter one of the nesting reset triggers | ||
1107 | - #peculiar to this tag, or we encounter another tag | ||
1108 | - #that causes nesting to reset, pop up to but not | ||
1109 | - #including that tag. | ||
1110 | - popTo = p.name | ||
1111 | - inclusive = False | ||
1112 | - break | ||
1113 | - p = p.parent | ||
1114 | - if popTo: | ||
1115 | - self._popToTag(popTo, inclusive) | ||
1116 | - | ||
1117 | - def unknown_starttag(self, name, attrs, selfClosing=0): | ||
1118 | - #print "Start tag %s: %s" % (name, attrs) | ||
1119 | - if self.quoteStack: | ||
1120 | - #This is not a real tag. | ||
1121 | - #print "<%s> is not real!" % name | ||
1122 | - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) | ||
1123 | - self.handle_data('<%s%s>' % (name, attrs)) | ||
1124 | - return | ||
1125 | - self.endData() | ||
1126 | - | ||
1127 | - if not self.isSelfClosingTag(name) and not selfClosing: | ||
1128 | - self._smartPop(name) | ||
1129 | - | ||
1130 | - if self.parseOnlyThese and len(self.tagStack) <= 1 \ | ||
1131 | - and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): | ||
1132 | - return | ||
1133 | - | ||
1134 | - tag = Tag(self, name, attrs, self.currentTag, self.previous) | ||
1135 | - if self.previous: | ||
1136 | - self.previous.next = tag | ||
1137 | - self.previous = tag | ||
1138 | - self.pushTag(tag) | ||
1139 | - if selfClosing or self.isSelfClosingTag(name): | ||
1140 | - self.popTag() | ||
1141 | - if name in self.QUOTE_TAGS: | ||
1142 | - #print "Beginning quote (%s)" % name | ||
1143 | - self.quoteStack.append(name) | ||
1144 | - self.literal = 1 | ||
1145 | - return tag | ||
1146 | - | ||
1147 | - def unknown_endtag(self, name): | ||
1148 | - #print "End tag %s" % name | ||
1149 | - if self.quoteStack and self.quoteStack[-1] != name: | ||
1150 | - #This is not a real end tag. | ||
1151 | - #print "</%s> is not real!" % name | ||
1152 | - self.handle_data('</%s>' % name) | ||
1153 | - return | ||
1154 | - self.endData() | ||
1155 | - self._popToTag(name) | ||
1156 | - if self.quoteStack and self.quoteStack[-1] == name: | ||
1157 | - self.quoteStack.pop() | ||
1158 | - self.literal = (len(self.quoteStack) > 0) | ||
1159 | - | ||
1160 | - def handle_data(self, data): | ||
1161 | - self.currentData.append(data) | ||
1162 | - | ||
1163 | - def _toStringSubclass(self, text, subclass): | ||
1164 | - """Adds a certain piece of text to the tree as a NavigableString | ||
1165 | - subclass.""" | ||
1166 | - self.endData() | ||
1167 | - self.handle_data(text) | ||
1168 | - self.endData(subclass) | ||
1169 | - | ||
1170 | - def handle_pi(self, text): | ||
1171 | - """Handle a processing instruction as a ProcessingInstruction | ||
1172 | - object, possibly one with a %SOUP-ENCODING% slot into which an | ||
1173 | - encoding will be plugged later.""" | ||
1174 | - if text[:3] == "xml": | ||
1175 | - text = "xml version='1.0' encoding='%SOUP-ENCODING%'" | ||
1176 | - self._toStringSubclass(text, ProcessingInstruction) | ||
1177 | - | ||
1178 | - def handle_comment(self, text): | ||
1179 | - "Handle comments as Comment objects." | ||
1180 | - self._toStringSubclass(text, Comment) | ||
1181 | - | ||
1182 | - def handle_charref(self, ref): | ||
1183 | - "Handle character references as data." | ||
1184 | - if self.convertEntities in [self.HTML_ENTITIES, | ||
1185 | - self.XML_ENTITIES]: | ||
1186 | - data = unichr(int(ref)) | ||
1187 | - else: | ||
1188 | - data = '&#%s;' % ref | ||
1189 | - self.handle_data(data) | ||
1190 | - | ||
1191 | - def handle_entityref(self, ref): | ||
1192 | - """Handle entity references as data, possibly converting known | ||
1193 | - HTML entity references to the corresponding Unicode | ||
1194 | - characters.""" | ||
1195 | - data = None | ||
1196 | - if self.convertEntities == self.HTML_ENTITIES or \ | ||
1197 | - (self.convertEntities == self.XML_ENTITIES and \ | ||
1198 | - self.XML_ENTITY_LIST.get(ref)): | ||
1199 | - try: | ||
1200 | - data = unichr(name2codepoint[ref]) | ||
1201 | - except KeyError: | ||
1202 | - pass | ||
1203 | - if not data: | ||
1204 | - data = '&%s;' % ref | ||
1205 | - self.handle_data(data) | ||
1206 | - | ||
1207 | - def handle_decl(self, data): | ||
1208 | - "Handle DOCTYPEs and the like as Declaration objects." | ||
1209 | - self._toStringSubclass(data, Declaration) | ||
1210 | - | ||
1211 | - def parse_declaration(self, i): | ||
1212 | - """Treat a bogus SGML declaration as raw data. Treat a CDATA | ||
1213 | - declaration as a CData object.""" | ||
1214 | - j = None | ||
1215 | - if self.rawdata[i:i+9] == '<![CDATA[': | ||
1216 | - k = self.rawdata.find(']]>', i) | ||
1217 | - if k == -1: | ||
1218 | - k = len(self.rawdata) | ||
1219 | - data = self.rawdata[i+9:k] | ||
1220 | - j = k+3 | ||
1221 | - self._toStringSubclass(data, CData) | ||
1222 | - else: | ||
1223 | - try: | ||
1224 | - j = SGMLParser.parse_declaration(self, i) | ||
1225 | - except SGMLParseError: | ||
1226 | - toHandle = self.rawdata[i:] | ||
1227 | - self.handle_data(toHandle) | ||
1228 | - j = i + len(toHandle) | ||
1229 | - return j | ||
1230 | - | ||
1231 | -class BeautifulSoup(BeautifulStoneSoup): | ||
1232 | - | ||
1233 | - """This parser knows the following facts about HTML: | ||
1234 | - | ||
1235 | - * Some tags have no closing tag and should be interpreted as being | ||
1236 | - closed as soon as they are encountered. | ||
1237 | - | ||
1238 | - * The text inside some tags (ie. 'script') may contain tags which | ||
1239 | - are not really part of the document and which should be parsed | ||
1240 | - as text, not tags. If you want to parse the text as tags, you can | ||
1241 | - always fetch it and parse it explicitly. | ||
1242 | - | ||
1243 | - * Tag nesting rules: | ||
1244 | - | ||
1245 | - Most tags can't be nested at all. For instance, the occurance of | ||
1246 | - a <p> tag should implicitly close the previous <p> tag. | ||
1247 | - | ||
1248 | - <p>Para1<p>Para2 | ||
1249 | - should be transformed into: | ||
1250 | - <p>Para1</p><p>Para2 | ||
1251 | - | ||
1252 | - Some tags can be nested arbitrarily. For instance, the occurance | ||
1253 | - of a <blockquote> tag should _not_ implicitly close the previous | ||
1254 | - <blockquote> tag. | ||
1255 | - | ||
1256 | - Alice said: <blockquote>Bob said: <blockquote>Blah | ||
1257 | - should NOT be transformed into: | ||
1258 | - Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah | ||
1259 | - | ||
1260 | - Some tags can be nested, but the nesting is reset by the | ||
1261 | - interposition of other tags. For instance, a <tr> tag should | ||
1262 | - implicitly close the previous <tr> tag within the same <table>, | ||
1263 | - but not close a <tr> tag in another table. | ||
1264 | - | ||
1265 | - <table><tr>Blah<tr>Blah | ||
1266 | - should be transformed into: | ||
1267 | - <table><tr>Blah</tr><tr>Blah | ||
1268 | - but, | ||
1269 | - <tr>Blah<table><tr>Blah | ||
1270 | - should NOT be transformed into | ||
1271 | - <tr>Blah<table></tr><tr>Blah | ||
1272 | - | ||
1273 | - Differing assumptions about tag nesting rules are a major source | ||
1274 | - of problems with the BeautifulSoup class. If BeautifulSoup is not | ||
1275 | - treating as nestable a tag your page author treats as nestable, | ||
1276 | - try ICantBelieveItsBeautifulSoup, MinimalSoup, or | ||
1277 | - BeautifulStoneSoup before writing your own subclass.""" | ||
1278 | - | ||
1279 | - def __init__(self, *args, **kwargs): | ||
1280 | - if not kwargs.has_key('smartQuotesTo'): | ||
1281 | - kwargs['smartQuotesTo'] = self.HTML_ENTITIES | ||
1282 | - BeautifulStoneSoup.__init__(self, *args, **kwargs) | ||
1283 | - | ||
1284 | - SELF_CLOSING_TAGS = buildTagMap(None, | ||
1285 | - ['br' , 'hr', 'input', 'img', 'meta', | ||
1286 | - 'spacer', 'link', 'frame', 'base']) | ||
1287 | - | ||
1288 | - QUOTE_TAGS = {'script': None} | ||
1289 | - | ||
1290 | - #According to the HTML standard, each of these inline tags can | ||
1291 | - #contain another tag of the same type. Furthermore, it's common | ||
1292 | - #to actually use these tags this way. | ||
1293 | - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', | ||
1294 | - 'center'] | ||
1295 | - | ||
1296 | - #According to the HTML standard, these block tags can contain | ||
1297 | - #another tag of the same type. Furthermore, it's common | ||
1298 | - #to actually use these tags this way. | ||
1299 | - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] | ||
1300 | - | ||
1301 | - #Lists can contain other lists, but there are restrictions. | ||
1302 | - NESTABLE_LIST_TAGS = { 'ol' : [], | ||
1303 | - 'ul' : [], | ||
1304 | - 'li' : ['ul', 'ol'], | ||
1305 | - 'dl' : [], | ||
1306 | - 'dd' : ['dl'], | ||
1307 | - 'dt' : ['dl'] } | ||
1308 | - | ||
1309 | - #Tables can contain other tables, but there are restrictions. | ||
1310 | - NESTABLE_TABLE_TAGS = {'table' : [], | ||
1311 | - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], | ||
1312 | - 'td' : ['tr'], | ||
1313 | - 'th' : ['tr'], | ||
1314 | - 'thead' : ['table'], | ||
1315 | - 'tbody' : ['table'], | ||
1316 | - 'tfoot' : ['table'], | ||
1317 | - } | ||
1318 | - | ||
1319 | - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] | ||
1320 | - | ||
1321 | - #If one of these tags is encountered, all tags up to the next tag of | ||
1322 | - #this type are popped. | ||
1323 | - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', | ||
1324 | - NON_NESTABLE_BLOCK_TAGS, | ||
1325 | - NESTABLE_LIST_TAGS, | ||
1326 | - NESTABLE_TABLE_TAGS) | ||
1327 | - | ||
1328 | - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, | ||
1329 | - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) | ||
1330 | - | ||
1331 | - # Used to detect the charset in a META tag; see start_meta | ||
1332 | - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)") | ||
1333 | - | ||
1334 | - def start_meta(self, attrs): | ||
1335 | - """Beautiful Soup can detect a charset included in a META tag, | ||
1336 | - try to convert the document to that charset, and re-parse the | ||
1337 | - document from the beginning.""" | ||
1338 | - httpEquiv = None | ||
1339 | - contentType = None | ||
1340 | - contentTypeIndex = None | ||
1341 | - tagNeedsEncodingSubstitution = False | ||
1342 | - | ||
1343 | - for i in range(0, len(attrs)): | ||
1344 | - key, value = attrs[i] | ||
1345 | - key = key.lower() | ||
1346 | - if key == 'http-equiv': | ||
1347 | - httpEquiv = value | ||
1348 | - elif key == 'content': | ||
1349 | - contentType = value | ||
1350 | - contentTypeIndex = i | ||
1351 | - | ||
1352 | - if httpEquiv and contentType: # It's an interesting meta tag. | ||
1353 | - match = self.CHARSET_RE.search(contentType) | ||
1354 | - if match: | ||
1355 | - if getattr(self, 'declaredHTMLEncoding') or \ | ||
1356 | - (self.originalEncoding == self.fromEncoding): | ||
1357 | - # This is our second pass through the document, or | ||
1358 | - # else an encoding was specified explicitly and it | ||
1359 | - # worked. Rewrite the meta tag. | ||
1360 | - newAttr = self.CHARSET_RE.sub\ | ||
1361 | - (lambda(match):match.group(1) + | ||
1362 | - "%SOUP-ENCODING%", value) | ||
1363 | - attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], | ||
1364 | - newAttr) | ||
1365 | - tagNeedsEncodingSubstitution = True | ||
1366 | - else: | ||
1367 | - # This is our first pass through the document. | ||
1368 | - # Go through it again with the new information. | ||
1369 | - newCharset = match.group(3) | ||
1370 | - if newCharset and newCharset != self.originalEncoding: | ||
1371 | - self.declaredHTMLEncoding = newCharset | ||
1372 | - self._feed(self.declaredHTMLEncoding) | ||
1373 | - raise StopParsing | ||
1374 | - tag = self.unknown_starttag("meta", attrs) | ||
1375 | - if tag and tagNeedsEncodingSubstitution: | ||
1376 | - tag.containsSubstitutions = True | ||
1377 | - | ||
1378 | -class StopParsing(Exception): | ||
1379 | - pass | ||
1380 | - | ||
1381 | -class ICantBelieveItsBeautifulSoup(BeautifulSoup): | ||
1382 | - | ||
1383 | - """The BeautifulSoup class is oriented towards skipping over | ||
1384 | - common HTML errors like unclosed tags. However, sometimes it makes | ||
1385 | - errors of its own. For instance, consider this fragment: | ||
1386 | - | ||
1387 | - <b>Foo<b>Bar</b></b> | ||
1388 | - | ||
1389 | - This is perfectly valid (if bizarre) HTML. However, the | ||
1390 | - BeautifulSoup class will implicitly close the first b tag when it | ||
1391 | - encounters the second 'b'. It will think the author wrote | ||
1392 | - "<b>Foo<b>Bar", and didn't close the first 'b' tag, because | ||
1393 | - there's no real-world reason to bold something that's already | ||
1394 | - bold. When it encounters '</b></b>' it will close two more 'b' | ||
1395 | - tags, for a grand total of three tags closed instead of two. This | ||
1396 | - can throw off the rest of your document structure. The same is | ||
1397 | - true of a number of other tags, listed below. | ||
1398 | - | ||
1399 | - It's much more common for someone to forget to close a 'b' tag | ||
1400 | - than to actually use nested 'b' tags, and the BeautifulSoup class | ||
1401 | - handles the common case. This class handles the not-co-common | ||
1402 | - case: where you can't believe someone wrote what they did, but | ||
1403 | - it's valid HTML and BeautifulSoup screwed up by assuming it | ||
1404 | - wouldn't be.""" | ||
1405 | - | ||
1406 | - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ | ||
1407 | - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', | ||
1408 | - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', | ||
1409 | - 'big'] | ||
1410 | - | ||
1411 | - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] | ||
1412 | - | ||
1413 | - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, | ||
1414 | - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, | ||
1415 | - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) | ||
1416 | - | ||
1417 | -class MinimalSoup(BeautifulSoup): | ||
1418 | - """The MinimalSoup class is for parsing HTML that contains | ||
1419 | - pathologically bad markup. It makes no assumptions about tag | ||
1420 | - nesting, but it does know which tags are self-closing, that | ||
1421 | - <script> tags contain Javascript and should not be parsed, that | ||
1422 | - META tags may contain encoding information, and so on. | ||
1423 | - | ||
1424 | - This also makes it better for subclassing than BeautifulStoneSoup | ||
1425 | - or BeautifulSoup.""" | ||
1426 | - | ||
1427 | - RESET_NESTING_TAGS = buildTagMap('noscript') | ||
1428 | - NESTABLE_TAGS = {} | ||
1429 | - | ||
1430 | -class BeautifulSOAP(BeautifulStoneSoup): | ||
1431 | - """This class will push a tag with only a single string child into | ||
1432 | - the tag's parent as an attribute. The attribute's name is the tag | ||
1433 | - name, and the value is the string child. An example should give | ||
1434 | - the flavor of the change: | ||
1435 | - | ||
1436 | - <foo><bar>baz</bar></foo> | ||
1437 | - => | ||
1438 | - <foo bar="baz"><bar>baz</bar></foo> | ||
1439 | - | ||
1440 | - You can then access fooTag['bar'] instead of fooTag.barTag.string. | ||
1441 | - | ||
1442 | - This is, of course, useful for scraping structures that tend to | ||
1443 | - use subelements instead of attributes, such as SOAP messages. Note | ||
1444 | - that it modifies its input, so don't print the modified version | ||
1445 | - out. | ||
1446 | - | ||
1447 | - I'm not sure how many people really want to use this class; let me | ||
1448 | - know if you do. Mainly I like the name.""" | ||
1449 | - | ||
1450 | - def popTag(self): | ||
1451 | - if len(self.tagStack) > 1: | ||
1452 | - tag = self.tagStack[-1] | ||
1453 | - parent = self.tagStack[-2] | ||
1454 | - parent._getAttrMap() | ||
1455 | - if (isinstance(tag, Tag) and len(tag.contents) == 1 and | ||
1456 | - isinstance(tag.contents[0], NavigableString) and | ||
1457 | - not parent.attrMap.has_key(tag.name)): | ||
1458 | - parent[tag.name] = tag.contents[0] | ||
1459 | - BeautifulStoneSoup.popTag(self) | ||
1460 | - | ||
1461 | -#Enterprise class names! It has come to our attention that some people | ||
1462 | -#think the names of the Beautiful Soup parser classes are too silly | ||
1463 | -#and "unprofessional" for use in enterprise screen-scraping. We feel | ||
1464 | -#your pain! For such-minded folk, the Beautiful Soup Consortium And | ||
1465 | -#All-Night Kosher Bakery recommends renaming this file to | ||
1466 | -#"RobustParser.py" (or, in cases of extreme enterprisness, | ||
1467 | -#"RobustParserBeanInterface.class") and using the following | ||
1468 | -#enterprise-friendly class aliases: | ||
1469 | -class RobustXMLParser(BeautifulStoneSoup): | ||
1470 | - pass | ||
1471 | -class RobustHTMLParser(BeautifulSoup): | ||
1472 | - pass | ||
1473 | -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): | ||
1474 | - pass | ||
1475 | -class RobustInsanelyWackAssHTMLParser(MinimalSoup): | ||
1476 | - pass | ||
1477 | -class SimplifyingSOAPParser(BeautifulSOAP): | ||
1478 | - pass | ||
1479 | - | ||
1480 | -###################################################### | ||
1481 | -# | ||
1482 | -# Bonus library: Unicode, Dammit | ||
1483 | -# | ||
1484 | -# This class forces XML data into a standard format (usually to UTF-8 | ||
1485 | -# or Unicode). It is heavily based on code from Mark Pilgrim's | ||
1486 | -# Universal Feed Parser. It does not rewrite the XML or HTML to | ||
1487 | -# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi | ||
1488 | -# (XML) and BeautifulSoup.start_meta (HTML). | ||
1489 | - | ||
1490 | -# Autodetects character encodings. | ||
1491 | -# Download from http://chardet.feedparser.org/ | ||
1492 | -try: | ||
1493 | - import chardet | ||
1494 | -# import chardet.constants | ||
1495 | -# chardet.constants._debug = 1 | ||
1496 | -except: | ||
1497 | - chardet = None | ||
1498 | -chardet = None | ||
1499 | - | ||
1500 | -# cjkcodecs and iconv_codec make Python know about more character encodings. | ||
1501 | -# Both are available from http://cjkpython.i18n.org/ | ||
1502 | -# They're built in if you use Python 2.4. | ||
1503 | -try: | ||
1504 | - import cjkcodecs.aliases | ||
1505 | -except: | ||
1506 | - pass | ||
1507 | -try: | ||
1508 | - import iconv_codec | ||
1509 | -except: | ||
1510 | - pass | ||
1511 | - | ||
1512 | -class UnicodeDammit: | ||
1513 | - """A class for detecting the encoding of a *ML document and | ||
1514 | - converting it to a Unicode string. If the source encoding is | ||
1515 | - windows-1252, can replace MS smart quotes with their HTML or XML | ||
1516 | - equivalents.""" | ||
1517 | - | ||
1518 | - # This dictionary maps commonly seen values for "charset" in HTML | ||
1519 | - # meta tags to the corresponding Python codec names. It only covers | ||
1520 | - # values that aren't in Python's aliases and can't be determined | ||
1521 | - # by the heuristics in find_codec. | ||
1522 | - CHARSET_ALIASES = { "macintosh" : "mac-roman", | ||
1523 | - "x-sjis" : "shift-jis" } | ||
1524 | - | ||
1525 | - def __init__(self, markup, overrideEncodings=[], | ||
1526 | - smartQuotesTo='xml'): | ||
1527 | - self.markup, documentEncoding, sniffedEncoding = \ | ||
1528 | - self._detectEncoding(markup) | ||
1529 | - self.smartQuotesTo = smartQuotesTo | ||
1530 | - self.triedEncodings = [] | ||
1531 | - if markup == '' or isinstance(markup, unicode): | ||
1532 | - self.originalEncoding = None | ||
1533 | - self.unicode = unicode(markup) | ||
1534 | - return | ||
1535 | - | ||
1536 | - u = None | ||
1537 | - for proposedEncoding in overrideEncodings: | ||
1538 | - u = self._convertFrom(proposedEncoding) | ||
1539 | - if u: break | ||
1540 | - if not u: | ||
1541 | - for proposedEncoding in (documentEncoding, sniffedEncoding): | ||
1542 | - u = self._convertFrom(proposedEncoding) | ||
1543 | - if u: break | ||
1544 | - | ||
1545 | - # If no luck and we have auto-detection library, try that: | ||
1546 | - if not u and chardet and not isinstance(self.markup, unicode): | ||
1547 | - u = self._convertFrom(chardet.detect(self.markup)['encoding']) | ||
1548 | - | ||
1549 | - # As a last resort, try utf-8 and windows-1252: | ||
1550 | - if not u: | ||
1551 | - for proposed_encoding in ("utf-8", "windows-1252"): | ||
1552 | - u = self._convertFrom(proposed_encoding) | ||
1553 | - if u: break | ||
1554 | - self.unicode = u | ||
1555 | - if not u: self.originalEncoding = None | ||
1556 | - | ||
1557 | - def _subMSChar(self, orig): | ||
1558 | - """Changes a MS smart quote character to an XML or HTML | ||
1559 | - entity.""" | ||
1560 | - sub = self.MS_CHARS.get(orig) | ||
1561 | - if type(sub) == types.TupleType: | ||
1562 | - if self.smartQuotesTo == 'xml': | ||
1563 | - sub = '&#x%s;' % sub[1] | ||
1564 | - else: | ||
1565 | - sub = '&%s;' % sub[0] | ||
1566 | - return sub | ||
1567 | - | ||
1568 | - def _convertFrom(self, proposed): | ||
1569 | - proposed = self.find_codec(proposed) | ||
1570 | - if not proposed or proposed in self.triedEncodings: | ||
1571 | - return None | ||
1572 | - self.triedEncodings.append(proposed) | ||
1573 | - markup = self.markup | ||
1574 | - | ||
1575 | - # Convert smart quotes to HTML if coming from an encoding | ||
1576 | - # that might have them. | ||
1577 | - if self.smartQuotesTo and proposed.lower() in("windows-1252", | ||
1578 | - "iso-8859-1", | ||
1579 | - "iso-8859-2"): | ||
1580 | - markup = re.compile("([\x80-\x9f])").sub \ | ||
1581 | - (lambda(x): self._subMSChar(x.group(1)), | ||
1582 | - markup) | ||
1583 | - | ||
1584 | - try: | ||
1585 | - # print "Trying to convert document to %s" % proposed | ||
1586 | - u = self._toUnicode(markup, proposed) | ||
1587 | - self.markup = u | ||
1588 | - self.originalEncoding = proposed | ||
1589 | - except Exception, e: | ||
1590 | - # print "That didn't work!" | ||
1591 | - # print e | ||
1592 | - return None | ||
1593 | - #print "Correct encoding: %s" % proposed | ||
1594 | - return self.markup | ||
1595 | - | ||
1596 | - def _toUnicode(self, data, encoding): | ||
1597 | - '''Given a string and its encoding, decodes the string into Unicode. | ||
1598 | - %encoding is a string recognized by encodings.aliases''' | ||
1599 | - | ||
1600 | - # strip Byte Order Mark (if present) | ||
1601 | - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ | ||
1602 | - and (data[2:4] != '\x00\x00'): | ||
1603 | - encoding = 'utf-16be' | ||
1604 | - data = data[2:] | ||
1605 | - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ | ||
1606 | - and (data[2:4] != '\x00\x00'): | ||
1607 | - encoding = 'utf-16le' | ||
1608 | - data = data[2:] | ||
1609 | - elif data[:3] == '\xef\xbb\xbf': | ||
1610 | - encoding = 'utf-8' | ||
1611 | - data = data[3:] | ||
1612 | - elif data[:4] == '\x00\x00\xfe\xff': | ||
1613 | - encoding = 'utf-32be' | ||
1614 | - data = data[4:] | ||
1615 | - elif data[:4] == '\xff\xfe\x00\x00': | ||
1616 | - encoding = 'utf-32le' | ||
1617 | - data = data[4:] | ||
1618 | - newdata = unicode(data, encoding) | ||
1619 | - return newdata | ||
1620 | - | ||
1621 | - def _detectEncoding(self, xml_data): | ||
1622 | - """Given a document, tries to detect its XML encoding.""" | ||
1623 | - xml_encoding = sniffed_xml_encoding = None | ||
1624 | - try: | ||
1625 | - if xml_data[:4] == '\x4c\x6f\xa7\x94': | ||
1626 | - # EBCDIC | ||
1627 | - xml_data = self._ebcdic_to_ascii(xml_data) | ||
1628 | - elif xml_data[:4] == '\x00\x3c\x00\x3f': | ||
1629 | - # UTF-16BE | ||
1630 | - sniffed_xml_encoding = 'utf-16be' | ||
1631 | - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') | ||
1632 | - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ | ||
1633 | - and (xml_data[2:4] != '\x00\x00'): | ||
1634 | - # UTF-16BE with BOM | ||
1635 | - sniffed_xml_encoding = 'utf-16be' | ||
1636 | - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') | ||
1637 | - elif xml_data[:4] == '\x3c\x00\x3f\x00': | ||
1638 | - # UTF-16LE | ||
1639 | - sniffed_xml_encoding = 'utf-16le' | ||
1640 | - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') | ||
1641 | - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ | ||
1642 | - (xml_data[2:4] != '\x00\x00'): | ||
1643 | - # UTF-16LE with BOM | ||
1644 | - sniffed_xml_encoding = 'utf-16le' | ||
1645 | - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') | ||
1646 | - elif xml_data[:4] == '\x00\x00\x00\x3c': | ||
1647 | - # UTF-32BE | ||
1648 | - sniffed_xml_encoding = 'utf-32be' | ||
1649 | - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') | ||
1650 | - elif xml_data[:4] == '\x3c\x00\x00\x00': | ||
1651 | - # UTF-32LE | ||
1652 | - sniffed_xml_encoding = 'utf-32le' | ||
1653 | - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') | ||
1654 | - elif xml_data[:4] == '\x00\x00\xfe\xff': | ||
1655 | - # UTF-32BE with BOM | ||
1656 | - sniffed_xml_encoding = 'utf-32be' | ||
1657 | - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') | ||
1658 | - elif xml_data[:4] == '\xff\xfe\x00\x00': | ||
1659 | - # UTF-32LE with BOM | ||
1660 | - sniffed_xml_encoding = 'utf-32le' | ||
1661 | - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') | ||
1662 | - elif xml_data[:3] == '\xef\xbb\xbf': | ||
1663 | - # UTF-8 with BOM | ||
1664 | - sniffed_xml_encoding = 'utf-8' | ||
1665 | - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') | ||
1666 | - else: | ||
1667 | - sniffed_xml_encoding = 'ascii' | ||
1668 | - pass | ||
1669 | - xml_encoding_match = re.compile \ | ||
1670 | - ('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')\ | ||
1671 | - .match(xml_data) | ||
1672 | - except: | ||
1673 | - xml_encoding_match = None | ||
1674 | - if xml_encoding_match: | ||
1675 | - xml_encoding = xml_encoding_match.groups()[0].lower() | ||
1676 | - if sniffed_xml_encoding and \ | ||
1677 | - (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', | ||
1678 | - 'iso-10646-ucs-4', 'ucs-4', 'csucs4', | ||
1679 | - 'utf-16', 'utf-32', 'utf_16', 'utf_32', | ||
1680 | - 'utf16', 'u16')): | ||
1681 | - xml_encoding = sniffed_xml_encoding | ||
1682 | - return xml_data, xml_encoding, sniffed_xml_encoding | ||
1683 | - | ||
1684 | - | ||
1685 | - def find_codec(self, charset): | ||
1686 | - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ | ||
1687 | - or (charset and self._codec(charset.replace("-", ""))) \ | ||
1688 | - or (charset and self._codec(charset.replace("-", "_"))) \ | ||
1689 | - or charset | ||
1690 | - | ||
1691 | - def _codec(self, charset): | ||
1692 | - if not charset: return charset | ||
1693 | - codec = None | ||
1694 | - try: | ||
1695 | - codecs.lookup(charset) | ||
1696 | - codec = charset | ||
1697 | - except LookupError: | ||
1698 | - pass | ||
1699 | - return codec | ||
1700 | - | ||
1701 | - EBCDIC_TO_ASCII_MAP = None | ||
1702 | - def _ebcdic_to_ascii(self, s): | ||
1703 | - c = self.__class__ | ||
1704 | - if not c.EBCDIC_TO_ASCII_MAP: | ||
1705 | - emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, | ||
1706 | - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, | ||
1707 | - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, | ||
1708 | - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, | ||
1709 | - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, | ||
1710 | - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, | ||
1711 | - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, | ||
1712 | - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, | ||
1713 | - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, | ||
1714 | - 201,202,106,107,108,109,110,111,112,113,114,203,204,205, | ||
1715 | - 206,207,208,209,126,115,116,117,118,119,120,121,122,210, | ||
1716 | - 211,212,213,214,215,216,217,218,219,220,221,222,223,224, | ||
1717 | - 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, | ||
1718 | - 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, | ||
1719 | - 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, | ||
1720 | - 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, | ||
1721 | - 250,251,252,253,254,255) | ||
1722 | - import string | ||
1723 | - c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ | ||
1724 | - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) | ||
1725 | - return s.translate(c.EBCDIC_TO_ASCII_MAP) | ||
1726 | - | ||
1727 | - MS_CHARS = { '\x80' : ('euro', '20AC'), | ||
1728 | - '\x81' : ' ', | ||
1729 | - '\x82' : ('sbquo', '201A'), | ||
1730 | - '\x83' : ('fnof', '192'), | ||
1731 | - '\x84' : ('bdquo', '201E'), | ||
1732 | - '\x85' : ('hellip', '2026'), | ||
1733 | - '\x86' : ('dagger', '2020'), | ||
1734 | - '\x87' : ('Dagger', '2021'), | ||
1735 | - '\x88' : ('circ', '2C6'), | ||
1736 | - '\x89' : ('permil', '2030'), | ||
1737 | - '\x8A' : ('Scaron', '160'), | ||
1738 | - '\x8B' : ('lsaquo', '2039'), | ||
1739 | - '\x8C' : ('OElig', '152'), | ||
1740 | - '\x8D' : '?', | ||
1741 | - '\x8E' : ('#x17D', '17D'), | ||
1742 | - '\x8F' : '?', | ||
1743 | - '\x90' : '?', | ||
1744 | - '\x91' : ('lsquo', '2018'), | ||
1745 | - '\x92' : ('rsquo', '2019'), | ||
1746 | - '\x93' : ('ldquo', '201C'), | ||
1747 | - '\x94' : ('rdquo', '201D'), | ||
1748 | - '\x95' : ('bull', '2022'), | ||
1749 | - '\x96' : ('ndash', '2013'), | ||
1750 | - '\x97' : ('mdash', '2014'), | ||
1751 | - '\x98' : ('tilde', '2DC'), | ||
1752 | - '\x99' : ('trade', '2122'), | ||
1753 | - '\x9a' : ('scaron', '161'), | ||
1754 | - '\x9b' : ('rsaquo', '203A'), | ||
1755 | - '\x9c' : ('oelig', '153'), | ||
1756 | - '\x9d' : '?', | ||
1757 | - '\x9e' : ('#x17E', '17E'), | ||
1758 | - '\x9f' : ('Yuml', ''),} | ||
1759 | - | ||
1760 | -####################################################################### | ||
1761 | - | ||
1762 | - | ||
1763 | -#By default, act as an HTML pretty-printer. | ||
1764 | -if __name__ == '__main__': | ||
1765 | - import sys | ||
1766 | - soup = BeautifulSoup(sys.stdin.read()) | ||
1767 | - print soup.prettify() |
pacotes/openlayers/tools/README.txt
@@ -1,14 +0,0 @@ | @@ -1,14 +0,0 @@ | ||
1 | -This directory contains tools used in the packaging or deployment of OpenLayers. | ||
2 | - | ||
3 | -Javascript minimizing tools: | ||
4 | - | ||
5 | - * jsmin.c, jsmin.py: | ||
6 | - jsmin.py is a direct translation of the jsmin.c code into Python. jsmin.py | ||
7 | - will therefore run anyplace Python runs... but at significantly slower speed. | ||
8 | - | ||
9 | - * shrinksafe.py | ||
10 | - shrinksafe.py calls out to a third party javascript shrinking service. This | ||
11 | - creates file sizes about 4% smaller (as of commit 501) of the OpenLayers | ||
12 | - code. However, this also has the side effect of making you dependant on the | ||
13 | - web service -- and since that service sometimes goes dead, it's risky to | ||
14 | - depend on it. |
pacotes/openlayers/tools/exampleparser.py
@@ -1,251 +0,0 @@ | @@ -1,251 +0,0 @@ | ||
1 | -#!/usr/bin/env python | ||
2 | - | ||
3 | -import sys | ||
4 | -import os | ||
5 | -import re | ||
6 | -import urllib2 | ||
7 | -import time | ||
8 | -from xml.dom.minidom import Document | ||
9 | - | ||
10 | -try: | ||
11 | - import xml.etree.ElementTree as ElementTree | ||
12 | -except ImportError: | ||
13 | - try: | ||
14 | - import cElementTree as ElementTree | ||
15 | - except ImportError: | ||
16 | - try: | ||
17 | - import elementtree.ElementTree as ElementTree | ||
18 | - except ImportError: | ||
19 | - import lxml.etree as ElementTree | ||
20 | - | ||
21 | -missing_deps = False | ||
22 | -try: | ||
23 | - import simplejson | ||
24 | - from BeautifulSoup import BeautifulSoup | ||
25 | -except ImportError, E: | ||
26 | - missing_deps = E | ||
27 | - | ||
28 | -feedName = "example-list.xml" | ||
29 | -feedPath = "http://openlayers.org/dev/examples/" | ||
30 | - | ||
31 | -def getListOfOnlineExamples(baseUrl): | ||
32 | - """ | ||
33 | - useful if you want to get a list of examples a url. not used by default. | ||
34 | - """ | ||
35 | - html = urllib2.urlopen(baseUrl) | ||
36 | - soup = BeautifulSoup(html) | ||
37 | - examples = soup.findAll('li') | ||
38 | - examples = [example.find('a').get('href') for example in examples] | ||
39 | - examples = [example for example in examples if example.endswith('.html')] | ||
40 | - examples = [example for example in examples] | ||
41 | - return examples | ||
42 | - | ||
43 | -def getListOfExamples(relPath): | ||
44 | - """ | ||
45 | - returns list of .html filenames within a given path - excludes example-list.html | ||
46 | - """ | ||
47 | - examples = os.listdir(relPath) | ||
48 | - examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"] | ||
49 | - return examples | ||
50 | - | ||
51 | - | ||
52 | -def getExampleHtml(location): | ||
53 | - """ | ||
54 | - returns html of a specific example that is available online or locally | ||
55 | - """ | ||
56 | - print '.', | ||
57 | - if location.startswith('http'): | ||
58 | - return urllib2.urlopen(location).read() | ||
59 | - else: | ||
60 | - f = open(location) | ||
61 | - html = f.read() | ||
62 | - f.close() | ||
63 | - return html | ||
64 | - | ||
65 | - | ||
66 | -def extractById(soup, tagId, value=None): | ||
67 | - """ | ||
68 | - returns full contents of a particular tag id | ||
69 | - """ | ||
70 | - beautifulTag = soup.find(id=tagId) | ||
71 | - if beautifulTag: | ||
72 | - if beautifulTag.contents: | ||
73 | - value = str(beautifulTag.renderContents()).strip() | ||
74 | - value = value.replace('\t','') | ||
75 | - value = value.replace('\n','') | ||
76 | - return value | ||
77 | - | ||
78 | -def getRelatedClasses(html): | ||
79 | - """ | ||
80 | - parses the html, and returns a list of all OpenLayers Classes | ||
81 | - used within (ie what parts of OL the javascript uses). | ||
82 | - """ | ||
83 | - rawstr = r'''(?P<class>OpenLayers\..*?)\(''' | ||
84 | - return re.findall(rawstr, html) | ||
85 | - | ||
86 | -def parseHtml(html,ids): | ||
87 | - """ | ||
88 | - returns dictionary of items of interest | ||
89 | - """ | ||
90 | - soup = BeautifulSoup(html) | ||
91 | - d = {} | ||
92 | - for tagId in ids: | ||
93 | - d[tagId] = extractById(soup,tagId) | ||
94 | - #classes should eventually be parsed from docs - not automatically created. | ||
95 | - classes = getRelatedClasses(html) | ||
96 | - d['classes'] = classes | ||
97 | - return d | ||
98 | - | ||
99 | -def getSvnInfo(path): | ||
100 | - h = os.popen("svn info %s --xml" % path) | ||
101 | - tree = ElementTree.fromstring(h.read()) | ||
102 | - h.close() | ||
103 | - d = { | ||
104 | - 'url': tree.findtext('entry/url'), | ||
105 | - 'author': tree.findtext('entry/commit/author'), | ||
106 | - 'date': tree.findtext('entry/commit/date') | ||
107 | - } | ||
108 | - return d | ||
109 | - | ||
110 | -def createFeed(examples): | ||
111 | - doc = Document() | ||
112 | - atomuri = "http://www.w3.org/2005/Atom" | ||
113 | - feed = doc.createElementNS(atomuri, "feed") | ||
114 | - feed.setAttribute("xmlns", atomuri) | ||
115 | - title = doc.createElementNS(atomuri, "title") | ||
116 | - title.appendChild(doc.createTextNode("OpenLayers Examples")) | ||
117 | - feed.appendChild(title) | ||
118 | - link = doc.createElementNS(atomuri, "link") | ||
119 | - link.setAttribute("rel", "self") | ||
120 | - link.setAttribute("href", feedPath + feedName) | ||
121 | - | ||
122 | - modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) | ||
123 | - id = doc.createElementNS(atomuri, "id") | ||
124 | - id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime))) | ||
125 | - feed.appendChild(id) | ||
126 | - | ||
127 | - updated = doc.createElementNS(atomuri, "updated") | ||
128 | - updated.appendChild(doc.createTextNode(modtime)) | ||
129 | - feed.appendChild(updated) | ||
130 | - | ||
131 | - examples.sort(key=lambda x:x["modified"]) | ||
132 | - for example in sorted(examples, key=lambda x:x["modified"], reverse=True): | ||
133 | - entry = doc.createElementNS(atomuri, "entry") | ||
134 | - | ||
135 | - title = doc.createElementNS(atomuri, "title") | ||
136 | - title.appendChild(doc.createTextNode(example["title"] or example["example"])) | ||
137 | - entry.appendChild(title) | ||
138 | - | ||
139 | - link = doc.createElementNS(atomuri, "link") | ||
140 | - link.setAttribute("href", "%s%s" % (feedPath, example["example"])) | ||
141 | - entry.appendChild(link) | ||
142 | - | ||
143 | - summary = doc.createElementNS(atomuri, "summary") | ||
144 | - summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"])) | ||
145 | - entry.appendChild(summary) | ||
146 | - | ||
147 | - updated = doc.createElementNS(atomuri, "updated") | ||
148 | - updated.appendChild(doc.createTextNode(example["modified"])) | ||
149 | - entry.appendChild(updated) | ||
150 | - | ||
151 | - author = doc.createElementNS(atomuri, "author") | ||
152 | - name = doc.createElementNS(atomuri, "name") | ||
153 | - name.appendChild(doc.createTextNode(example["author"])) | ||
154 | - author.appendChild(name) | ||
155 | - entry.appendChild(author) | ||
156 | - | ||
157 | - id = doc.createElementNS(atomuri, "id") | ||
158 | - id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"]))) | ||
159 | - entry.appendChild(id) | ||
160 | - | ||
161 | - feed.appendChild(entry) | ||
162 | - | ||
163 | - doc.appendChild(feed) | ||
164 | - return doc | ||
165 | - | ||
166 | -def wordIndex(examples): | ||
167 | - """ | ||
168 | - Create an inverted index based on words in title and shortdesc. Keys are | ||
169 | - lower cased words. Values are dictionaries with example index keys and | ||
170 | - count values. | ||
171 | - """ | ||
172 | - index = {} | ||
173 | - unword = re.compile("\\W+") | ||
174 | - keys = ["shortdesc", "title"] | ||
175 | - for i in range(len(examples)): | ||
176 | - for key in keys: | ||
177 | - text = examples[i][key] | ||
178 | - if text: | ||
179 | - words = unword.split(text) | ||
180 | - for word in words: | ||
181 | - if word: | ||
182 | - word = word.lower() | ||
183 | - if index.has_key(word): | ||
184 | - if index[word].has_key(i): | ||
185 | - index[word][i] += 1 | ||
186 | - else: | ||
187 | - index[word][i] = 1 | ||
188 | - else: | ||
189 | - index[word] = {i: 1} | ||
190 | - return index | ||
191 | - | ||
192 | -if __name__ == "__main__": | ||
193 | - | ||
194 | - if missing_deps: | ||
195 | - print "This script requires simplejson and BeautifulSoup. You don't have them. \n(%s)" % E | ||
196 | - sys.exit() | ||
197 | - | ||
198 | - if len(sys.argv) > 1: | ||
199 | - outFile = open(sys.argv[1],'w') | ||
200 | - else: | ||
201 | - outFile = open('../examples/example-list.js','w') | ||
202 | - | ||
203 | - examplesLocation = '../examples' | ||
204 | - print 'Reading examples from %s and writing out to %s' % (examplesLocation, outFile.name) | ||
205 | - | ||
206 | - exampleList = [] | ||
207 | - docIds = ['title','shortdesc'] | ||
208 | - | ||
209 | - #comment out option to create docs from online resource | ||
210 | - #examplesLocation = 'http://svn.openlayers.org/sandbox/docs/examples/' | ||
211 | - #examples = getListOfOnlineExamples(examplesLocation) | ||
212 | - | ||
213 | - examples = getListOfExamples(examplesLocation) | ||
214 | - | ||
215 | - modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime()) | ||
216 | - | ||
217 | - for example in examples: | ||
218 | - url = os.path.join(examplesLocation,example) | ||
219 | - html = getExampleHtml(url) | ||
220 | - tagvalues = parseHtml(html,docIds) | ||
221 | - tagvalues['example'] = example | ||
222 | - # add in svn info | ||
223 | - d = getSvnInfo(url) | ||
224 | - tagvalues["modified"] = d["date"] or modtime | ||
225 | - tagvalues["author"] = d["author"] or "anonymous" | ||
226 | - tagvalues['link'] = example | ||
227 | - | ||
228 | - exampleList.append(tagvalues) | ||
229 | - | ||
230 | |||
231 | - | ||
232 | - exampleList.sort(key=lambda x:x['example'].lower()) | ||
233 | - | ||
234 | - index = wordIndex(exampleList) | ||
235 | - | ||
236 | - json = simplejson.dumps({"examples": exampleList, "index": index}) | ||
237 | - #give the json a global variable we can use in our js. This should be replaced or made optional. | ||
238 | - json = 'var info=' + json | ||
239 | - outFile.write(json) | ||
240 | - outFile.close() | ||
241 | - | ||
242 | - print "writing feed to ../examples/%s " % feedName | ||
243 | - atom = open('../examples/%s' % feedName, 'w') | ||
244 | - doc = createFeed(exampleList) | ||
245 | - atom.write(doc.toxml()) | ||
246 | - atom.close() | ||
247 | - | ||
248 | - | ||
249 | - print 'complete' | ||
250 | - | ||
251 | - |
pacotes/openlayers/tools/jsmin.c
@@ -1,272 +0,0 @@ | @@ -1,272 +0,0 @@ | ||
1 | -/* jsmin.c | ||
2 | - 2006-05-04 | ||
3 | - | ||
4 | -Copyright (c) 2002 Douglas Crockford (www.crockford.com) | ||
5 | - | ||
6 | -Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
7 | -this software and associated documentation files (the "Software"), to deal in | ||
8 | -the Software without restriction, including without limitation the rights to | ||
9 | -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | ||
10 | -of the Software, and to permit persons to whom the Software is furnished to do | ||
11 | -so, subject to the following conditions: | ||
12 | - | ||
13 | -The above copyright notice and this permission notice shall be included in all | ||
14 | -copies or substantial portions of the Software. | ||
15 | - | ||
16 | -The Software shall be used for Good, not Evil. | ||
17 | - | ||
18 | -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
19 | -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
20 | -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
21 | -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
22 | -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
23 | -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
24 | -SOFTWARE. | ||
25 | -*/ | ||
26 | - | ||
27 | -#include <stdlib.h> | ||
28 | -#include <stdio.h> | ||
29 | - | ||
30 | -static int theA; | ||
31 | -static int theB; | ||
32 | -static int theLookahead = EOF; | ||
33 | - | ||
34 | - | ||
35 | -/* isAlphanum -- return true if the character is a letter, digit, underscore, | ||
36 | - dollar sign, or non-ASCII character. | ||
37 | -*/ | ||
38 | - | ||
39 | -static int | ||
40 | -isAlphanum(int c) | ||
41 | -{ | ||
42 | - return ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || | ||
43 | - (c >= 'A' && c <= 'Z') || c == '_' || c == '$' || c == '\\' || | ||
44 | - c > 126); | ||
45 | -} | ||
46 | - | ||
47 | - | ||
48 | -/* get -- return the next character from stdin. Watch out for lookahead. If | ||
49 | - the character is a control character, translate it to a space or | ||
50 | - linefeed. | ||
51 | -*/ | ||
52 | - | ||
53 | -static int | ||
54 | -get() | ||
55 | -{ | ||
56 | - int c = theLookahead; | ||
57 | - theLookahead = EOF; | ||
58 | - if (c == EOF) { | ||
59 | - c = getc(stdin); | ||
60 | - } | ||
61 | - if (c >= ' ' || c == '\n' || c == EOF) { | ||
62 | - return c; | ||
63 | - } | ||
64 | - if (c == '\r') { | ||
65 | - return '\n'; | ||
66 | - } | ||
67 | - return ' '; | ||
68 | -} | ||
69 | - | ||
70 | - | ||
71 | -/* peek -- get the next character without getting it. | ||
72 | -*/ | ||
73 | - | ||
74 | -static int | ||
75 | -peek() | ||
76 | -{ | ||
77 | - theLookahead = get(); | ||
78 | - return theLookahead; | ||
79 | -} | ||
80 | - | ||
81 | - | ||
82 | -/* next -- get the next character, excluding comments. peek() is used to see | ||
83 | - if a '/' is followed by a '/' or '*'. | ||
84 | -*/ | ||
85 | - | ||
86 | -static int | ||
87 | -next() | ||
88 | -{ | ||
89 | - int c = get(); | ||
90 | - if (c == '/') { | ||
91 | - switch (peek()) { | ||
92 | - case '/': | ||
93 | - for (;;) { | ||
94 | - c = get(); | ||
95 | - if (c <= '\n') { | ||
96 | - return c; | ||
97 | - } | ||
98 | - } | ||
99 | - case '*': | ||
100 | - get(); | ||
101 | - for (;;) { | ||
102 | - switch (get()) { | ||
103 | - case '*': | ||
104 | - if (peek() == '/') { | ||
105 | - get(); | ||
106 | - return ' '; | ||
107 | - } | ||
108 | - break; | ||
109 | - case EOF: | ||
110 | - fprintf(stderr, "Error: JSMIN Unterminated comment.\n"); | ||
111 | - exit(1); | ||
112 | - } | ||
113 | - } | ||
114 | - default: | ||
115 | - return c; | ||
116 | - } | ||
117 | - } | ||
118 | - return c; | ||
119 | -} | ||
120 | - | ||
121 | - | ||
122 | -/* action -- do something! What you do is determined by the argument: | ||
123 | - 1 Output A. Copy B to A. Get the next B. | ||
124 | - 2 Copy B to A. Get the next B. (Delete A). | ||
125 | - 3 Get the next B. (Delete B). | ||
126 | - action treats a string as a single character. Wow! | ||
127 | - action recognizes a regular expression if it is preceded by ( or , or =. | ||
128 | -*/ | ||
129 | - | ||
130 | -static void | ||
131 | -action(int d) | ||
132 | -{ | ||
133 | - switch (d) { | ||
134 | - case 1: | ||
135 | - putc(theA, stdout); | ||
136 | - case 2: | ||
137 | - theA = theB; | ||
138 | - if (theA == '\'' || theA == '"') { | ||
139 | - for (;;) { | ||
140 | - putc(theA, stdout); | ||
141 | - theA = get(); | ||
142 | - if (theA == theB) { | ||
143 | - break; | ||
144 | - } | ||
145 | - if (theA <= '\n') { | ||
146 | - fprintf(stderr, | ||
147 | -"Error: JSMIN unterminated string literal: %c\n", theA); | ||
148 | - exit(1); | ||
149 | - } | ||
150 | - if (theA == '\\') { | ||
151 | - putc(theA, stdout); | ||
152 | - theA = get(); | ||
153 | - } | ||
154 | - } | ||
155 | - } | ||
156 | - case 3: | ||
157 | - theB = next(); | ||
158 | - if (theB == '/' && (theA == '(' || theA == ',' || theA == '=' || | ||
159 | - theA == ':' || theA == '[' || theA == '!' || theA == '&' || | ||
160 | - theA == '|')) { | ||
161 | - putc(theA, stdout); | ||
162 | - putc(theB, stdout); | ||
163 | - for (;;) { | ||
164 | - theA = get(); | ||
165 | - if (theA == '/') { | ||
166 | - break; | ||
167 | - } else if (theA =='\\') { | ||
168 | - putc(theA, stdout); | ||
169 | - theA = get(); | ||
170 | - } else if (theA <= '\n') { | ||
171 | - fprintf(stderr, | ||
172 | -"Error: JSMIN unterminated Regular Expression literal.\n", theA); | ||
173 | - exit(1); | ||
174 | - } | ||
175 | - putc(theA, stdout); | ||
176 | - } | ||
177 | - theB = next(); | ||
178 | - } | ||
179 | - } | ||
180 | -} | ||
181 | - | ||
182 | - | ||
183 | -/* jsmin -- Copy the input to the output, deleting the characters which are | ||
184 | - insignificant to JavaScript. Comments will be removed. Tabs will be | ||
185 | - replaced with spaces. Carriage returns will be replaced with linefeeds. | ||
186 | - Most spaces and linefeeds will be removed. | ||
187 | -*/ | ||
188 | - | ||
189 | -static void | ||
190 | -jsmin() | ||
191 | -{ | ||
192 | - theA = '\n'; | ||
193 | - action(3); | ||
194 | - while (theA != EOF) { | ||
195 | - switch (theA) { | ||
196 | - case ' ': | ||
197 | - if (isAlphanum(theB)) { | ||
198 | - action(1); | ||
199 | - } else { | ||
200 | - action(2); | ||
201 | - } | ||
202 | - break; | ||
203 | - case '\n': | ||
204 | - switch (theB) { | ||
205 | - case '{': | ||
206 | - case '[': | ||
207 | - case '(': | ||
208 | - case '+': | ||
209 | - case '-': | ||
210 | - action(1); | ||
211 | - break; | ||
212 | - case ' ': | ||
213 | - action(3); | ||
214 | - break; | ||
215 | - default: | ||
216 | - if (isAlphanum(theB)) { | ||
217 | - action(1); | ||
218 | - } else { | ||
219 | - action(2); | ||
220 | - } | ||
221 | - } | ||
222 | - break; | ||
223 | - default: | ||
224 | - switch (theB) { | ||
225 | - case ' ': | ||
226 | - if (isAlphanum(theA)) { | ||
227 | - action(1); | ||
228 | - break; | ||
229 | - } | ||
230 | - action(3); | ||
231 | - break; | ||
232 | - case '\n': | ||
233 | - switch (theA) { | ||
234 | - case '}': | ||
235 | - case ']': | ||
236 | - case ')': | ||
237 | - case '+': | ||
238 | - case '-': | ||
239 | - case '"': | ||
240 | - case '\'': | ||
241 | - action(1); | ||
242 | - break; | ||
243 | - default: | ||
244 | - if (isAlphanum(theA)) { | ||
245 | - action(1); | ||
246 | - } else { | ||
247 | - action(3); | ||
248 | - } | ||
249 | - } | ||
250 | - break; | ||
251 | - default: | ||
252 | - action(1); | ||
253 | - break; | ||
254 | - } | ||
255 | - } | ||
256 | - } | ||
257 | -} | ||
258 | - | ||
259 | - | ||
260 | -/* main -- Output any command line arguments as comments | ||
261 | - and then minify the input. | ||
262 | -*/ | ||
263 | -extern int | ||
264 | -main(int argc, char* argv[]) | ||
265 | -{ | ||
266 | - int i; | ||
267 | - for (i = 1; i < argc; i += 1) { | ||
268 | - fprintf(stdout, "// %s\n", argv[i]); | ||
269 | - } | ||
270 | - jsmin(); | ||
271 | - return 0; | ||
272 | -} |
pacotes/openlayers/tools/jsmin.py
@@ -1,216 +0,0 @@ | @@ -1,216 +0,0 @@ | ||
1 | -#!/usr/bin/python | ||
2 | - | ||
3 | -# This code is original from jsmin by Douglas Crockford, it was translated to | ||
4 | -# Python by Baruch Even. The original code had the following copyright and | ||
5 | -# license. | ||
6 | -# | ||
7 | -# /* jsmin.c | ||
8 | -# 2007-01-08 | ||
9 | -# | ||
10 | -# Copyright (c) 2002 Douglas Crockford (www.crockford.com) | ||
11 | -# | ||
12 | -# Permission is hereby granted, free of charge, to any person obtaining a copy of | ||
13 | -# this software and associated documentation files (the "Software"), to deal in | ||
14 | -# the Software without restriction, including without limitation the rights to | ||
15 | -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies | ||
16 | -# of the Software, and to permit persons to whom the Software is furnished to do | ||
17 | -# so, subject to the following conditions: | ||
18 | -# | ||
19 | -# The above copyright notice and this permission notice shall be included in all | ||
20 | -# copies or substantial portions of the Software. | ||
21 | -# | ||
22 | -# The Software shall be used for Good, not Evil. | ||
23 | -# | ||
24 | -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
25 | -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
26 | -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
27 | -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
28 | -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
29 | -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | -# SOFTWARE. | ||
31 | -# */ | ||
32 | - | ||
33 | -from StringIO import StringIO | ||
34 | - | ||
35 | -def jsmin(js): | ||
36 | - ins = StringIO(js) | ||
37 | - outs = StringIO() | ||
38 | - JavascriptMinify().minify(ins, outs) | ||
39 | - str = outs.getvalue() | ||
40 | - if len(str) > 0 and str[0] == '\n': | ||
41 | - str = str[1:] | ||
42 | - return str | ||
43 | - | ||
44 | -def isAlphanum(c): | ||
45 | - """return true if the character is a letter, digit, underscore, | ||
46 | - dollar sign, or non-ASCII character. | ||
47 | - """ | ||
48 | - return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or | ||
49 | - (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126)); | ||
50 | - | ||
51 | -class UnterminatedComment(Exception): | ||
52 | - pass | ||
53 | - | ||
54 | -class UnterminatedStringLiteral(Exception): | ||
55 | - pass | ||
56 | - | ||
57 | -class UnterminatedRegularExpression(Exception): | ||
58 | - pass | ||
59 | - | ||
60 | -class JavascriptMinify(object): | ||
61 | - | ||
62 | - def _outA(self): | ||
63 | - self.outstream.write(self.theA) | ||
64 | - def _outB(self): | ||
65 | - self.outstream.write(self.theB) | ||
66 | - | ||
67 | - def _get(self): | ||
68 | - """return the next character from stdin. Watch out for lookahead. If | ||
69 | - the character is a control character, translate it to a space or | ||
70 | - linefeed. | ||
71 | - """ | ||
72 | - c = self.theLookahead | ||
73 | - self.theLookahead = None | ||
74 | - if c == None: | ||
75 | - c = self.instream.read(1) | ||
76 | - if c >= ' ' or c == '\n': | ||
77 | - return c | ||
78 | - if c == '': # EOF | ||
79 | - return '\000' | ||
80 | - if c == '\r': | ||
81 | - return '\n' | ||
82 | - return ' ' | ||
83 | - | ||
84 | - def _peek(self): | ||
85 | - self.theLookahead = self._get() | ||
86 | - return self.theLookahead | ||
87 | - | ||
88 | - def _next(self): | ||
89 | - """get the next character, excluding comments. peek() is used to see | ||
90 | - if a '/' is followed by a '/' or '*'. | ||
91 | - """ | ||
92 | - c = self._get() | ||
93 | - if c == '/': | ||
94 | - p = self._peek() | ||
95 | - if p == '/': | ||
96 | - c = self._get() | ||
97 | - while c > '\n': | ||
98 | - c = self._get() | ||
99 | - return c | ||
100 | - if p == '*': | ||
101 | - c = self._get() | ||
102 | - while 1: | ||
103 | - c = self._get() | ||
104 | - if c == '*': | ||
105 | - if self._peek() == '/': | ||
106 | - self._get() | ||
107 | - return ' ' | ||
108 | - if c == '\000': | ||
109 | - raise UnterminatedComment() | ||
110 | - | ||
111 | - return c | ||
112 | - | ||
113 | - def _action(self, action): | ||
114 | - """do something! What you do is determined by the argument: | ||
115 | - 1 Output A. Copy B to A. Get the next B. | ||
116 | - 2 Copy B to A. Get the next B. (Delete A). | ||
117 | - 3 Get the next B. (Delete B). | ||
118 | - action treats a string as a single character. Wow! | ||
119 | - action recognizes a regular expression if it is preceded by ( or , or =. | ||
120 | - """ | ||
121 | - if action <= 1: | ||
122 | - self._outA() | ||
123 | - | ||
124 | - if action <= 2: | ||
125 | - self.theA = self.theB | ||
126 | - if self.theA == "'" or self.theA == '"': | ||
127 | - while 1: | ||
128 | - self._outA() | ||
129 | - self.theA = self._get() | ||
130 | - if self.theA == self.theB: | ||
131 | - break | ||
132 | - if self.theA <= '\n': | ||
133 | - raise UnterminatedStringLiteral() | ||
134 | - if self.theA == '\\': | ||
135 | - self._outA() | ||
136 | - self.theA = self._get() | ||
137 | - | ||
138 | - | ||
139 | - if action <= 3: | ||
140 | - self.theB = self._next() | ||
141 | - if self.theB == '/' and (self.theA == '(' or self.theA == ',' or | ||
142 | - self.theA == '=' or self.theA == ':' or | ||
143 | - self.theA == '[' or self.theA == '?' or | ||
144 | - self.theA == '!' or self.theA == '&' or | ||
145 | - self.theA == '|'): | ||
146 | - self._outA() | ||
147 | - self._outB() | ||
148 | - while 1: | ||
149 | - self.theA = self._get() | ||
150 | - if self.theA == '/': | ||
151 | - break | ||
152 | - elif self.theA == '\\': | ||
153 | - self._outA() | ||
154 | - self.theA = self._get() | ||
155 | - elif self.theA <= '\n': | ||
156 | - raise UnterminatedRegularExpression() | ||
157 | - self._outA() | ||
158 | - self.theB = self._next() | ||
159 | - | ||
160 | - | ||
161 | - def _jsmin(self): | ||
162 | - """Copy the input to the output, deleting the characters which are | ||
163 | - insignificant to JavaScript. Comments will be removed. Tabs will be | ||
164 | - replaced with spaces. Carriage returns will be replaced with linefeeds. | ||
165 | - Most spaces and linefeeds will be removed. | ||
166 | - """ | ||
167 | - self.theA = '\n' | ||
168 | - self._action(3) | ||
169 | - | ||
170 | - while self.theA != '\000': | ||
171 | - if self.theA == ' ': | ||
172 | - if isAlphanum(self.theB): | ||
173 | - self._action(1) | ||
174 | - else: | ||
175 | - self._action(2) | ||
176 | - elif self.theA == '\n': | ||
177 | - if self.theB in ['{', '[', '(', '+', '-']: | ||
178 | - self._action(1) | ||
179 | - elif self.theB == ' ': | ||
180 | - self._action(3) | ||
181 | - else: | ||
182 | - if isAlphanum(self.theB): | ||
183 | - self._action(1) | ||
184 | - else: | ||
185 | - self._action(2) | ||
186 | - else: | ||
187 | - if self.theB == ' ': | ||
188 | - if isAlphanum(self.theA): | ||
189 | - self._action(1) | ||
190 | - else: | ||
191 | - self._action(3) | ||
192 | - elif self.theB == '\n': | ||
193 | - if self.theA in ['}', ']', ')', '+', '-', '"', '\'']: | ||
194 | - self._action(1) | ||
195 | - else: | ||
196 | - if isAlphanum(self.theA): | ||
197 | - self._action(1) | ||
198 | - else: | ||
199 | - self._action(3) | ||
200 | - else: | ||
201 | - self._action(1) | ||
202 | - | ||
203 | - def minify(self, instream, outstream): | ||
204 | - self.instream = instream | ||
205 | - self.outstream = outstream | ||
206 | - self.theA = None | ||
207 | - self.thaB = None | ||
208 | - self.theLookahead = None | ||
209 | - | ||
210 | - self._jsmin() | ||
211 | - self.instream.close() | ||
212 | - | ||
213 | -if __name__ == '__main__': | ||
214 | - import sys | ||
215 | - jsm = JavascriptMinify() | ||
216 | - jsm.minify(sys.stdin, sys.stdout) |
pacotes/openlayers/tools/mergejs.py
@@ -1,252 +0,0 @@ | @@ -1,252 +0,0 @@ | ||
1 | -#!/usr/bin/env python | ||
2 | -# | ||
3 | -# Merge multiple JavaScript source code files into one. | ||
4 | -# | ||
5 | -# Usage: | ||
6 | -# This script requires source files to have dependencies specified in them. | ||
7 | -# | ||
8 | -# Dependencies are specified with a comment of the form: | ||
9 | -# | ||
10 | -# // @requires <file path> | ||
11 | -# | ||
12 | -# e.g. | ||
13 | -# | ||
14 | -# // @requires Geo/DataSource.js | ||
15 | -# | ||
16 | -# This script should be executed like so: | ||
17 | -# | ||
18 | -# mergejs.py <output.js> <directory> [...] | ||
19 | -# | ||
20 | -# e.g. | ||
21 | -# | ||
22 | -# mergejs.py openlayers.js Geo/ CrossBrowser/ | ||
23 | -# | ||
24 | -# This example will cause the script to walk the `Geo` and | ||
25 | -# `CrossBrowser` directories--and subdirectories thereof--and import | ||
26 | -# all `*.js` files encountered. The dependency declarations will be extracted | ||
27 | -# and then the source code from imported files will be output to | ||
28 | -# a file named `openlayers.js` in an order which fulfils the dependencies | ||
29 | -# specified. | ||
30 | -# | ||
31 | -# | ||
32 | -# Note: This is a very rough initial version of this code. | ||
33 | -# | ||
34 | -# -- Copyright 2005-2008 MetaCarta, Inc. / OpenLayers project -- | ||
35 | -# | ||
36 | - | ||
37 | -# TODO: Allow files to be excluded. e.g. `Crossbrowser/DebugMode.js`? | ||
38 | -# TODO: Report error when dependency can not be found rather than KeyError. | ||
39 | - | ||
40 | -import re | ||
41 | -import os | ||
42 | -import sys | ||
43 | - | ||
44 | -SUFFIX_JAVASCRIPT = ".js" | ||
45 | - | ||
46 | -RE_REQUIRE = "@requires:? (.*)\n" # TODO: Ensure in comment? | ||
47 | -class SourceFile: | ||
48 | - """ | ||
49 | - Represents a Javascript source code file. | ||
50 | - """ | ||
51 | - | ||
52 | - def __init__(self, filepath, source): | ||
53 | - """ | ||
54 | - """ | ||
55 | - self.filepath = filepath | ||
56 | - self.source = source | ||
57 | - | ||
58 | - self.requiredBy = [] | ||
59 | - | ||
60 | - | ||
61 | - def _getRequirements(self): | ||
62 | - """ | ||
63 | - Extracts the dependencies specified in the source code and returns | ||
64 | - a list of them. | ||
65 | - """ | ||
66 | - # TODO: Cache? | ||
67 | - return re.findall(RE_REQUIRE, self.source) | ||
68 | - | ||
69 | - requires = property(fget=_getRequirements, doc="") | ||
70 | - | ||
71 | - | ||
72 | - | ||
73 | -def usage(filename): | ||
74 | - """ | ||
75 | - Displays a usage message. | ||
76 | - """ | ||
77 | - print "%s [-c <config file>] <output.js> <directory> [...]" % filename | ||
78 | - | ||
79 | - | ||
80 | -class Config: | ||
81 | - """ | ||
82 | - Represents a parsed configuration file. | ||
83 | - | ||
84 | - A configuration file should be of the following form: | ||
85 | - | ||
86 | - [first] | ||
87 | - 3rd/prototype.js | ||
88 | - core/application.js | ||
89 | - core/params.js | ||
90 | - # A comment | ||
91 | - | ||
92 | - [last] | ||
93 | - core/api.js # Another comment | ||
94 | - | ||
95 | - [exclude] | ||
96 | - 3rd/logger.js | ||
97 | - | ||
98 | - All headings are required. | ||
99 | - | ||
100 | - The files listed in the `first` section will be forced to load | ||
101 | - *before* all other files (in the order listed). The files in `last` | ||
102 | - section will be forced to load *after* all the other files (in the | ||
103 | - order listed). | ||
104 | - | ||
105 | - The files list in the `exclude` section will not be imported. | ||
106 | - | ||
107 | - Any text appearing after a # symbol indicates a comment. | ||
108 | - | ||
109 | - """ | ||
110 | - | ||
111 | - def __init__(self, filename): | ||
112 | - """ | ||
113 | - Parses the content of the named file and stores the values. | ||
114 | - """ | ||
115 | - lines = [re.sub("#.*?$", "", line).strip() # Assumes end-of-line character is present | ||
116 | - for line in open(filename) | ||
117 | - if line.strip() and not line.strip().startswith("#")] # Skip blank lines and comments | ||
118 | - | ||
119 | - self.forceFirst = lines[lines.index("[first]") + 1:lines.index("[last]")] | ||
120 | - | ||
121 | - self.forceLast = lines[lines.index("[last]") + 1:lines.index("[include]")] | ||
122 | - self.include = lines[lines.index("[include]") + 1:lines.index("[exclude]")] | ||
123 | - self.exclude = lines[lines.index("[exclude]") + 1:] | ||
124 | - | ||
125 | -def run (sourceDirectory, outputFilename = None, configFile = None): | ||
126 | - cfg = None | ||
127 | - if configFile: | ||
128 | - cfg = Config(configFile) | ||
129 | - | ||
130 | - allFiles = [] | ||
131 | - | ||
132 | - ## Find all the Javascript source files | ||
133 | - for root, dirs, files in os.walk(sourceDirectory): | ||
134 | - for filename in files: | ||
135 | - if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."): | ||
136 | - filepath = os.path.join(root, filename)[len(sourceDirectory)+1:] | ||
137 | - filepath = filepath.replace("\\", "/") | ||
138 | - if cfg and cfg.include: | ||
139 | - if filepath in cfg.include or filepath in cfg.forceFirst: | ||
140 | - allFiles.append(filepath) | ||
141 | - elif (not cfg) or (filepath not in cfg.exclude): | ||
142 | - allFiles.append(filepath) | ||
143 | - | ||
144 | - ## Header inserted at the start of each file in the output | ||
145 | - HEADER = "/* " + "=" * 70 + "\n %s\n" + " " + "=" * 70 + " */\n\n" | ||
146 | - | ||
147 | - files = {} | ||
148 | - | ||
149 | - order = [] # List of filepaths to output, in a dependency satisfying order | ||
150 | - | ||
151 | - ## Import file source code | ||
152 | - ## TODO: Do import when we walk the directories above? | ||
153 | - for filepath in allFiles: | ||
154 | - print "Importing: %s" % filepath | ||
155 | - fullpath = os.path.join(sourceDirectory, filepath).strip() | ||
156 | - content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF? | ||
157 | - files[filepath] = SourceFile(filepath, content) # TODO: Chop path? | ||
158 | - | ||
159 | |||
160 | - | ||
161 | - from toposort import toposort | ||
162 | - | ||
163 | - complete = False | ||
164 | - resolution_pass = 1 | ||
165 | - | ||
166 | - while not complete: | ||
167 | - order = [] # List of filepaths to output, in a dependency satisfying order | ||
168 | - nodes = [] | ||
169 | - routes = [] | ||
170 | - ## Resolve the dependencies | ||
171 | - print "Resolution pass %s... " % resolution_pass | ||
172 | - resolution_pass += 1 | ||
173 | - | ||
174 | - for filepath, info in files.items(): | ||
175 | - nodes.append(filepath) | ||
176 | - for neededFilePath in info.requires: | ||
177 | - routes.append((neededFilePath, filepath)) | ||
178 | - | ||
179 | - for dependencyLevel in toposort(nodes, routes): | ||
180 | - for filepath in dependencyLevel: | ||
181 | - order.append(filepath) | ||
182 | - if not files.has_key(filepath): | ||
183 | - print "Importing: %s" % filepath | ||
184 | - fullpath = os.path.join(sourceDirectory, filepath).strip() | ||
185 | - content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF? | ||
186 | - files[filepath] = SourceFile(filepath, content) # TODO: Chop path? | ||
187 | - | ||
188 | - | ||
189 | - | ||
190 | - # Double check all dependencies have been met | ||
191 | - complete = True | ||
192 | - try: | ||
193 | - for fp in order: | ||
194 | - if max([order.index(rfp) for rfp in files[fp].requires] + | ||
195 | - [order.index(fp)]) != order.index(fp): | ||
196 | - complete = False | ||
197 | - except: | ||
198 | - complete = False | ||
199 | - | ||
200 | |||
201 | - | ||
202 | - | ||
203 | - ## Move forced first and last files to the required position | ||
204 | - if cfg: | ||
205 | - print "Re-ordering files..." | ||
206 | - order = cfg.forceFirst + [item | ||
207 | - for item in order | ||
208 | - if ((item not in cfg.forceFirst) and | ||
209 | - (item not in cfg.forceLast))] + cfg.forceLast | ||
210 | - | ||
211 | |||
212 | - ## Output the files in the determined order | ||
213 | - result = [] | ||
214 | - | ||
215 | - for fp in order: | ||
216 | - f = files[fp] | ||
217 | - print "Exporting: ", f.filepath | ||
218 | - result.append(HEADER % f.filepath) | ||
219 | - source = f.source | ||
220 | - result.append(source) | ||
221 | - if not source.endswith("\n"): | ||
222 | - result.append("\n") | ||
223 | - | ||
224 | - print "\nTotal files merged: %d " % len(files) | ||
225 | - | ||
226 | - if outputFilename: | ||
227 | - print "\nGenerating: %s" % (outputFilename) | ||
228 | - open(outputFilename, "w").write("".join(result)) | ||
229 | - return "".join(result) | ||
230 | - | ||
231 | -if __name__ == "__main__": | ||
232 | - import getopt | ||
233 | - | ||
234 | - options, args = getopt.getopt(sys.argv[1:], "-c:") | ||
235 | - | ||
236 | - try: | ||
237 | - outputFilename = args[0] | ||
238 | - except IndexError: | ||
239 | - usage(sys.argv[0]) | ||
240 | - raise SystemExit | ||
241 | - else: | ||
242 | - sourceDirectory = args[1] | ||
243 | - if not sourceDirectory: | ||
244 | - usage(sys.argv[0]) | ||
245 | - raise SystemExit | ||
246 | - | ||
247 | - configFile = None | ||
248 | - if options and options[0][0] == "-c": | ||
249 | - configFile = options[0][1] | ||
250 | - print "Parsing configuration file: %s" % filename | ||
251 | - | ||
252 | - run( sourceDirectory, outputFilename, configFile ) |
pacotes/openlayers/tools/minimize.py
@@ -1,47 +0,0 @@ | @@ -1,47 +0,0 @@ | ||
1 | -# Minimal Python Minimizer | ||
2 | -# Copyright 2008, Christopher Schmidt | ||
3 | -# Released under the MIT License | ||
4 | -# | ||
5 | -# Taken from: http://svn.crschmidt.net/personal/python/minimize.py | ||
6 | -# $Id: minimize.py 6 2008-01-03 06:33:35Z crschmidt $ | ||
7 | -# | ||
8 | -# Permission is hereby granted, free of charge, to any person obtaining a copy | ||
9 | -# of this software and associated documentation files (the "Software"), to deal | ||
10 | -# in the Software without restriction, including without limitation the rights | ||
11 | -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
12 | -# copies of the Software, and to permit persons to whom the Software is | ||
13 | -# furnished to do so, subject to the following conditions: | ||
14 | -# | ||
15 | -# The above copyright notice and this permission notice shall be included in | ||
16 | -# all copies or substantial portions of the Software. | ||
17 | -# | ||
18 | -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
19 | -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
20 | -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
21 | -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
22 | -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
23 | -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
24 | -# THE SOFTWARE. | ||
25 | - | ||
26 | -import re | ||
27 | - | ||
28 | -def strip_comments_helper(data): | ||
29 | - """remove all /* */ format comments and surrounding whitespace.""" | ||
30 | - p = re.compile(r'[\s]*/\*.*?\*/[\s]*', re.DOTALL) | ||
31 | - return p.sub('',data) | ||
32 | - | ||
33 | -def minimize(data, exclude=None): | ||
34 | - """Central function call. This will call all other compression | ||
35 | - functions. To add further compression algorithms, simply add | ||
36 | - functions whose names end in _helper which take a string as input | ||
37 | - and return a more compressed string as output.""" | ||
38 | - for key, item in globals().iteritems(): | ||
39 | - if key.endswith("_helper"): | ||
40 | - func_key = key[:-7] | ||
41 | - if not exclude or not func_key in exclude: | ||
42 | - data = item(data) | ||
43 | - return data | ||
44 | - | ||
45 | -if __name__ == "__main__": | ||
46 | - import sys | ||
47 | - print minimize(open(sys.argv[1]).read()) |
pacotes/openlayers/tools/oldot.py
@@ -1,43 +0,0 @@ | @@ -1,43 +0,0 @@ | ||
1 | -import re | ||
2 | -import os | ||
3 | -def run(): | ||
4 | - sourceDirectory = "../lib/OpenLayers" | ||
5 | - allFiles = [] | ||
6 | - SUFFIX_JAVASCRIPT = ".js" | ||
7 | - ## Find all the Javascript source files | ||
8 | - for root, dirs, files in os.walk(sourceDirectory): | ||
9 | - for filename in files: | ||
10 | - if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."): | ||
11 | - filepath = os.path.join(root, filename)[len(sourceDirectory)+1:] | ||
12 | - filepath = filepath.replace("\\", "/") | ||
13 | - data = open(os.path.join(sourceDirectory, filepath)).read() | ||
14 | - parents = re.search("OpenLayers.Class\((.*?){", data, | ||
15 | - re.DOTALL) | ||
16 | - if parents: | ||
17 | - parents = [x.strip() for x in parents.group(1).strip().strip(",").split(",")] | ||
18 | - else: | ||
19 | - parents = [] | ||
20 | - cls = "OpenLayers.%s" % filepath.strip(".js").replace("/", ".") | ||
21 | - allFiles.append([cls, parents]) | ||
22 | - return allFiles | ||
23 | -print """ | ||
24 | -digraph name { | ||
25 | - fontname = "Helvetica" | ||
26 | - fontsize = 8 | ||
27 | - K = 0.6 | ||
28 | - | ||
29 | - node [ | ||
30 | - fontname = "Helvetica" | ||
31 | - fontsize = 8 | ||
32 | - shape = "plaintext" | ||
33 | - ] | ||
34 | -""" | ||
35 | - | ||
36 | -for i in run(): | ||
37 | - print i[0].replace(".", "_") | ||
38 | - for item in i[1]: | ||
39 | - if not item: continue | ||
40 | - print "%s -> %s" % (i[0].replace(".","_"), item.replace(".", "_")) | ||
41 | - print "; " | ||
42 | - | ||
43 | -print """}""" |
pacotes/openlayers/tools/release.sh
@@ -1,29 +0,0 @@ | @@ -1,29 +0,0 @@ | ||
1 | -#!/bin/sh | ||
2 | - | ||
3 | -VERSION=$1 | ||
4 | - | ||
5 | -svn export http://svn.openlayers.org/tags/openlayers/release-$VERSION OpenLayers-$VERSION | ||
6 | -cd OpenLayers-$VERSION/build | ||
7 | -./build.py full | ||
8 | -cp OpenLayers.js .. | ||
9 | - | ||
10 | -cd .. | ||
11 | - | ||
12 | -mkdir doc/devdocs | ||
13 | -mkdir doc/apidocs | ||
14 | -rm tools/*.pyc | ||
15 | - | ||
16 | -mkdir /www/openlayers/htdocs/api/$VERSION | ||
17 | -cp OpenLayers.js /www/openlayers/htdocs/api/$VERSION | ||
18 | -cp -a img/ /www/openlayers/htdocs/api/$VERSION | ||
19 | -cp -a theme/ /www/openlayers/htdocs/api/$VERSION | ||
20 | - | ||
21 | -cd .. | ||
22 | - | ||
23 | -~/nd/NaturalDocs -i OpenLayers-$VERSION/lib -o HTML OpenLayers-$VERSION/doc/devdocs -p OpenLayers-$VERSION/doc_config -s Small OL | ||
24 | -~/nd/NaturalDocs -i OpenLayers-$VERSION/lib -o HTML OpenLayers-$VERSION/doc/apidocs -p OpenLayers-$VERSION/apidoc_config -s Small OL | ||
25 | - | ||
26 | -tar cvfz OpenLayers-$VERSION.tar.gz OpenLayers-$VERSION/ | ||
27 | -zip -9r OpenLayers-$VERSION.zip OpenLayers-$VERSION/ | ||
28 | - | ||
29 | -cp OpenLayers-$VERSION.* /www/openlayers/htdocs/download |
pacotes/openlayers/tools/shrinksafe.py
@@ -1,54 +0,0 @@ | @@ -1,54 +0,0 @@ | ||
1 | -#!/usr/bin/env python | ||
2 | -# | ||
3 | -# Script to provide a wrapper around the ShrinkSafe "web service" | ||
4 | -# <http://shrinksafe.dojotoolkit.org/> | ||
5 | -# | ||
6 | - | ||
7 | -# | ||
8 | -# We use this script for two reasons: | ||
9 | -# | ||
10 | -# * This avoids having to install and configure Java and the standalone | ||
11 | -# ShrinkSafe utility. | ||
12 | -# | ||
13 | -# * The current ShrinkSafe standalone utility was broken when we last | ||
14 | -# used it. | ||
15 | -# | ||
16 | - | ||
17 | -import sys | ||
18 | - | ||
19 | -import urllib | ||
20 | -import urllib2 | ||
21 | - | ||
22 | -URL_SHRINK_SAFE = "http://shrinksafe.dojotoolkit.org/shrinksafe.php" | ||
23 | - | ||
24 | -# This would normally be dynamically generated: | ||
25 | -BOUNDARY_MARKER = "---------------------------72288400411964641492083565382" | ||
26 | - | ||
27 | -if __name__ == "__main__": | ||
28 | - ## Grab the source code | ||
29 | - try: | ||
30 | - sourceFilename = sys.argv[1] | ||
31 | - except: | ||
32 | - print "Usage: %s (<source filename>|-)" % sys.argv[0] | ||
33 | - raise SystemExit | ||
34 | - | ||
35 | - if sourceFilename == "-": | ||
36 | - sourceCode = sys.stdin.read() | ||
37 | - sourceFilename = "stdin.js" | ||
38 | - else: | ||
39 | - sourceCode = open(sourceFilename).read() | ||
40 | - | ||
41 | - ## Create the request replicating posting of the form from the web page | ||
42 | - request = urllib2.Request(url=URL_SHRINK_SAFE) | ||
43 | - request.add_header("Content-Type", | ||
44 | - "multipart/form-data; boundary=%s" % BOUNDARY_MARKER) | ||
45 | - request.add_data(""" | ||
46 | ---%s | ||
47 | -Content-Disposition: form-data; name="shrinkfile[]"; filename="%s" | ||
48 | -Content-Type: application/x-javascript | ||
49 | - | ||
50 | -%s | ||
51 | -""" % (BOUNDARY_MARKER, sourceFilename, sourceCode)) | ||
52 | - | ||
53 | - ## Deliver the result | ||
54 | - print urllib2.urlopen(request).read(), |
pacotes/openlayers/tools/toposort.py
@@ -1,260 +0,0 @@ | @@ -1,260 +0,0 @@ | ||
1 | -# | ||
2 | -# According to <http://www.vrplumber.com/programming/> this file | ||
3 | -# is licensed under a BSD-style license. We only use the section | ||
4 | -# originally by Tim Peters. | ||
5 | -# | ||
6 | -# TODO: The use of this code needs to be okayed by someone. | ||
7 | -# | ||
8 | - | ||
9 | -class RecursionError( OverflowError, ValueError ): | ||
10 | - '''Unable to calculate result because of recursive structure''' | ||
11 | - | ||
12 | - | ||
13 | -def sort(nodes, routes, noRecursion=1): | ||
14 | - '''Passed a list of node IDs and a list of source,dest ID routes | ||
15 | - attempt to create a list of stages where each sub list | ||
16 | - is one stage in a process. | ||
17 | - ''' | ||
18 | - children, parents = _buildChildrenLists(routes) | ||
19 | - # first stage is those nodes | ||
20 | - # having no incoming routes... | ||
21 | - stage = [] | ||
22 | - stages = [stage] | ||
23 | - taken = [] | ||
24 | - for node in nodes: | ||
25 | - if (not parents.get(node)): | ||
26 | - stage.append (node) | ||
27 | - if nodes and not stage: | ||
28 | - # there is no element which does not depend on | ||
29 | - # some other element!!! | ||
30 | - stage.append( nodes[0]) | ||
31 | - taken.extend( stage ) | ||
32 | - nodes = filter ( lambda x, l=stage: x not in l, nodes ) | ||
33 | - while nodes: | ||
34 | - previousStageChildren = [] | ||
35 | - nodelen = len(nodes) | ||
36 | - # second stage are those nodes | ||
37 | - # which are direct children of the first stage | ||
38 | - for node in stage: | ||
39 | - for child in children.get (node, []): | ||
40 | - if child not in previousStageChildren and child not in taken: | ||
41 | - previousStageChildren.append(child) | ||
42 | - elif child in taken and noRecursion: | ||
43 | - raise RecursionError( (child, node) ) | ||
44 | - # unless they are children of other direct children... | ||
45 | - # TODO, actually do that... | ||
46 | - stage = previousStageChildren | ||
47 | - removes = [] | ||
48 | - for current in stage: | ||
49 | - currentParents = parents.get( current, [] ) | ||
50 | - for parent in currentParents: | ||
51 | - if parent in stage and parent != current: | ||
52 | - # might wind up removing current... | ||
53 | - if not current in parents.get(parent, []): | ||
54 | - # is not mutually dependent... | ||
55 | - removes.append( current ) | ||
56 | - for remove in removes: | ||
57 | - while remove in stage: | ||
58 | - stage.remove( remove ) | ||
59 | - stages.append( stage) | ||
60 | - taken.extend( stage ) | ||
61 | - nodes = filter ( lambda x, l=stage: x not in l, nodes ) | ||
62 | - if nodelen == len(nodes): | ||
63 | - if noRecursion: | ||
64 | - raise RecursionError( nodes ) | ||
65 | - else: | ||
66 | - stages.append( nodes[:] ) | ||
67 | - nodes = [] | ||
68 | - return stages | ||
69 | - | ||
70 | -def _buildChildrenLists (routes): | ||
71 | - childrenTable = {} | ||
72 | - parentTable = {} | ||
73 | - for sourceID,destinationID in routes: | ||
74 | - currentChildren = childrenTable.get( sourceID, []) | ||
75 | - currentParents = parentTable.get( destinationID, []) | ||
76 | - if not destinationID in currentChildren: | ||
77 | - currentChildren.append ( destinationID) | ||
78 | - if not sourceID in currentParents: | ||
79 | - currentParents.append ( sourceID) | ||
80 | - childrenTable[sourceID] = currentChildren | ||
81 | - parentTable[destinationID] = currentParents | ||
82 | - return childrenTable, parentTable | ||
83 | - | ||
84 | - | ||
85 | -def toposort (nodes, routes, noRecursion=1): | ||
86 | - '''Topological sort from Tim Peters, fairly efficient | ||
87 | - in comparison (it seems).''' | ||
88 | - #first calculate the recursion depth | ||
89 | - | ||
90 | - dependencies = {} | ||
91 | - inversedependencies = {} | ||
92 | - if not nodes: | ||
93 | - return [] | ||
94 | - if not routes: | ||
95 | - return [nodes] | ||
96 | - for node in nodes: | ||
97 | - dependencies[ node ] = (0, node) | ||
98 | - inversedependencies[ node ] = [] | ||
99 | - | ||
100 | - | ||
101 | - for depended, depends in routes: | ||
102 | - # is it a null rule | ||
103 | - try: | ||
104 | - newdependencylevel, object = dependencies.get ( depends, (0, depends)) | ||
105 | - except TypeError: | ||
106 | - print depends | ||
107 | - raise | ||
108 | - dependencies[ depends ] = (newdependencylevel + 1, depends) | ||
109 | - # "dependency (existence) of depended-on" | ||
110 | - newdependencylevel,object = dependencies.get ( depended, (0, depended) ) | ||
111 | - dependencies[ depended ] = (newdependencylevel, depended) | ||
112 | - # Inverse dependency set up | ||
113 | - dependencieslist = inversedependencies.get ( depended, []) | ||
114 | - dependencieslist.append (depends) | ||
115 | - inversedependencies[depended] = dependencieslist | ||
116 | - ### Now we do the actual sorting | ||
117 | - # The first task is to create the sortable | ||
118 | - # list of dependency-levels | ||
119 | - sortinglist = dependencies.values() | ||
120 | - sortinglist.sort () | ||
121 | - output = [] | ||
122 | - while sortinglist: | ||
123 | - deletelist = [] | ||
124 | - generation = [] | ||
125 | - output.append( generation) | ||
126 | - while sortinglist and sortinglist[0][0] == 0: | ||
127 | - number, object = sortinglist[0] | ||
128 | - generation.append ( object ) | ||
129 | - deletelist.append( object ) | ||
130 | - for inverse in inversedependencies.get(object, () ): | ||
131 | - try: | ||
132 | - oldcount, inverse = dependencies [ inverse] | ||
133 | - if oldcount > 0: | ||
134 | - # will be dealt with on later pass | ||
135 | - dependencies [ inverse] = (oldcount-1, inverse) | ||
136 | - else: | ||
137 | - # will be dealt with on this pass, | ||
138 | - # so needs not to be in the sorting list next time | ||
139 | - deletelist.append( inverse ) | ||
140 | - # just in case a loop comes through | ||
141 | - inversedependencies[object] = [] | ||
142 | - except KeyError: | ||
143 | - # dealing with a recursion-breaking run... | ||
144 | - pass | ||
145 | - del sortinglist [0] | ||
146 | - # if no elements could be deleted, then | ||
147 | - # there is something which depends upon itself | ||
148 | - if not deletelist: | ||
149 | - if noRecursion: | ||
150 | - raise RecursionError( sortinglist ) | ||
151 | - else: | ||
152 | - # hack so that something gets deleted... | ||
153 | -## import pdb | ||
154 | -## pdb.set_trace() | ||
155 | - dependencies[sortinglist[0][1]] = (0,sortinglist[0][1]) | ||
156 | - # delete the items that were dealt with | ||
157 | - for item in deletelist: | ||
158 | - try: | ||
159 | - del dependencies [ item ] | ||
160 | - except KeyError: | ||
161 | - pass | ||
162 | - # need to recreate the sortinglist | ||
163 | - sortinglist = dependencies.values() | ||
164 | - if not generation: | ||
165 | - output.remove( generation ) | ||
166 | - sortinglist.sort () | ||
167 | - return output | ||
168 | - | ||
169 | - | ||
170 | - | ||
171 | - | ||
172 | - | ||
173 | -if __name__ == "__main__": | ||
174 | - | ||
175 | - nodes = ['a', 'b', 'c', 'd', 'e', 'f'] | ||
176 | - route = [('a', 'b'), ('b', 'c'), ('b', 'd'), ('e','f')] | ||
177 | - | ||
178 | - for x in toposort( nodes, route): | ||
179 | - for a in x: | ||
180 | - print a | ||
181 | - | ||
182 | - raise SystemExit | ||
183 | - | ||
184 | - | ||
185 | - | ||
186 | - import pprint, traceback | ||
187 | - nodes= [ 0,1,2,3,4,5 ] | ||
188 | - testingValues = [ | ||
189 | - [ (0,1),(1,2),(2,3),(3,4),(4,5)], | ||
190 | - [ (0,1),(0,2),(1,2),(3,4),(4,5)], | ||
191 | - [ | ||
192 | - (0,1), | ||
193 | - (0,2), | ||
194 | - (0,2), | ||
195 | - (2,4), | ||
196 | - (2,5), | ||
197 | - (3,2), | ||
198 | - (0,3)], | ||
199 | - [ | ||
200 | - (0,1), # 3-element cycle test, no orphan nodes | ||
201 | - (1,2), | ||
202 | - (2,0), | ||
203 | - (2,4), | ||
204 | - (2,5), | ||
205 | - (3,2), | ||
206 | - (0,3)], | ||
207 | - [ | ||
208 | - (0,1), | ||
209 | - (1,1), | ||
210 | - (1,1), | ||
211 | - (1,4), | ||
212 | - (1,5), | ||
213 | - (1,2), | ||
214 | - (3,1), | ||
215 | - (2,1), | ||
216 | - (2,0)], | ||
217 | - [ | ||
218 | - (0,1), | ||
219 | - (1,0), | ||
220 | - (0,2), | ||
221 | - (0,3), | ||
222 | - ], | ||
223 | - [ | ||
224 | - (0,1), | ||
225 | - (1,0), | ||
226 | - (0,2), | ||
227 | - (3,1), | ||
228 | - ], | ||
229 | - ] | ||
230 | - print 'sort, no recursion allowed' | ||
231 | - for index in range(len(testingValues)): | ||
232 | -## print ' %s -- %s'%( index, testingValues[index]) | ||
233 | - try: | ||
234 | - print ' ', sort( nodes, testingValues[index] ) | ||
235 | - except: | ||
236 | - print 'exception raised' | ||
237 | - print 'toposort, no recursion allowed' | ||
238 | - for index in range(len(testingValues)): | ||
239 | -## print ' %s -- %s'%( index, testingValues[index]) | ||
240 | - try: | ||
241 | - print ' ', toposort( nodes, testingValues[index] ) | ||
242 | - except: | ||
243 | - print 'exception raised' | ||
244 | - print 'sort, recursion allowed' | ||
245 | - for index in range(len(testingValues)): | ||
246 | -## print ' %s -- %s'%( index, testingValues[index]) | ||
247 | - try: | ||
248 | - print ' ', sort( nodes, testingValues[index],0 ) | ||
249 | - except: | ||
250 | - print 'exception raised' | ||
251 | - print 'toposort, recursion allowed' | ||
252 | - for index in range(len(testingValues)): | ||
253 | -## print ' %s -- %s'%( index, testingValues[index]) | ||
254 | - try: | ||
255 | - print ' ', toposort( nodes, testingValues[index],0 ) | ||
256 | - except: | ||
257 | - print 'exception raised' | ||
258 | - | ||
259 | - | ||
260 | - |
pacotes/openlayers/tools/update_dev_dir.sh
@@ -1,45 +0,0 @@ | @@ -1,45 +0,0 @@ | ||
1 | -#!/bin/sh | ||
2 | - | ||
3 | -# Used to update http://openlayers.org/dev/ | ||
4 | - | ||
5 | -svn up /www/openlayers/docs/dev; | ||
6 | - | ||
7 | -# Get current 'Last Changed Rev' | ||
8 | -REV=`svn info /www/openlayers/docs/dev/ | grep 'Last Changed Rev' | awk '{print $4}'` | ||
9 | - | ||
10 | -# Get the last svn rev | ||
11 | -touch /tmp/ol_svn_rev | ||
12 | -OLD_REV="o`cat /tmp/ol_svn_rev`" | ||
13 | - | ||
14 | -# If they're not equal, do some work. | ||
15 | -if [ ! o$REV = $OLD_REV ]; then | ||
16 | - | ||
17 | - cd /www/openlayers/docs/dev/tools/ | ||
18 | - python exampleparser.py | ||
19 | - cd /www/openlayers/docs/dev/build | ||
20 | - ./build.py | ||
21 | - | ||
22 | - cp OpenLayers.js .. | ||
23 | - cd .. | ||
24 | - | ||
25 | - sed -i -e 's!../lib/OpenLayers.js!../OpenLayers.js!' examples/*.html | ||
26 | - perl /home/crschmidt/NaturalDocs -i /www/openlayers/docs/dev/lib -o HTML /www/openlayers/dev/apidocs -p /www/openlayers/docs/dev/apidoc_config -s Default OL >/dev/null | ||
27 | - perl /home/crschmidt/NaturalDocs -i /www/openlayers/docs/dev/lib -o HTML /www/openlayers/dev/docs -p /www/openlayers/docs/dev/doc_config -s Default OL >/dev/null | ||
28 | - | ||
29 | - # Record the revision | ||
30 | - echo -n $REV > /tmp/ol_svn_rev | ||
31 | -fi | ||
32 | - | ||
33 | -svn up /www/openlayers/documentation-checkout | ||
34 | -REV=`svn info /www/openlayers/documentation-checkout | grep 'Last Changed Rev' | awk '{print $4}'` | ||
35 | -# Get the last svn rev | ||
36 | -touch /tmp/ol_doc_rev | ||
37 | -OLD_REV="o`cat /tmp/ol_doc_rev`" | ||
38 | -# If they're not equal, do some work. | ||
39 | -if [ ! o$REV = $OLD_REV ]; then | ||
40 | - cd /www/openlayers/documentation-checkout | ||
41 | - make html > /dev/null | ||
42 | - cp -r _build/html/* /www/openlayers/documentation | ||
43 | - | ||
44 | - echo -n $REV > /tmp/ol_doc_rev | ||
45 | -fi |