Commit f97f4db2002913e1a18d17d7988ff27e00b7b9c1

Authored by Edmar Moretti
1 parent 4c12b068

--no commit message

pacotes/openlayers/tools/BeautifulSoup.py
@@ -1,1767 +0,0 @@ @@ -1,1767 +0,0 @@
1 -"""Beautiful Soup  
2 -Elixir and Tonic  
3 -"The Screen-Scraper's Friend"  
4 -http://www.crummy.com/software/BeautifulSoup/  
5 -  
6 -Beautiful Soup parses a (possibly invalid) XML or HTML document into a  
7 -tree representation. It provides methods and Pythonic idioms that make  
8 -it easy to navigate, search, and modify the tree.  
9 -  
10 -A well-formed XML/HTML document yields a well-formed data  
11 -structure. An ill-formed XML/HTML document yields a correspondingly  
12 -ill-formed data structure. If your document is only locally  
13 -well-formed, you can use this library to find and process the  
14 -well-formed part of it. The BeautifulSoup class  
15 -  
16 -Beautiful Soup works with Python 2.2 and up. It has no external  
17 -dependencies, but you'll have more success at converting data to UTF-8  
18 -if you also install these three packages:  
19 -  
20 -* chardet, for auto-detecting character encodings  
21 - http://chardet.feedparser.org/  
22 -* cjkcodecs and iconv_codec, which add more encodings to the ones supported  
23 - by stock Python.  
24 - http://cjkpython.i18n.org/  
25 -  
26 -Beautiful Soup defines classes for two main parsing strategies:  
27 -  
28 - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific  
29 - language that kind of looks like XML.  
30 -  
31 - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid  
32 - or invalid. This class has web browser-like heuristics for  
33 - obtaining a sensible parse tree in the face of common HTML errors.  
34 -  
35 -Beautiful Soup also defines a class (UnicodeDammit) for autodetecting  
36 -the encoding of an HTML or XML document, and converting it to  
37 -Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.  
38 -  
39 -For more than you ever wanted to know about Beautiful Soup, see the  
40 -documentation:  
41 -http://www.crummy.com/software/BeautifulSoup/documentation.html  
42 -  
43 -"""  
44 -from __future__ import generators  
45 -  
46 -__author__ = "Leonard Richardson (leonardr@segfault.org)"  
47 -__version__ = "3.0.4"  
48 -__copyright__ = "Copyright (c) 2004-2007 Leonard Richardson"  
49 -__license__ = "PSF"  
50 -  
51 -from sgmllib import SGMLParser, SGMLParseError  
52 -import codecs  
53 -import types  
54 -import re  
55 -import sgmllib  
56 -try:  
57 - from htmlentitydefs import name2codepoint  
58 -except ImportError:  
59 - name2codepoint = {}  
60 -  
61 -#This hack makes Beautiful Soup able to parse XML with namespaces  
62 -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')  
63 -  
64 -DEFAULT_OUTPUT_ENCODING = "utf-8"  
65 -  
66 -# First, the classes that represent markup elements.  
67 -  
68 -class PageElement:  
69 - """Contains the navigational information for some part of the page  
70 - (either a tag or a piece of text)"""  
71 -  
72 - def setup(self, parent=None, previous=None):  
73 - """Sets up the initial relations between this element and  
74 - other elements."""  
75 - self.parent = parent  
76 - self.previous = previous  
77 - self.next = None  
78 - self.previousSibling = None  
79 - self.nextSibling = None  
80 - if self.parent and self.parent.contents:  
81 - self.previousSibling = self.parent.contents[-1]  
82 - self.previousSibling.nextSibling = self  
83 -  
84 - def replaceWith(self, replaceWith):  
85 - oldParent = self.parent  
86 - myIndex = self.parent.contents.index(self)  
87 - if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:  
88 - # We're replacing this element with one of its siblings.  
89 - index = self.parent.contents.index(replaceWith)  
90 - if index and index < myIndex:  
91 - # Furthermore, it comes before this element. That  
92 - # means that when we extract it, the index of this  
93 - # element will change.  
94 - myIndex = myIndex - 1  
95 - self.extract()  
96 - oldParent.insert(myIndex, replaceWith)  
97 -  
98 - def extract(self):  
99 - """Destructively rips this element out of the tree."""  
100 - if self.parent:  
101 - try:  
102 - self.parent.contents.remove(self)  
103 - except ValueError:  
104 - pass  
105 -  
106 - #Find the two elements that would be next to each other if  
107 - #this element (and any children) hadn't been parsed. Connect  
108 - #the two.  
109 - lastChild = self._lastRecursiveChild()  
110 - nextElement = lastChild.next  
111 -  
112 - if self.previous:  
113 - self.previous.next = nextElement  
114 - if nextElement:  
115 - nextElement.previous = self.previous  
116 - self.previous = None  
117 - lastChild.next = None  
118 -  
119 - self.parent = None  
120 - if self.previousSibling:  
121 - self.previousSibling.nextSibling = self.nextSibling  
122 - if self.nextSibling:  
123 - self.nextSibling.previousSibling = self.previousSibling  
124 - self.previousSibling = self.nextSibling = None  
125 -  
126 - def _lastRecursiveChild(self):  
127 - "Finds the last element beneath this object to be parsed."  
128 - lastChild = self  
129 - while hasattr(lastChild, 'contents') and lastChild.contents:  
130 - lastChild = lastChild.contents[-1]  
131 - return lastChild  
132 -  
133 - def insert(self, position, newChild):  
134 - if (isinstance(newChild, basestring)  
135 - or isinstance(newChild, unicode)) \  
136 - and not isinstance(newChild, NavigableString):  
137 - newChild = NavigableString(newChild)  
138 -  
139 - position = min(position, len(self.contents))  
140 - if hasattr(newChild, 'parent') and newChild.parent != None:  
141 - # We're 'inserting' an element that's already one  
142 - # of this object's children.  
143 - if newChild.parent == self:  
144 - index = self.find(newChild)  
145 - if index and index < position:  
146 - # Furthermore we're moving it further down the  
147 - # list of this object's children. That means that  
148 - # when we extract this element, our target index  
149 - # will jump down one.  
150 - position = position - 1  
151 - newChild.extract()  
152 -  
153 - newChild.parent = self  
154 - previousChild = None  
155 - if position == 0:  
156 - newChild.previousSibling = None  
157 - newChild.previous = self  
158 - else:  
159 - previousChild = self.contents[position-1]  
160 - newChild.previousSibling = previousChild  
161 - newChild.previousSibling.nextSibling = newChild  
162 - newChild.previous = previousChild._lastRecursiveChild()  
163 - if newChild.previous:  
164 - newChild.previous.next = newChild  
165 -  
166 - newChildsLastElement = newChild._lastRecursiveChild()  
167 -  
168 - if position >= len(self.contents):  
169 - newChild.nextSibling = None  
170 -  
171 - parent = self  
172 - parentsNextSibling = None  
173 - while not parentsNextSibling:  
174 - parentsNextSibling = parent.nextSibling  
175 - parent = parent.parent  
176 - if not parent: # This is the last element in the document.  
177 - break  
178 - if parentsNextSibling:  
179 - newChildsLastElement.next = parentsNextSibling  
180 - else:  
181 - newChildsLastElement.next = None  
182 - else:  
183 - nextChild = self.contents[position]  
184 - newChild.nextSibling = nextChild  
185 - if newChild.nextSibling:  
186 - newChild.nextSibling.previousSibling = newChild  
187 - newChildsLastElement.next = nextChild  
188 -  
189 - if newChildsLastElement.next:  
190 - newChildsLastElement.next.previous = newChildsLastElement  
191 - self.contents.insert(position, newChild)  
192 -  
193 - def findNext(self, name=None, attrs={}, text=None, **kwargs):  
194 - """Returns the first item that matches the given criteria and  
195 - appears after this Tag in the document."""  
196 - return self._findOne(self.findAllNext, name, attrs, text, **kwargs)  
197 -  
198 - def findAllNext(self, name=None, attrs={}, text=None, limit=None,  
199 - **kwargs):  
200 - """Returns all items that match the given criteria and appear  
201 - before after Tag in the document."""  
202 - return self._findAll(name, attrs, text, limit, self.nextGenerator)  
203 -  
204 - def findNextSibling(self, name=None, attrs={}, text=None, **kwargs):  
205 - """Returns the closest sibling to this Tag that matches the  
206 - given criteria and appears after this Tag in the document."""  
207 - return self._findOne(self.findNextSiblings, name, attrs, text,  
208 - **kwargs)  
209 -  
210 - def findNextSiblings(self, name=None, attrs={}, text=None, limit=None,  
211 - **kwargs):  
212 - """Returns the siblings of this Tag that match the given  
213 - criteria and appear after this Tag in the document."""  
214 - return self._findAll(name, attrs, text, limit,  
215 - self.nextSiblingGenerator, **kwargs)  
216 - fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x  
217 -  
218 - def findPrevious(self, name=None, attrs={}, text=None, **kwargs):  
219 - """Returns the first item that matches the given criteria and  
220 - appears before this Tag in the document."""  
221 - return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs)  
222 -  
223 - def findAllPrevious(self, name=None, attrs={}, text=None, limit=None,  
224 - **kwargs):  
225 - """Returns all items that match the given criteria and appear  
226 - before this Tag in the document."""  
227 - return self._findAll(name, attrs, text, limit, self.previousGenerator,  
228 - **kwargs)  
229 - fetchPrevious = findAllPrevious # Compatibility with pre-3.x  
230 -  
231 - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):  
232 - """Returns the closest sibling to this Tag that matches the  
233 - given criteria and appears before this Tag in the document."""  
234 - return self._findOne(self.findPreviousSiblings, name, attrs, text,  
235 - **kwargs)  
236 -  
237 - def findPreviousSiblings(self, name=None, attrs={}, text=None,  
238 - limit=None, **kwargs):  
239 - """Returns the siblings of this Tag that match the given  
240 - criteria and appear before this Tag in the document."""  
241 - return self._findAll(name, attrs, text, limit,  
242 - self.previousSiblingGenerator, **kwargs)  
243 - fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x  
244 -  
245 - def findParent(self, name=None, attrs={}, **kwargs):  
246 - """Returns the closest parent of this Tag that matches the given  
247 - criteria."""  
248 - # NOTE: We can't use _findOne because findParents takes a different  
249 - # set of arguments.  
250 - r = None  
251 - l = self.findParents(name, attrs, 1)  
252 - if l:  
253 - r = l[0]  
254 - return r  
255 -  
256 - def findParents(self, name=None, attrs={}, limit=None, **kwargs):  
257 - """Returns the parents of this Tag that match the given  
258 - criteria."""  
259 -  
260 - return self._findAll(name, attrs, None, limit, self.parentGenerator,  
261 - **kwargs)  
262 - fetchParents = findParents # Compatibility with pre-3.x  
263 -  
264 - #These methods do the real heavy lifting.  
265 -  
266 - def _findOne(self, method, name, attrs, text, **kwargs):  
267 - r = None  
268 - l = method(name, attrs, text, 1, **kwargs)  
269 - if l:  
270 - r = l[0]  
271 - return r  
272 -  
273 - def _findAll(self, name, attrs, text, limit, generator, **kwargs):  
274 - "Iterates over a generator looking for things that match."  
275 -  
276 - if isinstance(name, SoupStrainer):  
277 - strainer = name  
278 - else:  
279 - # Build a SoupStrainer  
280 - strainer = SoupStrainer(name, attrs, text, **kwargs)  
281 - results = ResultSet(strainer)  
282 - g = generator()  
283 - while True:  
284 - try:  
285 - i = g.next()  
286 - except StopIteration:  
287 - break  
288 - if i:  
289 - found = strainer.search(i)  
290 - if found:  
291 - results.append(found)  
292 - if limit and len(results) >= limit:  
293 - break  
294 - return results  
295 -  
296 - #These Generators can be used to navigate starting from both  
297 - #NavigableStrings and Tags.  
298 - def nextGenerator(self):  
299 - i = self  
300 - while i:  
301 - i = i.next  
302 - yield i  
303 -  
304 - def nextSiblingGenerator(self):  
305 - i = self  
306 - while i:  
307 - i = i.nextSibling  
308 - yield i  
309 -  
310 - def previousGenerator(self):  
311 - i = self  
312 - while i:  
313 - i = i.previous  
314 - yield i  
315 -  
316 - def previousSiblingGenerator(self):  
317 - i = self  
318 - while i:  
319 - i = i.previousSibling  
320 - yield i  
321 -  
322 - def parentGenerator(self):  
323 - i = self  
324 - while i:  
325 - i = i.parent  
326 - yield i  
327 -  
328 - # Utility methods  
329 - def substituteEncoding(self, str, encoding=None):  
330 - encoding = encoding or "utf-8"  
331 - return str.replace("%SOUP-ENCODING%", encoding)  
332 -  
333 - def toEncoding(self, s, encoding=None):  
334 - """Encodes an object to a string in some encoding, or to Unicode.  
335 - ."""  
336 - if isinstance(s, unicode):  
337 - if encoding:  
338 - s = s.encode(encoding)  
339 - elif isinstance(s, str):  
340 - if encoding:  
341 - s = s.encode(encoding)  
342 - else:  
343 - s = unicode(s)  
344 - else:  
345 - if encoding:  
346 - s = self.toEncoding(str(s), encoding)  
347 - else:  
348 - s = unicode(s)  
349 - return s  
350 -  
351 -class NavigableString(unicode, PageElement):  
352 -  
353 - def __getattr__(self, attr):  
354 - """text.string gives you text. This is for backwards  
355 - compatibility for Navigable*String, but for CData* it lets you  
356 - get the string without the CData wrapper."""  
357 - if attr == 'string':  
358 - return self  
359 - else:  
360 - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)  
361 -  
362 - def __unicode__(self):  
363 - return self.__str__(None)  
364 -  
365 - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):  
366 - if encoding:  
367 - return self.encode(encoding)  
368 - else:  
369 - return self  
370 -  
371 -class CData(NavigableString):  
372 -  
373 - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):  
374 - return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding)  
375 -  
376 -class ProcessingInstruction(NavigableString):  
377 - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):  
378 - output = self  
379 - if "%SOUP-ENCODING%" in output:  
380 - output = self.substituteEncoding(output, encoding)  
381 - return "<?%s?>" % self.toEncoding(output, encoding)  
382 -  
383 -class Comment(NavigableString):  
384 - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):  
385 - return "<!--%s-->" % NavigableString.__str__(self, encoding)  
386 -  
387 -class Declaration(NavigableString):  
388 - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):  
389 - return "<!%s>" % NavigableString.__str__(self, encoding)  
390 -  
391 -class Tag(PageElement):  
392 -  
393 - """Represents a found HTML tag with its attributes and contents."""  
394 -  
395 - XML_SPECIAL_CHARS_TO_ENTITIES = { "'" : "squot",  
396 - '"' : "quote",  
397 - "&" : "amp",  
398 - "<" : "lt",  
399 - ">" : "gt" }  
400 -  
401 - def __init__(self, parser, name, attrs=None, parent=None,  
402 - previous=None):  
403 - "Basic constructor."  
404 -  
405 - # We don't actually store the parser object: that lets extracted  
406 - # chunks be garbage-collected  
407 - self.parserClass = parser.__class__  
408 - self.isSelfClosing = parser.isSelfClosingTag(name)  
409 - self.name = name  
410 - if attrs == None:  
411 - attrs = []  
412 - self.attrs = attrs  
413 - self.contents = []  
414 - self.setup(parent, previous)  
415 - self.hidden = False  
416 - self.containsSubstitutions = False  
417 -  
418 - def get(self, key, default=None):  
419 - """Returns the value of the 'key' attribute for the tag, or  
420 - the value given for 'default' if it doesn't have that  
421 - attribute."""  
422 - return self._getAttrMap().get(key, default)  
423 -  
424 - def has_key(self, key):  
425 - return self._getAttrMap().has_key(key)  
426 -  
427 - def __getitem__(self, key):  
428 - """tag[key] returns the value of the 'key' attribute for the tag,  
429 - and throws an exception if it's not there."""  
430 - return self._getAttrMap()[key]  
431 -  
432 - def __iter__(self):  
433 - "Iterating over a tag iterates over its contents."  
434 - return iter(self.contents)  
435 -  
436 - def __len__(self):  
437 - "The length of a tag is the length of its list of contents."  
438 - return len(self.contents)  
439 -  
440 - def __contains__(self, x):  
441 - return x in self.contents  
442 -  
443 - def __nonzero__(self):  
444 - "A tag is non-None even if it has no contents."  
445 - return True  
446 -  
447 - def __setitem__(self, key, value):  
448 - """Setting tag[key] sets the value of the 'key' attribute for the  
449 - tag."""  
450 - self._getAttrMap()  
451 - self.attrMap[key] = value  
452 - found = False  
453 - for i in range(0, len(self.attrs)):  
454 - if self.attrs[i][0] == key:  
455 - self.attrs[i] = (key, value)  
456 - found = True  
457 - if not found:  
458 - self.attrs.append((key, value))  
459 - self._getAttrMap()[key] = value  
460 -  
461 - def __delitem__(self, key):  
462 - "Deleting tag[key] deletes all 'key' attributes for the tag."  
463 - for item in self.attrs:  
464 - if item[0] == key:  
465 - self.attrs.remove(item)  
466 - #We don't break because bad HTML can define the same  
467 - #attribute multiple times.  
468 - self._getAttrMap()  
469 - if self.attrMap.has_key(key):  
470 - del self.attrMap[key]  
471 -  
472 - def __call__(self, *args, **kwargs):  
473 - """Calling a tag like a function is the same as calling its  
474 - findAll() method. Eg. tag('a') returns a list of all the A tags  
475 - found within this tag."""  
476 - return apply(self.findAll, args, kwargs)  
477 -  
478 - def __getattr__(self, tag):  
479 - #print "Getattr %s.%s" % (self.__class__, tag)  
480 - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:  
481 - return self.find(tag[:-3])  
482 - elif tag.find('__') != 0:  
483 - return self.find(tag)  
484 -  
485 - def __eq__(self, other):  
486 - """Returns true iff this tag has the same name, the same attributes,  
487 - and the same contents (recursively) as the given tag.  
488 -  
489 - NOTE: right now this will return false if two tags have the  
490 - same attributes in a different order. Should this be fixed?"""  
491 - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):  
492 - return False  
493 - for i in range(0, len(self.contents)):  
494 - if self.contents[i] != other.contents[i]:  
495 - return False  
496 - return True  
497 -  
498 - def __ne__(self, other):  
499 - """Returns true iff this tag is not identical to the other tag,  
500 - as defined in __eq__."""  
501 - return not self == other  
502 -  
503 - def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):  
504 - """Renders this tag as a string."""  
505 - return self.__str__(encoding)  
506 -  
507 - def __unicode__(self):  
508 - return self.__str__(None)  
509 -  
510 - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,  
511 - prettyPrint=False, indentLevel=0):  
512 - """Returns a string or Unicode representation of this tag and  
513 - its contents. To get Unicode, pass None for encoding.  
514 -  
515 - NOTE: since Python's HTML parser consumes whitespace, this  
516 - method is not certain to reproduce the whitespace present in  
517 - the original string."""  
518 -  
519 - encodedName = self.toEncoding(self.name, encoding)  
520 -  
521 - attrs = []  
522 - if self.attrs:  
523 - for key, val in self.attrs:  
524 - fmt = '%s="%s"'  
525 - if isString(val):  
526 - if self.containsSubstitutions and '%SOUP-ENCODING%' in val:  
527 - val = self.substituteEncoding(val, encoding)  
528 -  
529 - # The attribute value either:  
530 - #  
531 - # * Contains no embedded double quotes or single quotes.  
532 - # No problem: we enclose it in double quotes.  
533 - # * Contains embedded single quotes. No problem:  
534 - # double quotes work here too.  
535 - # * Contains embedded double quotes. No problem:  
536 - # we enclose it in single quotes.  
537 - # * Embeds both single _and_ double quotes. This  
538 - # can't happen naturally, but it can happen if  
539 - # you modify an attribute value after parsing  
540 - # the document. Now we have a bit of a  
541 - # problem. We solve it by enclosing the  
542 - # attribute in single quotes, and escaping any  
543 - # embedded single quotes to XML entities.  
544 - if '"' in val:  
545 - fmt = "%s='%s'"  
546 - # This can't happen naturally, but it can happen  
547 - # if you modify an attribute value after parsing.  
548 - if "'" in val:  
549 - val = val.replace("'", "&squot;")  
550 -  
551 - # Now we're okay w/r/t quotes. But the attribute  
552 - # value might also contain angle brackets, or  
553 - # ampersands that aren't part of entities. We need  
554 - # to escape those to XML entities too.  
555 - val = re.sub("([<>]|&(?![^\s]+;))",  
556 - lambda x: "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";",  
557 - val)  
558 -  
559 - attrs.append(fmt % (self.toEncoding(key, encoding),  
560 - self.toEncoding(val, encoding)))  
561 - close = ''  
562 - closeTag = ''  
563 - if self.isSelfClosing:  
564 - close = ' /'  
565 - else:  
566 - closeTag = '</%s>' % encodedName  
567 -  
568 - indentTag, indentContents = 0, 0  
569 - if prettyPrint:  
570 - indentTag = indentLevel  
571 - space = (' ' * (indentTag-1))  
572 - indentContents = indentTag + 1  
573 - contents = self.renderContents(encoding, prettyPrint, indentContents)  
574 - if self.hidden:  
575 - s = contents  
576 - else:  
577 - s = []  
578 - attributeString = ''  
579 - if attrs:  
580 - attributeString = ' ' + ' '.join(attrs)  
581 - if prettyPrint:  
582 - s.append(space)  
583 - s.append('<%s%s%s>' % (encodedName, attributeString, close))  
584 - if prettyPrint:  
585 - s.append("\n")  
586 - s.append(contents)  
587 - if prettyPrint and contents and contents[-1] != "\n":  
588 - s.append("\n")  
589 - if prettyPrint and closeTag:  
590 - s.append(space)  
591 - s.append(closeTag)  
592 - if prettyPrint and closeTag and self.nextSibling:  
593 - s.append("\n")  
594 - s = ''.join(s)  
595 - return s  
596 -  
597 - def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING):  
598 - return self.__str__(encoding, True)  
599 -  
600 - def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,  
601 - prettyPrint=False, indentLevel=0):  
602 - """Renders the contents of this tag as a string in the given  
603 - encoding. If encoding is None, returns a Unicode string.."""  
604 - s=[]  
605 - for c in self:  
606 - text = None  
607 - if isinstance(c, NavigableString):  
608 - text = c.__str__(encoding)  
609 - elif isinstance(c, Tag):  
610 - s.append(c.__str__(encoding, prettyPrint, indentLevel))  
611 - if text and prettyPrint:  
612 - text = text.strip()  
613 - if text:  
614 - if prettyPrint:  
615 - s.append(" " * (indentLevel-1))  
616 - s.append(text)  
617 - if prettyPrint:  
618 - s.append("\n")  
619 - return ''.join(s)  
620 -  
621 - #Soup methods  
622 -  
623 - def find(self, name=None, attrs={}, recursive=True, text=None,  
624 - **kwargs):  
625 - """Return only the first child of this Tag matching the given  
626 - criteria."""  
627 - r = None  
628 - l = self.findAll(name, attrs, recursive, text, 1, **kwargs)  
629 - if l:  
630 - r = l[0]  
631 - return r  
632 - findChild = find  
633 -  
634 - def findAll(self, name=None, attrs={}, recursive=True, text=None,  
635 - limit=None, **kwargs):  
636 - """Extracts a list of Tag objects that match the given  
637 - criteria. You can specify the name of the Tag and any  
638 - attributes you want the Tag to have.  
639 -  
640 - The value of a key-value pair in the 'attrs' map can be a  
641 - string, a list of strings, a regular expression object, or a  
642 - callable that takes a string and returns whether or not the  
643 - string matches for some custom definition of 'matches'. The  
644 - same is true of the tag name."""  
645 - generator = self.recursiveChildGenerator  
646 - if not recursive:  
647 - generator = self.childGenerator  
648 - return self._findAll(name, attrs, text, limit, generator, **kwargs)  
649 - findChildren = findAll  
650 -  
651 - # Pre-3.x compatibility methods  
652 - first = find  
653 - fetch = findAll  
654 -  
655 - def fetchText(self, text=None, recursive=True, limit=None):  
656 - return self.findAll(text=text, recursive=recursive, limit=limit)  
657 -  
658 - def firstText(self, text=None, recursive=True):  
659 - return self.find(text=text, recursive=recursive)  
660 -  
661 - #Utility methods  
662 -  
663 - def append(self, tag):  
664 - """Appends the given tag to the contents of this tag."""  
665 - self.contents.append(tag)  
666 -  
667 - #Private methods  
668 -  
669 - def _getAttrMap(self):  
670 - """Initializes a map representation of this tag's attributes,  
671 - if not already initialized."""  
672 - if not getattr(self, 'attrMap'):  
673 - self.attrMap = {}  
674 - for (key, value) in self.attrs:  
675 - self.attrMap[key] = value  
676 - return self.attrMap  
677 -  
678 - #Generator methods  
679 - def childGenerator(self):  
680 - for i in range(0, len(self.contents)):  
681 - yield self.contents[i]  
682 - raise StopIteration  
683 -  
684 - def recursiveChildGenerator(self):  
685 - stack = [(self, 0)]  
686 - while stack:  
687 - tag, start = stack.pop()  
688 - if isinstance(tag, Tag):  
689 - for i in range(start, len(tag.contents)):  
690 - a = tag.contents[i]  
691 - yield a  
692 - if isinstance(a, Tag) and tag.contents:  
693 - if i < len(tag.contents) - 1:  
694 - stack.append((tag, i+1))  
695 - stack.append((a, 0))  
696 - break  
697 - raise StopIteration  
698 -  
699 -# Next, a couple classes to represent queries and their results.  
700 -class SoupStrainer:  
701 - """Encapsulates a number of ways of matching a markup element (tag or  
702 - text)."""  
703 -  
704 - def __init__(self, name=None, attrs={}, text=None, **kwargs):  
705 - self.name = name  
706 - if isString(attrs):  
707 - kwargs['class'] = attrs  
708 - attrs = None  
709 - if kwargs:  
710 - if attrs:  
711 - attrs = attrs.copy()  
712 - attrs.update(kwargs)  
713 - else:  
714 - attrs = kwargs  
715 - self.attrs = attrs  
716 - self.text = text  
717 -  
718 - def __str__(self):  
719 - if self.text:  
720 - return self.text  
721 - else:  
722 - return "%s|%s" % (self.name, self.attrs)  
723 -  
724 - def searchTag(self, markupName=None, markupAttrs={}):  
725 - found = None  
726 - markup = None  
727 - if isinstance(markupName, Tag):  
728 - markup = markupName  
729 - markupAttrs = markup  
730 - callFunctionWithTagData = callable(self.name) \  
731 - and not isinstance(markupName, Tag)  
732 -  
733 - if (not self.name) \  
734 - or callFunctionWithTagData \  
735 - or (markup and self._matches(markup, self.name)) \  
736 - or (not markup and self._matches(markupName, self.name)):  
737 - if callFunctionWithTagData:  
738 - match = self.name(markupName, markupAttrs)  
739 - else:  
740 - match = True  
741 - markupAttrMap = None  
742 - for attr, matchAgainst in self.attrs.items():  
743 - if not markupAttrMap:  
744 - if hasattr(markupAttrs, 'get'):  
745 - markupAttrMap = markupAttrs  
746 - else:  
747 - markupAttrMap = {}  
748 - for k,v in markupAttrs:  
749 - markupAttrMap[k] = v  
750 - attrValue = markupAttrMap.get(attr)  
751 - if not self._matches(attrValue, matchAgainst):  
752 - match = False  
753 - break  
754 - if match:  
755 - if markup:  
756 - found = markup  
757 - else:  
758 - found = markupName  
759 - return found  
760 -  
761 - def search(self, markup):  
762 - #print 'looking for %s in %s' % (self, markup)  
763 - found = None  
764 - # If given a list of items, scan it for a text element that  
765 - # matches.  
766 - if isList(markup) and not isinstance(markup, Tag):  
767 - for element in markup:  
768 - if isinstance(element, NavigableString) \  
769 - and self.search(element):  
770 - found = element  
771 - break  
772 - # If it's a Tag, make sure its name or attributes match.  
773 - # Don't bother with Tags if we're searching for text.  
774 - elif isinstance(markup, Tag):  
775 - if not self.text:  
776 - found = self.searchTag(markup)  
777 - # If it's text, make sure the text matches.  
778 - elif isinstance(markup, NavigableString) or \  
779 - isString(markup):  
780 - if self._matches(markup, self.text):  
781 - found = markup  
782 - else:  
783 - raise Exception, "I don't know how to match against a %s" \  
784 - % markup.__class__  
785 - return found  
786 -  
787 - def _matches(self, markup, matchAgainst):  
788 - #print "Matching %s against %s" % (markup, matchAgainst)  
789 - result = False  
790 - if matchAgainst == True and type(matchAgainst) == types.BooleanType:  
791 - result = markup != None  
792 - elif callable(matchAgainst):  
793 - result = matchAgainst(markup)  
794 - else:  
795 - #Custom match methods take the tag as an argument, but all  
796 - #other ways of matching match the tag name as a string.  
797 - if isinstance(markup, Tag):  
798 - markup = markup.name  
799 - if markup and not isString(markup):  
800 - markup = unicode(markup)  
801 - #Now we know that chunk is either a string, or None.  
802 - if hasattr(matchAgainst, 'match'):  
803 - # It's a regexp object.  
804 - result = markup and matchAgainst.search(markup)  
805 - elif isList(matchAgainst):  
806 - result = markup in matchAgainst  
807 - elif hasattr(matchAgainst, 'items'):  
808 - result = markup.has_key(matchAgainst)  
809 - elif matchAgainst and isString(markup):  
810 - if isinstance(markup, unicode):  
811 - matchAgainst = unicode(matchAgainst)  
812 - else:  
813 - matchAgainst = str(matchAgainst)  
814 -  
815 - if not result:  
816 - result = matchAgainst == markup  
817 - return result  
818 -  
819 -class ResultSet(list):  
820 - """A ResultSet is just a list that keeps track of the SoupStrainer  
821 - that created it."""  
822 - def __init__(self, source):  
823 - list.__init__([])  
824 - self.source = source  
825 -  
826 -# Now, some helper functions.  
827 -  
828 -def isList(l):  
829 - """Convenience method that works with all 2.x versions of Python  
830 - to determine whether or not something is listlike."""  
831 - return hasattr(l, '__iter__') \  
832 - or (type(l) in (types.ListType, types.TupleType))  
833 -  
834 -def isString(s):  
835 - """Convenience method that works with all 2.x versions of Python  
836 - to determine whether or not something is stringlike."""  
837 - try:  
838 - return isinstance(s, unicode) or isintance(s, basestring)  
839 - except NameError:  
840 - return isinstance(s, str)  
841 -  
842 -def buildTagMap(default, *args):  
843 - """Turns a list of maps, lists, or scalars into a single map.  
844 - Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and  
845 - NESTING_RESET_TAGS maps out of lists and partial maps."""  
846 - built = {}  
847 - for portion in args:  
848 - if hasattr(portion, 'items'):  
849 - #It's a map. Merge it.  
850 - for k,v in portion.items():  
851 - built[k] = v  
852 - elif isList(portion):  
853 - #It's a list. Map each item to the default.  
854 - for k in portion:  
855 - built[k] = default  
856 - else:  
857 - #It's a scalar. Map it to the default.  
858 - built[portion] = default  
859 - return built  
860 -  
861 -# Now, the parser classes.  
862 -  
863 -class BeautifulStoneSoup(Tag, SGMLParser):  
864 -  
865 - """This class contains the basic parser and search code. It defines  
866 - a parser that knows nothing about tag behavior except for the  
867 - following:  
868 -  
869 - You can't close a tag without closing all the tags it encloses.  
870 - That is, "<foo><bar></foo>" actually means  
871 - "<foo><bar></bar></foo>".  
872 -  
873 - [Another possible explanation is "<foo><bar /></foo>", but since  
874 - this class defines no SELF_CLOSING_TAGS, it will never use that  
875 - explanation.]  
876 -  
877 - This class is useful for parsing XML or made-up markup languages,  
878 - or when BeautifulSoup makes an assumption counter to what you were  
879 - expecting."""  
880 -  
881 - XML_ENTITY_LIST = {}  
882 - for i in Tag.XML_SPECIAL_CHARS_TO_ENTITIES.values():  
883 - XML_ENTITY_LIST[i] = True  
884 -  
885 - SELF_CLOSING_TAGS = {}  
886 - NESTABLE_TAGS = {}  
887 - RESET_NESTING_TAGS = {}  
888 - QUOTE_TAGS = {}  
889 -  
890 - MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),  
891 - lambda x: x.group(1) + ' />'),  
892 - (re.compile('<!\s+([^<>]*)>'),  
893 - lambda x: '<!' + x.group(1) + '>')  
894 - ]  
895 -  
896 - ROOT_TAG_NAME = u'[document]'  
897 -  
898 - HTML_ENTITIES = "html"  
899 - XML_ENTITIES = "xml"  
900 -  
901 - def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,  
902 - markupMassage=True, smartQuotesTo=XML_ENTITIES,  
903 - convertEntities=None, selfClosingTags=None):  
904 - """The Soup object is initialized as the 'root tag', and the  
905 - provided markup (which can be a string or a file-like object)  
906 - is fed into the underlying parser.  
907 -  
908 - sgmllib will process most bad HTML, and the BeautifulSoup  
909 - class has some tricks for dealing with some HTML that kills  
910 - sgmllib, but Beautiful Soup can nonetheless choke or lose data  
911 - if your data uses self-closing tags or declarations  
912 - incorrectly.  
913 -  
914 - By default, Beautiful Soup uses regexes to sanitize input,  
915 - avoiding the vast majority of these problems. If the problems  
916 - don't apply to you, pass in False for markupMassage, and  
917 - you'll get better performance.  
918 -  
919 - The default parser massage techniques fix the two most common  
920 - instances of invalid HTML that choke sgmllib:  
921 -  
922 - <br/> (No space between name of closing tag and tag close)  
923 - <! --Comment--> (Extraneous whitespace in declaration)  
924 -  
925 - You can pass in a custom list of (RE object, replace method)  
926 - tuples to get Beautiful Soup to scrub your input the way you  
927 - want."""  
928 -  
929 - self.parseOnlyThese = parseOnlyThese  
930 - self.fromEncoding = fromEncoding  
931 - self.smartQuotesTo = smartQuotesTo  
932 - self.convertEntities = convertEntities  
933 - if self.convertEntities:  
934 - # It doesn't make sense to convert encoded characters to  
935 - # entities even while you're converting entities to Unicode.  
936 - # Just convert it all to Unicode.  
937 - self.smartQuotesTo = None  
938 - self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags)  
939 - SGMLParser.__init__(self)  
940 -  
941 - if hasattr(markup, 'read'): # It's a file-type object.  
942 - markup = markup.read()  
943 - self.markup = markup  
944 - self.markupMassage = markupMassage  
945 - try:  
946 - self._feed()  
947 - except StopParsing:  
948 - pass  
949 - self.markup = None # The markup can now be GCed  
950 -  
951 - def _feed(self, inDocumentEncoding=None):  
952 - # Convert the document to Unicode.  
953 - markup = self.markup  
954 - if isinstance(markup, unicode):  
955 - if not hasattr(self, 'originalEncoding'):  
956 - self.originalEncoding = None  
957 - else:  
958 - dammit = UnicodeDammit\  
959 - (markup, [self.fromEncoding, inDocumentEncoding],  
960 - smartQuotesTo=self.smartQuotesTo)  
961 - markup = dammit.unicode  
962 - self.originalEncoding = dammit.originalEncoding  
963 - if markup:  
964 - if self.markupMassage:  
965 - if not isList(self.markupMassage):  
966 - self.markupMassage = self.MARKUP_MASSAGE  
967 - for fix, m in self.markupMassage:  
968 - markup = fix.sub(m, markup)  
969 - self.reset()  
970 -  
971 - SGMLParser.feed(self, markup)  
972 - # Close out any unfinished strings and close all the open tags.  
973 - self.endData()  
974 - while self.currentTag.name != self.ROOT_TAG_NAME:  
975 - self.popTag()  
976 -  
977 - def __getattr__(self, methodName):  
978 - """This method routes method call requests to either the SGMLParser  
979 - superclass or the Tag superclass, depending on the method name."""  
980 - #print "__getattr__ called on %s.%s" % (self.__class__, methodName)  
981 -  
982 - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \  
983 - or methodName.find('do_') == 0:  
984 - return SGMLParser.__getattr__(self, methodName)  
985 - elif methodName.find('__') != 0:  
986 - return Tag.__getattr__(self, methodName)  
987 - else:  
988 - raise AttributeError  
989 -  
990 - def isSelfClosingTag(self, name):  
991 - """Returns true iff the given string is the name of a  
992 - self-closing tag according to this parser."""  
993 - return self.SELF_CLOSING_TAGS.has_key(name) \  
994 - or self.instanceSelfClosingTags.has_key(name)  
995 -  
996 - def reset(self):  
997 - Tag.__init__(self, self, self.ROOT_TAG_NAME)  
998 - self.hidden = 1  
999 - SGMLParser.reset(self)  
1000 - self.currentData = []  
1001 - self.currentTag = None  
1002 - self.tagStack = []  
1003 - self.quoteStack = []  
1004 - self.pushTag(self)  
1005 -  
1006 - def popTag(self):  
1007 - tag = self.tagStack.pop()  
1008 - # Tags with just one string-owning child get the child as a  
1009 - # 'string' property, so that soup.tag.string is shorthand for  
1010 - # soup.tag.contents[0]  
1011 - if len(self.currentTag.contents) == 1 and \  
1012 - isinstance(self.currentTag.contents[0], NavigableString):  
1013 - self.currentTag.string = self.currentTag.contents[0]  
1014 -  
1015 - #print "Pop", tag.name  
1016 - if self.tagStack:  
1017 - self.currentTag = self.tagStack[-1]  
1018 - return self.currentTag  
1019 -  
1020 - def pushTag(self, tag):  
1021 - #print "Push", tag.name  
1022 - if self.currentTag:  
1023 - self.currentTag.append(tag)  
1024 - self.tagStack.append(tag)  
1025 - self.currentTag = self.tagStack[-1]  
1026 -  
1027 - def endData(self, containerClass=NavigableString):  
1028 - if self.currentData:  
1029 - currentData = ''.join(self.currentData)  
1030 - if not currentData.strip():  
1031 - if '\n' in currentData:  
1032 - currentData = '\n'  
1033 - else:  
1034 - currentData = ' '  
1035 - self.currentData = []  
1036 - if self.parseOnlyThese and len(self.tagStack) <= 1 and \  
1037 - (not self.parseOnlyThese.text or \  
1038 - not self.parseOnlyThese.search(currentData)):  
1039 - return  
1040 - o = containerClass(currentData)  
1041 - o.setup(self.currentTag, self.previous)  
1042 - if self.previous:  
1043 - self.previous.next = o  
1044 - self.previous = o  
1045 - self.currentTag.contents.append(o)  
1046 -  
1047 -  
1048 - def _popToTag(self, name, inclusivePop=True):  
1049 - """Pops the tag stack up to and including the most recent  
1050 - instance of the given tag. If inclusivePop is false, pops the tag  
1051 - stack up to but *not* including the most recent instqance of  
1052 - the given tag."""  
1053 - #print "Popping to %s" % name  
1054 - if name == self.ROOT_TAG_NAME:  
1055 - return  
1056 -  
1057 - numPops = 0  
1058 - mostRecentTag = None  
1059 - for i in range(len(self.tagStack)-1, 0, -1):  
1060 - if name == self.tagStack[i].name:  
1061 - numPops = len(self.tagStack)-i  
1062 - break  
1063 - if not inclusivePop:  
1064 - numPops = numPops - 1  
1065 -  
1066 - for i in range(0, numPops):  
1067 - mostRecentTag = self.popTag()  
1068 - return mostRecentTag  
1069 -  
1070 - def _smartPop(self, name):  
1071 -  
1072 - """We need to pop up to the previous tag of this type, unless  
1073 - one of this tag's nesting reset triggers comes between this  
1074 - tag and the previous tag of this type, OR unless this tag is a  
1075 - generic nesting trigger and another generic nesting trigger  
1076 - comes between this tag and the previous tag of this type.  
1077 -  
1078 - Examples:  
1079 - <p>Foo<b>Bar<p> should pop to 'p', not 'b'.  
1080 - <p>Foo<table>Bar<p> should pop to 'table', not 'p'.  
1081 - <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'.  
1082 - <p>Foo<b>Bar<p> should pop to 'p', not 'b'.  
1083 -  
1084 - <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.  
1085 - <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'  
1086 - <td><tr><td> *<td>* should pop to 'tr', not the first 'td'  
1087 - """  
1088 -  
1089 - nestingResetTriggers = self.NESTABLE_TAGS.get(name)  
1090 - isNestable = nestingResetTriggers != None  
1091 - isResetNesting = self.RESET_NESTING_TAGS.has_key(name)  
1092 - popTo = None  
1093 - inclusive = True  
1094 - for i in range(len(self.tagStack)-1, 0, -1):  
1095 - p = self.tagStack[i]  
1096 - if (not p or p.name == name) and not isNestable:  
1097 - #Non-nestable tags get popped to the top or to their  
1098 - #last occurance.  
1099 - popTo = name  
1100 - break  
1101 - if (nestingResetTriggers != None  
1102 - and p.name in nestingResetTriggers) \  
1103 - or (nestingResetTriggers == None and isResetNesting  
1104 - and self.RESET_NESTING_TAGS.has_key(p.name)):  
1105 -  
1106 - #If we encounter one of the nesting reset triggers  
1107 - #peculiar to this tag, or we encounter another tag  
1108 - #that causes nesting to reset, pop up to but not  
1109 - #including that tag.  
1110 - popTo = p.name  
1111 - inclusive = False  
1112 - break  
1113 - p = p.parent  
1114 - if popTo:  
1115 - self._popToTag(popTo, inclusive)  
1116 -  
1117 - def unknown_starttag(self, name, attrs, selfClosing=0):  
1118 - #print "Start tag %s: %s" % (name, attrs)  
1119 - if self.quoteStack:  
1120 - #This is not a real tag.  
1121 - #print "<%s> is not real!" % name  
1122 - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))  
1123 - self.handle_data('<%s%s>' % (name, attrs))  
1124 - return  
1125 - self.endData()  
1126 -  
1127 - if not self.isSelfClosingTag(name) and not selfClosing:  
1128 - self._smartPop(name)  
1129 -  
1130 - if self.parseOnlyThese and len(self.tagStack) <= 1 \  
1131 - and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):  
1132 - return  
1133 -  
1134 - tag = Tag(self, name, attrs, self.currentTag, self.previous)  
1135 - if self.previous:  
1136 - self.previous.next = tag  
1137 - self.previous = tag  
1138 - self.pushTag(tag)  
1139 - if selfClosing or self.isSelfClosingTag(name):  
1140 - self.popTag()  
1141 - if name in self.QUOTE_TAGS:  
1142 - #print "Beginning quote (%s)" % name  
1143 - self.quoteStack.append(name)  
1144 - self.literal = 1  
1145 - return tag  
1146 -  
1147 - def unknown_endtag(self, name):  
1148 - #print "End tag %s" % name  
1149 - if self.quoteStack and self.quoteStack[-1] != name:  
1150 - #This is not a real end tag.  
1151 - #print "</%s> is not real!" % name  
1152 - self.handle_data('</%s>' % name)  
1153 - return  
1154 - self.endData()  
1155 - self._popToTag(name)  
1156 - if self.quoteStack and self.quoteStack[-1] == name:  
1157 - self.quoteStack.pop()  
1158 - self.literal = (len(self.quoteStack) > 0)  
1159 -  
1160 - def handle_data(self, data):  
1161 - self.currentData.append(data)  
1162 -  
1163 - def _toStringSubclass(self, text, subclass):  
1164 - """Adds a certain piece of text to the tree as a NavigableString  
1165 - subclass."""  
1166 - self.endData()  
1167 - self.handle_data(text)  
1168 - self.endData(subclass)  
1169 -  
1170 - def handle_pi(self, text):  
1171 - """Handle a processing instruction as a ProcessingInstruction  
1172 - object, possibly one with a %SOUP-ENCODING% slot into which an  
1173 - encoding will be plugged later."""  
1174 - if text[:3] == "xml":  
1175 - text = "xml version='1.0' encoding='%SOUP-ENCODING%'"  
1176 - self._toStringSubclass(text, ProcessingInstruction)  
1177 -  
1178 - def handle_comment(self, text):  
1179 - "Handle comments as Comment objects."  
1180 - self._toStringSubclass(text, Comment)  
1181 -  
1182 - def handle_charref(self, ref):  
1183 - "Handle character references as data."  
1184 - if self.convertEntities in [self.HTML_ENTITIES,  
1185 - self.XML_ENTITIES]:  
1186 - data = unichr(int(ref))  
1187 - else:  
1188 - data = '&#%s;' % ref  
1189 - self.handle_data(data)  
1190 -  
1191 - def handle_entityref(self, ref):  
1192 - """Handle entity references as data, possibly converting known  
1193 - HTML entity references to the corresponding Unicode  
1194 - characters."""  
1195 - data = None  
1196 - if self.convertEntities == self.HTML_ENTITIES or \  
1197 - (self.convertEntities == self.XML_ENTITIES and \  
1198 - self.XML_ENTITY_LIST.get(ref)):  
1199 - try:  
1200 - data = unichr(name2codepoint[ref])  
1201 - except KeyError:  
1202 - pass  
1203 - if not data:  
1204 - data = '&%s;' % ref  
1205 - self.handle_data(data)  
1206 -  
1207 - def handle_decl(self, data):  
1208 - "Handle DOCTYPEs and the like as Declaration objects."  
1209 - self._toStringSubclass(data, Declaration)  
1210 -  
1211 - def parse_declaration(self, i):  
1212 - """Treat a bogus SGML declaration as raw data. Treat a CDATA  
1213 - declaration as a CData object."""  
1214 - j = None  
1215 - if self.rawdata[i:i+9] == '<![CDATA[':  
1216 - k = self.rawdata.find(']]>', i)  
1217 - if k == -1:  
1218 - k = len(self.rawdata)  
1219 - data = self.rawdata[i+9:k]  
1220 - j = k+3  
1221 - self._toStringSubclass(data, CData)  
1222 - else:  
1223 - try:  
1224 - j = SGMLParser.parse_declaration(self, i)  
1225 - except SGMLParseError:  
1226 - toHandle = self.rawdata[i:]  
1227 - self.handle_data(toHandle)  
1228 - j = i + len(toHandle)  
1229 - return j  
1230 -  
1231 -class BeautifulSoup(BeautifulStoneSoup):  
1232 -  
1233 - """This parser knows the following facts about HTML:  
1234 -  
1235 - * Some tags have no closing tag and should be interpreted as being  
1236 - closed as soon as they are encountered.  
1237 -  
1238 - * The text inside some tags (ie. 'script') may contain tags which  
1239 - are not really part of the document and which should be parsed  
1240 - as text, not tags. If you want to parse the text as tags, you can  
1241 - always fetch it and parse it explicitly.  
1242 -  
1243 - * Tag nesting rules:  
1244 -  
1245 - Most tags can't be nested at all. For instance, the occurance of  
1246 - a <p> tag should implicitly close the previous <p> tag.  
1247 -  
1248 - <p>Para1<p>Para2  
1249 - should be transformed into:  
1250 - <p>Para1</p><p>Para2  
1251 -  
1252 - Some tags can be nested arbitrarily. For instance, the occurance  
1253 - of a <blockquote> tag should _not_ implicitly close the previous  
1254 - <blockquote> tag.  
1255 -  
1256 - Alice said: <blockquote>Bob said: <blockquote>Blah  
1257 - should NOT be transformed into:  
1258 - Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah  
1259 -  
1260 - Some tags can be nested, but the nesting is reset by the  
1261 - interposition of other tags. For instance, a <tr> tag should  
1262 - implicitly close the previous <tr> tag within the same <table>,  
1263 - but not close a <tr> tag in another table.  
1264 -  
1265 - <table><tr>Blah<tr>Blah  
1266 - should be transformed into:  
1267 - <table><tr>Blah</tr><tr>Blah  
1268 - but,  
1269 - <tr>Blah<table><tr>Blah  
1270 - should NOT be transformed into  
1271 - <tr>Blah<table></tr><tr>Blah  
1272 -  
1273 - Differing assumptions about tag nesting rules are a major source  
1274 - of problems with the BeautifulSoup class. If BeautifulSoup is not  
1275 - treating as nestable a tag your page author treats as nestable,  
1276 - try ICantBelieveItsBeautifulSoup, MinimalSoup, or  
1277 - BeautifulStoneSoup before writing your own subclass."""  
1278 -  
1279 - def __init__(self, *args, **kwargs):  
1280 - if not kwargs.has_key('smartQuotesTo'):  
1281 - kwargs['smartQuotesTo'] = self.HTML_ENTITIES  
1282 - BeautifulStoneSoup.__init__(self, *args, **kwargs)  
1283 -  
1284 - SELF_CLOSING_TAGS = buildTagMap(None,  
1285 - ['br' , 'hr', 'input', 'img', 'meta',  
1286 - 'spacer', 'link', 'frame', 'base'])  
1287 -  
1288 - QUOTE_TAGS = {'script': None}  
1289 -  
1290 - #According to the HTML standard, each of these inline tags can  
1291 - #contain another tag of the same type. Furthermore, it's common  
1292 - #to actually use these tags this way.  
1293 - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',  
1294 - 'center']  
1295 -  
1296 - #According to the HTML standard, these block tags can contain  
1297 - #another tag of the same type. Furthermore, it's common  
1298 - #to actually use these tags this way.  
1299 - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']  
1300 -  
1301 - #Lists can contain other lists, but there are restrictions.  
1302 - NESTABLE_LIST_TAGS = { 'ol' : [],  
1303 - 'ul' : [],  
1304 - 'li' : ['ul', 'ol'],  
1305 - 'dl' : [],  
1306 - 'dd' : ['dl'],  
1307 - 'dt' : ['dl'] }  
1308 -  
1309 - #Tables can contain other tables, but there are restrictions.  
1310 - NESTABLE_TABLE_TAGS = {'table' : [],  
1311 - 'tr' : ['table', 'tbody', 'tfoot', 'thead'],  
1312 - 'td' : ['tr'],  
1313 - 'th' : ['tr'],  
1314 - 'thead' : ['table'],  
1315 - 'tbody' : ['table'],  
1316 - 'tfoot' : ['table'],  
1317 - }  
1318 -  
1319 - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']  
1320 -  
1321 - #If one of these tags is encountered, all tags up to the next tag of  
1322 - #this type are popped.  
1323 - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',  
1324 - NON_NESTABLE_BLOCK_TAGS,  
1325 - NESTABLE_LIST_TAGS,  
1326 - NESTABLE_TABLE_TAGS)  
1327 -  
1328 - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,  
1329 - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)  
1330 -  
1331 - # Used to detect the charset in a META tag; see start_meta  
1332 - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)")  
1333 -  
1334 - def start_meta(self, attrs):  
1335 - """Beautiful Soup can detect a charset included in a META tag,  
1336 - try to convert the document to that charset, and re-parse the  
1337 - document from the beginning."""  
1338 - httpEquiv = None  
1339 - contentType = None  
1340 - contentTypeIndex = None  
1341 - tagNeedsEncodingSubstitution = False  
1342 -  
1343 - for i in range(0, len(attrs)):  
1344 - key, value = attrs[i]  
1345 - key = key.lower()  
1346 - if key == 'http-equiv':  
1347 - httpEquiv = value  
1348 - elif key == 'content':  
1349 - contentType = value  
1350 - contentTypeIndex = i  
1351 -  
1352 - if httpEquiv and contentType: # It's an interesting meta tag.  
1353 - match = self.CHARSET_RE.search(contentType)  
1354 - if match:  
1355 - if getattr(self, 'declaredHTMLEncoding') or \  
1356 - (self.originalEncoding == self.fromEncoding):  
1357 - # This is our second pass through the document, or  
1358 - # else an encoding was specified explicitly and it  
1359 - # worked. Rewrite the meta tag.  
1360 - newAttr = self.CHARSET_RE.sub\  
1361 - (lambda(match):match.group(1) +  
1362 - "%SOUP-ENCODING%", value)  
1363 - attrs[contentTypeIndex] = (attrs[contentTypeIndex][0],  
1364 - newAttr)  
1365 - tagNeedsEncodingSubstitution = True  
1366 - else:  
1367 - # This is our first pass through the document.  
1368 - # Go through it again with the new information.  
1369 - newCharset = match.group(3)  
1370 - if newCharset and newCharset != self.originalEncoding:  
1371 - self.declaredHTMLEncoding = newCharset  
1372 - self._feed(self.declaredHTMLEncoding)  
1373 - raise StopParsing  
1374 - tag = self.unknown_starttag("meta", attrs)  
1375 - if tag and tagNeedsEncodingSubstitution:  
1376 - tag.containsSubstitutions = True  
1377 -  
1378 -class StopParsing(Exception):  
1379 - pass  
1380 -  
1381 -class ICantBelieveItsBeautifulSoup(BeautifulSoup):  
1382 -  
1383 - """The BeautifulSoup class is oriented towards skipping over  
1384 - common HTML errors like unclosed tags. However, sometimes it makes  
1385 - errors of its own. For instance, consider this fragment:  
1386 -  
1387 - <b>Foo<b>Bar</b></b>  
1388 -  
1389 - This is perfectly valid (if bizarre) HTML. However, the  
1390 - BeautifulSoup class will implicitly close the first b tag when it  
1391 - encounters the second 'b'. It will think the author wrote  
1392 - "<b>Foo<b>Bar", and didn't close the first 'b' tag, because  
1393 - there's no real-world reason to bold something that's already  
1394 - bold. When it encounters '</b></b>' it will close two more 'b'  
1395 - tags, for a grand total of three tags closed instead of two. This  
1396 - can throw off the rest of your document structure. The same is  
1397 - true of a number of other tags, listed below.  
1398 -  
1399 - It's much more common for someone to forget to close a 'b' tag  
1400 - than to actually use nested 'b' tags, and the BeautifulSoup class  
1401 - handles the common case. This class handles the not-co-common  
1402 - case: where you can't believe someone wrote what they did, but  
1403 - it's valid HTML and BeautifulSoup screwed up by assuming it  
1404 - wouldn't be."""  
1405 -  
1406 - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \  
1407 - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',  
1408 - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',  
1409 - 'big']  
1410 -  
1411 - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']  
1412 -  
1413 - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,  
1414 - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,  
1415 - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)  
1416 -  
1417 -class MinimalSoup(BeautifulSoup):  
1418 - """The MinimalSoup class is for parsing HTML that contains  
1419 - pathologically bad markup. It makes no assumptions about tag  
1420 - nesting, but it does know which tags are self-closing, that  
1421 - <script> tags contain Javascript and should not be parsed, that  
1422 - META tags may contain encoding information, and so on.  
1423 -  
1424 - This also makes it better for subclassing than BeautifulStoneSoup  
1425 - or BeautifulSoup."""  
1426 -  
1427 - RESET_NESTING_TAGS = buildTagMap('noscript')  
1428 - NESTABLE_TAGS = {}  
1429 -  
1430 -class BeautifulSOAP(BeautifulStoneSoup):  
1431 - """This class will push a tag with only a single string child into  
1432 - the tag's parent as an attribute. The attribute's name is the tag  
1433 - name, and the value is the string child. An example should give  
1434 - the flavor of the change:  
1435 -  
1436 - <foo><bar>baz</bar></foo>  
1437 - =>  
1438 - <foo bar="baz"><bar>baz</bar></foo>  
1439 -  
1440 - You can then access fooTag['bar'] instead of fooTag.barTag.string.  
1441 -  
1442 - This is, of course, useful for scraping structures that tend to  
1443 - use subelements instead of attributes, such as SOAP messages. Note  
1444 - that it modifies its input, so don't print the modified version  
1445 - out.  
1446 -  
1447 - I'm not sure how many people really want to use this class; let me  
1448 - know if you do. Mainly I like the name."""  
1449 -  
1450 - def popTag(self):  
1451 - if len(self.tagStack) > 1:  
1452 - tag = self.tagStack[-1]  
1453 - parent = self.tagStack[-2]  
1454 - parent._getAttrMap()  
1455 - if (isinstance(tag, Tag) and len(tag.contents) == 1 and  
1456 - isinstance(tag.contents[0], NavigableString) and  
1457 - not parent.attrMap.has_key(tag.name)):  
1458 - parent[tag.name] = tag.contents[0]  
1459 - BeautifulStoneSoup.popTag(self)  
1460 -  
1461 -#Enterprise class names! It has come to our attention that some people  
1462 -#think the names of the Beautiful Soup parser classes are too silly  
1463 -#and "unprofessional" for use in enterprise screen-scraping. We feel  
1464 -#your pain! For such-minded folk, the Beautiful Soup Consortium And  
1465 -#All-Night Kosher Bakery recommends renaming this file to  
1466 -#"RobustParser.py" (or, in cases of extreme enterprisness,  
1467 -#"RobustParserBeanInterface.class") and using the following  
1468 -#enterprise-friendly class aliases:  
1469 -class RobustXMLParser(BeautifulStoneSoup):  
1470 - pass  
1471 -class RobustHTMLParser(BeautifulSoup):  
1472 - pass  
1473 -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):  
1474 - pass  
1475 -class RobustInsanelyWackAssHTMLParser(MinimalSoup):  
1476 - pass  
1477 -class SimplifyingSOAPParser(BeautifulSOAP):  
1478 - pass  
1479 -  
1480 -######################################################  
1481 -#  
1482 -# Bonus library: Unicode, Dammit  
1483 -#  
1484 -# This class forces XML data into a standard format (usually to UTF-8  
1485 -# or Unicode). It is heavily based on code from Mark Pilgrim's  
1486 -# Universal Feed Parser. It does not rewrite the XML or HTML to  
1487 -# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi  
1488 -# (XML) and BeautifulSoup.start_meta (HTML).  
1489 -  
1490 -# Autodetects character encodings.  
1491 -# Download from http://chardet.feedparser.org/  
1492 -try:  
1493 - import chardet  
1494 -# import chardet.constants  
1495 -# chardet.constants._debug = 1  
1496 -except:  
1497 - chardet = None  
1498 -chardet = None  
1499 -  
1500 -# cjkcodecs and iconv_codec make Python know about more character encodings.  
1501 -# Both are available from http://cjkpython.i18n.org/  
1502 -# They're built in if you use Python 2.4.  
1503 -try:  
1504 - import cjkcodecs.aliases  
1505 -except:  
1506 - pass  
1507 -try:  
1508 - import iconv_codec  
1509 -except:  
1510 - pass  
1511 -  
1512 -class UnicodeDammit:  
1513 - """A class for detecting the encoding of a *ML document and  
1514 - converting it to a Unicode string. If the source encoding is  
1515 - windows-1252, can replace MS smart quotes with their HTML or XML  
1516 - equivalents."""  
1517 -  
1518 - # This dictionary maps commonly seen values for "charset" in HTML  
1519 - # meta tags to the corresponding Python codec names. It only covers  
1520 - # values that aren't in Python's aliases and can't be determined  
1521 - # by the heuristics in find_codec.  
1522 - CHARSET_ALIASES = { "macintosh" : "mac-roman",  
1523 - "x-sjis" : "shift-jis" }  
1524 -  
1525 - def __init__(self, markup, overrideEncodings=[],  
1526 - smartQuotesTo='xml'):  
1527 - self.markup, documentEncoding, sniffedEncoding = \  
1528 - self._detectEncoding(markup)  
1529 - self.smartQuotesTo = smartQuotesTo  
1530 - self.triedEncodings = []  
1531 - if markup == '' or isinstance(markup, unicode):  
1532 - self.originalEncoding = None  
1533 - self.unicode = unicode(markup)  
1534 - return  
1535 -  
1536 - u = None  
1537 - for proposedEncoding in overrideEncodings:  
1538 - u = self._convertFrom(proposedEncoding)  
1539 - if u: break  
1540 - if not u:  
1541 - for proposedEncoding in (documentEncoding, sniffedEncoding):  
1542 - u = self._convertFrom(proposedEncoding)  
1543 - if u: break  
1544 -  
1545 - # If no luck and we have auto-detection library, try that:  
1546 - if not u and chardet and not isinstance(self.markup, unicode):  
1547 - u = self._convertFrom(chardet.detect(self.markup)['encoding'])  
1548 -  
1549 - # As a last resort, try utf-8 and windows-1252:  
1550 - if not u:  
1551 - for proposed_encoding in ("utf-8", "windows-1252"):  
1552 - u = self._convertFrom(proposed_encoding)  
1553 - if u: break  
1554 - self.unicode = u  
1555 - if not u: self.originalEncoding = None  
1556 -  
1557 - def _subMSChar(self, orig):  
1558 - """Changes a MS smart quote character to an XML or HTML  
1559 - entity."""  
1560 - sub = self.MS_CHARS.get(orig)  
1561 - if type(sub) == types.TupleType:  
1562 - if self.smartQuotesTo == 'xml':  
1563 - sub = '&#x%s;' % sub[1]  
1564 - else:  
1565 - sub = '&%s;' % sub[0]  
1566 - return sub  
1567 -  
1568 - def _convertFrom(self, proposed):  
1569 - proposed = self.find_codec(proposed)  
1570 - if not proposed or proposed in self.triedEncodings:  
1571 - return None  
1572 - self.triedEncodings.append(proposed)  
1573 - markup = self.markup  
1574 -  
1575 - # Convert smart quotes to HTML if coming from an encoding  
1576 - # that might have them.  
1577 - if self.smartQuotesTo and proposed.lower() in("windows-1252",  
1578 - "iso-8859-1",  
1579 - "iso-8859-2"):  
1580 - markup = re.compile("([\x80-\x9f])").sub \  
1581 - (lambda(x): self._subMSChar(x.group(1)),  
1582 - markup)  
1583 -  
1584 - try:  
1585 - # print "Trying to convert document to %s" % proposed  
1586 - u = self._toUnicode(markup, proposed)  
1587 - self.markup = u  
1588 - self.originalEncoding = proposed  
1589 - except Exception, e:  
1590 - # print "That didn't work!"  
1591 - # print e  
1592 - return None  
1593 - #print "Correct encoding: %s" % proposed  
1594 - return self.markup  
1595 -  
1596 - def _toUnicode(self, data, encoding):  
1597 - '''Given a string and its encoding, decodes the string into Unicode.  
1598 - %encoding is a string recognized by encodings.aliases'''  
1599 -  
1600 - # strip Byte Order Mark (if present)  
1601 - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \  
1602 - and (data[2:4] != '\x00\x00'):  
1603 - encoding = 'utf-16be'  
1604 - data = data[2:]  
1605 - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \  
1606 - and (data[2:4] != '\x00\x00'):  
1607 - encoding = 'utf-16le'  
1608 - data = data[2:]  
1609 - elif data[:3] == '\xef\xbb\xbf':  
1610 - encoding = 'utf-8'  
1611 - data = data[3:]  
1612 - elif data[:4] == '\x00\x00\xfe\xff':  
1613 - encoding = 'utf-32be'  
1614 - data = data[4:]  
1615 - elif data[:4] == '\xff\xfe\x00\x00':  
1616 - encoding = 'utf-32le'  
1617 - data = data[4:]  
1618 - newdata = unicode(data, encoding)  
1619 - return newdata  
1620 -  
1621 - def _detectEncoding(self, xml_data):  
1622 - """Given a document, tries to detect its XML encoding."""  
1623 - xml_encoding = sniffed_xml_encoding = None  
1624 - try:  
1625 - if xml_data[:4] == '\x4c\x6f\xa7\x94':  
1626 - # EBCDIC  
1627 - xml_data = self._ebcdic_to_ascii(xml_data)  
1628 - elif xml_data[:4] == '\x00\x3c\x00\x3f':  
1629 - # UTF-16BE  
1630 - sniffed_xml_encoding = 'utf-16be'  
1631 - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')  
1632 - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \  
1633 - and (xml_data[2:4] != '\x00\x00'):  
1634 - # UTF-16BE with BOM  
1635 - sniffed_xml_encoding = 'utf-16be'  
1636 - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')  
1637 - elif xml_data[:4] == '\x3c\x00\x3f\x00':  
1638 - # UTF-16LE  
1639 - sniffed_xml_encoding = 'utf-16le'  
1640 - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')  
1641 - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \  
1642 - (xml_data[2:4] != '\x00\x00'):  
1643 - # UTF-16LE with BOM  
1644 - sniffed_xml_encoding = 'utf-16le'  
1645 - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')  
1646 - elif xml_data[:4] == '\x00\x00\x00\x3c':  
1647 - # UTF-32BE  
1648 - sniffed_xml_encoding = 'utf-32be'  
1649 - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')  
1650 - elif xml_data[:4] == '\x3c\x00\x00\x00':  
1651 - # UTF-32LE  
1652 - sniffed_xml_encoding = 'utf-32le'  
1653 - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')  
1654 - elif xml_data[:4] == '\x00\x00\xfe\xff':  
1655 - # UTF-32BE with BOM  
1656 - sniffed_xml_encoding = 'utf-32be'  
1657 - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')  
1658 - elif xml_data[:4] == '\xff\xfe\x00\x00':  
1659 - # UTF-32LE with BOM  
1660 - sniffed_xml_encoding = 'utf-32le'  
1661 - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')  
1662 - elif xml_data[:3] == '\xef\xbb\xbf':  
1663 - # UTF-8 with BOM  
1664 - sniffed_xml_encoding = 'utf-8'  
1665 - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')  
1666 - else:  
1667 - sniffed_xml_encoding = 'ascii'  
1668 - pass  
1669 - xml_encoding_match = re.compile \  
1670 - ('^<\?.*encoding=[\'"](.*?)[\'"].*\?>')\  
1671 - .match(xml_data)  
1672 - except:  
1673 - xml_encoding_match = None  
1674 - if xml_encoding_match:  
1675 - xml_encoding = xml_encoding_match.groups()[0].lower()  
1676 - if sniffed_xml_encoding and \  
1677 - (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',  
1678 - 'iso-10646-ucs-4', 'ucs-4', 'csucs4',  
1679 - 'utf-16', 'utf-32', 'utf_16', 'utf_32',  
1680 - 'utf16', 'u16')):  
1681 - xml_encoding = sniffed_xml_encoding  
1682 - return xml_data, xml_encoding, sniffed_xml_encoding  
1683 -  
1684 -  
1685 - def find_codec(self, charset):  
1686 - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \  
1687 - or (charset and self._codec(charset.replace("-", ""))) \  
1688 - or (charset and self._codec(charset.replace("-", "_"))) \  
1689 - or charset  
1690 -  
1691 - def _codec(self, charset):  
1692 - if not charset: return charset  
1693 - codec = None  
1694 - try:  
1695 - codecs.lookup(charset)  
1696 - codec = charset  
1697 - except LookupError:  
1698 - pass  
1699 - return codec  
1700 -  
1701 - EBCDIC_TO_ASCII_MAP = None  
1702 - def _ebcdic_to_ascii(self, s):  
1703 - c = self.__class__  
1704 - if not c.EBCDIC_TO_ASCII_MAP:  
1705 - emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,  
1706 - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,  
1707 - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,  
1708 - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,  
1709 - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,  
1710 - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,  
1711 - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,  
1712 - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,  
1713 - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,  
1714 - 201,202,106,107,108,109,110,111,112,113,114,203,204,205,  
1715 - 206,207,208,209,126,115,116,117,118,119,120,121,122,210,  
1716 - 211,212,213,214,215,216,217,218,219,220,221,222,223,224,  
1717 - 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,  
1718 - 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,  
1719 - 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,  
1720 - 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,  
1721 - 250,251,252,253,254,255)  
1722 - import string  
1723 - c.EBCDIC_TO_ASCII_MAP = string.maketrans( \  
1724 - ''.join(map(chr, range(256))), ''.join(map(chr, emap)))  
1725 - return s.translate(c.EBCDIC_TO_ASCII_MAP)  
1726 -  
1727 - MS_CHARS = { '\x80' : ('euro', '20AC'),  
1728 - '\x81' : ' ',  
1729 - '\x82' : ('sbquo', '201A'),  
1730 - '\x83' : ('fnof', '192'),  
1731 - '\x84' : ('bdquo', '201E'),  
1732 - '\x85' : ('hellip', '2026'),  
1733 - '\x86' : ('dagger', '2020'),  
1734 - '\x87' : ('Dagger', '2021'),  
1735 - '\x88' : ('circ', '2C6'),  
1736 - '\x89' : ('permil', '2030'),  
1737 - '\x8A' : ('Scaron', '160'),  
1738 - '\x8B' : ('lsaquo', '2039'),  
1739 - '\x8C' : ('OElig', '152'),  
1740 - '\x8D' : '?',  
1741 - '\x8E' : ('#x17D', '17D'),  
1742 - '\x8F' : '?',  
1743 - '\x90' : '?',  
1744 - '\x91' : ('lsquo', '2018'),  
1745 - '\x92' : ('rsquo', '2019'),  
1746 - '\x93' : ('ldquo', '201C'),  
1747 - '\x94' : ('rdquo', '201D'),  
1748 - '\x95' : ('bull', '2022'),  
1749 - '\x96' : ('ndash', '2013'),  
1750 - '\x97' : ('mdash', '2014'),  
1751 - '\x98' : ('tilde', '2DC'),  
1752 - '\x99' : ('trade', '2122'),  
1753 - '\x9a' : ('scaron', '161'),  
1754 - '\x9b' : ('rsaquo', '203A'),  
1755 - '\x9c' : ('oelig', '153'),  
1756 - '\x9d' : '?',  
1757 - '\x9e' : ('#x17E', '17E'),  
1758 - '\x9f' : ('Yuml', ''),}  
1759 -  
1760 -#######################################################################  
1761 -  
1762 -  
1763 -#By default, act as an HTML pretty-printer.  
1764 -if __name__ == '__main__':  
1765 - import sys  
1766 - soup = BeautifulSoup(sys.stdin.read())  
1767 - print soup.prettify()  
pacotes/openlayers/tools/README.txt
@@ -1,14 +0,0 @@ @@ -1,14 +0,0 @@
1 -This directory contains tools used in the packaging or deployment of OpenLayers.  
2 -  
3 -Javascript minimizing tools:  
4 -  
5 - * jsmin.c, jsmin.py:  
6 - jsmin.py is a direct translation of the jsmin.c code into Python. jsmin.py  
7 - will therefore run anyplace Python runs... but at significantly slower speed.  
8 -  
9 - * shrinksafe.py  
10 - shrinksafe.py calls out to a third party javascript shrinking service. This  
11 - creates file sizes about 4% smaller (as of commit 501) of the OpenLayers  
12 - code. However, this also has the side effect of making you dependant on the  
13 - web service -- and since that service sometimes goes dead, it's risky to  
14 - depend on it.  
pacotes/openlayers/tools/exampleparser.py
@@ -1,251 +0,0 @@ @@ -1,251 +0,0 @@
1 -#!/usr/bin/env python  
2 -  
3 -import sys  
4 -import os  
5 -import re  
6 -import urllib2  
7 -import time  
8 -from xml.dom.minidom import Document  
9 -  
10 -try:  
11 - import xml.etree.ElementTree as ElementTree  
12 -except ImportError:  
13 - try:  
14 - import cElementTree as ElementTree  
15 - except ImportError:  
16 - try:  
17 - import elementtree.ElementTree as ElementTree  
18 - except ImportError:  
19 - import lxml.etree as ElementTree  
20 -  
21 -missing_deps = False  
22 -try:  
23 - import simplejson  
24 - from BeautifulSoup import BeautifulSoup  
25 -except ImportError, E:  
26 - missing_deps = E  
27 -  
28 -feedName = "example-list.xml"  
29 -feedPath = "http://openlayers.org/dev/examples/"  
30 -  
31 -def getListOfOnlineExamples(baseUrl):  
32 - """  
33 - useful if you want to get a list of examples a url. not used by default.  
34 - """  
35 - html = urllib2.urlopen(baseUrl)  
36 - soup = BeautifulSoup(html)  
37 - examples = soup.findAll('li')  
38 - examples = [example.find('a').get('href') for example in examples]  
39 - examples = [example for example in examples if example.endswith('.html')]  
40 - examples = [example for example in examples]  
41 - return examples  
42 -  
43 -def getListOfExamples(relPath):  
44 - """  
45 - returns list of .html filenames within a given path - excludes example-list.html  
46 - """  
47 - examples = os.listdir(relPath)  
48 - examples = [example for example in examples if example.endswith('.html') and example != "example-list.html"]  
49 - return examples  
50 -  
51 -  
52 -def getExampleHtml(location):  
53 - """  
54 - returns html of a specific example that is available online or locally  
55 - """  
56 - print '.',  
57 - if location.startswith('http'):  
58 - return urllib2.urlopen(location).read()  
59 - else:  
60 - f = open(location)  
61 - html = f.read()  
62 - f.close()  
63 - return html  
64 -  
65 -  
66 -def extractById(soup, tagId, value=None):  
67 - """  
68 - returns full contents of a particular tag id  
69 - """  
70 - beautifulTag = soup.find(id=tagId)  
71 - if beautifulTag:  
72 - if beautifulTag.contents:  
73 - value = str(beautifulTag.renderContents()).strip()  
74 - value = value.replace('\t','')  
75 - value = value.replace('\n','')  
76 - return value  
77 -  
78 -def getRelatedClasses(html):  
79 - """  
80 - parses the html, and returns a list of all OpenLayers Classes  
81 - used within (ie what parts of OL the javascript uses).  
82 - """  
83 - rawstr = r'''(?P<class>OpenLayers\..*?)\('''  
84 - return re.findall(rawstr, html)  
85 -  
86 -def parseHtml(html,ids):  
87 - """  
88 - returns dictionary of items of interest  
89 - """  
90 - soup = BeautifulSoup(html)  
91 - d = {}  
92 - for tagId in ids:  
93 - d[tagId] = extractById(soup,tagId)  
94 - #classes should eventually be parsed from docs - not automatically created.  
95 - classes = getRelatedClasses(html)  
96 - d['classes'] = classes  
97 - return d  
98 -  
99 -def getSvnInfo(path):  
100 - h = os.popen("svn info %s --xml" % path)  
101 - tree = ElementTree.fromstring(h.read())  
102 - h.close()  
103 - d = {  
104 - 'url': tree.findtext('entry/url'),  
105 - 'author': tree.findtext('entry/commit/author'),  
106 - 'date': tree.findtext('entry/commit/date')  
107 - }  
108 - return d  
109 -  
110 -def createFeed(examples):  
111 - doc = Document()  
112 - atomuri = "http://www.w3.org/2005/Atom"  
113 - feed = doc.createElementNS(atomuri, "feed")  
114 - feed.setAttribute("xmlns", atomuri)  
115 - title = doc.createElementNS(atomuri, "title")  
116 - title.appendChild(doc.createTextNode("OpenLayers Examples"))  
117 - feed.appendChild(title)  
118 - link = doc.createElementNS(atomuri, "link")  
119 - link.setAttribute("rel", "self")  
120 - link.setAttribute("href", feedPath + feedName)  
121 -  
122 - modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())  
123 - id = doc.createElementNS(atomuri, "id")  
124 - id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, feedName, modtime)))  
125 - feed.appendChild(id)  
126 -  
127 - updated = doc.createElementNS(atomuri, "updated")  
128 - updated.appendChild(doc.createTextNode(modtime))  
129 - feed.appendChild(updated)  
130 -  
131 - examples.sort(key=lambda x:x["modified"])  
132 - for example in sorted(examples, key=lambda x:x["modified"], reverse=True):  
133 - entry = doc.createElementNS(atomuri, "entry")  
134 -  
135 - title = doc.createElementNS(atomuri, "title")  
136 - title.appendChild(doc.createTextNode(example["title"] or example["example"]))  
137 - entry.appendChild(title)  
138 -  
139 - link = doc.createElementNS(atomuri, "link")  
140 - link.setAttribute("href", "%s%s" % (feedPath, example["example"]))  
141 - entry.appendChild(link)  
142 -  
143 - summary = doc.createElementNS(atomuri, "summary")  
144 - summary.appendChild(doc.createTextNode(example["shortdesc"] or example["example"]))  
145 - entry.appendChild(summary)  
146 -  
147 - updated = doc.createElementNS(atomuri, "updated")  
148 - updated.appendChild(doc.createTextNode(example["modified"]))  
149 - entry.appendChild(updated)  
150 -  
151 - author = doc.createElementNS(atomuri, "author")  
152 - name = doc.createElementNS(atomuri, "name")  
153 - name.appendChild(doc.createTextNode(example["author"]))  
154 - author.appendChild(name)  
155 - entry.appendChild(author)  
156 -  
157 - id = doc.createElementNS(atomuri, "id")  
158 - id.appendChild(doc.createTextNode("%s%s#%s" % (feedPath, example["example"], example["modified"])))  
159 - entry.appendChild(id)  
160 -  
161 - feed.appendChild(entry)  
162 -  
163 - doc.appendChild(feed)  
164 - return doc  
165 -  
166 -def wordIndex(examples):  
167 - """  
168 - Create an inverted index based on words in title and shortdesc. Keys are  
169 - lower cased words. Values are dictionaries with example index keys and  
170 - count values.  
171 - """  
172 - index = {}  
173 - unword = re.compile("\\W+")  
174 - keys = ["shortdesc", "title"]  
175 - for i in range(len(examples)):  
176 - for key in keys:  
177 - text = examples[i][key]  
178 - if text:  
179 - words = unword.split(text)  
180 - for word in words:  
181 - if word:  
182 - word = word.lower()  
183 - if index.has_key(word):  
184 - if index[word].has_key(i):  
185 - index[word][i] += 1  
186 - else:  
187 - index[word][i] = 1  
188 - else:  
189 - index[word] = {i: 1}  
190 - return index  
191 -  
192 -if __name__ == "__main__":  
193 -  
194 - if missing_deps:  
195 - print "This script requires simplejson and BeautifulSoup. You don't have them. \n(%s)" % E  
196 - sys.exit()  
197 -  
198 - if len(sys.argv) > 1:  
199 - outFile = open(sys.argv[1],'w')  
200 - else:  
201 - outFile = open('../examples/example-list.js','w')  
202 -  
203 - examplesLocation = '../examples'  
204 - print 'Reading examples from %s and writing out to %s' % (examplesLocation, outFile.name)  
205 -  
206 - exampleList = []  
207 - docIds = ['title','shortdesc']  
208 -  
209 - #comment out option to create docs from online resource  
210 - #examplesLocation = 'http://svn.openlayers.org/sandbox/docs/examples/'  
211 - #examples = getListOfOnlineExamples(examplesLocation)  
212 -  
213 - examples = getListOfExamples(examplesLocation)  
214 -  
215 - modtime = time.strftime("%Y-%m-%dT%I:%M:%SZ", time.gmtime())  
216 -  
217 - for example in examples:  
218 - url = os.path.join(examplesLocation,example)  
219 - html = getExampleHtml(url)  
220 - tagvalues = parseHtml(html,docIds)  
221 - tagvalues['example'] = example  
222 - # add in svn info  
223 - d = getSvnInfo(url)  
224 - tagvalues["modified"] = d["date"] or modtime  
225 - tagvalues["author"] = d["author"] or "anonymous"  
226 - tagvalues['link'] = example  
227 -  
228 - exampleList.append(tagvalues)  
229 -  
230 - print  
231 -  
232 - exampleList.sort(key=lambda x:x['example'].lower())  
233 -  
234 - index = wordIndex(exampleList)  
235 -  
236 - json = simplejson.dumps({"examples": exampleList, "index": index})  
237 - #give the json a global variable we can use in our js. This should be replaced or made optional.  
238 - json = 'var info=' + json  
239 - outFile.write(json)  
240 - outFile.close()  
241 -  
242 - print "writing feed to ../examples/%s " % feedName  
243 - atom = open('../examples/%s' % feedName, 'w')  
244 - doc = createFeed(exampleList)  
245 - atom.write(doc.toxml())  
246 - atom.close()  
247 -  
248 -  
249 - print 'complete'  
250 -  
251 -  
pacotes/openlayers/tools/jsmin.c
@@ -1,272 +0,0 @@ @@ -1,272 +0,0 @@
1 -/* jsmin.c  
2 - 2006-05-04  
3 -  
4 -Copyright (c) 2002 Douglas Crockford (www.crockford.com)  
5 -  
6 -Permission is hereby granted, free of charge, to any person obtaining a copy of  
7 -this software and associated documentation files (the "Software"), to deal in  
8 -the Software without restriction, including without limitation the rights to  
9 -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies  
10 -of the Software, and to permit persons to whom the Software is furnished to do  
11 -so, subject to the following conditions:  
12 -  
13 -The above copyright notice and this permission notice shall be included in all  
14 -copies or substantial portions of the Software.  
15 -  
16 -The Software shall be used for Good, not Evil.  
17 -  
18 -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
19 -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
20 -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
21 -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
22 -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
23 -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  
24 -SOFTWARE.  
25 -*/  
26 -  
27 -#include <stdlib.h>  
28 -#include <stdio.h>  
29 -  
30 -static int theA;  
31 -static int theB;  
32 -static int theLookahead = EOF;  
33 -  
34 -  
35 -/* isAlphanum -- return true if the character is a letter, digit, underscore,  
36 - dollar sign, or non-ASCII character.  
37 -*/  
38 -  
39 -static int  
40 -isAlphanum(int c)  
41 -{  
42 - return ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') ||  
43 - (c >= 'A' && c <= 'Z') || c == '_' || c == '$' || c == '\\' ||  
44 - c > 126);  
45 -}  
46 -  
47 -  
48 -/* get -- return the next character from stdin. Watch out for lookahead. If  
49 - the character is a control character, translate it to a space or  
50 - linefeed.  
51 -*/  
52 -  
53 -static int  
54 -get()  
55 -{  
56 - int c = theLookahead;  
57 - theLookahead = EOF;  
58 - if (c == EOF) {  
59 - c = getc(stdin);  
60 - }  
61 - if (c >= ' ' || c == '\n' || c == EOF) {  
62 - return c;  
63 - }  
64 - if (c == '\r') {  
65 - return '\n';  
66 - }  
67 - return ' ';  
68 -}  
69 -  
70 -  
71 -/* peek -- get the next character without getting it.  
72 -*/  
73 -  
74 -static int  
75 -peek()  
76 -{  
77 - theLookahead = get();  
78 - return theLookahead;  
79 -}  
80 -  
81 -  
82 -/* next -- get the next character, excluding comments. peek() is used to see  
83 - if a '/' is followed by a '/' or '*'.  
84 -*/  
85 -  
86 -static int  
87 -next()  
88 -{  
89 - int c = get();  
90 - if (c == '/') {  
91 - switch (peek()) {  
92 - case '/':  
93 - for (;;) {  
94 - c = get();  
95 - if (c <= '\n') {  
96 - return c;  
97 - }  
98 - }  
99 - case '*':  
100 - get();  
101 - for (;;) {  
102 - switch (get()) {  
103 - case '*':  
104 - if (peek() == '/') {  
105 - get();  
106 - return ' ';  
107 - }  
108 - break;  
109 - case EOF:  
110 - fprintf(stderr, "Error: JSMIN Unterminated comment.\n");  
111 - exit(1);  
112 - }  
113 - }  
114 - default:  
115 - return c;  
116 - }  
117 - }  
118 - return c;  
119 -}  
120 -  
121 -  
122 -/* action -- do something! What you do is determined by the argument:  
123 - 1 Output A. Copy B to A. Get the next B.  
124 - 2 Copy B to A. Get the next B. (Delete A).  
125 - 3 Get the next B. (Delete B).  
126 - action treats a string as a single character. Wow!  
127 - action recognizes a regular expression if it is preceded by ( or , or =.  
128 -*/  
129 -  
130 -static void  
131 -action(int d)  
132 -{  
133 - switch (d) {  
134 - case 1:  
135 - putc(theA, stdout);  
136 - case 2:  
137 - theA = theB;  
138 - if (theA == '\'' || theA == '"') {  
139 - for (;;) {  
140 - putc(theA, stdout);  
141 - theA = get();  
142 - if (theA == theB) {  
143 - break;  
144 - }  
145 - if (theA <= '\n') {  
146 - fprintf(stderr,  
147 -"Error: JSMIN unterminated string literal: %c\n", theA);  
148 - exit(1);  
149 - }  
150 - if (theA == '\\') {  
151 - putc(theA, stdout);  
152 - theA = get();  
153 - }  
154 - }  
155 - }  
156 - case 3:  
157 - theB = next();  
158 - if (theB == '/' && (theA == '(' || theA == ',' || theA == '=' ||  
159 - theA == ':' || theA == '[' || theA == '!' || theA == '&' ||  
160 - theA == '|')) {  
161 - putc(theA, stdout);  
162 - putc(theB, stdout);  
163 - for (;;) {  
164 - theA = get();  
165 - if (theA == '/') {  
166 - break;  
167 - } else if (theA =='\\') {  
168 - putc(theA, stdout);  
169 - theA = get();  
170 - } else if (theA <= '\n') {  
171 - fprintf(stderr,  
172 -"Error: JSMIN unterminated Regular Expression literal.\n", theA);  
173 - exit(1);  
174 - }  
175 - putc(theA, stdout);  
176 - }  
177 - theB = next();  
178 - }  
179 - }  
180 -}  
181 -  
182 -  
183 -/* jsmin -- Copy the input to the output, deleting the characters which are  
184 - insignificant to JavaScript. Comments will be removed. Tabs will be  
185 - replaced with spaces. Carriage returns will be replaced with linefeeds.  
186 - Most spaces and linefeeds will be removed.  
187 -*/  
188 -  
189 -static void  
190 -jsmin()  
191 -{  
192 - theA = '\n';  
193 - action(3);  
194 - while (theA != EOF) {  
195 - switch (theA) {  
196 - case ' ':  
197 - if (isAlphanum(theB)) {  
198 - action(1);  
199 - } else {  
200 - action(2);  
201 - }  
202 - break;  
203 - case '\n':  
204 - switch (theB) {  
205 - case '{':  
206 - case '[':  
207 - case '(':  
208 - case '+':  
209 - case '-':  
210 - action(1);  
211 - break;  
212 - case ' ':  
213 - action(3);  
214 - break;  
215 - default:  
216 - if (isAlphanum(theB)) {  
217 - action(1);  
218 - } else {  
219 - action(2);  
220 - }  
221 - }  
222 - break;  
223 - default:  
224 - switch (theB) {  
225 - case ' ':  
226 - if (isAlphanum(theA)) {  
227 - action(1);  
228 - break;  
229 - }  
230 - action(3);  
231 - break;  
232 - case '\n':  
233 - switch (theA) {  
234 - case '}':  
235 - case ']':  
236 - case ')':  
237 - case '+':  
238 - case '-':  
239 - case '"':  
240 - case '\'':  
241 - action(1);  
242 - break;  
243 - default:  
244 - if (isAlphanum(theA)) {  
245 - action(1);  
246 - } else {  
247 - action(3);  
248 - }  
249 - }  
250 - break;  
251 - default:  
252 - action(1);  
253 - break;  
254 - }  
255 - }  
256 - }  
257 -}  
258 -  
259 -  
260 -/* main -- Output any command line arguments as comments  
261 - and then minify the input.  
262 -*/  
263 -extern int  
264 -main(int argc, char* argv[])  
265 -{  
266 - int i;  
267 - for (i = 1; i < argc; i += 1) {  
268 - fprintf(stdout, "// %s\n", argv[i]);  
269 - }  
270 - jsmin();  
271 - return 0;  
272 -}  
pacotes/openlayers/tools/jsmin.py
@@ -1,216 +0,0 @@ @@ -1,216 +0,0 @@
1 -#!/usr/bin/python  
2 -  
3 -# This code is original from jsmin by Douglas Crockford, it was translated to  
4 -# Python by Baruch Even. The original code had the following copyright and  
5 -# license.  
6 -#  
7 -# /* jsmin.c  
8 -# 2007-01-08  
9 -#  
10 -# Copyright (c) 2002 Douglas Crockford (www.crockford.com)  
11 -#  
12 -# Permission is hereby granted, free of charge, to any person obtaining a copy of  
13 -# this software and associated documentation files (the "Software"), to deal in  
14 -# the Software without restriction, including without limitation the rights to  
15 -# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies  
16 -# of the Software, and to permit persons to whom the Software is furnished to do  
17 -# so, subject to the following conditions:  
18 -#  
19 -# The above copyright notice and this permission notice shall be included in all  
20 -# copies or substantial portions of the Software.  
21 -#  
22 -# The Software shall be used for Good, not Evil.  
23 -#  
24 -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
25 -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
26 -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
27 -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
28 -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
29 -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  
30 -# SOFTWARE.  
31 -# */  
32 -  
33 -from StringIO import StringIO  
34 -  
35 -def jsmin(js):  
36 - ins = StringIO(js)  
37 - outs = StringIO()  
38 - JavascriptMinify().minify(ins, outs)  
39 - str = outs.getvalue()  
40 - if len(str) > 0 and str[0] == '\n':  
41 - str = str[1:]  
42 - return str  
43 -  
44 -def isAlphanum(c):  
45 - """return true if the character is a letter, digit, underscore,  
46 - dollar sign, or non-ASCII character.  
47 - """  
48 - return ((c >= 'a' and c <= 'z') or (c >= '0' and c <= '9') or  
49 - (c >= 'A' and c <= 'Z') or c == '_' or c == '$' or c == '\\' or (c is not None and ord(c) > 126));  
50 -  
51 -class UnterminatedComment(Exception):  
52 - pass  
53 -  
54 -class UnterminatedStringLiteral(Exception):  
55 - pass  
56 -  
57 -class UnterminatedRegularExpression(Exception):  
58 - pass  
59 -  
60 -class JavascriptMinify(object):  
61 -  
62 - def _outA(self):  
63 - self.outstream.write(self.theA)  
64 - def _outB(self):  
65 - self.outstream.write(self.theB)  
66 -  
67 - def _get(self):  
68 - """return the next character from stdin. Watch out for lookahead. If  
69 - the character is a control character, translate it to a space or  
70 - linefeed.  
71 - """  
72 - c = self.theLookahead  
73 - self.theLookahead = None  
74 - if c == None:  
75 - c = self.instream.read(1)  
76 - if c >= ' ' or c == '\n':  
77 - return c  
78 - if c == '': # EOF  
79 - return '\000'  
80 - if c == '\r':  
81 - return '\n'  
82 - return ' '  
83 -  
84 - def _peek(self):  
85 - self.theLookahead = self._get()  
86 - return self.theLookahead  
87 -  
88 - def _next(self):  
89 - """get the next character, excluding comments. peek() is used to see  
90 - if a '/' is followed by a '/' or '*'.  
91 - """  
92 - c = self._get()  
93 - if c == '/':  
94 - p = self._peek()  
95 - if p == '/':  
96 - c = self._get()  
97 - while c > '\n':  
98 - c = self._get()  
99 - return c  
100 - if p == '*':  
101 - c = self._get()  
102 - while 1:  
103 - c = self._get()  
104 - if c == '*':  
105 - if self._peek() == '/':  
106 - self._get()  
107 - return ' '  
108 - if c == '\000':  
109 - raise UnterminatedComment()  
110 -  
111 - return c  
112 -  
113 - def _action(self, action):  
114 - """do something! What you do is determined by the argument:  
115 - 1 Output A. Copy B to A. Get the next B.  
116 - 2 Copy B to A. Get the next B. (Delete A).  
117 - 3 Get the next B. (Delete B).  
118 - action treats a string as a single character. Wow!  
119 - action recognizes a regular expression if it is preceded by ( or , or =.  
120 - """  
121 - if action <= 1:  
122 - self._outA()  
123 -  
124 - if action <= 2:  
125 - self.theA = self.theB  
126 - if self.theA == "'" or self.theA == '"':  
127 - while 1:  
128 - self._outA()  
129 - self.theA = self._get()  
130 - if self.theA == self.theB:  
131 - break  
132 - if self.theA <= '\n':  
133 - raise UnterminatedStringLiteral()  
134 - if self.theA == '\\':  
135 - self._outA()  
136 - self.theA = self._get()  
137 -  
138 -  
139 - if action <= 3:  
140 - self.theB = self._next()  
141 - if self.theB == '/' and (self.theA == '(' or self.theA == ',' or  
142 - self.theA == '=' or self.theA == ':' or  
143 - self.theA == '[' or self.theA == '?' or  
144 - self.theA == '!' or self.theA == '&' or  
145 - self.theA == '|'):  
146 - self._outA()  
147 - self._outB()  
148 - while 1:  
149 - self.theA = self._get()  
150 - if self.theA == '/':  
151 - break  
152 - elif self.theA == '\\':  
153 - self._outA()  
154 - self.theA = self._get()  
155 - elif self.theA <= '\n':  
156 - raise UnterminatedRegularExpression()  
157 - self._outA()  
158 - self.theB = self._next()  
159 -  
160 -  
161 - def _jsmin(self):  
162 - """Copy the input to the output, deleting the characters which are  
163 - insignificant to JavaScript. Comments will be removed. Tabs will be  
164 - replaced with spaces. Carriage returns will be replaced with linefeeds.  
165 - Most spaces and linefeeds will be removed.  
166 - """  
167 - self.theA = '\n'  
168 - self._action(3)  
169 -  
170 - while self.theA != '\000':  
171 - if self.theA == ' ':  
172 - if isAlphanum(self.theB):  
173 - self._action(1)  
174 - else:  
175 - self._action(2)  
176 - elif self.theA == '\n':  
177 - if self.theB in ['{', '[', '(', '+', '-']:  
178 - self._action(1)  
179 - elif self.theB == ' ':  
180 - self._action(3)  
181 - else:  
182 - if isAlphanum(self.theB):  
183 - self._action(1)  
184 - else:  
185 - self._action(2)  
186 - else:  
187 - if self.theB == ' ':  
188 - if isAlphanum(self.theA):  
189 - self._action(1)  
190 - else:  
191 - self._action(3)  
192 - elif self.theB == '\n':  
193 - if self.theA in ['}', ']', ')', '+', '-', '"', '\'']:  
194 - self._action(1)  
195 - else:  
196 - if isAlphanum(self.theA):  
197 - self._action(1)  
198 - else:  
199 - self._action(3)  
200 - else:  
201 - self._action(1)  
202 -  
203 - def minify(self, instream, outstream):  
204 - self.instream = instream  
205 - self.outstream = outstream  
206 - self.theA = None  
207 - self.thaB = None  
208 - self.theLookahead = None  
209 -  
210 - self._jsmin()  
211 - self.instream.close()  
212 -  
213 -if __name__ == '__main__':  
214 - import sys  
215 - jsm = JavascriptMinify()  
216 - jsm.minify(sys.stdin, sys.stdout)  
pacotes/openlayers/tools/mergejs.py
@@ -1,252 +0,0 @@ @@ -1,252 +0,0 @@
1 -#!/usr/bin/env python  
2 -#  
3 -# Merge multiple JavaScript source code files into one.  
4 -#  
5 -# Usage:  
6 -# This script requires source files to have dependencies specified in them.  
7 -#  
8 -# Dependencies are specified with a comment of the form:  
9 -#  
10 -# // @requires <file path>  
11 -#  
12 -# e.g.  
13 -#  
14 -# // @requires Geo/DataSource.js  
15 -#  
16 -# This script should be executed like so:  
17 -#  
18 -# mergejs.py <output.js> <directory> [...]  
19 -#  
20 -# e.g.  
21 -#  
22 -# mergejs.py openlayers.js Geo/ CrossBrowser/  
23 -#  
24 -# This example will cause the script to walk the `Geo` and  
25 -# `CrossBrowser` directories--and subdirectories thereof--and import  
26 -# all `*.js` files encountered. The dependency declarations will be extracted  
27 -# and then the source code from imported files will be output to  
28 -# a file named `openlayers.js` in an order which fulfils the dependencies  
29 -# specified.  
30 -#  
31 -#  
32 -# Note: This is a very rough initial version of this code.  
33 -#  
34 -# -- Copyright 2005-2008 MetaCarta, Inc. / OpenLayers project --  
35 -#  
36 -  
37 -# TODO: Allow files to be excluded. e.g. `Crossbrowser/DebugMode.js`?  
38 -# TODO: Report error when dependency can not be found rather than KeyError.  
39 -  
40 -import re  
41 -import os  
42 -import sys  
43 -  
44 -SUFFIX_JAVASCRIPT = ".js"  
45 -  
46 -RE_REQUIRE = "@requires:? (.*)\n" # TODO: Ensure in comment?  
47 -class SourceFile:  
48 - """  
49 - Represents a Javascript source code file.  
50 - """  
51 -  
52 - def __init__(self, filepath, source):  
53 - """  
54 - """  
55 - self.filepath = filepath  
56 - self.source = source  
57 -  
58 - self.requiredBy = []  
59 -  
60 -  
61 - def _getRequirements(self):  
62 - """  
63 - Extracts the dependencies specified in the source code and returns  
64 - a list of them.  
65 - """  
66 - # TODO: Cache?  
67 - return re.findall(RE_REQUIRE, self.source)  
68 -  
69 - requires = property(fget=_getRequirements, doc="")  
70 -  
71 -  
72 -  
73 -def usage(filename):  
74 - """  
75 - Displays a usage message.  
76 - """  
77 - print "%s [-c <config file>] <output.js> <directory> [...]" % filename  
78 -  
79 -  
80 -class Config:  
81 - """  
82 - Represents a parsed configuration file.  
83 -  
84 - A configuration file should be of the following form:  
85 -  
86 - [first]  
87 - 3rd/prototype.js  
88 - core/application.js  
89 - core/params.js  
90 - # A comment  
91 -  
92 - [last]  
93 - core/api.js # Another comment  
94 -  
95 - [exclude]  
96 - 3rd/logger.js  
97 -  
98 - All headings are required.  
99 -  
100 - The files listed in the `first` section will be forced to load  
101 - *before* all other files (in the order listed). The files in `last`  
102 - section will be forced to load *after* all the other files (in the  
103 - order listed).  
104 -  
105 - The files list in the `exclude` section will not be imported.  
106 -  
107 - Any text appearing after a # symbol indicates a comment.  
108 -  
109 - """  
110 -  
111 - def __init__(self, filename):  
112 - """  
113 - Parses the content of the named file and stores the values.  
114 - """  
115 - lines = [re.sub("#.*?$", "", line).strip() # Assumes end-of-line character is present  
116 - for line in open(filename)  
117 - if line.strip() and not line.strip().startswith("#")] # Skip blank lines and comments  
118 -  
119 - self.forceFirst = lines[lines.index("[first]") + 1:lines.index("[last]")]  
120 -  
121 - self.forceLast = lines[lines.index("[last]") + 1:lines.index("[include]")]  
122 - self.include = lines[lines.index("[include]") + 1:lines.index("[exclude]")]  
123 - self.exclude = lines[lines.index("[exclude]") + 1:]  
124 -  
125 -def run (sourceDirectory, outputFilename = None, configFile = None):  
126 - cfg = None  
127 - if configFile:  
128 - cfg = Config(configFile)  
129 -  
130 - allFiles = []  
131 -  
132 - ## Find all the Javascript source files  
133 - for root, dirs, files in os.walk(sourceDirectory):  
134 - for filename in files:  
135 - if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."):  
136 - filepath = os.path.join(root, filename)[len(sourceDirectory)+1:]  
137 - filepath = filepath.replace("\\", "/")  
138 - if cfg and cfg.include:  
139 - if filepath in cfg.include or filepath in cfg.forceFirst:  
140 - allFiles.append(filepath)  
141 - elif (not cfg) or (filepath not in cfg.exclude):  
142 - allFiles.append(filepath)  
143 -  
144 - ## Header inserted at the start of each file in the output  
145 - HEADER = "/* " + "=" * 70 + "\n %s\n" + " " + "=" * 70 + " */\n\n"  
146 -  
147 - files = {}  
148 -  
149 - order = [] # List of filepaths to output, in a dependency satisfying order  
150 -  
151 - ## Import file source code  
152 - ## TODO: Do import when we walk the directories above?  
153 - for filepath in allFiles:  
154 - print "Importing: %s" % filepath  
155 - fullpath = os.path.join(sourceDirectory, filepath).strip()  
156 - content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?  
157 - files[filepath] = SourceFile(filepath, content) # TODO: Chop path?  
158 -  
159 - print  
160 -  
161 - from toposort import toposort  
162 -  
163 - complete = False  
164 - resolution_pass = 1  
165 -  
166 - while not complete:  
167 - order = [] # List of filepaths to output, in a dependency satisfying order  
168 - nodes = []  
169 - routes = []  
170 - ## Resolve the dependencies  
171 - print "Resolution pass %s... " % resolution_pass  
172 - resolution_pass += 1  
173 -  
174 - for filepath, info in files.items():  
175 - nodes.append(filepath)  
176 - for neededFilePath in info.requires:  
177 - routes.append((neededFilePath, filepath))  
178 -  
179 - for dependencyLevel in toposort(nodes, routes):  
180 - for filepath in dependencyLevel:  
181 - order.append(filepath)  
182 - if not files.has_key(filepath):  
183 - print "Importing: %s" % filepath  
184 - fullpath = os.path.join(sourceDirectory, filepath).strip()  
185 - content = open(fullpath, "U").read() # TODO: Ensure end of line @ EOF?  
186 - files[filepath] = SourceFile(filepath, content) # TODO: Chop path?  
187 -  
188 -  
189 -  
190 - # Double check all dependencies have been met  
191 - complete = True  
192 - try:  
193 - for fp in order:  
194 - if max([order.index(rfp) for rfp in files[fp].requires] +  
195 - [order.index(fp)]) != order.index(fp):  
196 - complete = False  
197 - except:  
198 - complete = False  
199 -  
200 - print  
201 -  
202 -  
203 - ## Move forced first and last files to the required position  
204 - if cfg:  
205 - print "Re-ordering files..."  
206 - order = cfg.forceFirst + [item  
207 - for item in order  
208 - if ((item not in cfg.forceFirst) and  
209 - (item not in cfg.forceLast))] + cfg.forceLast  
210 -  
211 - print  
212 - ## Output the files in the determined order  
213 - result = []  
214 -  
215 - for fp in order:  
216 - f = files[fp]  
217 - print "Exporting: ", f.filepath  
218 - result.append(HEADER % f.filepath)  
219 - source = f.source  
220 - result.append(source)  
221 - if not source.endswith("\n"):  
222 - result.append("\n")  
223 -  
224 - print "\nTotal files merged: %d " % len(files)  
225 -  
226 - if outputFilename:  
227 - print "\nGenerating: %s" % (outputFilename)  
228 - open(outputFilename, "w").write("".join(result))  
229 - return "".join(result)  
230 -  
231 -if __name__ == "__main__":  
232 - import getopt  
233 -  
234 - options, args = getopt.getopt(sys.argv[1:], "-c:")  
235 -  
236 - try:  
237 - outputFilename = args[0]  
238 - except IndexError:  
239 - usage(sys.argv[0])  
240 - raise SystemExit  
241 - else:  
242 - sourceDirectory = args[1]  
243 - if not sourceDirectory:  
244 - usage(sys.argv[0])  
245 - raise SystemExit  
246 -  
247 - configFile = None  
248 - if options and options[0][0] == "-c":  
249 - configFile = options[0][1]  
250 - print "Parsing configuration file: %s" % filename  
251 -  
252 - run( sourceDirectory, outputFilename, configFile )  
pacotes/openlayers/tools/minimize.py
@@ -1,47 +0,0 @@ @@ -1,47 +0,0 @@
1 -# Minimal Python Minimizer  
2 -# Copyright 2008, Christopher Schmidt  
3 -# Released under the MIT License  
4 -#  
5 -# Taken from: http://svn.crschmidt.net/personal/python/minimize.py  
6 -# $Id: minimize.py 6 2008-01-03 06:33:35Z crschmidt $  
7 -#  
8 -# Permission is hereby granted, free of charge, to any person obtaining a copy  
9 -# of this software and associated documentation files (the "Software"), to deal  
10 -# in the Software without restriction, including without limitation the rights  
11 -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
12 -# copies of the Software, and to permit persons to whom the Software is  
13 -# furnished to do so, subject to the following conditions:  
14 -#  
15 -# The above copyright notice and this permission notice shall be included in  
16 -# all copies or substantial portions of the Software.  
17 -#  
18 -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
19 -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
20 -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
21 -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
22 -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
23 -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN  
24 -# THE SOFTWARE.  
25 -  
26 -import re  
27 -  
28 -def strip_comments_helper(data):  
29 - """remove all /* */ format comments and surrounding whitespace."""  
30 - p = re.compile(r'[\s]*/\*.*?\*/[\s]*', re.DOTALL)  
31 - return p.sub('',data)  
32 -  
33 -def minimize(data, exclude=None):  
34 - """Central function call. This will call all other compression  
35 - functions. To add further compression algorithms, simply add  
36 - functions whose names end in _helper which take a string as input  
37 - and return a more compressed string as output."""  
38 - for key, item in globals().iteritems():  
39 - if key.endswith("_helper"):  
40 - func_key = key[:-7]  
41 - if not exclude or not func_key in exclude:  
42 - data = item(data)  
43 - return data  
44 -  
45 -if __name__ == "__main__":  
46 - import sys  
47 - print minimize(open(sys.argv[1]).read())  
pacotes/openlayers/tools/oldot.py
@@ -1,43 +0,0 @@ @@ -1,43 +0,0 @@
1 -import re  
2 -import os  
3 -def run():  
4 - sourceDirectory = "../lib/OpenLayers"  
5 - allFiles = []  
6 - SUFFIX_JAVASCRIPT = ".js"  
7 - ## Find all the Javascript source files  
8 - for root, dirs, files in os.walk(sourceDirectory):  
9 - for filename in files:  
10 - if filename.endswith(SUFFIX_JAVASCRIPT) and not filename.startswith("."):  
11 - filepath = os.path.join(root, filename)[len(sourceDirectory)+1:]  
12 - filepath = filepath.replace("\\", "/")  
13 - data = open(os.path.join(sourceDirectory, filepath)).read()  
14 - parents = re.search("OpenLayers.Class\((.*?){", data,  
15 - re.DOTALL)  
16 - if parents:  
17 - parents = [x.strip() for x in parents.group(1).strip().strip(",").split(",")]  
18 - else:  
19 - parents = []  
20 - cls = "OpenLayers.%s" % filepath.strip(".js").replace("/", ".")  
21 - allFiles.append([cls, parents])  
22 - return allFiles  
23 -print """  
24 -digraph name {  
25 - fontname = "Helvetica"  
26 - fontsize = 8  
27 - K = 0.6  
28 -  
29 - node [  
30 - fontname = "Helvetica"  
31 - fontsize = 8  
32 - shape = "plaintext"  
33 - ]  
34 -"""  
35 -  
36 -for i in run():  
37 - print i[0].replace(".", "_")  
38 - for item in i[1]:  
39 - if not item: continue  
40 - print "%s -> %s" % (i[0].replace(".","_"), item.replace(".", "_"))  
41 - print "; "  
42 -  
43 -print """}"""  
pacotes/openlayers/tools/release.sh
@@ -1,29 +0,0 @@ @@ -1,29 +0,0 @@
1 -#!/bin/sh  
2 -  
3 -VERSION=$1  
4 -  
5 -svn export http://svn.openlayers.org/tags/openlayers/release-$VERSION OpenLayers-$VERSION  
6 -cd OpenLayers-$VERSION/build  
7 -./build.py full  
8 -cp OpenLayers.js ..  
9 -  
10 -cd ..  
11 -  
12 -mkdir doc/devdocs  
13 -mkdir doc/apidocs  
14 -rm tools/*.pyc  
15 -  
16 -mkdir /www/openlayers/htdocs/api/$VERSION  
17 -cp OpenLayers.js /www/openlayers/htdocs/api/$VERSION  
18 -cp -a img/ /www/openlayers/htdocs/api/$VERSION  
19 -cp -a theme/ /www/openlayers/htdocs/api/$VERSION  
20 -  
21 -cd ..  
22 -  
23 -~/nd/NaturalDocs -i OpenLayers-$VERSION/lib -o HTML OpenLayers-$VERSION/doc/devdocs -p OpenLayers-$VERSION/doc_config -s Small OL  
24 -~/nd/NaturalDocs -i OpenLayers-$VERSION/lib -o HTML OpenLayers-$VERSION/doc/apidocs -p OpenLayers-$VERSION/apidoc_config -s Small OL  
25 -  
26 -tar cvfz OpenLayers-$VERSION.tar.gz OpenLayers-$VERSION/  
27 -zip -9r OpenLayers-$VERSION.zip OpenLayers-$VERSION/  
28 -  
29 -cp OpenLayers-$VERSION.* /www/openlayers/htdocs/download  
pacotes/openlayers/tools/shrinksafe.py
@@ -1,54 +0,0 @@ @@ -1,54 +0,0 @@
1 -#!/usr/bin/env python  
2 -#  
3 -# Script to provide a wrapper around the ShrinkSafe "web service"  
4 -# <http://shrinksafe.dojotoolkit.org/>  
5 -#  
6 -  
7 -#  
8 -# We use this script for two reasons:  
9 -#  
10 -# * This avoids having to install and configure Java and the standalone  
11 -# ShrinkSafe utility.  
12 -#  
13 -# * The current ShrinkSafe standalone utility was broken when we last  
14 -# used it.  
15 -#  
16 -  
17 -import sys  
18 -  
19 -import urllib  
20 -import urllib2  
21 -  
22 -URL_SHRINK_SAFE = "http://shrinksafe.dojotoolkit.org/shrinksafe.php"  
23 -  
24 -# This would normally be dynamically generated:  
25 -BOUNDARY_MARKER = "---------------------------72288400411964641492083565382"  
26 -  
27 -if __name__ == "__main__":  
28 - ## Grab the source code  
29 - try:  
30 - sourceFilename = sys.argv[1]  
31 - except:  
32 - print "Usage: %s (<source filename>|-)" % sys.argv[0]  
33 - raise SystemExit  
34 -  
35 - if sourceFilename == "-":  
36 - sourceCode = sys.stdin.read()  
37 - sourceFilename = "stdin.js"  
38 - else:  
39 - sourceCode = open(sourceFilename).read()  
40 -  
41 - ## Create the request replicating posting of the form from the web page  
42 - request = urllib2.Request(url=URL_SHRINK_SAFE)  
43 - request.add_header("Content-Type",  
44 - "multipart/form-data; boundary=%s" % BOUNDARY_MARKER)  
45 - request.add_data("""  
46 ---%s  
47 -Content-Disposition: form-data; name="shrinkfile[]"; filename="%s"  
48 -Content-Type: application/x-javascript  
49 -  
50 -%s  
51 -""" % (BOUNDARY_MARKER, sourceFilename, sourceCode))  
52 -  
53 - ## Deliver the result  
54 - print urllib2.urlopen(request).read(),  
pacotes/openlayers/tools/toposort.py
@@ -1,260 +0,0 @@ @@ -1,260 +0,0 @@
1 -#  
2 -# According to <http://www.vrplumber.com/programming/> this file  
3 -# is licensed under a BSD-style license. We only use the section  
4 -# originally by Tim Peters.  
5 -#  
6 -# TODO: The use of this code needs to be okayed by someone.  
7 -#  
8 -  
9 -class RecursionError( OverflowError, ValueError ):  
10 - '''Unable to calculate result because of recursive structure'''  
11 -  
12 -  
13 -def sort(nodes, routes, noRecursion=1):  
14 - '''Passed a list of node IDs and a list of source,dest ID routes  
15 - attempt to create a list of stages where each sub list  
16 - is one stage in a process.  
17 - '''  
18 - children, parents = _buildChildrenLists(routes)  
19 - # first stage is those nodes  
20 - # having no incoming routes...  
21 - stage = []  
22 - stages = [stage]  
23 - taken = []  
24 - for node in nodes:  
25 - if (not parents.get(node)):  
26 - stage.append (node)  
27 - if nodes and not stage:  
28 - # there is no element which does not depend on  
29 - # some other element!!!  
30 - stage.append( nodes[0])  
31 - taken.extend( stage )  
32 - nodes = filter ( lambda x, l=stage: x not in l, nodes )  
33 - while nodes:  
34 - previousStageChildren = []  
35 - nodelen = len(nodes)  
36 - # second stage are those nodes  
37 - # which are direct children of the first stage  
38 - for node in stage:  
39 - for child in children.get (node, []):  
40 - if child not in previousStageChildren and child not in taken:  
41 - previousStageChildren.append(child)  
42 - elif child in taken and noRecursion:  
43 - raise RecursionError( (child, node) )  
44 - # unless they are children of other direct children...  
45 - # TODO, actually do that...  
46 - stage = previousStageChildren  
47 - removes = []  
48 - for current in stage:  
49 - currentParents = parents.get( current, [] )  
50 - for parent in currentParents:  
51 - if parent in stage and parent != current:  
52 - # might wind up removing current...  
53 - if not current in parents.get(parent, []):  
54 - # is not mutually dependent...  
55 - removes.append( current )  
56 - for remove in removes:  
57 - while remove in stage:  
58 - stage.remove( remove )  
59 - stages.append( stage)  
60 - taken.extend( stage )  
61 - nodes = filter ( lambda x, l=stage: x not in l, nodes )  
62 - if nodelen == len(nodes):  
63 - if noRecursion:  
64 - raise RecursionError( nodes )  
65 - else:  
66 - stages.append( nodes[:] )  
67 - nodes = []  
68 - return stages  
69 -  
70 -def _buildChildrenLists (routes):  
71 - childrenTable = {}  
72 - parentTable = {}  
73 - for sourceID,destinationID in routes:  
74 - currentChildren = childrenTable.get( sourceID, [])  
75 - currentParents = parentTable.get( destinationID, [])  
76 - if not destinationID in currentChildren:  
77 - currentChildren.append ( destinationID)  
78 - if not sourceID in currentParents:  
79 - currentParents.append ( sourceID)  
80 - childrenTable[sourceID] = currentChildren  
81 - parentTable[destinationID] = currentParents  
82 - return childrenTable, parentTable  
83 -  
84 -  
85 -def toposort (nodes, routes, noRecursion=1):  
86 - '''Topological sort from Tim Peters, fairly efficient  
87 - in comparison (it seems).'''  
88 - #first calculate the recursion depth  
89 -  
90 - dependencies = {}  
91 - inversedependencies = {}  
92 - if not nodes:  
93 - return []  
94 - if not routes:  
95 - return [nodes]  
96 - for node in nodes:  
97 - dependencies[ node ] = (0, node)  
98 - inversedependencies[ node ] = []  
99 -  
100 -  
101 - for depended, depends in routes:  
102 - # is it a null rule  
103 - try:  
104 - newdependencylevel, object = dependencies.get ( depends, (0, depends))  
105 - except TypeError:  
106 - print depends  
107 - raise  
108 - dependencies[ depends ] = (newdependencylevel + 1, depends)  
109 - # "dependency (existence) of depended-on"  
110 - newdependencylevel,object = dependencies.get ( depended, (0, depended) )  
111 - dependencies[ depended ] = (newdependencylevel, depended)  
112 - # Inverse dependency set up  
113 - dependencieslist = inversedependencies.get ( depended, [])  
114 - dependencieslist.append (depends)  
115 - inversedependencies[depended] = dependencieslist  
116 - ### Now we do the actual sorting  
117 - # The first task is to create the sortable  
118 - # list of dependency-levels  
119 - sortinglist = dependencies.values()  
120 - sortinglist.sort ()  
121 - output = []  
122 - while sortinglist:  
123 - deletelist = []  
124 - generation = []  
125 - output.append( generation)  
126 - while sortinglist and sortinglist[0][0] == 0:  
127 - number, object = sortinglist[0]  
128 - generation.append ( object )  
129 - deletelist.append( object )  
130 - for inverse in inversedependencies.get(object, () ):  
131 - try:  
132 - oldcount, inverse = dependencies [ inverse]  
133 - if oldcount > 0:  
134 - # will be dealt with on later pass  
135 - dependencies [ inverse] = (oldcount-1, inverse)  
136 - else:  
137 - # will be dealt with on this pass,  
138 - # so needs not to be in the sorting list next time  
139 - deletelist.append( inverse )  
140 - # just in case a loop comes through  
141 - inversedependencies[object] = []  
142 - except KeyError:  
143 - # dealing with a recursion-breaking run...  
144 - pass  
145 - del sortinglist [0]  
146 - # if no elements could be deleted, then  
147 - # there is something which depends upon itself  
148 - if not deletelist:  
149 - if noRecursion:  
150 - raise RecursionError( sortinglist )  
151 - else:  
152 - # hack so that something gets deleted...  
153 -## import pdb  
154 -## pdb.set_trace()  
155 - dependencies[sortinglist[0][1]] = (0,sortinglist[0][1])  
156 - # delete the items that were dealt with  
157 - for item in deletelist:  
158 - try:  
159 - del dependencies [ item ]  
160 - except KeyError:  
161 - pass  
162 - # need to recreate the sortinglist  
163 - sortinglist = dependencies.values()  
164 - if not generation:  
165 - output.remove( generation )  
166 - sortinglist.sort ()  
167 - return output  
168 -  
169 -  
170 -  
171 -  
172 -  
173 -if __name__ == "__main__":  
174 -  
175 - nodes = ['a', 'b', 'c', 'd', 'e', 'f']  
176 - route = [('a', 'b'), ('b', 'c'), ('b', 'd'), ('e','f')]  
177 -  
178 - for x in toposort( nodes, route):  
179 - for a in x:  
180 - print a  
181 -  
182 - raise SystemExit  
183 -  
184 -  
185 -  
186 - import pprint, traceback  
187 - nodes= [ 0,1,2,3,4,5 ]  
188 - testingValues = [  
189 - [ (0,1),(1,2),(2,3),(3,4),(4,5)],  
190 - [ (0,1),(0,2),(1,2),(3,4),(4,5)],  
191 - [  
192 - (0,1),  
193 - (0,2),  
194 - (0,2),  
195 - (2,4),  
196 - (2,5),  
197 - (3,2),  
198 - (0,3)],  
199 - [  
200 - (0,1), # 3-element cycle test, no orphan nodes  
201 - (1,2),  
202 - (2,0),  
203 - (2,4),  
204 - (2,5),  
205 - (3,2),  
206 - (0,3)],  
207 - [  
208 - (0,1),  
209 - (1,1),  
210 - (1,1),  
211 - (1,4),  
212 - (1,5),  
213 - (1,2),  
214 - (3,1),  
215 - (2,1),  
216 - (2,0)],  
217 - [  
218 - (0,1),  
219 - (1,0),  
220 - (0,2),  
221 - (0,3),  
222 - ],  
223 - [  
224 - (0,1),  
225 - (1,0),  
226 - (0,2),  
227 - (3,1),  
228 - ],  
229 - ]  
230 - print 'sort, no recursion allowed'  
231 - for index in range(len(testingValues)):  
232 -## print ' %s -- %s'%( index, testingValues[index])  
233 - try:  
234 - print ' ', sort( nodes, testingValues[index] )  
235 - except:  
236 - print 'exception raised'  
237 - print 'toposort, no recursion allowed'  
238 - for index in range(len(testingValues)):  
239 -## print ' %s -- %s'%( index, testingValues[index])  
240 - try:  
241 - print ' ', toposort( nodes, testingValues[index] )  
242 - except:  
243 - print 'exception raised'  
244 - print 'sort, recursion allowed'  
245 - for index in range(len(testingValues)):  
246 -## print ' %s -- %s'%( index, testingValues[index])  
247 - try:  
248 - print ' ', sort( nodes, testingValues[index],0 )  
249 - except:  
250 - print 'exception raised'  
251 - print 'toposort, recursion allowed'  
252 - for index in range(len(testingValues)):  
253 -## print ' %s -- %s'%( index, testingValues[index])  
254 - try:  
255 - print ' ', toposort( nodes, testingValues[index],0 )  
256 - except:  
257 - print 'exception raised'  
258 -  
259 -  
260 -  
pacotes/openlayers/tools/update_dev_dir.sh
@@ -1,45 +0,0 @@ @@ -1,45 +0,0 @@
1 -#!/bin/sh  
2 -  
3 -# Used to update http://openlayers.org/dev/  
4 -  
5 -svn up /www/openlayers/docs/dev;  
6 -  
7 -# Get current 'Last Changed Rev'  
8 -REV=`svn info /www/openlayers/docs/dev/ | grep 'Last Changed Rev' | awk '{print $4}'`  
9 -  
10 -# Get the last svn rev  
11 -touch /tmp/ol_svn_rev  
12 -OLD_REV="o`cat /tmp/ol_svn_rev`"  
13 -  
14 -# If they're not equal, do some work.  
15 -if [ ! o$REV = $OLD_REV ]; then  
16 -  
17 - cd /www/openlayers/docs/dev/tools/  
18 - python exampleparser.py  
19 - cd /www/openlayers/docs/dev/build  
20 - ./build.py  
21 -  
22 - cp OpenLayers.js ..  
23 - cd ..  
24 -  
25 - sed -i -e 's!../lib/OpenLayers.js!../OpenLayers.js!' examples/*.html  
26 - perl /home/crschmidt/NaturalDocs -i /www/openlayers/docs/dev/lib -o HTML /www/openlayers/dev/apidocs -p /www/openlayers/docs/dev/apidoc_config -s Default OL >/dev/null  
27 - perl /home/crschmidt/NaturalDocs -i /www/openlayers/docs/dev/lib -o HTML /www/openlayers/dev/docs -p /www/openlayers/docs/dev/doc_config -s Default OL >/dev/null  
28 -  
29 - # Record the revision  
30 - echo -n $REV > /tmp/ol_svn_rev  
31 -fi  
32 -  
33 -svn up /www/openlayers/documentation-checkout  
34 -REV=`svn info /www/openlayers/documentation-checkout | grep 'Last Changed Rev' | awk '{print $4}'`  
35 -# Get the last svn rev  
36 -touch /tmp/ol_doc_rev  
37 -OLD_REV="o`cat /tmp/ol_doc_rev`"  
38 -# If they're not equal, do some work.  
39 -if [ ! o$REV = $OLD_REV ]; then  
40 - cd /www/openlayers/documentation-checkout  
41 - make html > /dev/null  
42 - cp -r _build/html/* /www/openlayers/documentation  
43 -  
44 - echo -n $REV > /tmp/ol_doc_rev  
45 -fi