diff --git a/plugins/Debian/BeautifulSoup.py b/plugins/Debian/BeautifulSoup.py deleted file mode 100644 index 2541dcc63..000000000 --- a/plugins/Debian/BeautifulSoup.py +++ /dev/null @@ -1,1080 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -v2.1.1 -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance -into a tree representation. It provides methods and Pythonic idioms -that make it easy to search and modify the tree. - -A well-formed XML/HTML document will yield a well-formed data -structure. An ill-formed XML/HTML document will yield a -correspondingly ill-formed data structure. If your document is only -locally well-formed, you can use this library to find and process the -well-formed part of it. The BeautifulSoup class has heuristics for -obtaining a sensible parse tree in the face of common HTML errors. - -Beautiful Soup has no external dependencies. It works with Python 2.2 -and up. - -Beautiful Soup defines classes for four different parsing strategies: - - * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific - language that kind of looks like XML. - - * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid - or invalid. - - * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML - that trips up BeautifulSoup. - - * BeautifulSOAP, for making it easier to parse XML documents that use - lots of subelements containing a single string, where you'd prefer - they put that string into an attribute (such as SOAP messages). - -You can subclass BeautifulStoneSoup or BeautifulSoup to create a -parsing strategy specific to an XML schema or a particular bizarre -HTML document. Typically your subclass would just override -SELF_CLOSING_TAGS and/or NESTABLE_TAGS. -""" -from __future__ import generators - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "2.1.1" -__date__ = "$Date: 2004/10/18 00:14:20 $" -__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson" -__license__ = "PSF" - -from sgmllib import SGMLParser, SGMLParseError -import types -import re -import sgmllib - -#This code makes Beautiful Soup able to parse XML with namespaces -sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') - -class NullType(object): - - """Similar to NoneType with a corresponding singleton instance - 'Null' that, unlike None, accepts any message and returns itself. - - Examples: - >>> Null("send", "a", "message")("and one more", - ... "and what you get still") is Null - True - """ - - def __new__(cls): return Null - def __call__(self, *args, **kwargs): return Null -## def __getstate__(self, *args): return Null - def __getattr__(self, attr): return Null - def __getitem__(self, item): return Null - def __setattr__(self, attr, value): pass - def __setitem__(self, item, value): pass - def __len__(self): return 0 - # FIXME: is this a python bug? otherwise ``for x in Null: pass`` - # never terminates... - def __iter__(self): return iter([]) - def __contains__(self, item): return False - def __repr__(self): return "Null" -Null = object.__new__(NullType) - -class PageElement: - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" - - def setup(self, parent=Null, previous=Null): - """Sets up the initial relations between this element and - other elements.""" - self.parent = parent - self.previous = previous - self.next = Null - self.previousSibling = Null - self.nextSibling = Null - if self.parent and self.parent.contents: - self.previousSibling = self.parent.contents[-1] - self.previousSibling.nextSibling = self - - def findNext(self, name=None, attrs={}, text=None): - """Returns the first item that matches the given criteria and - appears after this Tag in the document.""" - return self._first(self.fetchNext, name, attrs, text) - firstNext = findNext - - def fetchNext(self, name=None, attrs={}, text=None, limit=None): - """Returns all items that match the given criteria and appear - before after Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.nextGenerator) - - def findNextSibling(self, name=None, attrs={}, text=None): - """Returns the closest sibling to this Tag that matches the - given criteria and appears after this Tag in the document.""" - return self._first(self.fetchNextSiblings, name, attrs, text) - firstNextSibling = findNextSibling - - def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None): - """Returns the siblings of this Tag that match the given - criteria and appear after this Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator) - - def findPrevious(self, name=None, attrs={}, text=None): - """Returns the first item that matches the given criteria and - appears before this Tag in the document.""" - return self._first(self.fetchPrevious, name, attrs, text) - - def fetchPrevious(self, name=None, attrs={}, text=None, limit=None): - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._fetch(name, attrs, text, limit, self.previousGenerator) - firstPrevious = findPrevious - - def findPreviousSibling(self, name=None, attrs={}, text=None): - """Returns the closest sibling to this Tag that matches the - given criteria and appears before this Tag in the document.""" - return self._first(self.fetchPreviousSiblings, name, attrs, text) - firstPreviousSibling = findPreviousSibling - - def fetchPreviousSiblings(self, name=None, attrs={}, text=None, - limit=None): - """Returns the siblings of this Tag that match the given - criteria and appear before this Tag in the document.""" - return self._fetch(name, attrs, text, limit, - self.previousSiblingGenerator) - - def findParent(self, name=None, attrs={}): - """Returns the closest parent of this Tag that matches the given - criteria.""" - r = Null - l = self.fetchParents(name, attrs, 1) - if l: - r = l[0] - return r - firstParent = findParent - - def fetchParents(self, name=None, attrs={}, limit=None): - """Returns the parents of this Tag that match the given - criteria.""" - return self._fetch(name, attrs, None, limit, self.parentGenerator) - - #These methods do the real heavy lifting. - - def _first(self, method, name, attrs, text): - r = Null - l = method(name, attrs, text, 1) - if l: - r = l[0] - return r - - def _fetch(self, name, attrs, text, limit, generator): - "Iterates over a generator looking for things that match." - if not hasattr(attrs, 'items'): - attrs = {'class' : attrs} - - results = [] - g = generator() - while True: - try: - i = g.next() - except StopIteration: - break - found = None - if isinstance(i, Tag): - if not text: - if not name or self._matches(i, name): - match = True - for attr, matchAgainst in attrs.items(): - check = i.get(attr) - if not self._matches(check, matchAgainst): - match = False - break - if match: - found = i - elif text: - if self._matches(i, text): - found = i - if found: - results.append(found) - if limit and len(results) >= limit: - break - return results - - #Generators that can be used to navigate starting from both - #NavigableTexts and Tags. - def nextGenerator(self): - i = self - while i: - i = i.next - yield i - - def nextSiblingGenerator(self): - i = self - while i: - i = i.nextSibling - yield i - - def previousGenerator(self): - i = self - while i: - i = i.previous - yield i - - def previousSiblingGenerator(self): - i = self - while i: - i = i.previousSibling - yield i - - def parentGenerator(self): - i = self - while i: - i = i.parent - yield i - - def _matches(self, chunk, howToMatch): - #print 'looking for %s in %s' % (howToMatch, chunk) - # - # If given a list of items, return true if the list contains a - # text element that matches. - if isList(chunk) and not isinstance(chunk, Tag): - for tag in chunk: - if isinstance(tag, NavigableText) and self._matches(tag, howToMatch): - return True - return False - if callable(howToMatch): - return howToMatch(chunk) - if isinstance(chunk, Tag): - #Custom match methods take the tag as an argument, but all other - #ways of matching match the tag name as a string - chunk = chunk.name - #Now we know that chunk is a string - if not isinstance(chunk, basestring): - chunk = str(chunk) - if hasattr(howToMatch, 'match'): - # It's a regexp object. - return howToMatch.search(chunk) - if isList(howToMatch): - return chunk in howToMatch - if hasattr(howToMatch, 'items'): - return howToMatch.has_key(chunk) - #It's just a string - return str(howToMatch) == chunk - -class NavigableText(PageElement): - - def __getattr__(self, attr): - "For backwards compatibility, text.string gives you text" - if attr == 'string': - return self - else: - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - -class NavigableString(str, NavigableText): - pass - -class NavigableUnicodeString(unicode, NavigableText): - pass - -class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" - - def __init__(self, name, attrs=None, parent=Null, previous=Null): - "Basic constructor." - self.name = name - if attrs == None: - attrs = [] - self.attrs = attrs - self.contents = [] - self.setup(parent, previous) - self.hidden = False - - def get(self, key, default=None): - """Returns the value of the 'key' attribute for the tag, or - the value given for 'default' if it doesn't have that - attribute.""" - return self._getAttrMap().get(key, default) - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, - and throws an exception if it's not there.""" - return self._getAttrMap()[key] - - def __iter__(self): - "Iterating over a tag iterates over its contents." - return iter(self.contents) - - def __len__(self): - "The length of a tag is the length of its list of contents." - return len(self.contents) - - def __contains__(self, x): - return x in self.contents - - def __nonzero__(self): - "A tag is non-None even if it has no contents." - return True - - def __setitem__(self, key, value): - """Setting tag[key] sets the value of the 'key' attribute for the - tag.""" - self._getAttrMap() - self.attrMap[key] = value - found = False - for i in range(0, len(self.attrs)): - if self.attrs[i][0] == key: - self.attrs[i] = (key, value) - found = True - if not found: - self.attrs.append((key, value)) - self._getAttrMap()[key] = value - - def __delitem__(self, key): - "Deleting tag[key] deletes all 'key' attributes for the tag." - for item in self.attrs: - if item[0] == key: - self.attrs.remove(item) - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() - if self.attrMap.has_key(key): - del self.attrMap[key] - - def __call__(self, *args, **kwargs): - """Calling a tag like a function is the same as calling its - fetch() method. Eg. tag('a') returns a list of all the A tags - found within this tag.""" - return apply(self.fetch, args, kwargs) - - def __getattr__(self, tag): - if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: - return self.first(tag[:-3]) - elif tag.find('__') != 0: - return self.first(tag) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, - and the same contents (recursively) as the given tag. - - NOTE: right now this will return false if two tags have the - same attributes in a different order. Should this be fixed?""" - if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: - return False - return True - - def __ne__(self, other): - """Returns true iff this tag is not identical to the other tag, - as defined in __eq__.""" - return not self == other - - def __repr__(self): - """Renders this tag as a string.""" - return str(self) - - def __unicode__(self): - return self.__str__(1) - - def __str__(self, needUnicode=None, showStructureIndent=None): - """Returns a string or Unicode representation of this tag and - its contents. - - NOTE: since Python's HTML parser consumes whitespace, this - method is not certain to reproduce the whitespace present in - the original string.""" - - attrs = [] - if self.attrs: - for key, val in self.attrs: - attrs.append('%s="%s"' % (key, val)) - close = '' - closeTag = '' - if self.isSelfClosing(): - close = ' /' - else: - closeTag = '' % self.name - indentIncrement = None - if showStructureIndent != None: - indentIncrement = showStructureIndent - if not self.hidden: - indentIncrement += 1 - contents = self.renderContents(indentIncrement, needUnicode=needUnicode) - if showStructureIndent: - space = '\n%s' % (' ' * showStructureIndent) - if self.hidden: - s = contents - else: - s = [] - attributeString = '' - if attrs: - attributeString = ' ' + ' '.join(attrs) - if showStructureIndent: - s.append(space) - s.append('<%s%s%s>' % (self.name, attributeString, close)) - s.append(contents) - if closeTag and showStructureIndent != None: - s.append(space) - s.append(closeTag) - s = ''.join(s) - isUnicode = type(s) == types.UnicodeType - if needUnicode and not isUnicode: - s = unicode(s) - elif isUnicode and needUnicode==False: - s = str(s) - return s - - def prettify(self, needUnicode=None): - return self.__str__(needUnicode, showStructureIndent=True) - - def renderContents(self, showStructureIndent=None, needUnicode=None): - """Renders the contents of this tag as a (possibly Unicode) - string.""" - s=[] - for c in self: - text = None - if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType: - text = unicode(c) - elif isinstance(c, Tag): - s.append(c.__str__(needUnicode, showStructureIndent)) - elif needUnicode: - text = unicode(c) - else: - text = str(c) - if text: - if showStructureIndent != None: - if text[-1] == '\n': - text = text[:-1] - s.append(text) - return ''.join(s) - - #Soup methods - - def firstText(self, text, recursive=True): - """Convenience method to retrieve the first piece of text matching the - given criteria. 'text' can be a string, a regular expression object, - a callable that takes a string and returns whether or not the - string 'matches', etc.""" - return self.first(recursive=recursive, text=text) - - def fetchText(self, text, recursive=True, limit=None): - """Convenience method to retrieve all pieces of text matching the - given criteria. 'text' can be a string, a regular expression object, - a callable that takes a string and returns whether or not the - string 'matches', etc.""" - return self.fetch(recursive=recursive, text=text, limit=limit) - - def first(self, name=None, attrs={}, recursive=True, text=None): - """Return only the first child of this - Tag matching the given criteria.""" - r = Null - l = self.fetch(name, attrs, recursive, text, 1) - if l: - r = l[0] - return r - findChild = first - - def fetch(self, name=None, attrs={}, recursive=True, text=None, - limit=None): - """Extracts a list of Tag objects that match the given - criteria. You can specify the name of the Tag and any - attributes you want the Tag to have. - - The value of a key-value pair in the 'attrs' map can be a - string, a list of strings, a regular expression object, or a - callable that takes a string and returns whether or not the - string matches for some custom definition of 'matches'. The - same is true of the tag name.""" - generator = self.recursiveChildGenerator - if not recursive: - generator = self.childGenerator - return self._fetch(name, attrs, text, limit, generator) - fetchChildren = fetch - - #Utility methods - - def isSelfClosing(self): - """Returns true iff this is a self-closing tag as defined in the HTML - standard. - - TODO: This is specific to BeautifulSoup and its subclasses, but it's - used by __str__""" - return self.name in BeautifulSoup.SELF_CLOSING_TAGS - - def append(self, tag): - """Appends the given tag to the contents of this tag.""" - self.contents.append(tag) - - #Private methods - - def _getAttrMap(self): - """Initializes a map representation of this tag's attributes, - if not already initialized.""" - if not getattr(self, 'attrMap'): - self.attrMap = {} - for (key, value) in self.attrs: - self.attrMap[key] = value - return self.attrMap - - #Generator methods - def childGenerator(self): - for i in range(0, len(self.contents)): - yield self.contents[i] - raise StopIteration - - def recursiveChildGenerator(self): - stack = [(self, 0)] - while stack: - tag, start = stack.pop() - if isinstance(tag, Tag): - for i in range(start, len(tag.contents)): - a = tag.contents[i] - yield a - if isinstance(a, Tag) and tag.contents: - if i < len(tag.contents) - 1: - stack.append((tag, i+1)) - stack.append((a, 0)) - break - raise StopIteration - - -def isList(l): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return hasattr(l, '__iter__') \ - or (type(l) in (types.ListType, types.TupleType)) - -def buildTagMap(default, *args): - """Turns a list of maps, lists, or scalars into a single map. - Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out - of lists and partial maps.""" - built = {} - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. - for k,v in portion.items(): - built[k] = v - elif isList(portion): - #It's a list. Map each item to the default. - for k in portion: - built[k] = default - else: - #It's a scalar. Map it to the default. - built[portion] = default - return built - -class BeautifulStoneSoup(Tag, SGMLParser): - - """This class contains the basic parser and fetch code. It defines - a parser that knows nothing about tag behavior except for the - following: - - You can't close a tag without closing all the tags it encloses. - That is, "" actually means - "". - - [Another possible explanation is "", but since - this class defines no SELF_CLOSING_TAGS, it will never use that - explanation.] - - This class is useful for parsing XML or made-up markup languages, - or when BeautifulSoup makes an assumption counter to what you were - expecting.""" - - SELF_CLOSING_TAGS = {} - NESTABLE_TAGS = {} - RESET_NESTING_TAGS = {} - QUOTE_TAGS = {} - - #As a public service we will by default silently replace MS smart quotes - #and similar characters with their HTML or ASCII equivalents. - MS_CHARS = { '\x80' : '€', - '\x81' : ' ', - '\x82' : '‚', - '\x83' : 'ƒ', - '\x84' : '„', - '\x85' : '…', - '\x86' : '†', - '\x87' : '‡', - '\x88' : '⁁', - '\x89' : '%', - '\x8A' : 'Š', - '\x8B' : '<', - '\x8C' : 'Œ', - '\x8D' : '?', - '\x8E' : 'Z', - '\x8F' : '?', - '\x90' : '?', - '\x91' : '‘', - '\x92' : '’', - '\x93' : '“', - '\x94' : '”', - '\x95' : '•', - '\x96' : '–', - '\x97' : '—', - '\x98' : '˜', - '\x99' : '™', - '\x9a' : 'š', - '\x9b' : '>', - '\x9c' : 'œ', - '\x9d' : '?', - '\x9e' : 'z', - '\x9f' : 'Ÿ',} - - PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'), - lambda(x):x.group(1) + ' />'), - (re.compile(']*)>'), - lambda(x):''), - (re.compile("([\x80-\x9f])"), - lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1))) - ] - - ROOT_TAG_NAME = '[document]' - - def __init__(self, text=None, avoidParserProblems=True, - initialTextIsEverything=True): - """Initialize this as the 'root tag' and feed in any text to - the parser. - - NOTE about avoidParserProblems: sgmllib will process most bad - HTML, and BeautifulSoup has tricks for dealing with some HTML - that kills sgmllib, but Beautiful Soup can nonetheless choke - or lose data if your data uses self-closing tags or - declarations incorrectly. By default, Beautiful Soup sanitizes - its input to avoid the vast majority of these problems. The - problems are relatively rare, even in bad HTML, so feel free - to pass in False to avoidParserProblems if they don't apply to - you, and you'll get better performance. The only reason I have - this turned on by default is so I don't get so many tech - support questions. - - The two most common instances of invalid HTML that will choke - sgmllib are fixed by the default parser massage techniques: - -
(No space between name of closing tag and tag close) - (Extraneous whitespace in declaration) - - You can pass in a custom list of (RE object, replace method) - tuples to get Beautiful Soup to scrub your input the way you - want.""" - Tag.__init__(self, self.ROOT_TAG_NAME) - if avoidParserProblems \ - and not isList(avoidParserProblems): - avoidParserProblems = self.PARSER_MASSAGE - self.avoidParserProblems = avoidParserProblems - SGMLParser.__init__(self) - self.quoteStack = [] - self.hidden = 1 - self.reset() - if hasattr(text, 'read'): - #It's a file-type object. - text = text.read() - if text: - self.feed(text) - if initialTextIsEverything: - self.done() - - def __getattr__(self, methodName): - """This method routes method call requests to either the SGMLParser - superclass or the Tag superclass, depending on the method name.""" - if methodName.find('start_') == 0 or methodName.find('end_') == 0 \ - or methodName.find('do_') == 0: - return SGMLParser.__getattr__(self, methodName) - elif methodName.find('__') != 0: - return Tag.__getattr__(self, methodName) - else: - raise AttributeError - - def feed(self, text): - if self.avoidParserProblems: - for fix, m in self.avoidParserProblems: - text = fix.sub(m, text) - SGMLParser.feed(self, text) - - def done(self): - """Called when you're done parsing, so that the unclosed tags can be - correctly processed.""" - self.endData() #NEW - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def reset(self): - SGMLParser.reset(self) - self.currentData = [] - self.currentTag = None - self.tagStack = [] - self.pushTag(self) - - def popTag(self): - tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableText): - self.currentTag.string = self.currentTag.contents[0] - - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - - def endData(self): - currentData = ''.join(self.currentData) - if currentData: - if not currentData.strip(): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - c = NavigableString - if type(currentData) == types.UnicodeType: - c = NavigableUnicodeString - o = c(currentData) - o.setup(self.currentTag, self.previous) - if self.previous: - self.previous.next = o - self.previous = o - self.currentTag.contents.append(o) - self.currentData = [] - - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - if name == self.ROOT_TAG_NAME: - return - - numPops = 0 - mostRecentTag = None - for i in range(len(self.tagStack)-1, 0, -1): - if name == self.tagStack[i].name: - numPops = len(self.tagStack)-i - break - if not inclusivePop: - numPops = numPops - 1 - - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag - - def _smartPop(self, name): - - """We need to pop up to the previous tag of this type, unless - one of this tag's nesting reset triggers comes between this - tag and the previous tag of this type, OR unless this tag is a - generic nesting trigger and another generic nesting trigger - comes between this tag and the previous tag of this type. - - Examples: -

FooBar

should pop to 'p', not 'b'. -

FooBar

should pop to 'table', not 'p'. -

Foo

Bar

should pop to 'tr', not 'p'. -

FooBar

should pop to 'p', not 'b'. - -

    • *
    • * should pop to 'ul', not the first 'li'. -
  • ** should pop to 'table', not the first 'tr' - tag should - implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) - isNestable = nestingResetTriggers != None - isResetNesting = self.RESET_NESTING_TAGS.has_key(name) - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): - p = self.tagStack[i] - if (not p or p.name == name) and not isNestable: - #Non-nestable tags get popped to the top or to their - #last occurance. - popTo = name - break - if (nestingResetTriggers != None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting - and self.RESET_NESTING_TAGS.has_key(p.name)): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag - #that causes nesting to reset, pop up to but not - #including that tag. - - popTo = p.name - inclusive = False - break - p = p.parent - if popTo: - self._popToTag(popTo, inclusive) - - def unknown_starttag(self, name, attrs, selfClosing=0): - #print "Start tag %s" % name - if self.quoteStack: - #This is not a real tag. - #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) - self.handle_data('<%s%s>' % (name, attrs)) - return - self.endData() - if not name in self.SELF_CLOSING_TAGS and not selfClosing: - self._smartPop(name) - tag = Tag(name, attrs, self.currentTag, self.previous) - if self.previous: - self.previous.next = tag - self.previous = tag - self.pushTag(tag) - if selfClosing or name in self.SELF_CLOSING_TAGS: - self.popTag() - if name in self.QUOTE_TAGS: - #print "Beginning quote (%s)" % name - self.quoteStack.append(name) - self.literal = 1 - - def unknown_endtag(self, name): - if self.quoteStack and self.quoteStack[-1] != name: - #This is not a real end tag. - #print " is not real!" % name - self.handle_data('' % name) - return - self.endData() - self._popToTag(name) - if self.quoteStack and self.quoteStack[-1] == name: - self.quoteStack.pop() - self.literal = (len(self.quoteStack) > 0) - - def handle_data(self, data): - self.currentData.append(data) - - def handle_pi(self, text): - "Propagate processing instructions right through." - self.handle_data("" % text) - - def handle_comment(self, text): - "Propagate comments right through." - self.handle_data("" % text) - - def handle_charref(self, ref): - "Propagate char refs right through." - self.handle_data('&#%s;' % ref) - - def handle_entityref(self, ref): - "Propagate entity refs right through." - self.handle_data('&%s;' % ref) - - def handle_decl(self, data): - "Propagate DOCTYPEs and the like right through." - self.handle_data('' % data) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as regular data.""" - j = None - if self.rawdata[i:i+9] == '', i) - if k == -1: - k = len(self.rawdata) - self.handle_data(self.rawdata[i+9:k]) - j = k+3 - else: - try: - j = SGMLParser.parse_declaration(self, i) - except SGMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - -class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: - - * Some tags have no closing tag and should be interpreted as being - closed as soon as they are encountered. - - * The text inside some tags (ie. 'script') may contain tags which - are not really part of the document and which should be parsed - as text, not tags. If you want to parse the text as tags, you can - always fetch it and parse it explicitly. - - * Tag nesting rules: - - Most tags can't be nested at all. For instance, the occurance of - a

    tag should implicitly close the previous

    tag. - -

    Para1

    Para2 - should be transformed into: -

    Para1

    Para2 - - Some tags can be nested arbitrarily. For instance, the occurance - of a

    tag should _not_ implicitly close the previous -
    tag. - - Alice said:
    Bob said:
    Blah - should NOT be transformed into: - Alice said:
    Bob said:
    Blah - - Some tags can be nested, but the nesting is reset by the - interposition of other tags. For instance, a
    , - but not close a tag in another table. - -
    BlahBlah - should be transformed into: -
    BlahBlah - but, - Blah
    Blah - should NOT be transformed into - Blah
    Blah - - Differing assumptions about tag nesting rules are a major source - of problems with the BeautifulSoup class. If BeautifulSoup is not - treating as nestable a tag your page author treats as nestable, - try ICantBelieveItsBeautifulSoup before writing your own - subclass.""" - - SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) - - QUOTE_TAGS = {'script': None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] - - #According to the HTML standard, these block tags can contain - #another tag of the same type. Furthermore, it's common - #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] - - #Lists can contain other lists, but there are restrictions. - NESTABLE_LIST_TAGS = { 'ol' : [], - 'ul' : [], - 'li' : ['ul', 'ol'], - 'dl' : [], - 'dd' : ['dl'], - 'dt' : ['dl'] } - - #Tables can contain other tables, but there are restrictions. - NESTABLE_TABLE_TAGS = {'table' : [], - 'tr' : ['table', 'tbody', 'tfoot', 'thead'], - 'td' : ['tr'], - 'th' : ['tr'], - } - - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] - - #If one of these tags is encountered, all tags up to the next tag of - #this type are popped. - RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', - NON_NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, - NESTABLE_TABLE_TAGS) - - NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, - NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) - -class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over - common HTML errors like unclosed tags. However, sometimes it makes - errors of its own. For instance, consider this fragment: - - FooBar - - This is perfectly valid (if bizarre) HTML. However, the - BeautifulSoup class will implicitly close the first b tag when it - encounters the second 'b'. It will think the author wrote - "FooBar", and didn't close the first 'b' tag, because - there's no real-world reason to bold something that's already - bold. When it encounters '' it will close two more 'b' - tags, for a grand total of three tags closed instead of two. This - can throw off the rest of your document structure. The same is - true of a number of other tags, listed below. - - It's much more common for someone to forget to close (eg.) a 'b' - tag than to actually use nested 'b' tags, and the BeautifulSoup - class handles the common case. This class handles the - not-co-common case: where you can't believe someone wrote what - they did, but it's valid HTML and BeautifulSoup screwed up by - assuming it wouldn't be. - - If this doesn't do what you need, try subclassing this class or - BeautifulSoup, and providing your own list of NESTABLE_TAGS.""" - - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', - 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] - - NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag - name, and the value is the string child. An example should give - the flavor of the change: - - baz - => - baz - - You can then access fooTag['bar'] instead of fooTag.barTag.string. - - This is, of course, useful for scraping structures that tend to - use subelements instead of attributes, such as SOAP messages. Note - that it modifies its input, so don't print the modified version - out. - - I'm not sure how many people really want to use this class; let me - know if you do. Mainly I like the name.""" - - def popTag(self): - if len(self.tagStack) > 1: - tag = self.tagStack[-1] - parent = self.tagStack[-2] - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableText) and - not parent.attrMap.has_key(tag.name)): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -#Enterprise class names! It has come to our attention that some people -#think the names of the Beautiful Soup parser classes are too silly -#and "unprofessional" for use in enterprise screen-scraping. We feel -#your pain! For such-minded folk, the Beautiful Soup Consortium And -#All-Night Kosher Bakery recommends renaming this file to -#"RobustParser.py" (or, in cases of extreme enterprisitude, -#"RobustParserBeanInterface.class") and using the following -#enterprise-friendly class aliases: -class RobustXMLParser(BeautifulStoneSoup): - pass -class RobustHTMLParser(BeautifulSoup): - pass -class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): - pass -class SimplifyingSOAPParser(BeautifulSOAP): - pass - -### - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulStoneSoup(sys.stdin.read()) - print soup.prettify() diff --git a/plugins/Debian/__init__.py b/plugins/Debian/__init__.py deleted file mode 100644 index 535f2ff85..000000000 --- a/plugins/Debian/__init__.py +++ /dev/null @@ -1,62 +0,0 @@ -### -# Copyright (c) 2003-2005, James Vega -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions, and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions, and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the author of this software nor the name of -# contributors to this software may be used to endorse or promote products -# derived from this software without specific prior written consent. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -### - -""" -This is a module to contain Debian-specific commands. -""" - -import supybot -import supybot.world as world - -# Use this for the version of this plugin. You may wish to put a CVS keyword -# in here if you're keeping the plugin in CVS or some similar system. -__version__ = "0.1" - -__author__ = supybot.authors.jamessan - -# This is a dictionary mapping supybot.Author instances to lists of -# contributions. -__contributors__ = {} - -import config -import plugin -reload(plugin) # In case we're being reloaded. -# Add more reloads here if you add third-party modules and want them to be -# reloaded when this plugin is reloaded. Don't forget to import them as well! -import BeautifulSoup -reload(BeautifulSoup) - -if world.testing: - import test - -Class = plugin.Class -configure = config.configure - - -# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/plugins/Debian/config.py b/plugins/Debian/config.py deleted file mode 100644 index efd8a8c51..000000000 --- a/plugins/Debian/config.py +++ /dev/null @@ -1,75 +0,0 @@ -### -# Copyright (c) 2003-2005, James Vega -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions, and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions, and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the author of this software nor the name of -# contributors to this software may be used to endorse or promote products -# derived from this software without specific prior written consent. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -### - -import supybot.conf as conf -import supybot.registry as registry - -def configure(advanced): - # This will be called by supybot to configure this module. advanced is - # a bool that specifies whether the user identified himself as an advanced - # user or not. You should effect your configuration by manipulating the - # registry as appropriate. - from supybot.questions import output, expect, anything, something, yn - conf.registerPlugin('Debian', True) - if not utils.findBinaryInPath('zgrep'): - if not advanced: - output("""I can't find zgrep in your path. This is necessary - to run the file command. I'll disable this command - now. When you get zgrep in your path, use the command - 'enable Debian.file' to re-enable the command.""") - capabilities = conf.supybot.capabilities() - capabilities.add('-Debian.file') - conf.supybot.capabilities.set(capabilities) - else: - output("""I can't find zgrep in your path. If you want to run - the file command with any sort of expediency, you'll - need it. You can use a python equivalent, but it's - about two orders of magnitude slower. THIS MEANS IT - WILL TAKE AGES TO RUN THIS COMMAND. Don't do this.""") - if yn('Do you want to use a Python equivalent of zgrep?'): - conf.supybot.plugins.Debian.pythonZgrep.setValue(True) - else: - output('I\'ll disable file now.') - capabilities = conf.supybot.capabilities() - capabilities.add('-Debian.file') - conf.supybot.capabilities.set(capabilities) - - -Debian = conf.registerPlugin('Debian') -conf.registerGlobalValue(Debian, 'pythonZgrep', - registry.Boolean(False, """An advanced option, mostly just for testing; - uses a Python-coded zgrep rather than the actual zgrep executable, - generally resulting in a 50x slowdown. What would take 2 seconds will - take 100 with this enabled. Don't enable this.""")) -conf.registerChannelValue(Debian, 'bold', - registry.Boolean(True, """Determines whether the plugin will use bold in - the responses to some of its commands.""")) - - -# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/plugins/Debian/plugin.py b/plugins/Debian/plugin.py deleted file mode 100644 index 2bc089ab3..000000000 --- a/plugins/Debian/plugin.py +++ /dev/null @@ -1,492 +0,0 @@ -### -# Copyright (c) 2003-2005, James Vega -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions, and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions, and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the author of this software nor the name of -# contributors to this software may be used to endorse or promote products -# derived from this software without specific prior written consent. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -### - -import os -import re -import gzip -import time -import popen2 -import fnmatch -import threading - -import BeautifulSoup - -import supybot.conf as conf -import supybot.utils as utils -import supybot.world as world -from supybot.commands import * -import supybot.plugins as plugins -import supybot.ircutils as ircutils -import supybot.callbacks as callbacks -from supybot.utils.iter import all, imap, ifilter - -class PeriodicFileDownloader(object): - """A class to periodically download a file/files. - - A class-level dictionary 'periodicFiles' maps names of files to - three-tuples of - (url, seconds between downloads, function to run with downloaded file). - - 'url' should be in some form that urllib2.urlopen can handle (do note that - urllib2.urlopen handles file:// links perfectly well.) - - 'seconds between downloads' is the number of seconds between downloads, - obviously. An important point to remember, however, is that it is only - engaged when a command is run. I.e., if you say you want the file - downloaded every day, but no commands that use it are run in a week, the - next time such a command is run, it'll be using a week-old file. If you - don't want such behavior, you'll have to give an error mess age to the user - and tell him to call you back in the morning. - - 'function to run with downloaded file' is a function that will be passed - a string *filename* of the downloaded file. This will be some random - filename probably generated via some mktemp-type-thing. You can do what - you want with this; you may want to build a database, take some stats, - or simply rename the file. You can pass None as your function and the - file with automatically be renamed to match the filename you have it listed - under. It'll be in conf.supybot.directories.data, of course. - - Aside from that dictionary, simply use self.getFile(filename) in any method - that makes use of a periodically downloaded file, and you'll be set. - """ - periodicFiles = None - def __init__(self, *args, **kwargs): - if self.periodicFiles is None: - raise ValueError, 'You must provide files to download' - self.lastDownloaded = {} - self.downloadedCounter = {} - for filename in self.periodicFiles: - if self.periodicFiles[filename][-1] is None: - fullname = os.path.join(conf.supybot.directories.data(), - filename) - if os.path.exists(fullname): - self.lastDownloaded[filename] = os.stat(fullname).st_ctime - else: - self.lastDownloaded[filename] = 0 - else: - self.lastDownloaded[filename] = 0 - self.currentlyDownloading = set() - self.downloadedCounter[filename] = 0 - self.getFile(filename) - super(PeriodicFileDownloader, self).__init__(*args, **kwargs) - - def _downloadFile(self, filename, url, f): - self.currentlyDownloading.add(filename) - try: - try: - infd = utils.web.getUrlFd(url) - except IOError, e: - self.log.warning('Error downloading %s: %s', url, e) - return - except utils.web.Error, e: - self.log.warning('Error downloading %s: %s', url, e) - return - confDir = conf.supybot.directories.data() - newFilename = os.path.join(confDir, utils.file.mktemp()) - outfd = file(newFilename, 'wb') - start = time.time() - s = infd.read(4096) - while s: - outfd.write(s) - s = infd.read(4096) - infd.close() - outfd.close() - self.log.info('Downloaded %s in %s seconds', - filename, time.time()-start) - self.downloadedCounter[filename] += 1 - self.lastDownloaded[filename] = time.time() - if f is None: - toFilename = os.path.join(confDir, filename) - if os.name == 'nt': - # Windows, grrr... - if os.path.exists(toFilename): - os.remove(toFilename) - os.rename(newFilename, toFilename) - else: - start = time.time() - f(newFilename) - total = time.time() - start - self.log.info('Function ran on %s in %s seconds', - filename, total) - finally: - self.currentlyDownloading.remove(filename) - - def getFile(self, filename): - if world.documenting: - return - (url, timeLimit, f) = self.periodicFiles[filename] - if time.time() - self.lastDownloaded[filename] > timeLimit and \ - filename not in self.currentlyDownloading: - self.log.info('Beginning download of %s', url) - args = (filename, url, f) - name = '%s #%s' % (filename, self.downloadedCounter[filename]) - t = threading.Thread(target=self._downloadFile, name=name, - args=(filename, url, f)) - t.setDaemon(True) - t.start() - world.threadsSpawned += 1 - - -class Debian(callbacks.Plugin, PeriodicFileDownloader): - threaded = True - periodicFiles = { - # This file is only updated once a week, so there's no sense in - # downloading a new one every day. - 'Contents-i386.gz': ('ftp://ftp.us.debian.org/' - 'debian/dists/unstable/Contents-i386.gz', - 604800, None) - } - contents = conf.supybot.directories.data.dirize('Contents-i386.gz') - def file(self, irc, msg, args, optlist, glob): - """[--{regexp,exact} ] [] - - Returns packages in Debian that includes files matching . If - --regexp is given, returns packages that include files matching the - given regexp. If --exact is given, returns packages that include files - matching exactly the string given. - """ - self.getFile('Contents-i386.gz') - # Make sure it's anchored, make sure it doesn't have a leading slash - # (the filenames don't have leading slashes, and people may not know - # that). - if not optlist and not glob: - raise callbacks.ArgumentError - if optlist and glob: - irc.error('You must specify either a glob or a regexp/exact ' - 'search, but not both.', Raise=True) - for (option, arg) in optlist: - if option == 'exact': - regexp = arg.lstrip('/') - elif option == 'regexp': - regexp = arg - if glob: - regexp = fnmatch.translate(glob.lstrip('/')) - regexp = regexp.rstrip('$') - regexp = ".*%s.* " % regexp - try: - re_obj = re.compile(regexp, re.I) - except re.error, e: - irc.error(format('Error in regexp: %s', e), Raise=True) - if self.registryValue('pythonZgrep'): - fd = gzip.open(self.contents) - r = imap(lambda tup: tup[0], - ifilter(lambda tup: tup[0], - imap(lambda line:(re_obj.search(line), line),fd))) - else: - try: - (r, w) = popen2.popen4(['zgrep', '-ie', regexp, self.contents]) - w.close() - except TypeError: - # We're on Windows. - irc.error('This command won\'t work on this platform. ' - 'If you think it should (i.e., you know that you ' - 'have a zgrep binary somewhere) then file a bug ' - 'about it at http://supybot.sf.net/ .', Raise=True) - packages = set() # Make packages unique - try: - for line in r: - if len(packages) > 100: - irc.error('More than 100 packages matched, ' - 'please narrow your search.', Raise=True) - try: - if hasattr(line, 'group'): # we're actually using - line = line.group(0) # pythonZgrep :( - (filename, pkg_list) = line.split() - if filename == 'FILE': - # This is the last line before the actual files. - continue - except ValueError: # Unpack list of wrong size. - continue # We've not gotten to the files yet. - packages.update(pkg_list.split(',')) - finally: - if hasattr(r, 'close'): - r.close() - if len(packages) == 0: - irc.reply('I found no packages with that file.') - else: - irc.reply(format('%L', sorted(packages))) - file = wrap(file, [getopts({'regexp':'regexpMatcher','exact':'something'}), - additional('glob')]) - - _debreflags = re.DOTALL | re.IGNORECASE - _deblistre = re.compile(r'

    Package ([^<]+)

    (.*?)', _debreflags) - def version(self, irc, msg, args, optlist, branch, package): - """[--exact] [{stable,testing,unstable,experimental}] - - Returns the current version(s) of a Debian package in the given branch - (if any, otherwise all available ones are displayed). If --exact is - specified, only packages whose name exactly matches - will be reported. - """ - url = 'http://packages.debian.org/cgi-bin/search_packages.pl?keywords'\ - '=%s&searchon=names&version=%s&release=all&subword=1' - for (option, _) in optlist: - if option == 'exact': - url = url.replace('&subword=1','') - responses = [] - if '*' in package: - irc.error('Wildcard characters can not be specified.', Raise=True) - package = utils.web.urlquote(package) - url %= (package, branch) - try: - html = utils.web.getUrl(url) - except utils.web.Error, e: - irc.error(format('I couldn\'t reach the search page (%s).', e), - Raise=True) - if 'is down at the moment' in html: - irc.error('Packages.debian.org is down at the moment. ' - 'Please try again later.', Raise=True) - pkgs = self._deblistre.findall(html) - if not pkgs: - irc.reply(format('No package found for %s (%s)', - utils.web.urlunquote(package), branch)) - else: - for pkg in pkgs: - pkgMatch = pkg[0] - soup = BeautifulSoup.BeautifulSoup() - soup.feed(pkg[1]) - liBranches = soup.fetch('li') - branches = [] - versions = [] - def branchVers(br): - vers = [b.next.string.strip() for b in br] - return [utils.str.rsplit(v, ':', 1)[0] for v in vers] - for li in liBranches: - branches.append(li.a.string) - versions.append(branchVers(li.fetch('br'))) - if branches and versions: - for pairs in zip(branches, versions): - branch = pairs[0] - ver = ', '.join(pairs[1]) - s = format('%s (%s)', pkgMatch, - ': '.join([branch, ver])) - responses.append(s) - resp = format('%i matches found: %s', - len(responses), '; '.join(responses)) - irc.reply(resp) - version = wrap(version, [getopts({'exact':''}), - optional(('literal', ('stable', 'testing', - 'unstable', 'experimental')), 'all'), - 'text']) - - _incomingRe = re.compile(r'', re.I) - def incoming(self, irc, msg, args, optlist, globs): - """[--{regexp,arch} ] [ ...] - - Checks debian incoming for a matching package name. The arch - parameter defaults to i386; --regexp returns only those package names - that match a given regexp, and normal matches use standard *nix - globbing. - """ - predicates = [] - archPredicate = lambda s: ('_i386.' in s) - for (option, arg) in optlist: - if option == 'regexp': - predicates.append(r.search) - elif option == 'arch': - arg = '_%s.' % arg - archPredicate = lambda s, arg=arg: (arg in s) - predicates.append(archPredicate) - for glob in globs: - glob = fnmatch.translate(glob) - predicates.append(re.compile(glob).search) - packages = [] - try: - fd = utils.web.getUrlFd('http://incoming.debian.org/') - except utils.web.Error, e: - irc.error(str(e), Raise=True) - for line in fd: - m = self._incomingRe.search(line) - if m: - name = m.group(1) - if all(None, imap(lambda p: p(name), predicates)): - realname = utils.str.rsplit(name, '_', 1)[0] - packages.append(realname) - if len(packages) == 0: - irc.error('No packages matched that search.') - else: - irc.reply(format('%L', packages)) - incoming = thread(wrap(incoming, - [getopts({'regexp': 'regexpMatcher', - 'arch': 'something'}), - any('glob')])) - - def bold(self, s): - if self.registryValue('bold', dynamic.channel): - return ircutils.bold(s) - return s - - _update = re.compile(r' : ([^<]+) - - Reports various statistics (from http://packages.qa.debian.org/) about - . - """ - pkg = pkg.lower() - text = utils.web.getUrl('http://packages.qa.debian.org/%s/%s.html' % - (pkg[0], pkg)) - if "Error 404" in text: - irc.errorInvalid('source package name') - updated = None - m = self._update.search(text) - if m: - updated = m.group(1) - soup = BeautifulSoup.BeautifulSoup() - soup.feed(text) - pairs = zip(soup.fetch('td', {'class': 'labelcell'}), - soup.fetch('td', {'class': 'contentcell'})) - for (label, content) in pairs: - if label.string == 'Last version': - version = '%s: %s' % (self.bold(label.string), content.string) - elif label.string == 'Maintainer': - name = content.a.string - email = content.fetch('a')[1]['href'][7:] - maintainer = format('%s: %s %u', self.bold('Maintainer'), - name, utils.web.mungeEmail(email)) - elif label.string == 'All bugs': - bugsAll = format('%i Total', content.first('a').string) - elif label.string == 'Release Critical': - bugsRC = format('%i RC', content.first('a').string) - elif label.string == 'Important and Normal': - bugs = format('%i Important/Normal', - content.first('a').string) - elif label.string == 'Minor and Wishlist': - bugsMinor = format('%i Minor/Wishlist', - content.first('a').string) - elif label.string == 'Fixed and Pending': - bugsFixed = format('%i Fixed/Pending', - content.first('a').string) - elif label.string == 'Subscribers count': - subscribers = format('%s: %i', - self.bold('Subscribers'), content.string) - bugL = (bugsAll, bugsRC, bugs, bugsMinor, bugsFixed) - s = '. '.join((version, maintainer, subscribers, - '%s: %s' % (self.bold('Bugs'), '; '.join(bugL)))) - if updated: - s = 'As of %s, %s' % (updated, s) - irc.reply(s) - stats = wrap(stats, ['somethingWithoutSpaces']) - - _newpkgre = re.compile(r'
  • ]+>([^<]+)') - def new(self, irc, msg, args, section, glob): - """[{main,contrib,non-free}] [] - - Checks for packages that have been added to Debian's unstable branch - in the past week. If no glob is specified, returns a list of all - packages. If no section is specified, defaults to main. - """ - try: - fd = utils.web.getUrlFd( - 'http://packages.debian.org/unstable/newpkg_%s' % section) - except utils.web.Error, e: - irc.error(str(e), Raise=True) - packages = [] - for line in fd: - m = self._newpkgre.search(line) - if m: - m = m.group(1) - if fnmatch.fnmatch(m, glob): - packages.append(m) - fd.close() - if packages: - irc.reply(format('%L', packages)) - else: - irc.error('No packages matched that search.') - new = wrap(new, [optional(('literal', ('main', 'contrib', 'non-free')), - 'main'), - additional('glob', '*')]) - - _severity = re.compile(r'.*(?:severity set to `([^\']+)\'|' - r'severity:\s+([^<]+))', re.I) - _package = re.compile(r'Package: <[^>]+>([^<]+)<', re.I | re.S) - _reporter = re.compile(r'Reported by: <[^>]+>([^<]+)<', re.I | re.S) - _subject = re.compile(r'
    ([^<]+)', re.I | re.S) - _date = re.compile(r'Date: ([^;]+);', re.I | re.S) - _tags = re.compile(r'Tags: ([^<]+)', re.I) - _searches = (_package, _subject, _reporter, _date) - def bug(self, irc, msg, args, bug): - """ - - Returns a description of the bug with bug id . - """ - url = 'http://bugs.debian.org/%s' % bug - try: - text = utils.web.getUrl(url) - except utils.web.Error, e: - irc.error(str(e), Raise=True) - if "There is no record of Bug" in text: - irc.error('I could not find a bug report matching that number.', - Raise=True) - searches = map(lambda p: p.search(text), self._searches) - sev = self._severity.search(text) - tags = self._tags.search(text) - # This section should be cleaned up to ease future modifications - if all(None, searches): - L = map(self.bold, ('Package', 'Subject', 'Reported')) - resp = format('%s: %%s; %s: %%s; %s: by %%s on %%s', *L) - L = map(utils.web.htmlToText, map(lambda p: p.group(1), searches)) - resp = format(resp, *L) - if sev: - sev = filter(None, sev.groups()) - if sev: - sev = utils.web.htmlToText(sev[0]) - resp += format('; %s: %s', self.bold('Severity'), sev) - if tags: - resp += format('; %s: %s', self.bold('Tags'), tags.group(1)) - resp += format('; %u', url) - irc.reply(resp) - else: - irc.reply('I was unable to properly parse the BTS page.') - bug = wrap(bug, [('id', 'bug')]) - - _dpnRe = re.compile(r'"\+2">([^<]+) - - Turns into a 'debian package name' using - http://www.pigdog.com/features/dpn.html. - """ - url = r'http://www.pigdog.org/cgi_bin/dpn.phtml?name=%s' - try: - text = utils.web.getUrl(url % '+'.join(words)) - except utils.web.Error, e: - irc.error(str(e), Raise=True) - m = self._dpnRe.search(text) - if m is not None: - irc.reply(m.group(1)) - else: - irc.errorPossibleBug('Unable to parse webpage.') - debianize = wrap(debianize, [many('something')]) - - -Class = Debian - - -# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: diff --git a/plugins/Debian/test.py b/plugins/Debian/test.py deleted file mode 100644 index 8fc0b5c08..000000000 --- a/plugins/Debian/test.py +++ /dev/null @@ -1,92 +0,0 @@ -### -# Copyright (c) 2003-2005, James Vega -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, -# this list of conditions, and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions, and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the author of this software nor the name of -# contributors to this software may be used to endorse or promote products -# derived from this software without specific prior written consent. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -# POSSIBILITY OF SUCH DAMAGE. -### - -import os -import time - -from supybot.test import * - -class DebianTestCase(PluginTestCase): - plugins = ('Debian',) - timeout = 100 - cleanDataDir = False - fileDownloaded = False - if network: - def setUp(self, nick='test'): - PluginTestCase.setUp(self) - try: - datadir = conf.supybot.directories.data - if os.path.exists(datadir.dirize('Contents-i386.gz')): - pass - else: - print - print "Downloading files, this may take awhile." - filename = datadir.dirize('Contents-i386.gz') - while not os.path.exists(filename): - time.sleep(1) - print "Download complete." - print "Starting test ..." - self.fileDownloaded = True - except KeyboardInterrupt: - pass - - def testDebBugNoHtml(self): - self.assertNotRegexp('debian bug 287792', r'\') - - def testDebversion(self): - self.assertHelp('debian version') - self.assertRegexp('debian version lakjdfad', - r'^No package.*\(all\)') - self.assertRegexp('debian version unstable alkdjfad', - r'^No package.*\(unstable\)') - self.assertRegexp('debian version gaim', - r'\d+ matches found:.*gaim.*\(stable') - self.assertRegexp('debian version linux-wlan', - r'\d+ matches found:.*linux-wlan.*') - self.assertRegexp('debian version --exact linux-wlan', - r'^No package.*\(all\)') - self.assertError('debian version unstable') - - def testDebfile(self): - self.assertHelp('file') - if not self.fileDownloaded: - pass - self.assertRegexp('file --exact bin/gaim', r'net/gaim') - - def testDebincoming(self): - self.assertNotError('incoming') - - def testDebianize(self): - self.assertNotError('debianize supybot') - - def testDebstats(self): - self.assertNotError('stats supybot') - - -# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: