[MediaWiki-commits] [Gerrit] remove patch-BeautifulSoup since not needed and causes issue... - change (pywikibot/compat)

DrTrigon (Code Review) Fri, 02 Aug 2013 07:48:37 -0700

DrTrigon has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/77325



Change subject: remove patch-BeautifulSoup since not needed and causes issues 
with setup of core components
......................................................................

remove patch-BeautifulSoup since not needed and causes issues with setup of 
core components

Change-Id: I4b5462737fbf54c7521786bf03e71a90421926fc
---
M externals/__init__.py
D externals/patch-BeautifulSoup
2 files changed, 1 insertion(+), 845 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/compat 
refs/changes/25/77325/1

diff --git a/externals/__init__.py b/externals/__init__.py
index 13ea4c4..83045f5 100644
--- a/externals/__init__.py
+++ b/externals/__init__.py
@@ -57,9 +57,7 @@
    'BeautifulSoup.py': ({'linux-fedora': ['python-BeautifulSoup'],
                          'linux-ubuntu': ['']},
                         {  'url': 
'https://pypi.python.org/packages/source/B/BeautifulSoup/BeautifulSoup-3.2.0.tar.gz',
-                          'path': 'BeautifulSoup-3.2.0/BeautifulSoup.py',
-                         #$ diff -Nau TEST_BeautifulSoup.py BeautifulSoup.py > 
patch-BeautifulSoup
-                         'patch': 'patch-BeautifulSoup'},
+                          'path': 'BeautifulSoup-3.2.0/BeautifulSoup.py'},
                         {}),                                               # OK
              'irclib': ({'linux-fedora': ['python-irclib'],
                          'linux-ubuntu': ['']},
diff --git a/externals/patch-BeautifulSoup b/externals/patch-BeautifulSoup
deleted file mode 100644
index 271c061..0000000
--- a/externals/patch-BeautifulSoup
+++ /dev/null
@@ -1,842 +0,0 @@
---- TEST_BeautifulSoup.py      2010-11-21 14:35:28.000000000 +0100
-+++ BeautifulSoup.py   2013-03-02 14:56:38.000000000 +0100
-@@ -76,7 +76,6 @@
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
- 
- """
--from __future__ import generators
- 
- __author__ = "Leonard Richardson ([email protected])"
- __version__ = "3.2.0"
-@@ -90,9 +89,9 @@
- import re
- import sgmllib
- try:
--  from htmlentitydefs import name2codepoint
-+    from htmlentitydefs import name2codepoint
- except ImportError:
--  name2codepoint = {}
-+    name2codepoint = {}
- try:
-     set
- except NameError:
-@@ -104,12 +103,13 @@
- 
- DEFAULT_OUTPUT_ENCODING = "utf-8"
- 
-+
- def _match_css_class(str):
-     """Build a RE to match the given CSS class."""
-     return re.compile(r"(^|.*\s)%s($|\s)" % str)
- 
--# First, the classes that represent markup elements.
- 
-+# First, the classes that represent markup elements.
- class PageElement(object):
-     """Contains the navigational information for some part of the page
-     (either a tag or a piece of text)"""
-@@ -129,8 +129,8 @@
-     def replaceWith(self, replaceWith):
-         oldParent = self.parent
-         myIndex = self.parent.index(self)
--        if hasattr(replaceWith, "parent")\
--                  and replaceWith.parent is self.parent:
-+        if hasattr(replaceWith, "parent") and \
-+           replaceWith.parent is self.parent:
-             # We're replacing this element with one of its siblings.
-             index = replaceWith.parent.index(replaceWith)
-             if index and index < myIndex:
-@@ -187,11 +187,11 @@
-         return lastChild
- 
-     def insert(self, position, newChild):
--        if isinstance(newChild, basestring) \
--            and not isinstance(newChild, NavigableString):
-+        if isinstance(newChild, basestring) and not \
-+           isinstance(newChild, NavigableString):
-             newChild = NavigableString(newChild)
- 
--        position =  min(position, len(self.contents))
-+        position = min(position, len(self.contents))
-         if hasattr(newChild, 'parent') and newChild.parent is not None:
-             # We're 'inserting' an element that's already one
-             # of this object's children.
-@@ -228,7 +228,7 @@
-             while not parentsNextSibling:
-                 parentsNextSibling = parent.nextSibling
-                 parent = parent.parent
--                if not parent: # This is the last element in the document.
-+                if not parent:  # This is the last element in the document.
-                     break
-             if parentsNextSibling:
-                 newChildsLastElement.next = parentsNextSibling
-@@ -273,7 +273,8 @@
-         criteria and appear after this Tag in the document."""
-         return self._findAll(name, attrs, text, limit,
-                              self.nextSiblingGenerator, **kwargs)
--    fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
-+
-+    fetchNextSiblings = findNextSiblings  # Compatibility with pre-3.x
- 
-     def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
-         """Returns the first item that matches the given criteria and
-@@ -285,8 +286,8 @@
-         """Returns all items that match the given criteria and appear
-         before this Tag in the document."""
-         return self._findAll(name, attrs, text, limit, self.previousGenerator,
--                           **kwargs)
--    fetchPrevious = findAllPrevious # Compatibility with pre-3.x
-+                             **kwargs)
-+    fetchPrevious = findAllPrevious  # Compatibility with pre-3.x
- 
-     def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
-         """Returns the closest sibling to this Tag that matches the
-@@ -300,7 +301,7 @@
-         criteria and appear before this Tag in the document."""
-         return self._findAll(name, attrs, text, limit,
-                              self.previousSiblingGenerator, **kwargs)
--    fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
-+    fetchPreviousSiblings = findPreviousSiblings  # Compatibility with pre-3.x
- 
-     def findParent(self, name=None, attrs={}, **kwargs):
-         """Returns the closest parent of this Tag that matches the given
-@@ -319,7 +320,8 @@
- 
-         return self._findAll(name, attrs, None, limit, self.parentGenerator,
-                              **kwargs)
--    fetchParents = findParents # Compatibility with pre-3.x
-+
-+    fetchParents = findParents  # Compatibility with pre-3.x
- 
-     #These methods do the real heavy lifting.
- 
-@@ -416,11 +418,12 @@
-                 s = unicode(s)
-         else:
-             if encoding:
--                s  = self.toEncoding(str(s), encoding)
-+                s = self.toEncoding(str(s), encoding)
-             else:
-                 s = unicode(s)
-         return s
- 
-+
- class NavigableString(unicode, PageElement):
- 
-     def __new__(cls, value):
-@@ -445,7 +448,8 @@
-         if attr == 'string':
-             return self
-         else:
--            raise AttributeError, "'%s' object has no attribute '%s'" % 
(self.__class__.__name__, attr)
-+            raise AttributeError("'%s' object has no attribute '%s'"
-+                                 % (self.__class__.__name__, attr))
- 
-     def __unicode__(self):
-         return str(self).decode(DEFAULT_OUTPUT_ENCODING)
-@@ -456,11 +460,13 @@
-         else:
-             return self
- 
-+
- class CData(NavigableString):
- 
-     def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
-         return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding)
- 
-+
- class ProcessingInstruction(NavigableString):
-     def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
-         output = self
-@@ -468,14 +474,17 @@
-             output = self.substituteEncoding(output, encoding)
-         return "<?%s?>" % self.toEncoding(output, encoding)
- 
-+
- class Comment(NavigableString):
-     def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
-         return "<!--%s-->" % NavigableString.__str__(self, encoding)
- 
-+
- class Declaration(NavigableString):
-     def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
-         return "<!%s>" % NavigableString.__str__(self, encoding)
- 
-+
- class Tag(PageElement):
- 
-     """Represents a found HTML tag with its attributes and contents."""
-@@ -483,15 +492,15 @@
-     def _invert(h):
-         "Cheap function to invert a hash."
-         i = {}
--        for k,v in h.items():
-+        for k, v in h.items():
-             i[v] = k
-         return i
- 
--    XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
--                                      "quot" : '"',
--                                      "amp" : "&",
--                                      "lt" : "<",
--                                      "gt" : ">" }
-+    XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'",
-+                                     "quot": '"',
-+                                     "amp": "&",
-+                                     "lt": "<",
-+                                     "gt": ">"}
- 
-     XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
- 
-@@ -550,8 +559,8 @@
-         self.attrs = map(convert, self.attrs)
- 
-     def getString(self):
--        if (len(self.contents) == 1
--            and isinstance(self.contents[0], NavigableString)):
-+        if (len(self.contents) == 1 and isinstance(self.contents[0],
-+                                                   NavigableString)):
-             return self.contents[0]
- 
-     def setString(self, string):
-@@ -593,7 +602,7 @@
-         raise ValueError("Tag.index: element not in tag")
- 
-     def has_key(self, key):
--        return self._getAttrMap().has_key(key)
-+        return key in self._getAttrMap()
- 
-     def __getitem__(self, key):
-         """tag[key] returns the value of the 'key' attribute for the tag,
-@@ -637,7 +646,7 @@
-                 #We don't break because bad HTML can define the same
-                 #attribute multiple times.
-             self._getAttrMap()
--            if self.attrMap.has_key(key):
-+            if key in self.attrMap:
-                 del self.attrMap[key]
- 
-     def __call__(self, *args, **kwargs):
-@@ -652,7 +661,8 @@
-             return self.find(tag[:-3])
-         elif tag.find('__') != 0:
-             return self.find(tag)
--        raise AttributeError, "'%s' object has no attribute '%s'" % 
(self.__class__, tag)
-+        raise AttributeError("'%s' object has no attribute '%s'"
-+                             % (self.__class__, tag))
- 
-     def __eq__(self, other):
-         """Returns true iff this tag has the same name, the same attributes,
-@@ -662,7 +672,9 @@
-         same attributes in a different order. Should this be fixed?"""
-         if other is self:
-             return True
--        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not 
hasattr(other, 'contents') or self.name != other.name or self.attrs != 
other.attrs or len(self) != len(other):
-+        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or \
-+           not hasattr(other, 'contents') or self.name != other.name or \
-+           self.attrs != other.attrs or len(self) != len(other):
-             return False
-         for i in range(0, len(self.contents)):
-             if self.contents[i] != other.contents[i]:
-@@ -735,7 +747,8 @@
-                     # value might also contain angle brackets, or
-                     # ampersands that aren't part of entities. We need
-                     # to escape those to XML entities too.
--                    val = 
self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
-+                    val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity,
-+                                                             val)
- 
-                 attrs.append(fmt % (self.toEncoding(key, encoding),
-                                     self.toEncoding(val, encoding)))
-@@ -799,7 +812,7 @@
-                        prettyPrint=False, indentLevel=0):
-         """Renders the contents of this tag as a string in the given
-         encoding. If encoding is None, returns a Unicode string.."""
--        s=[]
-+        s = []
-         for c in self:
-             text = None
-             if isinstance(c, NavigableString):
-@@ -913,13 +926,13 @@
-         if isinstance(markupName, Tag):
-             markup = markupName
-             markupAttrs = markup
--        callFunctionWithTagData = callable(self.name) \
--                                and not isinstance(markupName, Tag)
-+        callFunctionWithTagData = callable(self.name) and \
-+                                  not isinstance(markupName, Tag)
- 
-         if (not self.name) \
--               or callFunctionWithTagData \
--               or (markup and self._matches(markup, self.name)) \
--               or (not markup and self._matches(markupName, self.name)):
-+           or callFunctionWithTagData \
-+           or (markup and self._matches(markup, self.name)) \
-+           or (not markup and self._matches(markupName, self.name)):
-             if callFunctionWithTagData:
-                 match = self.name(markupName, markupAttrs)
-             else:
-@@ -927,11 +940,11 @@
-                 markupAttrMap = None
-                 for attr, matchAgainst in self.attrs.items():
-                     if not markupAttrMap:
--                         if hasattr(markupAttrs, 'get'):
-+                        if hasattr(markupAttrs, 'get'):
-                             markupAttrMap = markupAttrs
--                         else:
-+                        else:
-                             markupAttrMap = {}
--                            for k,v in markupAttrs:
-+                            for k, v in markupAttrs:
-                                 markupAttrMap[k] = v
-                     attrValue = markupAttrMap.get(attr)
-                     if not self._matches(attrValue, matchAgainst):
-@@ -949,11 +962,10 @@
-         found = None
-         # If given a list of items, scan it for a text element that
-         # matches.
--        if hasattr(markup, "__iter__") \
--                and not isinstance(markup, Tag):
-+        if hasattr(markup, "__iter__") and not isinstance(markup, Tag):
-             for element in markup:
--                if isinstance(element, NavigableString) \
--                       and self.search(element):
-+                if isinstance(element, NavigableString) and \
-+                   self.search(element):
-                     found = element
-                     break
-         # If it's a Tag, make sure its name or attributes match.
-@@ -962,13 +974,13 @@
-             if not self.text:
-                 found = self.searchTag(markup)
-         # If it's text, make sure the text matches.
--        elif isinstance(markup, NavigableString) or \
--                 isinstance(markup, basestring):
-+        elif isinstance(markup, NavigableString) or isinstance(markup,
-+                                                               basestring):
-             if self._matches(markup, self.text):
-                 found = markup
-         else:
--            raise Exception, "I don't know how to match against a %s" \
--                  % markup.__class__
-+            raise Exception("I don't know how to match against a %s"
-+                            % markup.__class__)
-         return found
- 
-     def _matches(self, markup, matchAgainst):
-@@ -989,10 +1001,10 @@
-             if hasattr(matchAgainst, 'match'):
-                 # It's a regexp object.
-                 result = markup and matchAgainst.search(markup)
--            elif hasattr(matchAgainst, '__iter__'): # list-like
-+            elif hasattr(matchAgainst, '__iter__'):  # list-like
-                 result = markup in matchAgainst
-             elif hasattr(matchAgainst, 'items'):
--                result = markup.has_key(matchAgainst)
-+                result = matchAgainst in markup
-             elif matchAgainst and isinstance(markup, basestring):
-                 if isinstance(markup, unicode):
-                     matchAgainst = unicode(matchAgainst)
-@@ -1003,6 +1015,7 @@
-                 result = matchAgainst == markup
-         return result
- 
-+
- class ResultSet(list):
-     """A ResultSet is just a list that keeps track of the SoupStrainer
-     that created it."""
-@@ -1010,6 +1023,7 @@
-         list.__init__([])
-         self.source = source
- 
-+
- # Now, some helper functions.
- 
- def buildTagMap(default, *args):
-@@ -1020,9 +1034,9 @@
-     for portion in args:
-         if hasattr(portion, 'items'):
-             #It's a map. Merge it.
--            for k,v in portion.items():
-+            for k, v in portion.items():
-                 built[k] = v
--        elif hasattr(portion, '__iter__'): # is a list
-+        elif hasattr(portion, '__iter__'):  # is a list
-             #It's a list. Map each item to the default.
-             for k in portion:
-                 built[k] = default
-@@ -1031,6 +1045,7 @@
-             built[portion] = default
-     return built
- 
-+
- # Now, the parser classes.
- 
- class BeautifulStoneSoup(Tag, SGMLParser):
-@@ -1075,7 +1090,7 @@
-     # can be replaced with a single space. A text node that contains
-     # fancy Unicode spaces (usually non-breaking) should be left
-     # alone.
--    STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
-+    STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
- 
-     def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
-                  markupMassage=True, smartQuotesTo=XML_ENTITIES,
-@@ -1152,7 +1167,7 @@
-             n = int(name)
-         except ValueError:
-             return
--        if not 0 <= n <= 127 : # ASCII ends at 127, not 255
-+        if not 0 <= n <= 127:  # ASCII ends at 127, not 255
-             return
-         return self.convert_codepoint(n)
- 
-@@ -1163,9 +1178,10 @@
-             if not hasattr(self, 'originalEncoding'):
-                 self.originalEncoding = None
-         else:
--            dammit = UnicodeDammit\
--                     (markup, [self.fromEncoding, inDocumentEncoding],
--                      smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
-+            dammit = UnicodeDammit(markup,
-+                                   [self.fromEncoding, inDocumentEncoding],
-+                                   smartQuotesTo=self.smartQuotesTo,
-+                                   isHTML=isHTML)
-             markup = dammit.unicode
-             self.originalEncoding = dammit.originalEncoding
-             self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
-@@ -1195,7 +1211,7 @@
-         #print "__getattr__ called on %s.%s" % (self.__class__, methodName)
- 
-         if methodName.startswith('start_') or methodName.startswith('end_') \
--               or methodName.startswith('do_'):
-+           or methodName.startswith('do_'):
-             return SGMLParser.__getattr__(self, methodName)
-         elif not methodName.startswith('__'):
-             return Tag.__getattr__(self, methodName)
-@@ -1205,8 +1221,8 @@
-     def isSelfClosingTag(self, name):
-         """Returns true iff the given string is the name of a
-         self-closing tag according to this parser."""
--        return self.SELF_CLOSING_TAGS.has_key(name) \
--               or self.instanceSelfClosingTags.has_key(name)
-+        return name in self.SELF_CLOSING_TAGS or \
-+               name in self.instanceSelfClosingTags
- 
-     def reset(self):
-         Tag.__init__(self, self, self.ROOT_TAG_NAME)
-@@ -1245,8 +1261,8 @@
-                     currentData = ' '
-             self.currentData = []
-             if self.parseOnlyThese and len(self.tagStack) <= 1 and \
--                   (not self.parseOnlyThese.text or \
--                    not self.parseOnlyThese.search(currentData)):
-+               (not self.parseOnlyThese.text or not
-+                    self.parseOnlyThese.search(currentData)):
-                 return
-             o = containerClass(currentData)
-             o.setup(self.currentTag, self.previous)
-@@ -1255,7 +1271,6 @@
-             self.previous = o
-             self.currentTag.contents.append(o)
- 
--
-     def _popToTag(self, name, inclusivePop=True):
-         """Pops the tag stack up to and including the most recent
-         instance of the given tag. If inclusivePop is false, pops the tag
-@@ -1297,8 +1312,8 @@
-         """
- 
-         nestingResetTriggers = self.NESTABLE_TAGS.get(name)
--        isNestable = nestingResetTriggers != None
--        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
-+        isNestable = nestingResetTriggers is not None
-+        isResetNesting = name in self.RESET_NESTING_TAGS
-         popTo = None
-         inclusive = True
-         for i in range(len(self.tagStack)-1, 0, -1):
-@@ -1311,7 +1326,7 @@
-             if (nestingResetTriggers is not None
-                 and p.name in nestingResetTriggers) \
-                 or (nestingResetTriggers is None and isResetNesting
--                    and self.RESET_NESTING_TAGS.has_key(p.name)):
-+                    and p.name in self.RESET_NESTING_TAGS):
- 
-                 #If we encounter one of the nesting reset triggers
-                 #peculiar to this tag, or we encounter another tag
-@@ -1338,7 +1353,8 @@
-             self._smartPop(name)
- 
-         if self.parseOnlyThese and len(self.tagStack) <= 1 \
--               and (self.parseOnlyThese.text or not 
self.parseOnlyThese.searchTag(name, attrs)):
-+           and (self.parseOnlyThese.text or
-+                not self.parseOnlyThese.searchTag(name, attrs)):
-             return
- 
-         tag = Tag(self, name, attrs, self.currentTag, self.previous)
-@@ -1412,7 +1428,7 @@
-                 data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
- 
-         if not data and self.convertHTMLEntities and \
--            not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
-+           not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
-                 # TODO: We've got a problem here. We're told this is
-                 # an entity reference, but it's not an XML entity
-                 # reference or an HTML entity reference. Nonetheless,
-@@ -1449,12 +1465,12 @@
-         declaration as a CData object."""
-         j = None
-         if self.rawdata[i:i+9] == '<![CDATA[':
--             k = self.rawdata.find(']]>', i)
--             if k == -1:
--                 k = len(self.rawdata)
--             data = self.rawdata[i+9:k]
--             j = k+3
--             self._toStringSubclass(data, CData)
-+            k = self.rawdata.find(']]>', i)
-+            if k == -1:
-+                k = len(self.rawdata)
-+            data = self.rawdata[i+9:k]
-+            j = k + 3
-+            self._toStringSubclass(data, CData)
-         else:
-             try:
-                 j = SGMLParser.parse_declaration(self, i)
-@@ -1464,6 +1480,7 @@
-                 j = i + len(toHandle)
-         return j
- 
-+
- class BeautifulSoup(BeautifulStoneSoup):
- 
-     """This parser knows the following facts about HTML:
-@@ -1513,18 +1530,18 @@
-     BeautifulStoneSoup before writing your own subclass."""
- 
-     def __init__(self, *args, **kwargs):
--        if not kwargs.has_key('smartQuotesTo'):
-+        if not 'smartQuotesTo' in kwargs:
-             kwargs['smartQuotesTo'] = self.HTML_ENTITIES
-         kwargs['isHTML'] = True
-         BeautifulStoneSoup.__init__(self, *args, **kwargs)
- 
-     SELF_CLOSING_TAGS = buildTagMap(None,
--                                    ('br' , 'hr', 'input', 'img', 'meta',
-+                                    ('br', 'hr', 'input', 'img', 'meta',
-                                     'spacer', 'link', 'frame', 'base', 'col'))
- 
-     PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
- 
--    QUOTE_TAGS = {'script' : None, 'textarea' : None}
-+    QUOTE_TAGS = {'script': None, 'textarea': None}
- 
-     #According to the HTML standard, each of these inline tags can
-     #contain another tag of the same type. Furthermore, it's common
-@@ -1538,21 +1555,21 @@
-     NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
- 
-     #Lists can contain other lists, but there are restrictions.
--    NESTABLE_LIST_TAGS = { 'ol' : [],
--                           'ul' : [],
--                           'li' : ['ul', 'ol'],
--                           'dl' : [],
--                           'dd' : ['dl'],
--                           'dt' : ['dl'] }
-+    NESTABLE_LIST_TAGS = {'ol': [],
-+                          'ul': [],
-+                          'li': ['ul', 'ol'],
-+                          'dl': [],
-+                          'dd': ['dl'],
-+                          'dt': ['dl']}
- 
-     #Tables can contain other tables, but there are restrictions.
--    NESTABLE_TABLE_TAGS = {'table' : [],
--                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
--                           'td' : ['tr'],
--                           'th' : ['tr'],
--                           'thead' : ['table'],
--                           'tbody' : ['table'],
--                           'tfoot' : ['table'],
-+    NESTABLE_TABLE_TAGS = {'table': [],
-+                           'tr': ['table', 'tbody', 'tfoot', 'thead'],
-+                           'td': ['tr'],
-+                           'th': ['tr'],
-+                           'thead': ['table'],
-+                           'tbody': ['table'],
-+                           'tfoot': ['table'],
-                            }
- 
-     NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
-@@ -1588,11 +1605,11 @@
-                 contentType = value
-                 contentTypeIndex = i
- 
--        if httpEquiv and contentType: # It's an interesting meta tag.
-+        if httpEquiv and contentType:  # It's an interesting meta tag.
-             match = self.CHARSET_RE.search(contentType)
-             if match:
-                 if (self.declaredHTMLEncoding is not None or
--                    self.originalEncoding == self.fromEncoding):
-+                        self.originalEncoding == self.fromEncoding):
-                     # An HTML encoding was sniffed while converting
-                     # the document to Unicode, or an HTML encoding was
-                     # sniffed during a previous pass through the
-@@ -1617,9 +1634,11 @@
-         if tag and tagNeedsEncodingSubstitution:
-             tag.containsSubstitutions = True
- 
-+
- class StopParsing(Exception):
-     pass
- 
-+
- class ICantBelieveItsBeautifulSoup(BeautifulSoup):
- 
-     """The BeautifulSoup class is oriented towards skipping over
-@@ -1645,10 +1664,10 @@
-     it's valid HTML and BeautifulSoup screwed up by assuming it
-     wouldn't be."""
- 
--    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
--     ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
--      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
--      'big')
-+    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = (
-+        'em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', 'cite',
-+        'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', 'big'
-+    )
- 
-     I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',)
- 
-@@ -1656,6 +1675,7 @@
-                                 I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
-                                 I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
- 
-+
- class MinimalSoup(BeautifulSoup):
-     """The MinimalSoup class is for parsing HTML that contains
-     pathologically bad markup. It makes no assumptions about tag
-@@ -1669,6 +1689,7 @@
-     RESET_NESTING_TAGS = buildTagMap('noscript')
-     NESTABLE_TAGS = {}
- 
-+
- class BeautifulSOAP(BeautifulStoneSoup):
-     """This class will push a tag with only a single string child into
-     the tag's parent as an attribute. The attribute's name is the tag
-@@ -1696,10 +1717,11 @@
-             parent._getAttrMap()
-             if (isinstance(tag, Tag) and len(tag.contents) == 1 and
-                 isinstance(tag.contents[0], NavigableString) and
--                not parent.attrMap.has_key(tag.name)):
-+                    not tag.name in parent.attrMap):
-                 parent[tag.name] = tag.contents[0]
-         BeautifulStoneSoup.popTag(self)
- 
-+
- #Enterprise class names! It has come to our attention that some people
- #think the names of the Beautiful Soup parser classes are too silly
- #and "unprofessional" for use in enterprise screen-scraping. We feel
-@@ -1750,6 +1772,7 @@
- except ImportError:
-     pass
- 
-+
- class UnicodeDammit:
-     """A class for detecting the encoding of a *ML document and
-     converting it to a Unicode string. If the source encoding is
-@@ -1760,14 +1783,14 @@
-     # meta tags to the corresponding Python codec names. It only covers
-     # values that aren't in Python's aliases and can't be determined
-     # by the heuristics in find_codec.
--    CHARSET_ALIASES = { "macintosh" : "mac-roman",
--                        "x-sjis" : "shift-jis" }
-+    CHARSET_ALIASES = {"macintosh": "mac-roman",
-+                       "x-sjis": "shift-jis"}
- 
-     def __init__(self, markup, overrideEncodings=[],
-                  smartQuotesTo='xml', isHTML=False):
-         self.declaredHTMLEncoding = None
-         self.markup, documentEncoding, sniffedEncoding = \
--                     self._detectEncoding(markup, isHTML)
-+            self._detectEncoding(markup, isHTML)
-         self.smartQuotesTo = smartQuotesTo
-         self.triedEncodings = []
-         if markup == '' or isinstance(markup, unicode):
-@@ -1820,9 +1843,8 @@
-         if self.smartQuotesTo and proposed.lower() in("windows-1252",
-                                                       "iso-8859-1",
-                                                       "iso-8859-2"):
--            markup = re.compile("([\x80-\x9f])").sub \
--                     (lambda(x): self._subMSChar(x.group(1)),
--                      markup)
-+            markup = re.compile("([\x80-\x9f])").sub(
-+                lambda(x): self._subMSChar(x.group(1)), markup)
- 
-         try:
-             # print "Trying to convert document to %s" % proposed
-@@ -1842,11 +1864,11 @@
- 
-         # strip Byte Order Mark (if present)
-         if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
--               and (data[2:4] != '\x00\x00'):
-+           and (data[2:4] != '\x00\x00'):
-             encoding = 'utf-16be'
-             data = data[2:]
--        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
--                 and (data[2:4] != '\x00\x00'):
-+        elif (len(data) >= 4) and \
-+             (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'):
-             encoding = 'utf-16le'
-             data = data[2:]
-         elif data[:3] == '\xef\xbb\xbf':
-@@ -1872,8 +1894,8 @@
-                 # UTF-16BE
-                 sniffed_xml_encoding = 'utf-16be'
-                 xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
--            elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
--                     and (xml_data[2:4] != '\x00\x00'):
-+            elif (len(xml_data) >= 4) and \
-+                 (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != 
'\x00\x00'):
-                 # UTF-16BE with BOM
-                 sniffed_xml_encoding = 'utf-16be'
-                 xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
-@@ -1882,7 +1904,7 @@
-                 sniffed_xml_encoding = 'utf-16le'
-                 xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
-             elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
--                     (xml_data[2:4] != '\x00\x00'):
-+                 (xml_data[2:4] != '\x00\x00'):
-                 # UTF-16LE with BOM
-                 sniffed_xml_encoding = 'utf-16le'
-                 xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
-@@ -1928,7 +1950,6 @@
-                 xml_encoding = sniffed_xml_encoding
-         return xml_data, xml_encoding, sniffed_xml_encoding
- 
--
-     def find_codec(self, charset):
-         return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
-                or (charset and self._codec(charset.replace("-", ""))) \
-@@ -1946,63 +1967,70 @@
-         return codec
- 
-     EBCDIC_TO_ASCII_MAP = None
-+
-     def _ebcdic_to_ascii(self, s):
-         c = self.__class__
-         if not c.EBCDIC_TO_ASCII_MAP:
--            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
--                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
--                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
--                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
--                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
--                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
--                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
--                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
--                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
--                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
--                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
--                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
--                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
--                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
--                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
--                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
--                    250,251,252,253,254,255)
-+            emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 
14,
-+                    15, 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 
28,
-+                    29, 30, 31, 128, 129, 130, 131, 132, 10, 23, 27, 136, 137,
-+                    138, 139, 140, 5, 6, 7, 144, 145, 22, 147, 148, 149, 150, 
4,
-+                    152, 153, 154, 155, 20, 21, 158, 26, 32, 160, 161, 162, 
163,
-+                    164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33, 38, 169,
-+                    170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 
59,
-+                    94, 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 
44,
-+                    37, 95, 62, 63, 186, 187, 188, 189, 190, 191, 192, 193, 
194,
-+                    96, 58, 35, 64, 39, 61, 34, 195, 97, 98, 99, 100, 101, 
102,
-+                    103, 104, 105, 196, 197, 198, 199, 200, 201, 202, 106, 
107,
-+                    108, 109, 110, 111, 112, 113, 114, 203, 204, 205, 206, 
207,
-+                    208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 
210,
-+                    211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 
222,
-+                    223, 224, 225, 226, 227, 228, 229, 230, 231, 123, 65, 66,
-+                    67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237,
-+                    125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 
241,
-+                    242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89, 90, 244, 
245,
-+                    246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 
57,
-+                    250, 251, 252, 253, 254, 255)
-             import string
--            c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
--            ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
-+            c.EBCDIC_TO_ASCII_MAP = string.maketrans(''.join(map(chr,
-+                                                                 range(256))),
-+                                                     ''.join(map(chr, emap)))
-         return s.translate(c.EBCDIC_TO_ASCII_MAP)
- 
--    MS_CHARS = { '\x80' : ('euro', '20AC'),
--                 '\x81' : ' ',
--                 '\x82' : ('sbquo', '201A'),
--                 '\x83' : ('fnof', '192'),
--                 '\x84' : ('bdquo', '201E'),
--                 '\x85' : ('hellip', '2026'),
--                 '\x86' : ('dagger', '2020'),
--                 '\x87' : ('Dagger', '2021'),
--                 '\x88' : ('circ', '2C6'),
--                 '\x89' : ('permil', '2030'),
--                 '\x8A' : ('Scaron', '160'),
--                 '\x8B' : ('lsaquo', '2039'),
--                 '\x8C' : ('OElig', '152'),
--                 '\x8D' : '?',
--                 '\x8E' : ('#x17D', '17D'),
--                 '\x8F' : '?',
--                 '\x90' : '?',
--                 '\x91' : ('lsquo', '2018'),
--                 '\x92' : ('rsquo', '2019'),
--                 '\x93' : ('ldquo', '201C'),
--                 '\x94' : ('rdquo', '201D'),
--                 '\x95' : ('bull', '2022'),
--                 '\x96' : ('ndash', '2013'),
--                 '\x97' : ('mdash', '2014'),
--                 '\x98' : ('tilde', '2DC'),
--                 '\x99' : ('trade', '2122'),
--                 '\x9a' : ('scaron', '161'),
--                 '\x9b' : ('rsaquo', '203A'),
--                 '\x9c' : ('oelig', '153'),
--                 '\x9d' : '?',
--                 '\x9e' : ('#x17E', '17E'),
--                 '\x9f' : ('Yuml', ''),}
-+    MS_CHARS = {
-+        '\x80': ('euro', '20AC'),
-+        '\x81': ' ',
-+        '\x82': ('sbquo', '201A'),
-+        '\x83': ('fnof', '192'),
-+        '\x84': ('bdquo', '201E'),
-+        '\x85': ('hellip', '2026'),
-+        '\x86': ('dagger', '2020'),
-+        '\x87': ('Dagger', '2021'),
-+        '\x88': ('circ', '2C6'),
-+        '\x89': ('permil', '2030'),
-+        '\x8A': ('Scaron', '160'),
-+        '\x8B': ('lsaquo', '2039'),
-+        '\x8C': ('OElig', '152'),
-+        '\x8D': '?',
-+        '\x8E': ('#x17D', '17D'),
-+        '\x8F': '?',
-+        '\x90': '?',
-+        '\x91': ('lsquo', '2018'),
-+        '\x92': ('rsquo', '2019'),
-+        '\x93': ('ldquo', '201C'),
-+        '\x94': ('rdquo', '201D'),
-+        '\x95': ('bull', '2022'),
-+        '\x96': ('ndash', '2013'),
-+        '\x97': ('mdash', '2014'),
-+        '\x98': ('tilde', '2DC'),
-+        '\x99': ('trade', '2122'),
-+        '\x9a': ('scaron', '161'),
-+        '\x9b': ('rsaquo', '203A'),
-+        '\x9c': ('oelig', '153'),
-+        '\x9d': '?',
-+        '\x9e': ('#x17E', '17E'),
-+        '\x9f': ('Yuml', ''),
-+    }
- 
- #######################################################################
- 

-- 
To view, visit https://gerrit.wikimedia.org/r/77325
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4b5462737fbf54c7521786bf03e71a90421926fc
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: DrTrigon <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] remove patch-BeautifulSoup since not needed and causes issue... - change (pywikibot/compat)

Reply via email to