Hello community,
here is the log from the commit of package python-beautifulsoup for
openSUSE:Factory checked in at 2012-03-08 19:45:02
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-beautifulsoup (Old)
and /work/SRC/openSUSE:Factory/.python-beautifulsoup.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-beautifulsoup", Maintainer is ""
Changes:
--------
---
/work/SRC/openSUSE:Factory/python-beautifulsoup/python-beautifulsoup.changes
2012-02-27 18:35:54.000000000 +0100
+++
/work/SRC/openSUSE:Factory/.python-beautifulsoup.new/python-beautifulsoup.changes
2012-03-08 19:45:04.000000000 +0100
@@ -1,0 +2,9 @@
+Tue Feb 21 19:47:47 UTC 2012 - [email protected]
+
+- Update to 3.2.1
+ * Substitute XML entities for angle brackets and bare ampersands within
+ strings, not just within attribute values. This prevents a possible
+ cross-site scripting attack when Beautiful Soup is used to sanitize HTML.
+ (https://bugs.launchpad.net/beautifulsoup/+bug/868921)
+
+-------------------------------------------------------------------
Old:
----
BeautifulSoup-3.2.0.tar.gz
New:
----
BeautifulSoup-3.2.1.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-beautifulsoup.spec ++++++
--- /var/tmp/diff_new_pack.4u2uqZ/_old 2012-03-08 19:45:07.000000000 +0100
+++ /var/tmp/diff_new_pack.4u2uqZ/_new 2012-03-08 19:45:07.000000000 +0100
@@ -1,7 +1,7 @@
#
# spec file for package python-beautifulsoup
#
-# Copyright (c) 2011 SUSE LINUX Products GmbH, Nuernberg, Germany.
+# Copyright (c) 2012 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
%define modname BeautifulSoup
Name: python-beautifulsoup
-Version: 3.2.0
+Version: 3.2.1
Release: 0
Summary: HTML/XML Parser for Quick-Turnaround Applications Like
Screen-Scraping
License: BSD-3-Clause
++++++ BeautifulSoup-3.2.0.tar.gz -> BeautifulSoup-3.2.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/BeautifulSoup-3.2.0/BeautifulSoup.py
new/BeautifulSoup-3.2.1/BeautifulSoup.py
--- old/BeautifulSoup-3.2.0/BeautifulSoup.py 2010-11-21 14:35:28.000000000
+0100
+++ new/BeautifulSoup-3.2.1/BeautifulSoup.py 2012-02-16 14:34:48.000000000
+0100
@@ -79,8 +79,8 @@
from __future__ import generators
__author__ = "Leonard Richardson ([email protected])"
-__version__ = "3.2.0"
-__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson"
+__version__ = "3.2.1"
+__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
__license__ = "New-style BSD"
from sgmllib import SGMLParser, SGMLParseError
@@ -114,6 +114,21 @@
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
+ def _invert(h):
+ "Cheap function to invert a hash."
+ i = {}
+ for k,v in h.items():
+ i[v] = k
+ return i
+
+ XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
+ "quot" : '"',
+ "amp" : "&",
+ "lt" : "<",
+ "gt" : ">" }
+
+ XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
+
def setup(self, parent=None, previous=None):
"""Sets up the initial relations between this element and
other elements."""
@@ -421,6 +436,16 @@
s = unicode(s)
return s
+ BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+ + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+ + ")")
+
+ def _sub_entity(self, x):
+ """Used with a regular expression to substitute the
+ appropriate XML entity for an XML special character."""
+ return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
+
+
class NavigableString(unicode, PageElement):
def __new__(cls, value):
@@ -451,10 +476,12 @@
return str(self).decode(DEFAULT_OUTPUT_ENCODING)
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+ # Substitute outgoing XML entities.
+ data = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, self)
if encoding:
- return self.encode(encoding)
+ return data.encode(encoding)
else:
- return self
+ return data
class CData(NavigableString):
@@ -480,21 +507,6 @@
"""Represents a found HTML tag with its attributes and contents."""
- def _invert(h):
- "Cheap function to invert a hash."
- i = {}
- for k,v in h.items():
- i[v] = k
- return i
-
- XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
- "quot" : '"',
- "amp" : "&",
- "lt" : "<",
- "gt" : ">" }
-
- XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-
def _convertEntities(self, match):
"""Used in a call to re.sub to replace HTML, XML, and numeric
entities with the appropriate Unicode characters. If HTML
@@ -681,15 +693,6 @@
def __unicode__(self):
return self.__str__(None)
- BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
- + ")")
-
- def _sub_entity(self, x):
- """Used with a regular expression to substitute the
- appropriate XML entity for an XML special character."""
- return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
-
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
prettyPrint=False, indentLevel=0):
"""Returns a string or Unicode representation of this tag and
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/BeautifulSoup-3.2.0/BeautifulSoupTests.py
new/BeautifulSoup-3.2.1/BeautifulSoupTests.py
--- old/BeautifulSoup-3.2.0/BeautifulSoupTests.py 2010-11-21
14:23:59.000000000 +0100
+++ new/BeautifulSoup-3.2.1/BeautifulSoupTests.py 2012-02-16
14:07:28.000000000 +0100
@@ -512,7 +512,7 @@
# SGMLParser generates bogus parse events when attribute values
# contain embedded brackets, but at least Beautiful Soup fixes
# it up a little.
- self.assertSoupEquals('<a b="<a>">', '<a b="<a>"></a><a>"></a>')
+ self.assertSoupEquals('<a b="<a>">', '<a
b="<a>"></a><a>"></a>')
self.assertSoupEquals('<a href="http://foo.com/<a> and blah and blah',
"""<a href='"http://foo.com/'></a><a> and blah
and blah</a>""")
@@ -649,30 +649,34 @@
xhtmlEnt = BeautifulStoneSoup.XHTML_ENTITIES
soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
- self.assertEquals(str(soup), "<<sacré bleu!>>")
-
- soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
- self.assertEquals(str(soup), "<<sacré bleu!>>")
+ self.assertEquals(str(soup), "<<sacré bleu!>>")
soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
- self.assertEquals(unicode(soup), u"<<sacr\xe9 bleu!>>")
+ self.assertEquals(unicode(soup), u"<<sacr\xe9 bleu!>>")
# Make sure the "XML", "HTML", and "XHTML" settings work.
text = "<™'"
soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
- self.assertEquals(unicode(soup), u"<™'")
+ self.assertEquals(unicode(soup), u"<™'")
soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
- self.assertEquals(unicode(soup), u"<\u2122'")
+ self.assertEquals(unicode(soup), u"<\u2122'")
soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)
- self.assertEquals(unicode(soup), u"<\u2122'")
+ self.assertEquals(unicode(soup), u"<\u2122'")
invalidEntity = "foo&#bar;baz"
soup = BeautifulStoneSoup\
(invalidEntity,
convertEntities=htmlEnt)
- self.assertEquals(str(soup), invalidEntity)
+ self.assertEquals(str(soup), "foo&#bar;baz")
+
+ nonexistentEntity = "foo&bar;baz"
+ soup = BeautifulStoneSoup\
+ (nonexistentEntity,
+ convertEntities="xml")
+ self.assertEquals(str(soup), nonexistentEntity)
+
def testNonBreakingSpaces(self):
soup = BeautifulSoup("<a> </a>",
@@ -683,10 +687,10 @@
self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
def testJunkInDeclaration(self):
- self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
+ self.assertSoupEquals('<! Foo = -8>a', '<!Foo = -8>a')
def testIncompleteDeclaration(self):
- self.assertSoupEquals('a<!b <p>c')
+ self.assertSoupEquals('a<!b <p>c', 'a<!b <p>c')
def testEntityReplacement(self):
self.assertSoupEquals('<b>hello there</b>')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/BeautifulSoup-3.2.0/PKG-INFO
new/BeautifulSoup-3.2.1/PKG-INFO
--- old/BeautifulSoup-3.2.0/PKG-INFO 2010-11-21 14:42:49.000000000 +0100
+++ new/BeautifulSoup-3.2.1/PKG-INFO 2012-02-16 14:36:59.000000000 +0100
@@ -1,6 +1,6 @@
Metadata-Version: 1.0
Name: BeautifulSoup
-Version: 3.2.0
+Version: 3.2.1
Summary: HTML/XML parser for quick-turnaround applications like
screen-scraping.
Home-page: http://www.crummy.com/software/BeautifulSoup/
Author: Leonard Richardson
--
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]