commit python-w3lib for openSUSE:Factory

root Sun, 18 Nov 2018 14:33:02 -0800

Hello community,

here is the log from the commit of package python-w3lib for openSUSE:Factory 
checked in at 2018-11-18 23:32:31
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-w3lib (Old)
 and      /work/SRC/openSUSE:Factory/.python-w3lib.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-w3lib"

Sun Nov 18 23:32:31 2018 rev:2 rq:649920 version:1.19.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-w3lib/python-w3lib.changes        
2018-01-10 23:32:42.734937532 +0100
+++ /work/SRC/openSUSE:Factory/.python-w3lib.new/python-w3lib.changes   
2018-11-18 23:32:51.337433396 +0100
@@ -1,0 +2,12 @@
+Fri Nov 16 18:49:26 UTC 2018 - Todd R <[email protected]>
+
+- Update to version 1.19.0
+  * Add a workaround for CPython segfault (https://bugs.python.org/issue32583)
+    which affect w3lib.encoding functions. This is technically **backwards
+    incompatible** because it changes the way non-decodable bytes are replaced
+    (in some cases instead of two ``\ufffd`` chars you can get one).
+    As a side effect, the fix speeds up decoding in Python 3.4+.
+  * Add 'encoding' parameter for w3lib.http.basic_auth_header.
+  * Fix pypy testing setup, add pypy3 to CI.
+
+-------------------------------------------------------------------

Old:
----
  w3lib-1.18.0.tar.gz

New:
----
  w3lib-1.19.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-w3lib.spec ++++++
--- /var/tmp/diff_new_pack.KjrxNl/_old  2018-11-18 23:32:53.017431377 +0100
+++ /var/tmp/diff_new_pack.KjrxNl/_new  2018-11-18 23:32:53.017431377 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package python-w3lib
 #
-# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -18,7 +18,7 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-w3lib
-Version:        1.18.0
+Version:        1.19.0
 Release:        0
 Summary:        Library of Web-Related Functions
 License:        BSD-3-Clause
@@ -31,6 +31,7 @@
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
 BuildArch:      noarch
+
 %python_subpackages
 
 %description
@@ -70,7 +71,8 @@
 %python_exec setup.py test
 
 %files %{python_files}
-%doc README.rst LICENSE
+%doc README.rst
+%license LICENSE
 %{python_sitelib}/*
 
 %changelog

++++++ w3lib-1.18.0.tar.gz -> w3lib-1.19.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/PKG-INFO new/w3lib-1.19.0/PKG-INFO
--- old/w3lib-1.18.0/PKG-INFO   2017-08-03 15:25:28.000000000 +0200
+++ new/w3lib-1.19.0/PKG-INFO   2018-01-25 01:58:11.000000000 +0100
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: w3lib
-Version: 1.18.0
+Version: 1.19.0
 Summary: Library of web-related functions
 Home-page: https://github.com/scrapy/w3lib
 Author: Scrapy project
 Author-email: [email protected]
 License: BSD
+Description-Content-Type: UNKNOWN
 Description: UNKNOWN
 Platform: Any
 Classifier: Development Status :: 5 - Production/Stable
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/docs/conf.py 
new/w3lib-1.19.0/docs/conf.py
--- old/w3lib-1.18.0/docs/conf.py       2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/docs/conf.py       2018-01-25 01:57:32.000000000 +0100
@@ -53,7 +53,7 @@
 # built documents.
 #
 # The full version, including alpha/beta/rc tags.
-release = '1.18.0'
+release = '1.19.0'
 # The short X.Y version.
 version = '.'.join(release.split('.')[:2])
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/setup.py new/w3lib-1.19.0/setup.py
--- old/w3lib-1.18.0/setup.py   2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/setup.py   2018-01-25 01:57:32.000000000 +0100
@@ -3,7 +3,7 @@
 
 setup(
     name='w3lib',
-    version='1.18.0',
+    version='1.19.0',
     license='BSD',
     description='Library of web-related functions',
     author='Scrapy project',
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/tests/test_encoding.py 
new/w3lib-1.19.0/tests/test_encoding.py
--- old/w3lib-1.18.0/tests/test_encoding.py     2017-08-03 15:24:36.000000000 
+0200
+++ new/w3lib-1.19.0/tests/test_encoding.py     2018-01-25 01:57:32.000000000 
+0100
@@ -144,9 +144,9 @@
     def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
         # unlike scrapy, the BOM is stripped
         self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffd\ufffdWORD2')
+                'utf-8', u'WORD\ufffdWORD2')
         self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffd\ufffdWORD2')
+                'utf-8', u'WORD\ufffdWORD2')
 
     def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
         # Python implementations handle unexpected end of UTF8 data
@@ -220,6 +220,18 @@
         self._assert_encoding('utf-16', u"hi".encode('utf-16-be'), 
'utf-16-be', u"hi")
         self._assert_encoding('utf-32', u"hi".encode('utf-32-be'), 
'utf-32-be', u"hi")
 
+    def test_python_crash(self):
+        import random
+        from io import BytesIO
+        random.seed(42)
+        buf = BytesIO()
+        for i in range(150000):
+            buf.write(bytes([random.randint(0, 255)]))
+        to_unicode(buf.getvalue(), 'utf-16-le')
+        to_unicode(buf.getvalue(), 'utf-16-be')
+        to_unicode(buf.getvalue(), 'utf-32-le')
+        to_unicode(buf.getvalue(), 'utf-32-be')
+
     def test_html_encoding(self):
         # extracting the encoding from raw html is tested elsewhere
         body = b"""blah blah < meta   http-equiv="Content-Type"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/tests/test_http.py 
new/w3lib-1.19.0/tests/test_http.py
--- old/w3lib-1.18.0/tests/test_http.py 2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/tests/test_http.py 2018-01-25 01:57:32.000000000 +0100
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import unittest
 from collections import OrderedDict
 from w3lib.http import (basic_auth_header,
@@ -14,6 +16,13 @@
         self.assertEqual(b'Basic c29tZXVzZXI6QDx5dTk-Jm8_UQ==',
             basic_auth_header('someuser', '@<yu9>&o?Q'))
 
+    def test_basic_auth_header_encoding(self):
+        self.assertEqual(b'Basic c29tw6Z1c8Oocjpzw7htZXDDpHNz',
+                basic_auth_header(u'somæusèr', u'sømepäss', encoding='utf8'))
+        # default encoding (ISO-8859-1)
+        self.assertEqual(b'Basic c29t5nVz6HI6c_htZXDkc3M=',
+                basic_auth_header(u'somæusèr', u'sømepäss'))
+
     def test_headers_raw_dict_none(self):
         self.assertIsNone(headers_raw_to_dict(None))
         self.assertIsNone(headers_dict_to_raw(None))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/tox.ini new/w3lib-1.19.0/tox.ini
--- old/w3lib-1.18.0/tox.ini    2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/tox.ini    2018-01-25 01:57:32.000000000 +0100
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27, pypy, py33, py34, py35, py36
+envlist = py27, pypy, py33, py34, py35, py36, pypy3
 
 [testenv]
 deps =
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/w3lib/__init__.py 
new/w3lib-1.19.0/w3lib/__init__.py
--- old/w3lib-1.18.0/w3lib/__init__.py  2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/w3lib/__init__.py  2018-01-25 01:57:32.000000000 +0100
@@ -1,3 +1,3 @@
-__version__ = "1.18.0"
+__version__ = "1.19.0"
 version_info = tuple(int(v) if v.isdigit() else v
                      for v in __version__.split('.'))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/w3lib/encoding.py 
new/w3lib-1.19.0/w3lib/encoding.py
--- old/w3lib-1.18.0/w3lib/encoding.py  2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/w3lib/encoding.py  2018-01-25 01:57:32.000000000 +0100
@@ -3,6 +3,7 @@
 Functions for handling encoding of web pages
 """
 import re, codecs, encodings
+from sys import version_info
 
 _HEADER_ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I)
 
@@ -173,7 +174,7 @@
 
 # Python decoder doesn't follow unicode standard when handling
 # bad utf-8 encoded strings. see http://bugs.python.org/issue8271
-codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.start+1))
+codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.end))
 
 def to_unicode(data_str, encoding):
     """Convert a str object to unicode using the encoding given
@@ -181,7 +182,7 @@
     Characters that cannot be converted will be converted to ``\\ufffd`` (the
     unicode replacement character).
     """
-    return data_str.decode(encoding, 'w3lib_replace')
+    return data_str.decode(encoding, 'replace' if version_info[0:2] >= (3, 3) 
else 'w3lib_replace')
 
 def html_to_unicode(content_type_header, html_body_str,
         default_encoding='utf8', auto_detect_fun=None):
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/w3lib/http.py 
new/w3lib-1.19.0/w3lib/http.py
--- old/w3lib-1.18.0/w3lib/http.py      2017-08-03 15:24:36.000000000 +0200
+++ new/w3lib-1.19.0/w3lib/http.py      2018-01-25 01:57:32.000000000 +0100
@@ -78,7 +78,7 @@
     return b'\r\n'.join(raw_lines)
 
 
-def basic_auth_header(username, password):
+def basic_auth_header(username, password, encoding='ISO-8859-1'):
     """
     Return an `Authorization` header field value for `HTTP Basic Access 
Authentication (RFC 2617)`_
 
@@ -95,5 +95,5 @@
         # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
         # seems to be the most widely used encoding here. See also:
         # 
http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html
-        auth = auth.encode('ISO-8859-1')
+        auth = auth.encode(encoding)
     return b'Basic ' + urlsafe_b64encode(auth)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/w3lib-1.18.0/w3lib.egg-info/PKG-INFO 
new/w3lib-1.19.0/w3lib.egg-info/PKG-INFO
--- old/w3lib-1.18.0/w3lib.egg-info/PKG-INFO    2017-08-03 15:25:28.000000000 
+0200
+++ new/w3lib-1.19.0/w3lib.egg-info/PKG-INFO    2018-01-25 01:58:11.000000000 
+0100
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: w3lib
-Version: 1.18.0
+Version: 1.19.0
 Summary: Library of web-related functions
 Home-page: https://github.com/scrapy/w3lib
 Author: Scrapy project
 Author-email: [email protected]
 License: BSD
+Description-Content-Type: UNKNOWN
 Description: UNKNOWN
 Platform: Any
 Classifier: Development Status :: 5 - Production/Stable

commit python-w3lib for openSUSE:Factory

Reply via email to