Hello community, here is the log from the commit of package python-w3lib for openSUSE:Factory checked in at 2018-11-18 23:32:31 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-w3lib (Old) and /work/SRC/openSUSE:Factory/.python-w3lib.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-w3lib" Sun Nov 18 23:32:31 2018 rev:2 rq:649920 version:1.19.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-w3lib/python-w3lib.changes 2018-01-10 23:32:42.734937532 +0100 +++ /work/SRC/openSUSE:Factory/.python-w3lib.new/python-w3lib.changes 2018-11-18 23:32:51.337433396 +0100 @@ -1,0 +2,12 @@ +Fri Nov 16 18:49:26 UTC 2018 - Todd R <toddrme2...@gmail.com> + +- Update to version 1.19.0 + * Add a workaround for CPython segfault (https://bugs.python.org/issue32583) + which affect w3lib.encoding functions. This is technically **backwards + incompatible** because it changes the way non-decodable bytes are replaced + (in some cases instead of two ``\ufffd`` chars you can get one). + As a side effect, the fix speeds up decoding in Python 3.4+. + * Add 'encoding' parameter for w3lib.http.basic_auth_header. + * Fix pypy testing setup, add pypy3 to CI. + +------------------------------------------------------------------- Old: ---- w3lib-1.18.0.tar.gz New: ---- w3lib-1.19.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-w3lib.spec ++++++ --- /var/tmp/diff_new_pack.KjrxNl/_old 2018-11-18 23:32:53.017431377 +0100 +++ /var/tmp/diff_new_pack.KjrxNl/_new 2018-11-18 23:32:53.017431377 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-w3lib # -# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-w3lib -Version: 1.18.0 +Version: 1.19.0 Release: 0 Summary: Library of Web-Related Functions License: BSD-3-Clause @@ -31,6 +31,7 @@ BuildRequires: fdupes BuildRequires: python-rpm-macros BuildArch: noarch + %python_subpackages %description @@ -70,7 +71,8 @@ %python_exec setup.py test %files %{python_files} -%doc README.rst LICENSE +%doc README.rst +%license LICENSE %{python_sitelib}/* %changelog ++++++ w3lib-1.18.0.tar.gz -> w3lib-1.19.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/PKG-INFO new/w3lib-1.19.0/PKG-INFO --- old/w3lib-1.18.0/PKG-INFO 2017-08-03 15:25:28.000000000 +0200 +++ new/w3lib-1.19.0/PKG-INFO 2018-01-25 01:58:11.000000000 +0100 @@ -1,11 +1,12 @@ Metadata-Version: 1.1 Name: w3lib -Version: 1.18.0 +Version: 1.19.0 Summary: Library of web-related functions Home-page: https://github.com/scrapy/w3lib Author: Scrapy project Author-email: i...@scrapy.org License: BSD +Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: Any Classifier: Development Status :: 5 - Production/Stable diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/docs/conf.py new/w3lib-1.19.0/docs/conf.py --- old/w3lib-1.18.0/docs/conf.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/docs/conf.py 2018-01-25 01:57:32.000000000 +0100 @@ -53,7 +53,7 @@ # built documents. # # The full version, including alpha/beta/rc tags. -release = '1.18.0' +release = '1.19.0' # The short X.Y version. version = '.'.join(release.split('.')[:2]) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/setup.py new/w3lib-1.19.0/setup.py --- old/w3lib-1.18.0/setup.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/setup.py 2018-01-25 01:57:32.000000000 +0100 @@ -3,7 +3,7 @@ setup( name='w3lib', - version='1.18.0', + version='1.19.0', license='BSD', description='Library of web-related functions', author='Scrapy project', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/tests/test_encoding.py new/w3lib-1.19.0/tests/test_encoding.py --- old/w3lib-1.18.0/tests/test_encoding.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/tests/test_encoding.py 2018-01-25 01:57:32.000000000 +0100 @@ -144,9 +144,9 @@ def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self): # unlike scrapy, the BOM is stripped self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xabWORD2", - 'utf-8', u'WORD\ufffd\ufffdWORD2') + 'utf-8', u'WORD\ufffdWORD2') self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xabWORD2", - 'utf-8', u'WORD\ufffd\ufffdWORD2') + 'utf-8', u'WORD\ufffdWORD2') def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self): # Python implementations handle unexpected end of UTF8 data @@ -220,6 +220,18 @@ self._assert_encoding('utf-16', u"hi".encode('utf-16-be'), 'utf-16-be', u"hi") self._assert_encoding('utf-32', u"hi".encode('utf-32-be'), 'utf-32-be', u"hi") + def test_python_crash(self): + import random + from io import BytesIO + random.seed(42) + buf = BytesIO() + for i in range(150000): + buf.write(bytes([random.randint(0, 255)])) + to_unicode(buf.getvalue(), 'utf-16-le') + to_unicode(buf.getvalue(), 'utf-16-be') + to_unicode(buf.getvalue(), 'utf-32-le') + to_unicode(buf.getvalue(), 'utf-32-be') + def test_html_encoding(self): # extracting the encoding from raw html is tested elsewhere body = b"""blah blah < meta http-equiv="Content-Type" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/tests/test_http.py new/w3lib-1.19.0/tests/test_http.py --- old/w3lib-1.18.0/tests/test_http.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/tests/test_http.py 2018-01-25 01:57:32.000000000 +0100 @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + import unittest from collections import OrderedDict from w3lib.http import (basic_auth_header, @@ -14,6 +16,13 @@ self.assertEqual(b'Basic c29tZXVzZXI6QDx5dTk-Jm8_UQ==', basic_auth_header('someuser', '@<yu9>&o?Q')) + def test_basic_auth_header_encoding(self): + self.assertEqual(b'Basic c29tw6Z1c8Oocjpzw7htZXDDpHNz', + basic_auth_header(u'somæusèr', u'sømepäss', encoding='utf8')) + # default encoding (ISO-8859-1) + self.assertEqual(b'Basic c29t5nVz6HI6c_htZXDkc3M=', + basic_auth_header(u'somæusèr', u'sømepäss')) + def test_headers_raw_dict_none(self): self.assertIsNone(headers_raw_to_dict(None)) self.assertIsNone(headers_dict_to_raw(None)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/tox.ini new/w3lib-1.19.0/tox.ini --- old/w3lib-1.18.0/tox.ini 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/tox.ini 2018-01-25 01:57:32.000000000 +0100 @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, pypy, py33, py34, py35, py36 +envlist = py27, pypy, py33, py34, py35, py36, pypy3 [testenv] deps = diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/w3lib/__init__.py new/w3lib-1.19.0/w3lib/__init__.py --- old/w3lib-1.18.0/w3lib/__init__.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/w3lib/__init__.py 2018-01-25 01:57:32.000000000 +0100 @@ -1,3 +1,3 @@ -__version__ = "1.18.0" +__version__ = "1.19.0" version_info = tuple(int(v) if v.isdigit() else v for v in __version__.split('.')) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/w3lib/encoding.py new/w3lib-1.19.0/w3lib/encoding.py --- old/w3lib-1.18.0/w3lib/encoding.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/w3lib/encoding.py 2018-01-25 01:57:32.000000000 +0100 @@ -3,6 +3,7 @@ Functions for handling encoding of web pages """ import re, codecs, encodings +from sys import version_info _HEADER_ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I) @@ -173,7 +174,7 @@ # Python decoder doesn't follow unicode standard when handling # bad utf-8 encoded strings. see http://bugs.python.org/issue8271 -codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.start+1)) +codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.end)) def to_unicode(data_str, encoding): """Convert a str object to unicode using the encoding given @@ -181,7 +182,7 @@ Characters that cannot be converted will be converted to ``\\ufffd`` (the unicode replacement character). """ - return data_str.decode(encoding, 'w3lib_replace') + return data_str.decode(encoding, 'replace' if version_info[0:2] >= (3, 3) else 'w3lib_replace') def html_to_unicode(content_type_header, html_body_str, default_encoding='utf8', auto_detect_fun=None): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/w3lib/http.py new/w3lib-1.19.0/w3lib/http.py --- old/w3lib-1.18.0/w3lib/http.py 2017-08-03 15:24:36.000000000 +0200 +++ new/w3lib-1.19.0/w3lib/http.py 2018-01-25 01:57:32.000000000 +0100 @@ -78,7 +78,7 @@ return b'\r\n'.join(raw_lines) -def basic_auth_header(username, password): +def basic_auth_header(username, password, encoding='ISO-8859-1'): """ Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_ @@ -95,5 +95,5 @@ # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1 # seems to be the most widely used encoding here. See also: # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html - auth = auth.encode('ISO-8859-1') + auth = auth.encode(encoding) return b'Basic ' + urlsafe_b64encode(auth) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/w3lib-1.18.0/w3lib.egg-info/PKG-INFO new/w3lib-1.19.0/w3lib.egg-info/PKG-INFO --- old/w3lib-1.18.0/w3lib.egg-info/PKG-INFO 2017-08-03 15:25:28.000000000 +0200 +++ new/w3lib-1.19.0/w3lib.egg-info/PKG-INFO 2018-01-25 01:58:11.000000000 +0100 @@ -1,11 +1,12 @@ Metadata-Version: 1.1 Name: w3lib -Version: 1.18.0 +Version: 1.19.0 Summary: Library of web-related functions Home-page: https://github.com/scrapy/w3lib Author: Scrapy project Author-email: i...@scrapy.org License: BSD +Description-Content-Type: UNKNOWN Description: UNKNOWN Platform: Any Classifier: Development Status :: 5 - Production/Stable