Hello community, here is the log from the commit of package python-parsel for openSUSE:Factory checked in at 2019-09-13 14:57:47 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-parsel (Old) and /work/SRC/openSUSE:Factory/.python-parsel.new.7948 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-parsel" Fri Sep 13 14:57:47 2019 rev:5 rq:730059 version:1.5.2 Changes: -------- --- /work/SRC/openSUSE:Factory/python-parsel/python-parsel.changes 2018-12-24 11:40:49.765482068 +0100 +++ /work/SRC/openSUSE:Factory/.python-parsel.new.7948/python-parsel.changes 2019-09-13 14:57:54.589276197 +0200 @@ -1,0 +2,9 @@ +Wed Sep 11 08:27:22 UTC 2019 - Tomáš Chvátal <tchva...@suse.com> + +- Update to 1.5.2: + * ``Selector.remove_namespaces`` received a significant performance improvement + * The value of ``data`` within the printable representation of a selector + (``repr(selector)``) now ends in ``...`` when truncated, to make the + truncation obvious. + +------------------------------------------------------------------- Old: ---- parsel-1.5.1.tar.gz New: ---- parsel-1.5.2.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-parsel.spec ++++++ --- /var/tmp/diff_new_pack.Gks42h/_old 2019-09-13 14:57:55.057276217 +0200 +++ /var/tmp/diff_new_pack.Gks42h/_new 2019-09-13 14:57:55.061276217 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-parsel # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-parsel -Version: 1.5.1 +Version: 1.5.2 Release: 0 Summary: Library to extract data from HTML and XML using XPath and CSS selectors License: BSD-3-Clause @@ -36,16 +36,15 @@ Requires: python-lxml Requires: python-six >= 1.5.2 Requires: python-w3lib >= 1.8.0 +BuildArch: noarch %ifpython2 Requires: python-functools32 %endif -BuildArch: noarch # SECTION test requirements BuildRequires: %{python_module pytest-runner} BuildRequires: %{python_module pytest} BuildRequires: python-functools32 # /SECTION - %python_subpackages %description ++++++ parsel-1.5.1.tar.gz -> parsel-1.5.2.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/NEWS new/parsel-1.5.2/NEWS --- old/parsel-1.5.1/NEWS 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/NEWS 2019-08-09 13:26:40.000000000 +0200 @@ -3,6 +3,16 @@ History ------- +1.5.2 (2019-08-09) +~~~~~~~~~~~~~~~~~~ + +* ``Selector.remove_namespaces`` received a significant performance improvement +* The value of ``data`` within the printable representation of a selector + (``repr(selector)``) now ends in ``...`` when truncated, to make the + truncation obvious. +* Minor documentation improvements. + + 1.5.1 (2018-10-25) ~~~~~~~~~~~~~~~~~~ @@ -12,6 +22,7 @@ * documentation improvements; * Python 3.7 tests are run on CI; other test improvements. + 1.5.0 (2018-07-04) ~~~~~~~~~~~~~~~~~~ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/PKG-INFO new/parsel-1.5.2/PKG-INFO --- old/parsel-1.5.1/PKG-INFO 2018-10-25 22:21:57.000000000 +0200 +++ new/parsel-1.5.2/PKG-INFO 2019-08-09 13:27:13.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: parsel -Version: 1.5.1 +Version: 1.5.2 Summary: Parsel is a library to extract data from HTML and XML using XPath and CSS selectors Home-page: https://github.com/scrapy/parsel Author: Scrapy project @@ -43,7 +43,7 @@ <ul> <li><a href="http://example.com">Link 1</a></li> <li><a href="http://scrapy.org">Link 2</a></li> - </ul + </ul> </body> </html>""") >>> @@ -64,6 +64,16 @@ History ------- + 1.5.2 (2019-08-09) + ~~~~~~~~~~~~~~~~~~ + + * ``Selector.remove_namespaces`` received a significant performance improvement + * The value of ``data`` within the printable representation of a selector + (``repr(selector)``) now ends in ``...`` when truncated, to make the + truncation obvious. + * Minor documentation improvements. + + 1.5.1 (2018-10-25) ~~~~~~~~~~~~~~~~~~ @@ -73,6 +83,7 @@ * documentation improvements; * Python 3.7 tests are run on CI; other test improvements. + 1.5.0 (2018-07-04) ~~~~~~~~~~~~~~~~~~ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/README.rst new/parsel-1.5.2/README.rst --- old/parsel-1.5.1/README.rst 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/README.rst 2019-08-09 13:26:40.000000000 +0200 @@ -35,7 +35,7 @@ <ul> <li><a href="http://example.com">Link 1</a></li> <li><a href="http://scrapy.org">Link 2</a></li> - </ul + </ul> </body> </html>""") >>> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/docs/usage.rst new/parsel-1.5.2/docs/usage.rst --- old/parsel-1.5.1/docs/usage.rst 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/docs/usage.rst 2019-08-09 13:26:40.000000000 +0200 @@ -530,6 +530,8 @@ .. _regular expressions: http://exslt.org/regexp/index.html .. _set manipulation: http://exslt.org/set/index.html +.. _topics-xpath-other-extensions: + Other XPath extensions ---------------------- @@ -912,6 +914,7 @@ '//lh3.googleusercontent.com/-7xisiK0EArc/AAAAAAAAAAI/AAAAAAAAAuM/-r6o6A8RKCM/s512-c/photo.jpg', ... +.. _topics-xpath-variables: Variables in XPath expressions ------------------------------ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/parsel/__init__.py new/parsel-1.5.2/parsel/__init__.py --- old/parsel-1.5.1/parsel/__init__.py 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/parsel/__init__.py 2019-08-09 13:26:40.000000000 +0200 @@ -5,7 +5,7 @@ __author__ = 'Scrapy project' __email__ = 'i...@scrapy.org' -__version__ = '1.5.1' +__version__ = '1.5.2' from parsel.selector import Selector, SelectorList # NOQA from parsel.csstranslator import css2xpath # NOQA diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/parsel/selector.py new/parsel-1.5.2/parsel/selector.py --- old/parsel-1.5.1/parsel/selector.py 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/parsel/selector.py 2019-08-09 13:26:40.000000000 +0200 @@ -7,7 +7,7 @@ import six from lxml import etree, html -from .utils import flatten, iflatten, extract_regex +from .utils import flatten, iflatten, extract_regex, shorten from .csstranslator import HTMLTranslator, GenericTranslator @@ -258,6 +258,8 @@ In the background, CSS queries are translated into XPath queries using `cssselect`_ library and run ``.xpath()`` method. + + .. _cssselect: https://pypi.python.org/pypi/cssselect/ """ return self.xpath(self._css2xpath(query)) @@ -337,8 +339,8 @@ for an in el.attrib.keys(): if an.startswith('{'): el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an) - # remove namespace declarations - etree.cleanup_namespaces(self.root) + # remove namespace declarations + etree.cleanup_namespaces(self.root) @property def attrib(self): @@ -356,6 +358,6 @@ __nonzero__ = __bool__ def __str__(self): - data = repr(self.get()[:40]) + data = repr(shorten(self.get(), width=40)) return "<%s xpath=%r data=%s>" % (type(self).__name__, self._expr, data) __repr__ = __str__ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/parsel/utils.py new/parsel-1.5.2/parsel/utils.py --- old/parsel-1.5.1/parsel/utils.py 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/parsel/utils.py 2019-08-09 13:26:40.000000000 +0200 @@ -80,4 +80,15 @@ strings = flatten(strings) if not replace_entities: return strings - return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings] \ No newline at end of file + return [w3lib_replace_entities(s, keep=['lt', 'amp']) for s in strings] + + +def shorten(text, width, suffix='...'): + """Truncate the given text to fit in the given width.""" + if len(text) <= width: + return text + if width > len(suffix): + return text[:width-len(suffix)] + suffix + if width >= 0: + return suffix[len(suffix)-width:] + raise ValueError('width must be equal or greater than 0') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/parsel.egg-info/PKG-INFO new/parsel-1.5.2/parsel.egg-info/PKG-INFO --- old/parsel-1.5.1/parsel.egg-info/PKG-INFO 2018-10-25 22:21:57.000000000 +0200 +++ new/parsel-1.5.2/parsel.egg-info/PKG-INFO 2019-08-09 13:27:13.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: parsel -Version: 1.5.1 +Version: 1.5.2 Summary: Parsel is a library to extract data from HTML and XML using XPath and CSS selectors Home-page: https://github.com/scrapy/parsel Author: Scrapy project @@ -43,7 +43,7 @@ <ul> <li><a href="http://example.com">Link 1</a></li> <li><a href="http://scrapy.org">Link 2</a></li> - </ul + </ul> </body> </html>""") >>> @@ -64,6 +64,16 @@ History ------- + 1.5.2 (2019-08-09) + ~~~~~~~~~~~~~~~~~~ + + * ``Selector.remove_namespaces`` received a significant performance improvement + * The value of ``data`` within the printable representation of a selector + (``repr(selector)``) now ends in ``...`` when truncated, to make the + truncation obvious. + * Minor documentation improvements. + + 1.5.1 (2018-10-25) ~~~~~~~~~~~~~~~~~~ @@ -73,6 +83,7 @@ * documentation improvements; * Python 3.7 tests are run on CI; other test improvements. + 1.5.0 (2018-07-04) ~~~~~~~~~~~~~~~~~~ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/parsel.egg-info/SOURCES.txt new/parsel-1.5.2/parsel.egg-info/SOURCES.txt --- old/parsel-1.5.1/parsel.egg-info/SOURCES.txt 2018-10-25 22:21:57.000000000 +0200 +++ new/parsel-1.5.2/parsel.egg-info/SOURCES.txt 2019-08-09 13:27:13.000000000 +0200 @@ -28,4 +28,5 @@ tests/requirements.txt tests/test_selector.py tests/test_selector_csstranslator.py +tests/test_utils.py tests/test_xpathfuncs.py \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/parsel.egg-info/requires.txt new/parsel-1.5.2/parsel.egg-info/requires.txt --- old/parsel-1.5.1/parsel.egg-info/requires.txt 2018-10-25 22:21:57.000000000 +0200 +++ new/parsel-1.5.2/parsel.egg-info/requires.txt 2019-08-09 13:27:13.000000000 +0200 @@ -1,7 +1,12 @@ w3lib>=1.19.0 -lxml>=2.3 six>=1.5.2 cssselect>=0.9 +[:python_version != "3.4"] +lxml + +[:python_version == "3.4"] +lxml<=4.3.5 + [:python_version<'3.0'] functools32 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/setup.py new/parsel-1.5.2/setup.py --- old/parsel-1.5.1/setup.py 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/setup.py 2019-08-09 13:26:40.000000000 +0200 @@ -27,7 +27,8 @@ install_requires = [ 'w3lib>=1.19.0', - 'lxml>=2.3', + 'lxml;python_version!="3.4"', + 'lxml<=4.3.5;python_version=="3.4"', 'six>=1.5.2', 'cssselect>=0.9' ] @@ -41,7 +42,7 @@ setup( name='parsel', - version='1.5.1', + version='1.5.2', description="Parsel is a library to extract data from HTML and XML using XPath and CSS selectors", long_description=readme + '\n\n' + history, author="Scrapy project", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/tests/test_selector.py new/parsel-1.5.2/tests/test_selector.py --- old/parsel-1.5.1/tests/test_selector.py 2018-10-25 22:21:21.000000000 +0200 +++ new/parsel-1.5.2/tests/test_selector.py 2019-08-09 13:26:40.000000000 +0200 @@ -133,9 +133,9 @@ body = u"<p><input name='{}' value='\xa9'/></p>".format(50 * 'b') sel = self.sscls(text=body) - representation = "<Selector xpath='//input/@name' data='{}'>".format(40 * 'b') + representation = "<Selector xpath='//input/@name' data='{}...'>".format(37 * 'b') if six.PY2: - representation = "<Selector xpath='//input/@name' data=u'{}'>".format(40 * 'b') + representation = "<Selector xpath='//input/@name' data=u'{}...'>".format(37 * 'b') self.assertEqual( [repr(it) for it in sel.xpath('//input/@name')], @@ -625,28 +625,61 @@ def test_remove_namespaces(self): xml = u"""<?xml version="1.0" encoding="UTF-8"?> <feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/"> - <link type="text/html"> - <link type="application/atom+xml"> + <link type="text/html"/> + <entry> + <link type="text/html"/> + </entry> + <link type="application/atom+xml"/> </feed> """ sel = self.sscls(text=xml, type='xml') self.assertEqual(len(sel.xpath("//link")), 0) self.assertEqual(len(sel.xpath("./namespace::*")), 3) sel.remove_namespaces() + self.assertEqual(len(sel.xpath("//link")), 3) + self.assertEqual(len(sel.xpath("./namespace::*")), 1) + + def test_remove_namespaces_embedded(self): + xml = u""" + <feed xmlns="http://www.w3.org/2005/Atom"> + <link type="text/html"/> + <entry> + <link type="text/html"/> + </entry> + <svg xmlns="http://www.w3.org/2000/svg" version="1.1" viewBox="0 0 100 100"> + <linearGradient id="gradient"> + <stop class="begin" offset="0%" style="stop-color:yellow;"/> + <stop class="end" offset="80%" style="stop-color:green;"/> + </linearGradient> + <circle cx="50" cy="50" r="30" style="fill:url(#gradient)" /> + </svg> + </feed> + """ + sel = self.sscls(text=xml, type='xml') + self.assertEqual(len(sel.xpath("//link")), 0) + self.assertEqual(len(sel.xpath("//stop")), 0) + self.assertEqual(len(sel.xpath("./namespace::*")), 2) + self.assertEqual(len(sel.xpath("//f:link", namespaces={'f': 'http://www.w3.org/2005/Atom'})), 2) + self.assertEqual(len(sel.xpath("//s:stop", namespaces={'s': 'http://www.w3.org/2000/svg'})), 2) + sel.remove_namespaces() self.assertEqual(len(sel.xpath("//link")), 2) + self.assertEqual(len(sel.xpath("//stop")), 2) self.assertEqual(len(sel.xpath("./namespace::*")), 1) def test_remove_attributes_namespaces(self): xml = u"""<?xml version="1.0" encoding="UTF-8"?> <feed xmlns:atom="http://www.w3.org/2005/Atom" xml:lang="en-US" xmlns:media="http://search.yahoo.com/mrss/"> - <link atom:type="text/html"> - <link atom:type="application/atom+xml"> + <link atom:type="text/html"/> + <entry> + <link atom:type="text/html"/> + </entry> + <link atom:type="application/atom+xml"/> </feed> """ sel = self.sscls(text=xml, type='xml') self.assertEqual(len(sel.xpath("//link/@type")), 0) sel.remove_namespaces() - self.assertEqual(len(sel.xpath("//link/@type")), 2) + self.assertEqual(len(sel.xpath("//link/@type")), 3) def test_smart_strings(self): """Lxml smart strings return values""" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parsel-1.5.1/tests/test_utils.py new/parsel-1.5.2/tests/test_utils.py --- old/parsel-1.5.1/tests/test_utils.py 1970-01-01 01:00:00.000000000 +0100 +++ new/parsel-1.5.2/tests/test_utils.py 2019-08-09 13:26:40.000000000 +0200 @@ -0,0 +1,26 @@ +from parsel.utils import shorten + +from pytest import mark, raises +import six + + +@mark.parametrize( + 'width,expected', + ( + (-1, ValueError), + (0, u''), + (1, u'.'), + (2, u'..'), + (3, u'...'), + (4, u'f...'), + (5, u'fo...'), + (6, u'foobar'), + (7, u'foobar'), + ) +) +def test_shorten(width, expected): + if isinstance(expected, six.string_types): + assert shorten(u'foobar', width) == expected + else: + with raises(expected): + shorten(u'foobar', width)