Control: tag -1 + patch Looks like upstream has mostly done this already, and just needs to cut a new release...
https://github.com/clips/pattern/commit/25e88a3ab29cae04efed3205bd7f6ddcdf8b0ddc https://github.com/clips/pattern/commit/1dffe92fd8606fdce7126e0b71947911af0c4feb Patch attached. SR -- Stefano Rivera http://tumbleweed.org.za/ +1 415 683 3272
diff -Nru python-pattern-2.6+git20150109/debian/control python-pattern-2.6+git20150109/debian/control --- python-pattern-2.6+git20150109/debian/control 2016-05-12 21:26:36.000000000 +0100 +++ python-pattern-2.6+git20150109/debian/control 2018-08-27 14:30:22.000000000 +0100 @@ -4,7 +4,7 @@ Maintainer: Miriam Ruiz <[email protected]> Build-Depends: debhelper (>= 9), quilt, python-all, python-setuptools, dh-python, - python-liblinear, python-libsvm, python-feedparser, python-beautifulsoup, + python-liblinear, python-libsvm, python-feedparser, python-bs4, python-simplejson, python-pdfminer, python-numpy, wordnet-base Standards-Version: 3.9.8 X-Python-Version: >= 2.6 @@ -15,7 +15,7 @@ Package: python-pattern Architecture: all Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends}, - python-liblinear, python-libsvm, python-feedparser, python-beautifulsoup, + python-liblinear, python-libsvm, python-feedparser, python-bs4, python-simplejson, python-pdfminer, python-numpy, wordnet-base Description: web mining module for Python Pattern is a web mining module for the Python programming language. It has diff -Nru python-pattern-2.6+git20150109/debian/patches/bs4.patch python-pattern-2.6+git20150109/debian/patches/bs4.patch --- python-pattern-2.6+git20150109/debian/patches/bs4.patch 1970-01-01 01:00:00.000000000 +0100 +++ python-pattern-2.6+git20150109/debian/patches/bs4.patch 2018-08-27 14:30:11.000000000 +0100 @@ -0,0 +1,71 @@ +Description: Port to beautifulsoup4 + +Author: Markus Beuckelmann <[email protected]> +Origin: upstream, https://github.com/clips/pattern/commit/25e88a3ab29cae04efed3205bd7f6ddcdf8b0ddc https://github.com/clips/pattern/commit/1dffe92fd8606fdce7126e0b71947911af0c4feb +Bug-Debian: https://bugs.debian.org/891099 + +--- a/pattern/web/__init__.py ++++ b/pattern/web/__init__.py +@@ -36,7 +36,7 @@ + import locale + + import feedparser +-import BeautifulSoup ++import bs4 as BeautifulSoup + + try: + # Import persistent Cache. +--- a/test/test_web.py ++++ b/test/test_web.py +@@ -308,7 +308,9 @@ + ( "<p>text</p>", "text\n\n"), + ( "<li>text</li>", "* text\n"), + ( "<td>text</td>", "text\t"), +- ( "<br /><br/><br>", "\n\n\n")): ++ ( "<br>", "\n"), ++ ( "<br/>", "\n\n"), ++ ( "<br /><br/><br>", "\n\n\n\n\n")): + self.assertEqual(web.strip_tags(html), plain) + # Assert exclude tags and attributes + v = web.strip_tags("<a href=\"\" onclick=\"\">text</a>", exclude={"a": ["href"]}) +@@ -749,17 +751,17 @@ + # Assert Node properties. + v1 = web.Document(self.html) + self.assertEqual(v1.type, web.DOCUMENT) +- self.assertEqual(v1.source[:10], "<!doctype ") # Note: BeautifulSoup strips whitespace. ++ self.assertEqual(v1.source[:10], "<!DOCTYPE ") # Note: BeautifulSoup strips whitespace. + self.assertEqual(v1.parent, None) + # Assert Node traversal. + v2 = v1.children[0].next +- self.assertEqual(v2.type, web.TEXT) ++ self.assertEqual(v2.type, web.ELEMENT) + self.assertEqual(v2.previous, v1.children[0]) + # Assert Document properties. + v3 = v1.declaration + self.assertEqual(v3, v1.children[0]) + self.assertEqual(v3.parent, v1) +- self.assertEqual(v3.source, "<!doctype html>") ++ self.assertEqual(v3.source, "html") + self.assertEqual(v1.head.type, web.ELEMENT) + self.assertEqual(v1.body.type, web.ELEMENT) + self.assertTrue(v1.head.source.startswith("<head")) +@@ -783,7 +785,7 @@ + v = web.DOM(self.html).body + self.assertEqual(v.tag, "body") + self.assertEqual(v.attributes["id"], "front") +- self.assertEqual(v.attributes["class"], "comments") ++ self.assertEqual(v.attributes["class"], ["comments"]) + self.assertTrue(v.content.startswith("\n<script")) + # Assert Element.getElementsByTagname() (test navigation links). + a = v.by_tag("a") +@@ -794,8 +796,8 @@ + # Assert Element.getElementsByClassname() (test <p class="comment">). + a = v.by_class("comment") + self.assertEqual(a[0].tag, "p") +- self.assertEqual(a[0].by_tag("span")[0].attributes["class"], "date") +- self.assertEqual(a[0].by_tag("span")[1].attributes["class"], "author") ++ self.assertEqual(a[0].by_tag("span")[0].attributes["class"], ["date"]) ++ self.assertEqual(a[0].by_tag("span")[1].attributes["class"], ["author"]) + for selector in (".comment", "p.comment", "*.comment"): + self.assertEqual(v.by_tag(selector)[0], a[0]) + # Assert Element.getElementById() (test <div id="content">). diff -Nru python-pattern-2.6+git20150109/debian/patches/series python-pattern-2.6+git20150109/debian/patches/series --- python-pattern-2.6+git20150109/debian/patches/series 2016-05-12 21:18:38.000000000 +0100 +++ python-pattern-2.6+git20150109/debian/patches/series 2018-08-27 14:27:44.000000000 +0100 @@ -5,3 +5,4 @@ remove-paypal.patch fix-tests.patch fix-examples.patch +bs4.patch

