Control: tag -1 + patch
There's an upstream PR migrating to bs4
https://github.com/google/gumbo-parser/pull/368
Patch attached.
SR
--
Stefano Rivera
http://tumbleweed.org.za/
+1 415 683 3272
diff -Nru gumbo-parser-0.10.1+dfsg/debian/control
gumbo-parser-0.10.1+dfsg/debian/control
--- gumbo-parser-0.10.1+dfsg/debian/control 2015-12-30 18:44:19.0
+
+++ gumbo-parser-0.10.1+dfsg/debian/control 2018-08-27 15:48:16.0
+0100
@@ -45,7 +45,7 @@
Architecture: all
Depends: ${misc:Depends}, ${shlibs:Depends}, ${python:Depends},
libgumbo1 (>= ${source:Version})
-Recommends: python-beautifulsoup, python-html5lib
+Recommends: python-bs4, python-html5lib
Description: pure-C HTML5 parser Python bindings
Gumbo is an implementation of the [HTML5 parsing algorithm implemented
as a pure C99 library with no outside dependencies. It's designed to serve
@@ -59,7 +59,7 @@
Architecture: all
Depends: ${misc:Depends}, ${shlibs:Depends}, ${python3:Depends},
libgumbo1 (>= ${source:Version})
-Recommends: python3-html5lib
+Recommends: python3-bs4, python3-html5lib
Description: pure-C HTML5 parser Python 3 bindings
Gumbo is an implementation of the [HTML5 parsing algorithm implemented
as a pure C99 library with no outside dependencies. It's designed to serve
diff -Nru gumbo-parser-0.10.1+dfsg/debian/patches/03-bs4.patch
gumbo-parser-0.10.1+dfsg/debian/patches/03-bs4.patch
--- gumbo-parser-0.10.1+dfsg/debian/patches/03-bs4.patch1970-01-01
01:00:00.0 +0100
+++ gumbo-parser-0.10.1+dfsg/debian/patches/03-bs4.patch2018-08-27
15:48:16.0 +0100
@@ -0,0 +1,178 @@
+From 29e1abb337af2a15ac4b38fb1c28d1b55ed08d54 Mon Sep 17 00:00:00 2001
+From: Roman Miroshnychenko
+Date: Tue, 19 Jul 2016 18:25:52 +0300
+Subject: [PATCH] Updates soup_adapter to use BeautifulSoup 4
+
+Also fixes the indentation according to PEP-8
+---
+ python/gumbo/soup_adapter.py | 123 +--
+ 1 file changed, 61 insertions(+), 62 deletions(-)
+
+diff --git a/python/gumbo/soup_adapter.py b/python/gumbo/soup_adapter.py
+index b18748f..6a247dd 100644
+--- a/python/gumbo/soup_adapter.py
b/python/gumbo/soup_adapter.py
+@@ -13,66 +13,65 @@
+ # limitations under the License.
+ #
+
+-"""Adapter between Gumbo and BeautifulSoup.
++"""Adapter between Gumbo and BeautifulSoup 4.
+
+-This parses an HTML document and gives back a BeautifulSoup object, which you
+-can then manipulate like a normal BeautifulSoup parse tree.
++This parses an HTML document and gives back a BeautifulSoup 4 object, which
you
++can then manipulate like a normal BeautifulSoup 4 parse tree.
+ """
+
+ __author__ = 'jdt...@google.com (Jonathan Tang)'
+
+-import BeautifulSoup
+-
++import bs4 as BeautifulSoup
+ import gumboc
+
+
+ def _utf8(text):
+- return text.decode('utf-8', 'replace')
++return text.decode('utf-8', 'replace')
+
+
+ def _add_source_info(obj, original_text, start_pos, end_pos):
+- obj.original = str(original_text)
+- obj.line = start_pos.line
+- obj.col = start_pos.column
+- obj.offset = start_pos.offset
+- if end_pos:
+-obj.end_line = end_pos.line
+-obj.end_col = end_pos.column
+-obj.end_offset = end_pos.offset
++obj.original = str(original_text)
++obj.line = start_pos.line
++obj.col = start_pos.column
++obj.offset = start_pos.offset
++if end_pos:
++obj.end_line = end_pos.line
++obj.end_col = end_pos.column
++obj.end_offset = end_pos.offset
+
+
+ def _convert_attrs(attrs):
+- # TODO(jdtang): Ideally attributes would pass along their positions as well,
+- # but I can't extend the built in str objects with new attributes. Maybe
work
+- # around this with a subclass in some way...
+- return [(_utf8(attr.name), _utf8(attr.value)) for attr in attrs]
++# TODO(jdtang): Ideally attributes would pass along their positions as
well,
++# but I can't extend the built in str objects with new attributes. Maybe
work
++# around this with a subclass in some way...
++return [(_utf8(attr.name), _utf8(attr.value)) for attr in attrs]
+
+
+ def _add_document(soup, element):
+- # Currently ignored, since there's no real place for this in the
BeautifulSoup
+- # API.
+- pass
++# Currently ignored, since there's no real place for this in the
BeautifulSoup
++# API.
++pass
+
+
+ def _add_element(soup, element):
+- # TODO(jdtang): Expose next/previous in gumbo so they can be passed along to
+- # BeautifulSoup.
+- tag = BeautifulSoup.Tag(
+- soup, _utf8(element.tag_name), _convert_attrs(element.attributes))
+- for child in element.children:
+-tag.append(_add_node(soup, child))
+- _add_source_info(
+- tag, element.original_tag, element.start_pos, element.end_pos)
+- tag.original_end_tag = str(element.original_end_tag)
+- return tag
++# TODO(jdtang): Expose next/previous in gumbo so they can be passed along
to
++# BeautifulSoup.
++tag = BeautifulSoup.Tag(
++