Hello community,
here is the log from the commit of package python-html5-parser for
openSUSE:Factory checked in at 2019-06-01 09:55:21
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old)
and /work/SRC/openSUSE:Factory/.python-html5-parser.new.5148 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-html5-parser"
Sat Jun 1 09:55:21 2019 rev:6 rq:706354 version:0.4.6
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes
2019-02-06 14:07:50.406644623 +0100
+++
/work/SRC/openSUSE:Factory/.python-html5-parser.new.5148/python-html5-parser.changes
2019-06-01 09:55:22.499203938 +0200
@@ -1,0 +2,8 @@
+Wed May 29 14:38:09 UTC 2019 - [email protected]
+
+- update to 0.4.6
+ No changelog from upstream.
+ See instead here:
+
https://github.com/kovidgoyal/html5-parser/compare/v0.4.5...v0.4.6?diff=unified&name=v0.4.6
+
+-------------------------------------------------------------------
Old:
----
v0.4.5.tar.gz
New:
----
v0.4.6.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-html5-parser.spec ++++++
--- /var/tmp/diff_new_pack.qHSr8T/_old 2019-06-01 09:55:23.007203765 +0200
+++ /var/tmp/diff_new_pack.qHSr8T/_new 2019-06-01 09:55:23.011203763 +0200
@@ -19,7 +19,7 @@
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-html5-parser
-Version: 0.4.5
+Version: 0.4.6
Release: 0
Summary: C based HTML 5 parsing for Python
License: Apache-2.0
++++++ v0.4.5.tar.gz -> v0.4.6.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/build.py
new/html5-parser-0.4.6/build.py
--- old/html5-parser-0.4.5/build.py 2018-04-22 17:07:13.000000000 +0200
+++ new/html5-parser-0.4.6/build.py 2019-05-13 09:03:47.000000000 +0200
@@ -72,7 +72,12 @@
def pkg_config(pkg, *args):
- val = subprocess.check_output([PKGCONFIG, pkg] +
list(args)).decode('utf-8')
+ try:
+ val = subprocess.check_output([PKGCONFIG, pkg] +
list(args)).decode('utf-8')
+ except EnvironmentError as err:
+ if err.errno == errno.ENOENT:
+ raise SystemExit('pkg-config is required to build html5-parser')
+ raise
return list(filter(None, map(str, shlex.split(val))))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/gumbo/parser.c
new/html5-parser-0.4.6/gumbo/parser.c
--- old/html5-parser-0.4.5/gumbo/parser.c 2018-04-22 17:07:13.000000000
+0200
+++ new/html5-parser-0.4.6/gumbo/parser.c 2019-05-13 09:03:47.000000000
+0200
@@ -4643,7 +4643,14 @@
TAG(HR), TAG(IMAGE), TAG(IMG), TAG(INPUT), TAG(ISINDEX),
TAG(KEYGEN), TAG(LINK), TAG(MENUITEM), TAG(META),
TAG(PARAM), TAG(SOURCE), TAG(SPACER), TAG(TRACK),
- TAG(WBR)})) {
+ TAG(WBR),
+ // we exclude the <html> tag as it causes crashes in the
as-lxml
+ // module, see
https://github.com/kovidgoyal/html5-parser/issues/17
+ // I dont have the time to track down the root cause,
probably something
+ // related to resuing the same string segments for the tag
name and the
+ // special cloning/modification that happens to HTML tags.
Since HTML tags
+ // are treated specially anyway, there is no harm in
excluding them.
+ TAG(HTML)})) {
inject_end = true;
// since self closing tag, end tag should share same
// position and original text information as start tag
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/src/as-libxml.c
new/html5-parser-0.4.6/src/as-libxml.c
--- old/html5-parser-0.4.5/src/as-libxml.c 2018-04-22 17:07:13.000000000
+0200
+++ new/html5-parser-0.4.6/src/as-libxml.c 2019-05-13 09:03:47.000000000
+0200
@@ -101,7 +101,7 @@
if (UNLIKELY(!xmlNewNsPropEatName(node, NULL,
(xmlChar*)pd->lang_attribute, BAD_CAST attr->value))) return false;
}
continue;
- }
+ }
break;
case GUMBO_ATTR_NAMESPACE_XMLNS:
if (strncmp(aname, "xlink", 5) == 0) {
@@ -115,8 +115,8 @@
continue;
} else if (strncmp(aname, "xmlns", 5) == 0) {
// discard since we dont support changing the default
- // namespace, namespace are decided by tag names alone.
- continue;
+ // namespace, namespace are decided by tag names alone.
+ continue;
}
break;
default:
@@ -128,14 +128,14 @@
continue;
} else if (UNLIKELY(strncmp("xmlns", aname, 5) == 0)) {
size_t len = strlen(aname);
- if (len == 5) continue; // ignore xmlns
+ if (len == 5) continue; // ignore xmlns
if (aname[5] == ':') {
if (len == 6) continue; //ignore xmlns:
if (pd->maybe_xhtml) {
xmlNewNs(node, BAD_CAST attr->value, BAD_CAST
aname + 6);
- // We ignore failure to create the namespace as
the most likely
+ // We ignore failure to create the namespace as
the most likely
// cause is the prefix already exists in this
context and xmlNewNs
- // does not allow replacing prefixes. We could in
theory find the
+ // does not allow replacing prefixes. We could in
theory find the
// existing namespace, but I dont care enough
continue;
} else {
@@ -265,15 +265,15 @@
}
#undef ABORT
end:
- if (UNLIKELY(!ok)) {
- if(result) xmlFreeNode(result);
- result = NULL;
+ if (UNLIKELY(!ok)) {
+ if(result) xmlFreeNode(result);
+ result = NULL;
}
return result;
}
-static inline xmlNodePtr
+static inline xmlNodePtr
convert_node(xmlDocPtr doc, xmlNodePtr xml_parent, GumboNode* node,
GumboElement **elem, Options *opts) {
xmlNodePtr ans = NULL;
ParseData *pd = (ParseData*)doc->_private;
@@ -320,7 +320,7 @@
}
opts->line_number_attr = xmlDictLookup(doc->dict, BAD_CAST
opts->line_number_attr, -1);
}
- doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
+ doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
}
return doc;
}
@@ -336,9 +336,9 @@
xmlNodePtr comment = xmlNewComment(BAD_CAST
root_node->v.text.text);
if (UNLIKELY(!comment)) { pd->errmsg = ERRMSG("Out of memory
allocating comment"); return false; }
if (UNLIKELY(!(before_root ? xmlAddPrevSibling(pd->root, comment)
: xmlAddSibling(pd->root, comment)))) {
- pd->errmsg = ERRMSG("Failed to add sibling to root node");
+ pd->errmsg = ERRMSG("Failed to add sibling to root node");
xmlFreeNode(comment);
- return false;
+ return false;
}
}
}
@@ -377,7 +377,7 @@
if (UNLIKELY(!child)) ABORT;
if (LIKELY(parent)) {
if (UNLIKELY(!xmlAddChild(parent, child))) ABORT;
- } else parse_data.root = child;
+ } else parse_data.root = child;
if (elem != NULL) {
if (!push_children(child, elem, stack)) ABORT;
}
@@ -405,10 +405,10 @@
return doc;
}
-libxml_doc*
+libxml_doc*
copy_libxml_doc(libxml_doc* doc) { return xmlCopyDoc(doc, 1); }
-libxml_doc
+libxml_doc
free_libxml_doc(libxml_doc* doc) { xmlFreeDoc(doc); }
int
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/src/html5_parser/soup.py
new/html5-parser-0.4.6/src/html5_parser/soup.py
--- old/html5-parser-0.4.5/src/html5_parser/soup.py 2018-04-22
17:07:13.000000000 +0200
+++ new/html5-parser-0.4.6/src/html5_parser/soup.py 2019-05-13
09:03:47.000000000 +0200
@@ -63,9 +63,11 @@
def bs4_new_tag(Tag, soup):
+ builder = soup.builder
+
def new_tag(name, attrs):
attrs = {k: map_list_attributes(name, k, v) for k, v in attrs.items()}
- return Tag(soup, name=name, attrs=attrs)
+ return Tag(soup, name=name, attrs=attrs, builder=builder)
return new_tag
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/src/python-wrapper.c
new/html5-parser-0.4.6/src/python-wrapper.c
--- old/html5-parser-0.4.5/src/python-wrapper.c 2018-04-22 17:07:13.000000000
+0200
+++ new/html5-parser-0.4.6/src/python-wrapper.c 2019-05-13 09:03:47.000000000
+0200
@@ -15,7 +15,7 @@
#define MAJOR 0
#define MINOR 4
-#define PATCH 5
+#define PATCH 6
static char *NAME = "libxml2:xmlDoc";
static char *DESTRUCTOR = "destructor:xmlFreeDoc";
@@ -134,7 +134,7 @@
static PyMethodDef
methods[] = {
- {"parse", (PyCFunction)parse, METH_VARARGS | METH_KEYWORDS,
+ {"parse", (PyCFunction)(void(*)(void))(PyCFunctionWithKeywords)(parse),
METH_VARARGS | METH_KEYWORDS,
"parse()\n\nParse specified bytestring which must be in the UTF-8
encoding."
},
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/test/malformed.py
new/html5-parser-0.4.6/test/malformed.py
--- old/html5-parser-0.4.5/test/malformed.py 2018-04-22 17:07:13.000000000
+0200
+++ new/html5-parser-0.4.6/test/malformed.py 2019-05-13 09:03:47.000000000
+0200
@@ -15,3 +15,9 @@
p = root[1][0]
self.ae(p.attrib, {'bad_attr': 'x'})
self.ae(p[0].tag, 'bad_name')
+
+ def test_multiple_roots(self):
+ root = parse("<html><html />", maybe_xhtml=True)
+ from lxml import etree
+ self.ae(etree.tostring(root, encoding='unicode'),
+ '<html
xmlns="http://www.w3.org/1999/xhtml"><head/><body/></html>')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html5-parser-0.4.5/test/soup.py
new/html5-parser-0.4.6/test/soup.py
--- old/html5-parser-0.4.5/test/soup.py 2018-04-22 17:07:13.000000000 +0200
+++ new/html5-parser-0.4.6/test/soup.py 2019-05-13 09:03:47.000000000 +0200
@@ -23,11 +23,13 @@
self.ae(
type('')(root),
'<html><head></head><body><p>\n<a>y</a>z<x:x>1</x:x></p></body></html>')
root = parse('<svg><image>')
- self.ae(type('')(root),
'<html><head></head><body><svg><image></image></svg></body></html>')
+ self.ae(type('')(root),
'<html><head></head><body><svg><image/></svg></body></html>')
root = parse('<p><!-- ---->')
self.ae(type('')(root), '<html><head></head><body><p><!--
----></p></body></html>')
root = parse('<p><i><b>')
self.ae(type('')(root),
'<html><head></head><body><p><i><b></b></i></p></body></html>')
+ root = parse('<p>a<br>b')
+ self.ae(type('')(root),
'<html><head></head><body><p>a<br/>b</p></body></html>')
def test_attr_soup(self):
root = parse('<p a=1 b=2 ID=3><a a=a>')
@@ -38,7 +40,7 @@
self.ae(
type('')(root),
'<html><head></head><body>'
- '<p a="1"><svg><image xlink:href="h"></image></svg></p>'
+ '<p a="1"><svg><image xlink:href="h"/></svg></p>'
'</body></html>'
)
root = parse('<html xml:lang="en" lang="fr"><p>')