commit python-html5-parser for openSUSE:Factory

root Sat, 01 Jun 2019 00:56:06 -0700

Hello community,

here is the log from the commit of package python-html5-parser for 
openSUSE:Factory checked in at 2019-06-01 09:55:21
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old)
 and      /work/SRC/openSUSE:Factory/.python-html5-parser.new.5148 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-html5-parser"

Sat Jun  1 09:55:21 2019 rev:6 rq:706354 version:0.4.6

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes  
2019-02-06 14:07:50.406644623 +0100
+++ 
/work/SRC/openSUSE:Factory/.python-html5-parser.new.5148/python-html5-parser.changes
        2019-06-01 09:55:22.499203938 +0200
@@ -1,0 +2,8 @@
+Wed May 29 14:38:09 UTC 2019 - [email protected]
+
+- update to 0.4.6
+  No changelog from upstream.
+  See instead here:
+  
https://github.com/kovidgoyal/html5-parser/compare/v0.4.5...v0.4.6?diff=unified&name=v0.4.6
+
+-------------------------------------------------------------------

Old:
----
  v0.4.5.tar.gz

New:
----
  v0.4.6.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-html5-parser.spec ++++++
--- /var/tmp/diff_new_pack.qHSr8T/_old  2019-06-01 09:55:23.007203765 +0200
+++ /var/tmp/diff_new_pack.qHSr8T/_new  2019-06-01 09:55:23.011203763 +0200
@@ -19,7 +19,7 @@
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 
 Name:           python-html5-parser
-Version:        0.4.5
+Version:        0.4.6
 Release:        0
 Summary:        C based HTML 5 parsing for Python
 License:        Apache-2.0

++++++ v0.4.5.tar.gz -> v0.4.6.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/build.py 
new/html5-parser-0.4.6/build.py
--- old/html5-parser-0.4.5/build.py     2018-04-22 17:07:13.000000000 +0200
+++ new/html5-parser-0.4.6/build.py     2019-05-13 09:03:47.000000000 +0200
@@ -72,7 +72,12 @@
 
 
 def pkg_config(pkg, *args):
-    val = subprocess.check_output([PKGCONFIG, pkg] + 
list(args)).decode('utf-8')
+    try:
+        val = subprocess.check_output([PKGCONFIG, pkg] + 
list(args)).decode('utf-8')
+    except EnvironmentError as err:
+        if err.errno == errno.ENOENT:
+            raise SystemExit('pkg-config is required to build html5-parser')
+        raise
     return list(filter(None, map(str, shlex.split(val))))
 
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/gumbo/parser.c 
new/html5-parser-0.4.6/gumbo/parser.c
--- old/html5-parser-0.4.5/gumbo/parser.c       2018-04-22 17:07:13.000000000 
+0200
+++ new/html5-parser-0.4.6/gumbo/parser.c       2019-05-13 09:03:47.000000000 
+0200
@@ -4643,7 +4643,14 @@
                     TAG(HR), TAG(IMAGE), TAG(IMG), TAG(INPUT), TAG(ISINDEX),
                     TAG(KEYGEN), TAG(LINK), TAG(MENUITEM), TAG(META),
                     TAG(PARAM), TAG(SOURCE), TAG(SPACER), TAG(TRACK),
-                    TAG(WBR)})) {
+                    TAG(WBR),
+                    // we exclude the <html> tag as it causes crashes in the 
as-lxml
+                    // module, see 
https://github.com/kovidgoyal/html5-parser/issues/17
+                    // I dont have the time to track down the root cause, 
probably something
+                    // related to resuing the same string segments for the tag 
name and the
+                    // special cloning/modification that happens to HTML tags. 
Since HTML tags
+                    // are treated specially anyway, there is no harm in 
excluding them.
+                    TAG(HTML)})) {
           inject_end = true;
           // since self closing tag,  end tag should share same
           // position and original text information as start tag
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/src/as-libxml.c 
new/html5-parser-0.4.6/src/as-libxml.c
--- old/html5-parser-0.4.5/src/as-libxml.c      2018-04-22 17:07:13.000000000 
+0200
+++ new/html5-parser-0.4.6/src/as-libxml.c      2019-05-13 09:03:47.000000000 
+0200
@@ -101,7 +101,7 @@
                         if (UNLIKELY(!xmlNewNsPropEatName(node, NULL, 
(xmlChar*)pd->lang_attribute, BAD_CAST attr->value))) return false;
                     }
                     continue;
-                } 
+                }
                 break;
             case GUMBO_ATTR_NAMESPACE_XMLNS:
                 if (strncmp(aname, "xlink", 5) == 0) {
@@ -115,8 +115,8 @@
                     continue;
                 } else if (strncmp(aname, "xmlns", 5) == 0) {
                     // discard since we dont support changing the default
-                    // namespace, namespace are decided by tag names alone. 
-                    continue; 
+                    // namespace, namespace are decided by tag names alone.
+                    continue;
                 }
                 break;
             default:
@@ -128,14 +128,14 @@
                     continue;
                 } else if (UNLIKELY(strncmp("xmlns", aname, 5) == 0)) {
                     size_t len = strlen(aname);
-                    if (len == 5) continue;  // ignore xmlns 
+                    if (len == 5) continue;  // ignore xmlns
                     if (aname[5] == ':') {
                         if (len == 6) continue; //ignore xmlns:
                         if (pd->maybe_xhtml) {
                             xmlNewNs(node, BAD_CAST attr->value, BAD_CAST 
aname + 6);
-                            // We ignore failure to create the namespace as 
the most likely 
+                            // We ignore failure to create the namespace as 
the most likely
                             // cause is the prefix already exists in this 
context and xmlNewNs
-                            // does not allow replacing prefixes. We could in 
theory find the 
+                            // does not allow replacing prefixes. We could in 
theory find the
                             // existing namespace, but I dont care enough
                             continue;
                         } else {
@@ -265,15 +265,15 @@
     }
 #undef ABORT
 end:
-    if (UNLIKELY(!ok)) { 
-        if(result) xmlFreeNode(result); 
-        result = NULL; 
+    if (UNLIKELY(!ok)) {
+        if(result) xmlFreeNode(result);
+        result = NULL;
     }
     return result;
 }
 
 
-static inline xmlNodePtr 
+static inline xmlNodePtr
 convert_node(xmlDocPtr doc, xmlNodePtr xml_parent, GumboNode* node, 
GumboElement **elem, Options *opts) {
     xmlNodePtr ans = NULL;
     ParseData *pd = (ParseData*)doc->_private;
@@ -320,7 +320,7 @@
             }
             opts->line_number_attr = xmlDictLookup(doc->dict, BAD_CAST 
opts->line_number_attr, -1);
         }
-       doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
+        doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
     }
     return doc;
 }
@@ -336,9 +336,9 @@
             xmlNodePtr comment = xmlNewComment(BAD_CAST 
root_node->v.text.text);
             if (UNLIKELY(!comment)) { pd->errmsg = ERRMSG("Out of memory 
allocating comment");  return false; }
             if (UNLIKELY(!(before_root ? xmlAddPrevSibling(pd->root, comment) 
: xmlAddSibling(pd->root, comment)))) {
-                pd->errmsg = ERRMSG("Failed to add sibling to root node"); 
+                pd->errmsg = ERRMSG("Failed to add sibling to root node");
                 xmlFreeNode(comment);
-                return false; 
+                return false;
             }
         }
     }
@@ -377,7 +377,7 @@
         if (UNLIKELY(!child)) ABORT;
         if (LIKELY(parent)) {
             if (UNLIKELY(!xmlAddChild(parent, child))) ABORT;
-        } else parse_data.root = child; 
+        } else parse_data.root = child;
         if (elem != NULL) {
             if (!push_children(child, elem, stack)) ABORT;
         }
@@ -405,10 +405,10 @@
     return doc;
 }
 
-libxml_doc* 
+libxml_doc*
 copy_libxml_doc(libxml_doc* doc) { return xmlCopyDoc(doc, 1); }
 
-libxml_doc 
+libxml_doc
 free_libxml_doc(libxml_doc* doc) { xmlFreeDoc(doc); }
 
 int
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/src/html5_parser/soup.py 
new/html5-parser-0.4.6/src/html5_parser/soup.py
--- old/html5-parser-0.4.5/src/html5_parser/soup.py     2018-04-22 
17:07:13.000000000 +0200
+++ new/html5-parser-0.4.6/src/html5_parser/soup.py     2019-05-13 
09:03:47.000000000 +0200
@@ -63,9 +63,11 @@
 
 def bs4_new_tag(Tag, soup):
 
+    builder = soup.builder
+
     def new_tag(name, attrs):
         attrs = {k: map_list_attributes(name, k, v) for k, v in attrs.items()}
-        return Tag(soup, name=name, attrs=attrs)
+        return Tag(soup, name=name, attrs=attrs, builder=builder)
 
     return new_tag
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/src/python-wrapper.c 
new/html5-parser-0.4.6/src/python-wrapper.c
--- old/html5-parser-0.4.5/src/python-wrapper.c 2018-04-22 17:07:13.000000000 
+0200
+++ new/html5-parser-0.4.6/src/python-wrapper.c 2019-05-13 09:03:47.000000000 
+0200
@@ -15,7 +15,7 @@
 
 #define MAJOR 0
 #define MINOR 4
-#define PATCH 5
+#define PATCH 6
 
 static char *NAME =  "libxml2:xmlDoc";
 static char *DESTRUCTOR = "destructor:xmlFreeDoc";
@@ -134,7 +134,7 @@
 
 static PyMethodDef
 methods[] = {
-    {"parse", (PyCFunction)parse, METH_VARARGS | METH_KEYWORDS,
+    {"parse", (PyCFunction)(void(*)(void))(PyCFunctionWithKeywords)(parse), 
METH_VARARGS | METH_KEYWORDS,
         "parse()\n\nParse specified bytestring which must be in the UTF-8 
encoding."
     },
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/test/malformed.py 
new/html5-parser-0.4.6/test/malformed.py
--- old/html5-parser-0.4.5/test/malformed.py    2018-04-22 17:07:13.000000000 
+0200
+++ new/html5-parser-0.4.6/test/malformed.py    2019-05-13 09:03:47.000000000 
+0200
@@ -15,3 +15,9 @@
         p = root[1][0]
         self.ae(p.attrib, {'bad_attr': 'x'})
         self.ae(p[0].tag, 'bad_name')
+
+    def test_multiple_roots(self):
+        root = parse("<html><html />", maybe_xhtml=True)
+        from lxml import etree
+        self.ae(etree.tostring(root, encoding='unicode'),
+                '<html 
xmlns="http://www.w3.org/1999/xhtml";><head/><body/></html>')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.5/test/soup.py 
new/html5-parser-0.4.6/test/soup.py
--- old/html5-parser-0.4.5/test/soup.py 2018-04-22 17:07:13.000000000 +0200
+++ new/html5-parser-0.4.6/test/soup.py 2019-05-13 09:03:47.000000000 +0200
@@ -23,11 +23,13 @@
         self.ae(
             type('')(root), 
'<html><head></head><body><p>\n<a>y</a>z<x:x>1</x:x></p></body></html>')
         root = parse('<svg><image>')
-        self.ae(type('')(root), 
'<html><head></head><body><svg><image></image></svg></body></html>')
+        self.ae(type('')(root), 
'<html><head></head><body><svg><image/></svg></body></html>')
         root = parse('<p><!-- ---->')
         self.ae(type('')(root), '<html><head></head><body><p><!-- 
----></p></body></html>')
         root = parse('<p><i><b>')
         self.ae(type('')(root), 
'<html><head></head><body><p><i><b></b></i></p></body></html>')
+        root = parse('<p>a<br>b')
+        self.ae(type('')(root), 
'<html><head></head><body><p>a<br/>b</p></body></html>')
 
     def test_attr_soup(self):
         root = parse('<p a=1 b=2 ID=3><a a=a>')
@@ -38,7 +40,7 @@
         self.ae(
             type('')(root),
             '<html><head></head><body>'
-            '<p a="1"><svg><image xlink:href="h"></image></svg></p>'
+            '<p a="1"><svg><image xlink:href="h"/></svg></p>'
             '</body></html>'
         )
         root = parse('<html xml:lang="en" lang="fr"><p>')

commit python-html5-parser for openSUSE:Factory

Reply via email to