Hello community,

here is the log from the commit of package python-html5-parser for 
openSUSE:Factory checked in at 2018-06-28 15:14:24
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old)
 and      /work/SRC/openSUSE:Factory/.python-html5-parser.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-html5-parser"

Thu Jun 28 15:14:24 2018 rev:4 rq:619429 version:0.4.5

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes  
2017-10-02 16:54:21.853039967 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-html5-parser.new/python-html5-parser.changes 
    2018-06-28 15:14:26.967479594 +0200
@@ -1,0 +2,8 @@
+Wed Jun 27 17:15:29 UTC 2018 - [email protected]
+
+- update to 0.4.5
+  No changelog from upstream.
+  See instead here:
+  
https://github.com/kovidgoyal/html5-parser/compare/v0.4.4...v0.4.5?diff=unified&name=v0.4.5
+
+-------------------------------------------------------------------

Old:
----
  v0.4.4.tar.gz

New:
----
  v0.4.5.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-html5-parser.spec ++++++
--- /var/tmp/diff_new_pack.FVDYL3/_old  2018-06-28 15:14:27.623478393 +0200
+++ /var/tmp/diff_new_pack.FVDYL3/_new  2018-06-28 15:14:27.627478386 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-html5-parser
 #
-# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -19,7 +19,7 @@
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 
 Name:           python-html5-parser
-Version:        0.4.4
+Version:        0.4.5
 Release:        0
 Summary:        C based HTML 5 parsing for Python
 License:        Apache-2.0

++++++ v0.4.4.tar.gz -> v0.4.5.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/.travis.yml 
new/html5-parser-0.4.5/.travis.yml
--- old/html5-parser-0.4.4/.travis.yml  2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/.travis.yml  2018-04-22 17:07:13.000000000 +0200
@@ -1,11 +1,3 @@
-cache: pip
-dist: trusty
-sudo: false
-addons:
-    apt:
-        packages:
-            - libxml2-dev
-
 env:
     global:
         - PYTHONHASHSEED=random
@@ -15,31 +7,69 @@
         - os: linux
           language: python
           python: 2.7
-          env: BUILDER=build.py CC=gcc
+          env: BUILDER=build.py CC=gcc PYTHON=python
+          group: beta
+          dist: trusty
+          sudo: false
+          addons:
+              apt:
+                  packages:
+                      - libxml2-dev
         - os: linux
           language: python
           python: 2.7
-          env: BUILDER=build.py CC=clang
+          env: BUILDER=build.py CC=clang PYTHON=python 
LSAN_OPTIONS=verbosity=1:log_threads=1
+          group: beta
+          dist: trusty
+          # See https://github.com/travis-ci/travis-ci/issues/9033
+          sudo: required
+          addons:
+              apt:
+                  packages:
+                      - libxml2-dev
         - os: linux
           language: python
           python: 2.7
-          env: BUILDER=setup.py
+          env: BUILDER=setup.py PYTHON=python
+          group: beta
+          dist: trusty
+          sudo: false
+          addons:
+              apt:
+                  packages:
+                      - libxml2-dev
         - os: linux
           language: python
-          python: 3.4
-          env: BUILDER=setup.py
-
+          python: 3.6
+          env: BUILDER=setup.py PYTHON=python
+          group: beta
+          dist: trusty
+          sudo: false
+          addons:
+              apt:
+                  packages:
+                      - libxml2-dev
         - os: osx
-          python:
           language: generic
-          env: BUILDER=setup.py
+          env: BUILDER=setup.py PYTHON=python3
 
-install:
-    - pip install --no-binary lxml chardet lxml beautifulsoup4
-    - if [[ $TRAVIS_PYTHON_VERSION == 2.* ]]; then pip install BeautifulSoup; 
fi
-    - python -c "from lxml import etree; print(etree)"
-    - git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git"; 
test/html5lib-tests
+install: |
+    set -e
+    if [[ "$TRAVIS_OS_NAME" == 'osx' ]]; then
+        brew update;
+        brew upgrade python;
+        python3 --version
+        pip3 install --no-binary lxml chardet lxml beautifulsoup4
+    else
+        PLIB=$(ldd `which python` | grep libpython | cut -d ' ' -f 3)
+        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`dirname $PLIB`
+        pip install --no-binary lxml chardet lxml beautifulsoup4
+        if [[ $TRAVIS_PYTHON_VERSION == 2.* ]]; then pip install 
BeautifulSoup; fi
+    fi
+    $PYTHON -c "from lxml import etree; print(etree)"
+    git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git"; 
test/html5lib-tests
+    set +e
 
 script:
-    - python $BUILDER test
-    - if [[ $BUILDER == "build.py" ]]; then python $BUILDER leak; fi
+    - $PYTHON $BUILDER test
+    - if [[ $BUILDER == "build.py" ]]; then $PYTHON $BUILDER leak; fi
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/build.py 
new/html5-parser-0.4.5/build.py
--- old/html5-parser-0.4.4/build.py     2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/build.py     2018-04-22 17:07:13.000000000 +0200
@@ -180,7 +180,7 @@
 
 def find_c_files(src_dir):
     ans, headers = [], []
-    for x in os.listdir(src_dir):
+    for x in sorted(os.listdir(src_dir)):
         ext = os.path.splitext(x)[1]
         if ext == '.c' and not x.endswith('-check.c'):
             ans.append(os.path.join(src_dir, x))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/gumbo.h 
new/html5-parser-0.4.5/gumbo/gumbo.h
--- old/html5-parser-0.4.4/gumbo/gumbo.h        2017-08-01 07:18:10.000000000 
+0200
+++ new/html5-parser-0.4.5/gumbo/gumbo.h        2018-04-22 17:07:13.000000000 
+0200
@@ -189,7 +189,7 @@
 
 /**
  * Fixes the case of SVG elements that are not all lowercase.
- * 
http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inforeign
+ * https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
  * This is not done at parse time because there's no place to store a mutated
  * tag name.  tag_name is an enum (which will be TAG_UNKNOWN for most SVG tags
  * without special handling), while original_tag_name is a pointer into the
@@ -199,7 +199,7 @@
  * no normalization is called for.  The return value is static data and owned 
by
  * the library.
  */
-const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname, 
uint8_t *sz);
+const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
 
 /**
  * Converts a tag name string (which may be in upper or mixed case) to a tag
@@ -230,7 +230,7 @@
    * The namespace for the attribute.  This will usually be
    * GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take 
special
    * values, per:
-   * 
http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
+   * 
https://html.spec.whatwg.org/multipage/parsing.html#adjust-foreign-attributes
    */
   GumboAttributeNamespaceEnum attr_namespace;
 
@@ -319,7 +319,7 @@
  */
 typedef struct GumboInternalNode GumboNode;
 
-/** 
http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode 
*/
+/** https://dom.spec.whatwg.org/#concept-document-quirks */
 typedef enum {
   GUMBO_DOCTYPE_NO_QUIRKS,
   GUMBO_DOCTYPE_QUIRKS,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/parser.c 
new/html5-parser-0.4.5/gumbo/parser.c
--- old/html5-parser-0.4.4/gumbo/parser.c       2017-08-01 07:18:10.000000000 
+0200
+++ new/html5-parser-0.4.5/gumbo/parser.c       2018-04-22 17:07:13.000000000 
+0200
@@ -30,6 +30,7 @@
 #include "utf8.h"
 #include "util.h"
 #include "vector.h"
+#include "replacement.h"
 
 #define AVOID_UNUSED_VARIABLE_WARNING(i) (void) (i)
 
@@ -44,7 +45,7 @@
 #define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
 
 #define TAGSET_INCLUDES(tagset, namespace, tag) \
-  (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
+  (tag < GUMBO_TAG_LAST && tagset[(int) tag] & (1 << (int) namespace))
 
 // selected forward declarations as it is getting hard to find
 // an appropriate order
@@ -165,121 +166,11 @@
 static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml";,
     "http://www.w3.org/2000/svg";, "http://www.w3.org/1998/Math/MathML"};
 
-typedef struct _ReplacementEntry {
+typedef struct {
   const GumboStringPiece from;
   const GumboStringPiece to;
 } ReplacementEntry;
 
-#define REPLACEMENT_ENTRY(from, to) \
-  { GUMBO_STRING(from), GUMBO_STRING(to) }
-
-// Static data for SVG attribute replacements.
-// 
https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
-static const ReplacementEntry kSvgAttributeReplacements[] = {
-    REPLACEMENT_ENTRY("attributename", "attributeName"),
-    REPLACEMENT_ENTRY("attributetype", "attributeType"),
-    REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
-    REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
-    REPLACEMENT_ENTRY("calcmode", "calcMode"),
-    REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
-    // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
-    // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
-    REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
-    REPLACEMENT_ENTRY("edgemode", "edgeMode"),
-    // REPLACEMENT_ENTRY("externalresourcesrequired",
-    // "externalResourcesRequired"),
-    // REPLACEMENT_ENTRY("filterres", "filterRes"),
-    REPLACEMENT_ENTRY("filterunits", "filterUnits"),
-    REPLACEMENT_ENTRY("glyphref", "glyphRef"),
-    REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
-    REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
-    REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
-    REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
-    REPLACEMENT_ENTRY("keypoints", "keyPoints"),
-    REPLACEMENT_ENTRY("keysplines", "keySplines"),
-    REPLACEMENT_ENTRY("keytimes", "keyTimes"),
-    REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
-    REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
-    REPLACEMENT_ENTRY("markerheight", "markerHeight"),
-    REPLACEMENT_ENTRY("markerunits", "markerUnits"),
-    REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
-    REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
-    REPLACEMENT_ENTRY("maskunits", "maskUnits"),
-    REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
-    REPLACEMENT_ENTRY("pathlength", "pathLength"),
-    REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
-    REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
-    REPLACEMENT_ENTRY("patternunits", "patternUnits"),
-    REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
-    REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
-    REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
-    REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
-    REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
-    REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
-    REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
-    REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
-    REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
-    REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
-    REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
-    REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
-    REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
-    REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
-    REPLACEMENT_ENTRY("startoffset", "startOffset"),
-    REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
-    REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
-    REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
-    REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
-    REPLACEMENT_ENTRY("tablevalues", "tableValues"),
-    REPLACEMENT_ENTRY("targetx", "targetX"),
-    REPLACEMENT_ENTRY("targety", "targetY"),
-    REPLACEMENT_ENTRY("textlength", "textLength"),
-    REPLACEMENT_ENTRY("viewbox", "viewBox"),
-    REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
-    REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
-    REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
-    REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
-};
-
-static const ReplacementEntry kSvgTagReplacements[] = {
-    REPLACEMENT_ENTRY("altglyph", "altGlyph"),
-    REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
-    REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
-    REPLACEMENT_ENTRY("animatecolor", "animateColor"),
-    REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
-    REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
-    REPLACEMENT_ENTRY("clippath", "clipPath"),
-    REPLACEMENT_ENTRY("feblend", "feBlend"),
-    REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
-    REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
-    REPLACEMENT_ENTRY("fecomposite", "feComposite"),
-    REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
-    REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
-    REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
-    REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
-    REPLACEMENT_ENTRY("feflood", "feFlood"),
-    REPLACEMENT_ENTRY("fefunca", "feFuncA"),
-    REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
-    REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
-    REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
-    REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
-    REPLACEMENT_ENTRY("feimage", "feImage"),
-    REPLACEMENT_ENTRY("femerge", "feMerge"),
-    REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
-    REPLACEMENT_ENTRY("femorphology", "feMorphology"),
-    REPLACEMENT_ENTRY("feoffset", "feOffset"),
-    REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
-    REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
-    REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
-    REPLACEMENT_ENTRY("fetile", "feTile"),
-    REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
-    REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
-    REPLACEMENT_ENTRY("glyphref", "glyphRef"),
-    REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
-    REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
-    REPLACEMENT_ENTRY("solidcolor", "solidcolor"),
-    REPLACEMENT_ENTRY("textpath", "textPath"),
-};
-
 typedef struct _NamespacedAttributeReplacement {
   const char* from;
   const char* local_name;
@@ -1577,12 +1468,20 @@
           TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
           TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
           TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
-          TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
+          TAG(THEAD), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
 
           TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
           TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
 
-          TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
+          TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
+
+          // This TagSet needs to include the "title" element in both the HTML 
and
+          // SVG namespaces. Using both TAG(TITLE) and TAG_SVG(TITLE) won't 
work, due
+          // to the simplistic way in which the TAG macros are implemented, so 
we do
+          // it like this instead:
+          [GUMBO_TAG_TITLE] = (1 << GUMBO_NAMESPACE_HTML) | (1 << 
GUMBO_NAMESPACE_SVG)
+      }
+  );
 }
 
 // Implicitly closes currently open elements until it reaches an element with
@@ -1674,16 +1573,9 @@
 }
 
 const char* gumbo_normalize_svg_tagname(
-    const GumboStringPiece* tag, uint8_t* sz) {
-  for (unsigned int i = 0;
-       i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry); ++i) {
-    const ReplacementEntry* entry = &kSvgTagReplacements[i];
-    if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
-      *sz = entry->to.length;
-      return entry->to.data;
-    }
-  }
-  return NULL;
+    const GumboStringPiece* tag) {
+    const StringReplacement *replacement = 
gumbo_get_svg_tag_replacement(tag->data, tag->length);
+    return replacement ? replacement->to : NULL;
 }
 
 // 
http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-foreign-attributes
@@ -1713,16 +1605,15 @@
 static void adjust_svg_attributes(GumboToken* token) {
   assert(token->type == GUMBO_TOKEN_START_TAG);
   const GumboVector* attributes = &token->v.start_tag.attributes;
-  for (unsigned int i = 0;
-       i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
-    const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
-    GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
-    if (!attr) {
+  for (unsigned int i = 0, n = attributes->length; i < n; i++) {
+    GumboAttribute* attr = (GumboAttribute*) attributes->data[i];
+    const StringReplacement* replacement = 
gumbo_get_svg_attr_replacement(attr->name, attr->original_name.length);
+    if (!replacement) {
       continue;
     }
     /* TODO:vmg refactor to use attribute helpers */
     gumbo_free((void*) attr->name);
-    attr->name = gumbo_strdup(entry->to.data);
+    attr->name = gumbo_strdup(replacement->to);
   }
 }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/replacement.h 
new/html5-parser-0.4.5/gumbo/replacement.h
--- old/html5-parser-0.4.4/gumbo/replacement.h  1970-01-01 01:00:00.000000000 
+0100
+++ new/html5-parser-0.4.5/gumbo/replacement.h  2018-04-22 17:07:13.000000000 
+0200
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <stddef.h>
+
+typedef struct {
+  const char *const from;
+  const char *const to;
+} StringReplacement;
+
+const StringReplacement *gumbo_get_svg_tag_replacement(
+  const char* str,
+  size_t len
+);
+
+const StringReplacement *gumbo_get_svg_attr_replacement(
+  const char* str,
+  size_t len
+);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_attrs.c 
new/html5-parser-0.4.5/gumbo/svg_attrs.c
--- old/html5-parser-0.4.4/gumbo/svg_attrs.c    1970-01-01 01:00:00.000000000 
+0100
+++ new/html5-parser-0.4.5/gumbo/svg_attrs.c    2018-04-22 17:07:13.000000000 
+0200
@@ -0,0 +1,306 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 svg_attrs.gperf  */
+/* Computed positions: -k'1,10,$' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. 
Please report a bug to <[email protected]>."
+#endif
+
+#line 2 "svg_attrs.gperf"
+
+#include "replacement.h"
+#include <string.h>
+
+#define TOTAL_KEYWORDS 58
+#define MIN_WORD_LENGTH 4
+#define MAX_WORD_LENGTH 19
+#define MIN_HASH_VALUE 5
+#define MAX_HASH_VALUE 77
+/* maximum key range = 73, duplicates = 0 */
+
+#ifndef GPERF_DOWNCASE
+#define GPERF_DOWNCASE 1
+static unsigned char gperf_downcase[256] =
+  {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+     60,  61,  62,  63,  64,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
+    107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+    122,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
+    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+    195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+    210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+    225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+    240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+    255
+  };
+#endif
+
+#ifndef GPERF_CASE_MEMCMP
+#define GPERF_CASE_MEMCMP 1
+static int
+gperf_case_memcmp (register const char *s1, register const char *s2, register 
size_t n)
+{
+  for (; n > 0;)
+    {
+      unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
+      unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
+      if (c1 == c2)
+        {
+          n--;
+          continue;
+        }
+      return (int)c1 - (int)c2;
+    }
+  return 0;
+}
+#endif
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned char asso_values[] =
+    {
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78,  5, 78, 39, 14,  1,
+      31, 31, 13, 13, 78, 78, 22, 25, 10,  2,
+       7, 78, 22,  0,  1,  3,  1, 78,  0, 36,
+      14, 17, 20, 78, 78, 78, 78,  5, 78, 39,
+      14,  1, 31, 31, 13, 13, 78, 78, 22, 25,
+      10,  2,  7, 78, 22,  0,  1,  3,  1, 78,
+       0, 36, 14, 17, 20, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+      78, 78, 78, 78, 78, 78, 78, 78
+    };
+  register unsigned int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[9]];
+      /*FALLTHROUGH*/
+      case 9:
+      case 8:
+      case 7:
+      case 6:
+      case 5:
+      case 4:
+      case 3:
+      case 2:
+      case 1:
+        hval += asso_values[(unsigned char)str[0]+2];
+        break;
+    }
+  return hval + asso_values[(unsigned char)str[len - 1]];
+}
+
+static const unsigned char lengthtable[] =
+  {
+     0,  0,  0,  0,  0,  4,  0,  7,  7,  0,  8,  9, 10, 11,
+    11, 11, 11, 10, 16, 18, 16, 12, 16, 11, 13, 11, 12, 11,
+    16,  0, 17,  9,  9,  8,  9, 10, 13, 10, 12, 14,  8,  4,
+    12, 19,  7,  9, 12, 12, 11, 14, 10, 19,  8, 16, 13, 16,
+    16, 15, 10, 12,  0,  0, 13, 13, 13,  0,  0,  9, 16,  0,
+     0,  0,  0,  0,  0,  0,  0, 17
+  };
+
+static const StringReplacement wordlist[] =
+  {
+    {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+    {(char*)0},
+#line 58 "svg_attrs.gperf"
+    {"refx", "refX"},
+    {(char*)0},
+#line 76 "svg_attrs.gperf"
+    {"viewbox", "viewBox"},
+#line 73 "svg_attrs.gperf"
+    {"targetx", "targetX"},
+    {(char*)0},
+#line 27 "svg_attrs.gperf"
+    {"calcmode", "calcMode"},
+#line 46 "svg_attrs.gperf"
+    {"maskunits", "maskUnits"},
+#line 77 "svg_attrs.gperf"
+    {"viewtarget", "viewTarget"},
+#line 72 "svg_attrs.gperf"
+    {"tablevalues", "tableValues"},
+#line 43 "svg_attrs.gperf"
+    {"markerunits", "markerUnits"},
+#line 69 "svg_attrs.gperf"
+    {"stitchtiles", "stitchTiles"},
+#line 67 "svg_attrs.gperf"
+    {"startoffset", "startOffset"},
+#line 47 "svg_attrs.gperf"
+    {"numoctaves", "numOctaves"},
+#line 63 "svg_attrs.gperf"
+    {"requiredfeatures", "requiredFeatures"},
+#line 62 "svg_attrs.gperf"
+    {"requiredextensions", "requiredExtensions"},
+#line 65 "svg_attrs.gperf"
+    {"specularexponent", "specularExponent"},
+#line 70 "svg_attrs.gperf"
+    {"surfacescale", "surfaceScale"},
+#line 64 "svg_attrs.gperf"
+    {"specularconstant", "specularConstant"},
+#line 60 "svg_attrs.gperf"
+    {"repeatcount", "repeatCount"},
+#line 28 "svg_attrs.gperf"
+    {"clippathunits", "clipPathUnits"},
+#line 31 "svg_attrs.gperf"
+    {"filterunits", "filterUnits"},
+#line 40 "svg_attrs.gperf"
+    {"lengthadjust", "lengthAdjust"},
+#line 44 "svg_attrs.gperf"
+    {"markerwidth", "markerWidth"},
+#line 45 "svg_attrs.gperf"
+    {"maskcontentunits", "maskContentUnits"},
+    {(char*)0},
+#line 41 "svg_attrs.gperf"
+    {"limitingconeangle", "limitingConeAngle"},
+#line 52 "svg_attrs.gperf"
+    {"pointsatx", "pointsAtX"},
+#line 61 "svg_attrs.gperf"
+    {"repeatdur", "repeatDur"},
+#line 39 "svg_attrs.gperf"
+    {"keytimes", "keyTimes"},
+#line 37 "svg_attrs.gperf"
+    {"keypoints", "keyPoints"},
+#line 38 "svg_attrs.gperf"
+    {"keysplines", "keySplines"},
+#line 34 "svg_attrs.gperf"
+    {"gradientunits", "gradientUnits"},
+#line 75 "svg_attrs.gperf"
+    {"textlength", "textLength"},
+#line 68 "svg_attrs.gperf"
+    {"stddeviation", "stdDeviation"},
+#line 57 "svg_attrs.gperf"
+    {"primitiveunits", "primitiveUnits"},
+#line 30 "svg_attrs.gperf"
+    {"edgemode", "edgeMode"},
+#line 59 "svg_attrs.gperf"
+    {"refy", "refY"},
+#line 66 "svg_attrs.gperf"
+    {"spreadmethod", "spreadMethod"},
+#line 56 "svg_attrs.gperf"
+    {"preserveaspectratio", "preserveAspectRatio"},
+#line 74 "svg_attrs.gperf"
+    {"targety", "targetY"},
+#line 54 "svg_attrs.gperf"
+    {"pointsatz", "pointsAtZ"},
+#line 42 "svg_attrs.gperf"
+    {"markerheight", "markerHeight"},
+#line 51 "svg_attrs.gperf"
+    {"patternunits", "patternUnits"},
+#line 26 "svg_attrs.gperf"
+    {"baseprofile", "baseProfile"},
+#line 71 "svg_attrs.gperf"
+    {"systemlanguage", "systemLanguage"},
+#line 80 "svg_attrs.gperf"
+    {"zoomandpan", "zoomAndPan"},
+#line 49 "svg_attrs.gperf"
+    {"patterncontentunits", "patternContentUnits"},
+#line 32 "svg_attrs.gperf"
+    {"glyphref", "glyphRef"},
+#line 78 "svg_attrs.gperf"
+    {"xchannelselector", "xChannelSelector"},
+#line 24 "svg_attrs.gperf"
+    {"attributetype", "attributeType"},
+#line 36 "svg_attrs.gperf"
+    {"kernelunitlength", "kernelUnitLength"},
+#line 79 "svg_attrs.gperf"
+    {"ychannelselector", "yChannelSelector"},
+#line 29 "svg_attrs.gperf"
+    {"diffuseconstant", "diffuseConstant"},
+#line 48 "svg_attrs.gperf"
+    {"pathlength", "pathLength"},
+#line 35 "svg_attrs.gperf"
+    {"kernelmatrix", "kernelMatrix"},
+    {(char*)0}, {(char*)0},
+#line 55 "svg_attrs.gperf"
+    {"preservealpha", "preserveAlpha"},
+#line 23 "svg_attrs.gperf"
+    {"attributename", "attributeName"},
+#line 25 "svg_attrs.gperf"
+    {"basefrequency", "baseFrequency"},
+    {(char*)0}, {(char*)0},
+#line 53 "svg_attrs.gperf"
+    {"pointsaty", "pointsAtY"},
+#line 50 "svg_attrs.gperf"
+    {"patterntransform", "patternTransform"},
+    {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+    {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+#line 33 "svg_attrs.gperf"
+    {"gradienttransform", "gradientTransform"}
+  };
+
+const StringReplacement *
+gumbo_get_svg_attr_replacement (register const char *str, register size_t len)
+{
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].from;
+
+            if (s && (((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && 
!gperf_case_memcmp (str, s, len))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_attrs.gperf 
new/html5-parser-0.4.5/gumbo/svg_attrs.gperf
--- old/html5-parser-0.4.4/gumbo/svg_attrs.gperf        1970-01-01 
01:00:00.000000000 +0100
+++ new/html5-parser-0.4.5/gumbo/svg_attrs.gperf        2018-04-22 
17:07:13.000000000 +0200
@@ -0,0 +1,80 @@
+// See 
https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
+%{
+#include "replacement.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%global-table
+%readonly-tables
+%null-strings
+%includes
+%define lookup-function-name gumbo_get_svg_attr_replacement
+%define slot-name from
+StringReplacement;
+
+// "contentscripttype", "contentScriptType"
+// "contentstyletype", "contentStyleType"
+// "externalresourcesrequired", "externalResourcesRequired"
+// "filterres", "filterRes"
+%%
+"attributename", "attributeName"
+"attributetype", "attributeType"
+"basefrequency", "baseFrequency"
+"baseprofile", "baseProfile"
+"calcmode", "calcMode"
+"clippathunits", "clipPathUnits"
+"diffuseconstant", "diffuseConstant"
+"edgemode", "edgeMode"
+"filterunits", "filterUnits"
+"glyphref", "glyphRef"
+"gradienttransform", "gradientTransform"
+"gradientunits", "gradientUnits"
+"kernelmatrix", "kernelMatrix"
+"kernelunitlength", "kernelUnitLength"
+"keypoints", "keyPoints"
+"keysplines", "keySplines"
+"keytimes", "keyTimes"
+"lengthadjust", "lengthAdjust"
+"limitingconeangle", "limitingConeAngle"
+"markerheight", "markerHeight"
+"markerunits", "markerUnits"
+"markerwidth", "markerWidth"
+"maskcontentunits", "maskContentUnits"
+"maskunits", "maskUnits"
+"numoctaves", "numOctaves"
+"pathlength", "pathLength"
+"patterncontentunits", "patternContentUnits"
+"patterntransform", "patternTransform"
+"patternunits", "patternUnits"
+"pointsatx", "pointsAtX"
+"pointsaty", "pointsAtY"
+"pointsatz", "pointsAtZ"
+"preservealpha", "preserveAlpha"
+"preserveaspectratio", "preserveAspectRatio"
+"primitiveunits", "primitiveUnits"
+"refx", "refX"
+"refy", "refY"
+"repeatcount", "repeatCount"
+"repeatdur", "repeatDur"
+"requiredextensions", "requiredExtensions"
+"requiredfeatures", "requiredFeatures"
+"specularconstant", "specularConstant"
+"specularexponent", "specularExponent"
+"spreadmethod", "spreadMethod"
+"startoffset", "startOffset"
+"stddeviation", "stdDeviation"
+"stitchtiles", "stitchTiles"
+"surfacescale", "surfaceScale"
+"systemlanguage", "systemLanguage"
+"tablevalues", "tableValues"
+"targetx", "targetX"
+"targety", "targetY"
+"textlength", "textLength"
+"viewbox", "viewBox"
+"viewtarget", "viewTarget"
+"xchannelselector", "xChannelSelector"
+"ychannelselector", "yChannelSelector"
+"zoomandpan", "zoomAndPan"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_tags.c 
new/html5-parser-0.4.5/gumbo/svg_tags.c
--- old/html5-parser-0.4.4/gumbo/svg_tags.c     1970-01-01 01:00:00.000000000 
+0100
+++ new/html5-parser-0.4.5/gumbo/svg_tags.c     2018-04-22 17:07:13.000000000 
+0200
@@ -0,0 +1,248 @@
+/* ANSI-C code produced by gperf version 3.1 */
+/* Command-line: gperf -m100 svg_tags.gperf  */
+/* Computed positions: -k'3,7' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. 
Please report a bug to <[email protected]>."
+#endif
+
+#line 1 "svg_tags.gperf"
+
+#include "replacement.h"
+#include <string.h>
+
+#define TOTAL_KEYWORDS 36
+#define MIN_WORD_LENGTH 6
+#define MAX_WORD_LENGTH 19
+#define MIN_HASH_VALUE 6
+#define MAX_HASH_VALUE 42
+/* maximum key range = 37, duplicates = 0 */
+
+#ifndef GPERF_DOWNCASE
+#define GPERF_DOWNCASE 1
+static unsigned char gperf_downcase[256] =
+  {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,
+     45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,
+     60,  61,  62,  63,  64,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106,
+    107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
+    122,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
+    105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
+    120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
+    135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
+    150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164,
+    165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
+    180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+    195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
+    210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224,
+    225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+    240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
+    255
+  };
+#endif
+
+#ifndef GPERF_CASE_MEMCMP
+#define GPERF_CASE_MEMCMP 1
+static int
+gperf_case_memcmp (register const char *s1, register const char *s2, register 
size_t n)
+{
+  for (; n > 0;)
+    {
+      unsigned char c1 = gperf_downcase[(unsigned char)*s1++];
+      unsigned char c2 = gperf_downcase[(unsigned char)*s2++];
+      if (c1 == c2)
+        {
+          n--;
+          continue;
+        }
+      return (int)c1 - (int)c2;
+    }
+  return 0;
+}
+#endif
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register size_t len)
+{
+  static const unsigned char asso_values[] =
+    {
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 12,  2, 10, 22,
+       1, 28, 15,  1, 43, 43, 43,  0,  9, 26,
+       3, 17,  1, 11,  0, 22,  5, 43,  3,  2,
+      43, 43, 43, 43, 43, 43, 43, 43, 12,  2,
+      10, 22,  1, 28, 15,  1, 43, 43, 43,  0,
+       9, 26,  3, 17,  1, 11,  0, 22,  5, 43,
+       3,  2, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
+      43, 43, 43, 43, 43, 43, 43
+    };
+  register unsigned int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[6]+1];
+      /*FALLTHROUGH*/
+      case 6:
+      case 5:
+      case 4:
+      case 3:
+        hval += asso_values[(unsigned char)str[2]];
+        break;
+    }
+  return hval;
+}
+
+static const unsigned char lengthtable[] =
+  {
+     0,  0,  0,  0,  0,  0,  6,  0,  7,  7,  7,  8, 11, 12,
+    12, 13, 11, 12, 16,  7,  7, 16, 11,  7, 19,  8, 13, 17,
+    11, 12,  7,  8, 17,  8, 18,  8, 14, 12, 14, 14, 13,  7,
+    14
+  };
+
+static const StringReplacement wordlist[] =
+  {
+    {""}, {""}, {""}, {""}, {""}, {""},
+#line 46 "svg_tags.gperf"
+    {"fetile", "feTile"},
+    {""},
+#line 39 "svg_tags.gperf"
+    {"femerge", "feMerge"},
+#line 38 "svg_tags.gperf"
+    {"feimage", "feImage"},
+#line 34 "svg_tags.gperf"
+    {"fefuncb", "feFuncB"},
+#line 49 "svg_tags.gperf"
+    {"glyphref", "glyphRef"},
+#line 40 "svg_tags.gperf"
+    {"femergenode", "feMergeNode"},
+#line 41 "svg_tags.gperf"
+    {"femorphology", "feMorphology"},
+#line 20 "svg_tags.gperf"
+    {"animatecolor", "animateColor"},
+#line 21 "svg_tags.gperf"
+    {"animatemotion", "animateMotion"},
+#line 27 "svg_tags.gperf"
+    {"fecomposite", "feComposite"},
+#line 47 "svg_tags.gperf"
+    {"feturbulence", "feTurbulence"},
+#line 22 "svg_tags.gperf"
+    {"animatetransform", "animateTransform"},
+#line 36 "svg_tags.gperf"
+    {"fefuncr", "feFuncR"},
+#line 33 "svg_tags.gperf"
+    {"fefunca", "feFuncA"},
+#line 28 "svg_tags.gperf"
+    {"feconvolvematrix", "feConvolveMatrix"},
+#line 45 "svg_tags.gperf"
+    {"fespotlight", "feSpotLight"},
+#line 35 "svg_tags.gperf"
+    {"fefuncg", "feFuncG"},
+#line 26 "svg_tags.gperf"
+    {"fecomponenttransfer", "feComponentTransfer"},
+#line 17 "svg_tags.gperf"
+    {"altglyph", "altGlyph"},
+#line 25 "svg_tags.gperf"
+    {"fecolormatrix", "feColorMatrix"},
+#line 30 "svg_tags.gperf"
+    {"fedisplacementmap", "feDisplacementMap"},
+#line 18 "svg_tags.gperf"
+    {"altglyphdef", "altGlyphDef"},
+#line 19 "svg_tags.gperf"
+    {"altglyphitem", "altGlyphItem"},
+#line 32 "svg_tags.gperf"
+    {"feflood", "feFlood"},
+#line 23 "svg_tags.gperf"
+    {"clippath", "clipPath"},
+#line 29 "svg_tags.gperf"
+    {"fediffuselighting", "feDiffuseLighting"},
+#line 52 "svg_tags.gperf"
+    {"textpath", "textPath"},
+#line 44 "svg_tags.gperf"
+    {"fespecularlighting", "feSpecularLighting"},
+#line 42 "svg_tags.gperf"
+    {"feoffset", "feOffset"},
+#line 31 "svg_tags.gperf"
+    {"fedistantlight", "feDistantLight"},
+#line 43 "svg_tags.gperf"
+    {"fepointlight", "fePointLight"},
+#line 50 "svg_tags.gperf"
+    {"lineargradient", "linearGradient"},
+#line 51 "svg_tags.gperf"
+    {"radialgradient", "radialGradient"},
+#line 48 "svg_tags.gperf"
+    {"foreignobject", "foreignObject"},
+#line 24 "svg_tags.gperf"
+    {"feblend", "feBlend"},
+#line 37 "svg_tags.gperf"
+    {"fegaussianblur", "feGaussianBlur"}
+  };
+
+const StringReplacement *
+gumbo_get_svg_tag_replacement (register const char *str, register size_t len)
+{
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register unsigned int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].from;
+
+            if ((((unsigned char)*str ^ (unsigned char)*s) & ~32) == 0 && 
!gperf_case_memcmp (str, s, len))
+              return &wordlist[key];
+          }
+    }
+  return 0;
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/svg_tags.gperf 
new/html5-parser-0.4.5/gumbo/svg_tags.gperf
--- old/html5-parser-0.4.4/gumbo/svg_tags.gperf 1970-01-01 01:00:00.000000000 
+0100
+++ new/html5-parser-0.4.5/gumbo/svg_tags.gperf 2018-04-22 17:07:13.000000000 
+0200
@@ -0,0 +1,53 @@
+%{
+#include "replacement.h"
+%}
+
+%ignore-case
+%struct-type
+%omit-struct-type
+%compare-lengths
+%readonly-tables
+%null-strings
+%includes
+%global-table
+%define lookup-function-name gumbo_get_svg_tag_replacement
+%define slot-name from
+StringReplacement;
+
+%%
+"altglyph", "altGlyph"
+"altglyphdef", "altGlyphDef"
+"altglyphitem", "altGlyphItem"
+"animatecolor", "animateColor"
+"animatemotion", "animateMotion"
+"animatetransform", "animateTransform"
+"clippath", "clipPath"
+"feblend", "feBlend"
+"fecolormatrix", "feColorMatrix"
+"fecomponenttransfer", "feComponentTransfer"
+"fecomposite", "feComposite"
+"feconvolvematrix", "feConvolveMatrix"
+"fediffuselighting", "feDiffuseLighting"
+"fedisplacementmap", "feDisplacementMap"
+"fedistantlight", "feDistantLight"
+"feflood", "feFlood"
+"fefunca", "feFuncA"
+"fefuncb", "feFuncB"
+"fefuncg", "feFuncG"
+"fefuncr", "feFuncR"
+"fegaussianblur", "feGaussianBlur"
+"feimage", "feImage"
+"femerge", "feMerge"
+"femergenode", "feMergeNode"
+"femorphology", "feMorphology"
+"feoffset", "feOffset"
+"fepointlight", "fePointLight"
+"fespecularlighting", "feSpecularLighting"
+"fespotlight", "feSpotLight"
+"fetile", "feTile"
+"feturbulence", "feTurbulence"
+"foreignobject", "foreignObject"
+"glyphref", "glyphRef"
+"lineargradient", "linearGradient"
+"radialgradient", "radialGradient"
+"textpath", "textPath"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/utf8.c 
new/html5-parser-0.4.5/gumbo/utf8.c
--- old/html5-parser-0.4.4/gumbo/utf8.c 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/gumbo/utf8.c 2018-04-22 17:07:13.000000000 +0200
@@ -32,7 +32,7 @@
 // Wikipedia: http://en.wikipedia.org/wiki/UTF-8#Description
 // RFC 3629: http://tools.ietf.org/html/rfc3629
 // HTML5 Unicode handling:
-// 
http://www.whatwg.org/specs/web-apps/current-work/multipage/syntax.html#preprocessing-the-input-stream
+// 
https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
 //
 // This implementation is based on a DFA-based decoder by Bjoern Hoehrmann
 // <[email protected]>.  We wrap the inner table-based decoder routine in our
@@ -141,7 +141,7 @@
       // the HTML5 spec.  Since we're looking for particular 7-bit literal
       // characters, we operate in terms of chars and only need a check for 
iter
       // overrun, instead of having to read in a full next code point.
-      // 
http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream
+      // 
https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
       if (code_point == '\r') {
         assert(iter->_width == 1);
         const char* next = c + 1;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/gumbo/utf8.h 
new/html5-parser-0.4.5/gumbo/utf8.h
--- old/html5-parser-0.4.4/gumbo/utf8.h 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/gumbo/utf8.h 2018-04-22 17:07:13.000000000 +0200
@@ -20,7 +20,7 @@
 // 1. Decoding errors are parse errors.
 // 2. Certain other codepoints (eg. control characters) are parse errors.
 // 3. Carriage returns and CR/LF groups are converted to line feeds.
-// 
http://www.whatwg.org/specs/web-apps/current-work/multipage/infrastructure.html#decoded-as-utf-8,-with-error-handling
+// https://encoding.spec.whatwg.org/#utf-8-decode
 //
 // Also, we want to keep track of source positions for error handling.  As a
 // result, we fold all that functionality into this decoder, and can't use an
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/src/as-libxml.c 
new/html5-parser-0.4.5/src/as-libxml.c
--- old/html5-parser-0.4.4/src/as-libxml.c      2017-08-01 07:18:10.000000000 
+0200
+++ new/html5-parser-0.4.5/src/as-libxml.c      2018-04-22 17:07:13.000000000 
+0200
@@ -205,7 +205,6 @@
     bool ok = true;
     const xmlChar *tag_name = NULL;
     const char *tag;
-    uint8_t tag_sz;
     char buf[MAX_TAG_NAME_SZ] = {0};
     char *nsprefix = NULL;
     xmlNsPtr namespace = NULL;
@@ -214,7 +213,7 @@
 
     if (UNLIKELY(elem->tag >= GUMBO_TAG_UNKNOWN)) {
         gumbo_tag_from_original_text(&(elem->original_tag));
-        tag_sz = MIN(sizeof(buf) - 1, elem->original_tag.length);
+        uint8_t tag_sz = MIN(sizeof(buf) - 1, elem->original_tag.length);
         memcpy(buf, elem->original_tag.data, tag_sz);
         tag = buf;
         if (pd->maybe_xhtml) {
@@ -226,9 +225,9 @@
         tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, tag_sz);
     } else if (UNLIKELY(elem->tag_namespace == GUMBO_NAMESPACE_SVG)) {
         gumbo_tag_from_original_text(&(elem->original_tag));
-        tag = gumbo_normalize_svg_tagname(&(elem->original_tag), &tag_sz);
+        tag = gumbo_normalize_svg_tagname(&(elem->original_tag));
         if (tag == NULL) tag_name = lookup_standard_tag(doc, pd, elem->tag);
-        else tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, tag_sz);
+        else tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, 
elem->original_tag.length);
     } else tag_name = lookup_standard_tag(doc, pd, elem->tag);
 
     if (UNLIKELY(!tag_name)) ABORT;
@@ -321,6 +320,7 @@
             }
             opts->line_number_attr = xmlDictLookup(doc->dict, BAD_CAST 
opts->line_number_attr, -1);
         }
+       doc->encoding = xmlStrdup(BAD_CAST "UTF-8");
     }
     return doc;
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/src/as-python-tree.c 
new/html5-parser-0.4.5/src/as-python-tree.c
--- old/html5-parser-0.4.4/src/as-python-tree.c 2017-08-01 07:18:10.000000000 
+0200
+++ new/html5-parser-0.4.5/src/as-python-tree.c 2018-04-22 17:07:13.000000000 
+0200
@@ -126,7 +126,6 @@
 static inline PyObject*
 create_element(GumboElement *elem, PyObject *new_tag) {
     PyObject *tag_name = NULL, *tag_obj = NULL, *attributes = NULL;
-    uint8_t tag_sz;
     const char *tag;
 
     if (UNLIKELY(elem->tag >= GUMBO_TAG_UNKNOWN)) {
@@ -134,9 +133,9 @@
         tag_name = PyUnicode_FromStringAndSize(elem->original_tag.data, 
elem->original_tag.length);
     } else if (UNLIKELY(elem->tag_namespace == GUMBO_NAMESPACE_SVG)) {
         gumbo_tag_from_original_text(&(elem->original_tag));
-        tag = gumbo_normalize_svg_tagname(&(elem->original_tag), &tag_sz);
+        tag = gumbo_normalize_svg_tagname(&(elem->original_tag));
         if (tag) {
-            tag_name = PyUnicode_FromStringAndSize(tag, tag_sz);
+            tag_name = PyUnicode_FromStringAndSize(tag, 
elem->original_tag.length);
         } else {
             tag_name = PyTuple_GET_ITEM(KNOWN_TAG_NAMES, elem->tag);
             Py_INCREF(tag_name);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/src/html5_parser/soup.py 
new/html5-parser-0.4.5/src/html5_parser/soup.py
--- old/html5-parser-0.4.4/src/html5_parser/soup.py     2017-08-01 
07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/src/html5_parser/soup.py     2018-04-22 
17:07:13.000000000 +0200
@@ -6,6 +6,27 @@
 
 unicode = type('')
 
+cdata_list_attributes = None
+universal_cdata_list_attributes = None
+empty = ()
+
+
+def init_bs4_cdata_list_attributes():
+    global cdata_list_attributes, universal_cdata_list_attributes
+    from bs4.builder import HTMLTreeBuilder
+    cdata_list_attributes = {
+        k: frozenset(v) for k, v in 
HTMLTreeBuilder.cdata_list_attributes.items()
+    }
+    universal_cdata_list_attributes = cdata_list_attributes['*']
+
+
+def map_list_attributes(tag_name, name, val):
+    if name in universal_cdata_list_attributes:
+        return val.split()
+    if name in cdata_list_attributes.get(tag_name, empty):
+        return val.split()
+    return val
+
 
 def soup_module():
     if soup_module.ans is None:
@@ -43,6 +64,7 @@
 def bs4_new_tag(Tag, soup):
 
     def new_tag(name, attrs):
+        attrs = {k: map_list_attributes(name, k, v) for k, v in attrs.items()}
         return Tag(soup, name=name, attrs=attrs)
 
     return new_tag
@@ -79,9 +101,13 @@
     'area base br col embed hr img input keygen link menuitem meta param 
source track wbr'.split())
 
 
+def is_bs3():
+    return soup_module().__version__.startswith('3.')
+
+
 def init_soup():
     bs = soup_module()
-    if bs.__version__.startswith('3.'):
+    if is_bs3():
         soup = bs.BeautifulSoup()
         new_tag = bs3_new_tag(bs.Tag, soup)
         append = bs3_fast_append
@@ -90,6 +116,8 @@
         soup = bs.BeautifulSoup('', 'lxml')
         new_tag = bs4_new_tag(bs.Tag, soup)
         append = bs4_fast_append
+        if universal_cdata_list_attributes is None:
+            init_bs4_cdata_list_attributes()
     return bs, soup, new_tag, bs.Comment, append, bs.NavigableString
 
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/src/python-wrapper.c 
new/html5-parser-0.4.5/src/python-wrapper.c
--- old/html5-parser-0.4.4/src/python-wrapper.c 2017-08-01 07:18:10.000000000 
+0200
+++ new/html5-parser-0.4.5/src/python-wrapper.c 2018-04-22 17:07:13.000000000 
+0200
@@ -15,7 +15,7 @@
 
 #define MAJOR 0
 #define MINOR 4
-#define PATCH 4
+#define PATCH 5
 
 static char *NAME =  "libxml2:xmlDoc";
 static char *DESTRUCTOR = "destructor:xmlFreeDoc";
@@ -35,14 +35,14 @@
     return doc;
 }
 
-static inline libxml_doc* 
+static inline libxml_doc*
 parse_with_options(const char* buffer, size_t buffer_length, Options *opts) {
     GumboOutput *output = NULL;
     libxml_doc* doc = NULL;
     Py_BEGIN_ALLOW_THREADS;
     output = gumbo_parse_with_options(&(opts->gumbo_opts), buffer, 
buffer_length);
     Py_END_ALLOW_THREADS;
-    if (output == NULL) PyErr_NoMemory(); 
+    if (output == NULL) PyErr_NoMemory();
     else {
         doc = convert_tree(output, opts);
         gumbo_destroy_output(output);
@@ -50,7 +50,7 @@
     return doc;
 }
 
-static void 
+static void
 free_encapsulated_doc(PyObject *capsule) {
     libxml_doc *doc = (libxml_doc*)PyCapsule_GetPointer(capsule, NAME);
     if (doc != NULL) {
@@ -108,7 +108,7 @@
     Py_BEGIN_ALLOW_THREADS;
     output = gumbo_parse_with_options(&(opts.gumbo_opts), buffer, (size_t)sz);
     Py_END_ALLOW_THREADS;
-    if (output == NULL) PyErr_NoMemory(); 
+    if (output == NULL) PyErr_NoMemory();
     GumboDocument* document = &(output->document->v.document);
 
     if (new_doctype != Py_None && document->has_doctype) {
@@ -132,7 +132,7 @@
     return encapsulate(doc);
 }
 
-static PyMethodDef 
+static PyMethodDef
 methods[] = {
     {"parse", (PyCFunction)parse, METH_VARARGS | METH_KEYWORDS,
         "parse()\n\nParse specified bytestring which must be in the UTF-8 
encoding."
@@ -154,7 +154,7 @@
 
 #if PY_MAJOR_VERSION >= 3
 
-static struct PyModuleDef 
+static struct PyModuleDef
 moduledef = {
         PyModuleDef_HEAD_INIT,
         MODULE_NAME,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/test/adapt.py 
new/html5-parser-0.4.5/test/adapt.py
--- old/html5-parser-0.4.4/test/adapt.py        2017-08-01 07:18:10.000000000 
+0200
+++ new/html5-parser-0.4.5/test/adapt.py        2018-04-22 17:07:13.000000000 
+0200
@@ -86,6 +86,7 @@
         for soup_name, soup in soups:
             set_soup_module(soup)
             self.do_soup_test(soup_name)
+        set_soup_module(None)
 
     def do_soup_test(self, soup_name):
         root = parse(HTML, treebuilder='soup')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.4/test/soup.py 
new/html5-parser-0.4.5/test/soup.py
--- old/html5-parser-0.4.4/test/soup.py 2017-08-01 07:18:10.000000000 +0200
+++ new/html5-parser-0.4.5/test/soup.py 2018-04-22 17:07:13.000000000 +0200
@@ -6,7 +6,7 @@
 
 import gc
 
-from html5_parser.soup import parse
+from html5_parser.soup import parse, is_bs3
 
 from . import TestCase
 
@@ -46,6 +46,12 @@
         root = parse('<p><x xmlns:a="b">')
         self.ae(type('')(root), '<html><head></head><body><p><x 
xmlns:a="b"></x></p></body></html>')
 
+    def test_soup_list_attrs(self):
+        if is_bs3():
+            self.skipTest('No bs4 module found')
+        root = parse('<a class="a b" rel="x y">')
+        self.ae(root.body.a.attrs, {'class': 'a b'.split(), 'rel': 'x 
y'.split()})
+
     def test_soup_leak(self):
         HTML = '<p a=1>\n<a b=2 id=3>y</a>z<x:x class=4>1</x:x>'
         parse(HTML)  # So that BS and html_parser set up any internal objects


Reply via email to