Hello community,

here is the log from the commit of package html-xml-utils for openSUSE:Factory 
checked in at 2019-10-27 13:41:51
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/html-xml-utils (Old)
 and      /work/SRC/openSUSE:Factory/.html-xml-utils.new.2990 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "html-xml-utils"

Sun Oct 27 13:41:51 2019 rev:5 rq:743106 version:7.8

Changes:
--------
--- /work/SRC/openSUSE:Factory/html-xml-utils/html-xml-utils.changes    
2018-07-17 09:41:28.537493202 +0200
+++ /work/SRC/openSUSE:Factory/.html-xml-utils.new.2990/html-xml-utils.changes  
2019-10-27 13:41:53.689330719 +0100
@@ -1,0 +2,20 @@
+Thu Oct 24 20:21:24 UTC 2019 - Sebastian Wagner <[email protected]>
+
+- update to version 7.8:
+ * textwrap.c, langinfo.c, hxnormalize.c: Added knowledge about
+   languages that do not use spaces between words. In such languages,
+   a newline should not be converted to a space in outc() in
+   textwrap.c, but only to a break opportunity.
+ * hxtoc.c: The element to group headings in HTML5 is called
+   HGROUP, not HEADER. The heading of a section (SECTION, ARTICLE,
+   etc.) need not be the first element, there may be non-header
+   elements before it.
+ * hxwls.c: Print "longdesc", "classid" or "codebase" in the second
+   column for the corresponding attribute. Also recognize srcset
+   (somewhat).
+ * hxnormalize.c: Added option -X to indicate the input is XML
+   instead of HTML. Handle conversion of CDATA elements to XML by
+   escaping < and & instead of adding <![CDATA[. Added corresponding
+   test normalize13.sh.
+
+-------------------------------------------------------------------

Old:
----
  html-xml-utils-7.7.tar.gz

New:
----
  html-xml-utils-7.8.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ html-xml-utils.spec ++++++
--- /var/tmp/diff_new_pack.WAVIiw/_old  2019-10-27 13:41:54.297331452 +0100
+++ /var/tmp/diff_new_pack.WAVIiw/_new  2019-10-27 13:41:54.297331452 +0100
@@ -1,7 +1,7 @@
 #
 # spec file for package html-xml-utils
 #
-# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -12,12 +12,12 @@
 # license that conforms to the Open Source Definition (Version 1.9)
 # published by the Open Source Initiative.
 
-# Please submit bugfixes or comments via http://bugs.opensuse.org/
+# Please submit bugfixes or comments via https://bugs.opensuse.org/
 #
 
 
 Name:           html-xml-utils
-Version:        7.7
+Version:        7.8
 Release:        0
 Summary:        A number of utilities for manipulating HTML and XML files
 License:        W3C

++++++ html-xml-utils-7.7.tar.gz -> html-xml-utils-7.8.tar.gz ++++++
++++ 2941 lines of diff (skipped)
++++    retrying with extended exclude list
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/ChangeLog new/html-xml-utils-7.8/ChangeLog
--- old/html-xml-utils-7.7/ChangeLog    2018-04-29 18:34:08.000000000 +0200
+++ new/html-xml-utils-7.8/ChangeLog    2019-10-06 01:27:29.000000000 +0200
@@ -1,5 +1,34 @@
+2019-10-05  Bert Bos  <[email protected]>
+
+       * Published version 7.8.
+
+       * textwrap.c, langinfo.c, hxnormalize.c: Added knowledge about
+       languages that do not use spaces between words. In such languages,
+       a newline should not be converted to a space in outc() in
+       textwrap.c, but only to a break opportunity.
+
+2019-08-28  Bert Bos  <[email protected]>
+
+       * hxtoc.c: The element to group headings in HTML5 is called
+       HGROUP, not HEADER. The heading of a section (SECTION, ARTICLE,
+       etc.) need not be the first element, there may be non-header
+       elements before it.
+
+       * hxwls.c: Print "longdesc", "classid" or "codebase" in the second
+       column for the corresponding attribute. Also recognize srcset
+       (somewhat).
+
+2018-08-03  Bert Bos  <[email protected]>
+
+       * hxnormalize.c: Added option -X to indicate the input is XML
+       instead of HTML. Handle conversion of CDATA elements to XML by
+       escaping < and & instead of adding <![CDATA[. Added corresponding
+       test normalize13.sh.
+
 2018-04-29  Bert Bos  <[email protected]>
 
+       * Released version 7.7.
+
        * dtd.hash: Don't include the arguments in the forward declaration
        of lookup_element(), because those arguments differ slightly
        depending on which version of gperf is used to generate dtd.c:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/Makefile.am new/html-xml-utils-7.8/Makefile.am
--- old/html-xml-utils-7.7/Makefile.am  2018-04-29 17:24:29.000000000 +0200
+++ new/html-xml-utils-7.8/Makefile.am  2019-10-05 19:46:53.000000000 +0200
@@ -1,4 +1,4 @@
-# Copyright © 2000-2017 World Wide Web Consortium
+# Copyright © 2000-2018 World Wide Web Consortium
 # See http://www.w3.org/Consortium/Legal/copyright-software
 #
 # Author: Bert Bos <[email protected]>
@@ -41,7 +41,7 @@
 EXPORTS =              dict.e heap.e types.e headers.e connectsock.e\
                        dtd.e errexit.e tree.e genid.e html.e url.e\
                        openurl.e scan.e textwrap.e unent.e class.e\
-                       selector.e hash.e selmatch.e
+                       selector.e hash.e selmatch.e langinfo.e
 
 BUILT_SOURCES =                $(EXPORTS) scan.c html.c html.h dtd.c unent.c
 
@@ -75,7 +75,7 @@
                        dict.c types.c fopencookie.h fopencookie.c
 hxnormalize_SOURCES =  hxnormalize.c html.y scan.l openurl.c url.c\
                        tree.c connectsock.c heap.c dtd.c types.c\
-                       fopencookie.h\
+                       fopencookie.h langinfo.c\
                        textwrap.c errexit.c headers.c dict.c fopencookie.c
 hxnum_SOURCES =                hxnum.c html.y scan.l openurl.c url.c errexit.c\
                        heap.c connectsock.c headers.c dict.c types.c class.c\
@@ -180,8 +180,10 @@
        tests/normalize1.sh tests/normalize2.sh\
        tests/normalize3.sh tests/normalize4.sh tests/normalize5.sh\
        tests/normalize6.sh tests/normalize7.sh tests/normalize8.sh\
-       tests/normalize9.sh tests/normalize10.sh  tests/normalize11.sh\
-       tests/normalize12.sh\
+       tests/normalize9.sh tests/normalize10.sh tests/normalize11.sh\
+       tests/normalize12.sh tests/normalize13.sh tests/normalize14.sh\
+       tests/num1.sh tests/num2.sh tests/num3.sh tests/num4.sh\
+       tests/num5.sh tests/num6.sh\
        tests/pipe1.sh tests/pipe2.sh tests/pipe3.sh tests/pipe4.sh\
        tests/printlinks1.sh tests/printlinks2.sh tests/printlinks3.sh\
        tests/printlinks4.sh tests/ref1.sh\
@@ -200,6 +202,7 @@
        tests/tabletrans1.sh tests/tabletrans2.sh tests/tabletrans3.sh\
        tests/tabletrans4.sh tests/tabletrans5.sh\
        tests/toc1.sh tests/toc2.sh tests/toc3.sh tests/toc4.sh\
+       tests/toc5.sh tests/toc6.sh\
        tests/uncdata1.sh\
        tests/unent1.sh tests/unent2.sh tests/unent3.sh tests/unent4.sh\
        tests/unent5.sh tests/unent6.sh\
@@ -207,7 +210,7 @@
        tests/unpipe2.sh tests/unpipe3.sh tests/unpipe4.sh tests/unpipe5.sh\
        tests/unpipe6.sh\
        tests/wls1.sh tests/wls2.sh\
-       tests/wls3.sh tests/wls4.sh tests/wls5.sh\
+       tests/wls3.sh tests/wls4.sh tests/wls5.sh tests/wls6.sh\
        tests/xmlasc1.sh tests/xmlasc2.sh tests/xmlasc3.sh\
        tests/xmlasc4.sh tests/xmlasc5.sh tests/xmlasc6.sh\
        tests/xmlasc7.sh tests/xmlns1.sh tests/xref1.sh tests/xref2.sh\
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/TODO new/html-xml-utils-7.8/TODO
--- old/html-xml-utils-7.7/TODO 2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/TODO 2019-02-07 13:23:49.000000000 +0100
@@ -15,4 +15,9 @@
 
 Add ruby, rb, rp, rt and rtc to dtd.hash.
 
-Add MathML elements to dtd.hash.
\ No newline at end of file
+Add MathML elements to dtd.hash.
+
+An option to add <![CDATA[...]]> to CDATA sections or to escape <, >
+and & in such sections (only when output is XML).
+
+Properly parse the srcset attribute.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/configure.ac new/html-xml-utils-7.8/configure.ac
--- old/html-xml-utils-7.7/configure.ac 2018-04-29 18:39:29.000000000 +0200
+++ new/html-xml-utils-7.8/configure.ac 2019-10-06 00:41:03.000000000 +0200
@@ -1,7 +1,7 @@
 #                                               -*- Autoconf -*-
 # Process this file with autoconf to produce a configure script.
 AC_PREREQ([2.69])
-AC_INIT([html-xml-utils],[7.7])
+AC_INIT([html-xml-utils],[7.8])
 dnl print all automake warnings with -Wall
 dnl http://sources.redhat.com/automake/automake.html#Options
 AM_INIT_AUTOMAKE([-Wall])
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/hxnormalize.1 new/html-xml-utils-7.8/hxnormalize.1
--- old/html-xml-utils-7.7/hxnormalize.1        2018-02-23 15:29:48.000000000 
+0100
+++ new/html-xml-utils-7.8/hxnormalize.1        2019-10-06 01:35:09.000000000 
+0200
@@ -4,6 +4,7 @@
 .SH SYNOPSIS
 .B hxnormalize
 .RB "[\| " \-x " \|]"
+.RB "[\| " \-X " \|]"
 .RB "[\| " \-e " \|]"
 .RB "[\| " \-d " \|]"
 .RB "[\| " \-s " \|]"
@@ -19,22 +20,35 @@
 .LP
 The
 .B hxnormalize
-command pretty-prints an HTML file, and also tries to fix small
-errors. The output is the same HTML, but with a maximum line length
+command pretty-prints an HTML or XML file, and also tries to fix small
+HTML errors. The output is the same file, but with a maximum line length
 and with optional indentation to indicate the nesting level of each
 line.
 .SH OPTIONS
 The following options are supported:
 .TP 10
 .B \-x
-Use XML conventions: empty elements are written with a slash at the
-end: <IMG\ />. Implies
+Applies XML conventions: empty elements are written with a slash at
+the end (e.g., <IMG\ />) and, if the input is HTML, any \(oq<\(cq and
+\(oq&\(cq inside <style> and <script> elements are escaped as
+\(oq&lt;\(cq and \(oq&amp;\(cq. (The input is assumed to be HTML
+unless the
+.B \-X
+option is present.) Implies
 .BR \-e .
 .TP
 .B \-e
-Always insert endtags, even if HTML does not require them (for
+Always inserts endtags, even if HTML does not require them (for
 example: </p> and </li>).
 .TP
+.B \-X
+Makes
+.B hxnormalize
+assume the input is well-formed XML. It does not try to infer omitted
+HTML tags, does not assume elements such as <img> and <br> are empty,
+and does not treat \(oq<\(cq and \(oq&\(cq inside <style> and <script>
+as normal characters.
+.TP
 .B \-d
 Omit the DOCTYPE from the output.
 .TP
@@ -50,7 +64,7 @@
 will wrap lines so that all lines are as long as possible, but no
 longer than this length. Default is 72. Words that are longer than the
 line length will not be broken, and will extend past this length. A
-\(lqword\(rq is a sequence of characters delimited by white space.) The
+\(oqword\(cq is a sequence of characters delimited by white space.) The
 content of the STYLE, SCRIPT and PRE elements will not be
 line-wrapped.
 .TP
@@ -58,8 +72,9 @@
 Omit <span> tags that don't have any attributes.
 .TP
 .B \-L
-Remove redundant "lang" and "xml:lang" attributes. (I.e., those whose
-value is the same as the language inherited from the parent element.)
+Remove redundant \(oqlang\(cq and \(oqxml:lang\(cq attributes. (I.e.,
+those whose value is the same as the language inherited from the
+parent element.)
 .TP
 .BI \-c " commentmagic"
 Comments are normally placed right after the preceding text. That is
@@ -69,7 +84,7 @@
 is a string and when that string occurs inside a comment,
 .B hxnormalize
 will output an empty line before that comment. E.g. \fB\-c "===="\fR
-can be used to put all comments that contain "====" on a separate
+can be used to put all comments that contain \(oq====\(cq on a separate
 line, preceded by an empty line. By default, no comments are treated
 that way.
 .SH OPERANDS
@@ -102,13 +117,44 @@
 .B hxnormalize
 will not omit an endtag if the white space after it could possibly be
 significant. E.g., it will not remove the first </p> from
-"<div><p>text</p> <p>text</p></div>".
+\(oq<div><p>text</p> <p>text</p></div>\(cq.
 .LP
 .B hxnormalize
 can currently only retrieve remote files over HTTP. It doesn't handle
 password-protected files, nor files whose content depends on HTTP
-"cookies."
+\(oqcookies.\(cq
+.LP
+When converting from XML to HTML (option
+.B \-X
+without option
+.BR \-x ),
+any pairs of \(OQ<![CDATA[\(CQ and \(oq]]>\(cq are removed and
+character entities &lt; &gt; &quot; &apos; and &amp; are expanded (to
+\(oq<\(cq, \(oq>\(cq, \(oq"\(cq, \(oq'\(cq and \(oq&\(cq,
+respectively), but any other character entities are not expanded. To
+expand other character entities, pipe the input through
+.BR hxunent (1)
+first.
+.LP
+To limit lines to a given number of characters,
+.B hxnormalize
+breaks lines at spaces (or inside tags). Some writing systems do not
+use spaces between words and thus
+.B hxnormalize
+may not be able to break lines, except at already existing line
+breaks.
+.LP
+To make short lines longer,
+.B hxnormalize
+will combine lines and replace a line break by a space, except in
+writing systems that do not put spaces between words, where the line
+break is replaced by nothing.
+.B hxnormalize
+currently only does the latter for Japanese, Chinese, Korean,
+Khmer and Thai. (The text must be correctly marked up with
+\(oqlang\(cq or \(oqxml:lang\(cq.)
 .SH "SEE ALSO"
 .BR asc2xml (1),
 .BR xml2asc (1),
+.BR hxunent (1),
 .BR UTF-8 " (RFC 2279)"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/hxnormalize.c new/html-xml-utils-7.8/hxnormalize.c
--- old/html-xml-utils-7.7/hxnormalize.c        2018-02-23 15:29:48.000000000 
+0100
+++ new/html-xml-utils-7.8/hxnormalize.c        2019-10-05 19:46:53.000000000 
+0200
@@ -6,7 +6,7 @@
  *
  * Created 9 May 1998
  * Bert Bos <[email protected]>
- * $Id: hxnormalize.c,v 1.22 2017/11/24 09:50:25 bbos Exp $
+ * $Id: hxnormalize.c,v 1.25 2019/10/05 17:11:31 bbos Exp $
  */
 #include "config.h"
 #include <stdio.h>
@@ -31,6 +31,7 @@
 #include "dict.e"
 #include "openurl.e"
 #include "errexit.e"
+#include "langinfo.e"
 
 static Tree tree;
 static bool do_xml = false;
@@ -40,6 +41,7 @@
 static bool clean_span = false;
 static string long_comment = NULL;
 static bool do_lang = false;
+static bool input_is_xml = false;
 
 
 /* handle_error -- called when a parse error occurred */
@@ -68,10 +70,16 @@
   tree = append_comment(tree, commenttext);
 }
 
+/* handle_html_text -- called after a text chunk is parsed */
+void handle_html_text(void *clientdata, string text)
+{
+  tree = append_text(tree, text);
+}
+
 /* handle_text -- called after a text chunk is parsed */
 void handle_text(void *clientdata, string text)
 {
-  tree = append_text(tree, text);
+  tree = tree_append_text(tree, text);
 }
 
 /* handle_decl -- called after a declaration is parsed */
@@ -87,22 +95,46 @@
   tree = append_procins(tree, pi_text);
 }
 
+/* handle_html_starttag -- called after a start tag is parsed */
+void handle_html_starttag(void *clientdata, string name, pairlist attribs)
+{
+  tree = html_push(tree, name, attribs);
+  free(name);
+}
+
 /* handle_starttag -- called after a start tag is parsed */
 void handle_starttag(void *clientdata, string name, pairlist attribs)
 {
+  tree = tree_push(tree, name, attribs);
+  free(name);
+}
+
+/* handle_html_emptytag -- called after an empty tag is parsed */
+void handle_html_emptytag(void *clientdata, string name, pairlist attribs)
+{
   tree = html_push(tree, name, attribs);
+  free(name);
 }
 
 /* handle_emptytag -- called after an empty tag is parsed */
 void handle_emptytag(void *clientdata, string name, pairlist attribs)
 {
-  tree = html_push(tree, name, attribs);
+  tree = tree_push(tree, name, attribs);
+  tree = tree_pop(tree, name);
+  free(name);
+}
+
+/* handle_html_endtag -- called after an endtag is parsed (name may be "") */
+void handle_html_endtag(void *clientdata, string name)
+{
+  tree = html_pop(tree, name);
+  free(name);
 }
 
 /* handle_endtag -- called after an endtag is parsed (name may be "") */
 void handle_endtag(void *clientdata, string name)
 {
-  tree = html_pop(tree, name);
+  tree = tree_pop(tree, name);
   free(name);
 }
 
@@ -162,57 +194,58 @@
 static void pp(Tree n, bool preformatted, bool allow_text,
               conststring lang)
 {
-  bool pre, mixed;
+  bool pre, mixed, with_space;
   conststring lang2;
   string s;
   pairlist h;
   size_t i, j;
   Tree l;
 
+  with_space = with_spaces(lang);      /* Language with spaces between words? 
*/
+
   switch (n->tp) {
     case Text:
       if (!allow_text) {
        assert(only_space(n->text));
       } else {
        s = n->text;
-       i = strlen(s);
-       outn(s, i, preformatted);
+       out(s, preformatted, with_space);
       }
       break;
     case Comment:
       if (long_comment && strstr(n->text, long_comment) && !preformatted) {
        /* Found a comment that should have an empty line before it */
        outbreak();
-       outln(NULL, true);
+       outln(NULL, true, with_space);
       }
-      out("<!--", true); out(n->text, true);
-      if (allow_text || preformatted) out("-->", true);
-      else outln("-->", preformatted);
+      out("<!--", true, true); out(n->text, true, with_space);
+      if (allow_text || preformatted) out("-->", true, true);
+      else outln("-->", preformatted, true);
       break;
     case Declaration:
       if (do_doctype) {
-       out("<!DOCTYPE ", false);
-       out(n->name, false);
+       out("<!DOCTYPE ", false, true);
+       out(n->name, false, true);
        if (n->text) {
-         out(" PUBLIC \"", false);
-         out(n->text, false);
-         out("\"", false);
+         out(" PUBLIC \"", false, true);
+         out(n->text, false, true);
+         out("\"", false, true);
        }
        if (n->url) {
-         if (!n->text) out(" SYSTEM", false);
-         out(" \"", false);
-         out(n->url, false);
-         out("\"", false);
+         if (!n->text) out(" SYSTEM", false, true);
+         out(" \"", false, true);
+         out(n->url, false, true);
+         out("\"", false, true);
        } else if (n->text && do_xml) { /* XML cannot omit the system literal */
-         out(" \"\"", false);
+         out(" \"\"", false, true);
        }
-       outln(">", false);
+       outln(">", false, true);
       }
       break;
     case Procins:
-      out("<?", false); out(n->text, true);
-      if (allow_text || preformatted) out(">", false);
-      else outln(">", false);
+      out("<?", false, true); out(n->text, true, true);
+      if (allow_text || preformatted) out(">", false, true);
+      else outln(">", false, true);
       break;
     case Element:
       if (clean_span && eq(n->name, "span") && ! n->attribs) {
@@ -221,100 +254,114 @@
          pp(l, preformatted, true, lang);
        break;
       }
-      /* Determine language, remove redundant language attribute */
-      if (do_lang) {
-       if ((lang2 = pairlist_get(n->attribs, "lang")) ||
-           (lang2 = pairlist_get(n->attribs, "xml:lang"))) {
-         if (lang && eq(lang, lang2)) {
-           pairlist_unset(&n->attribs, "lang");
-           pairlist_unset(&n->attribs, "xml:lang");
-         }
-         lang = lang2;
-       }
+      /* Check for language attribute. */
+      lang2 = pairlist_get(n->attribs, "lang");
+      if (!lang2) lang2 = pairlist_get(n->attribs, "xml:lang");
+
+      /* Remove redundant language attribute */
+      if (do_lang && lang && lang2 && eq(lang, lang2)) {
+       pairlist_unset(&n->attribs, "lang");
+       pairlist_unset(&n->attribs, "xml:lang");
       }
-      if (!preformatted && break_before(n->name)) outln(NULL, false);
-      out("<", preformatted); out(n->name, preformatted);
+
+      /* Update inherited language. */
+      if (lang2) lang = lang2;
+
+      if (!preformatted && break_before(n->name)) outln(NULL, false, true);
+      out("<", preformatted, true); out(n->name, preformatted, true);
       if (break_before(n->name)) inc_indent();
       n->attribs = sort_list(n->attribs);
       for (h = n->attribs; h != NULL; h = h->next) {
-       out(" ", false); out(h->name, false);
+       out(" ", false, true); out(h->name, false, true);
        if (do_xml) {
-         out("=\"", false);
-         out(h->value ? h->value : h->name, true);
-         outc('"', false);
+         out("=\"", false, true);
+         out(h->value ? h->value : h->name, true, true);
+         outc('"', false, true);
        } else if (h->value == NULL) {
          /* The h->name *is* the value (and the attribute name is implicit) */
        } else if (!needs_quotes(h->value)) {
-         out("=", false); /* Omit the quotes */
-         out(h->value, true);
+         out("=", false, true); /* Omit the quotes */
+         out(h->value, true, true);
        } else {
-         out("=\"", false);
-         out(h->value, true);
-         outc('"', false);
+         out("=\"", false, true);
+         out(h->value, true, true);
+         outc('"', false, true);
        }
       }
       if (is_empty(n->name)) {
        assert(n->children == NULL);
        outbreakpoint();
-       out(do_xml ? " />" : ">", true);
+       out(do_xml ? " />" : ">", true, true);
        if (break_before(n->name)) dec_indent();
-       if (!preformatted && break_after(n->name)) outln(NULL, false);
+       if (!preformatted && break_after(n->name)) outln(NULL, false, true);
 
-      } else if (do_xml && is_cdata_elt(n->name)) {
-       /* Insert <![CDATA[...]]>, but only if input was HTML, not XML */
+      } else if (do_xml && !input_is_xml && is_cdata_elt(n->name)) {
+       /* Escape '<' and '&', but only if input was HTML, not XML */
        if (!n->children) {
-         out(" />", true);
+         out(" />", true, true);
          if (break_before(n->name)) dec_indent();
        } else {
-         out(">", preformatted);
-         /* TODO: Strictly speaking, if the input is HTML (not XML),
-            then the string "<![CDATA[" in <style> or <script> is to
-            be taken as literal text. In practice, the string
-            "<![CDATA[" is nearly always preceeded by "<!--" or "//"
-            and so this simplistic check will usually work... */
-         assert(n->children->tp == Text);
-         if (!hasprefix(n->children->text, "<![CDATA[")) out("<![CDATA[",true);
+         outbreakpoint();
+         out(">", preformatted, true);
          for (l = n->children; l; l = l->sister) {
-           assert(n->children->tp == Text);
-           out(l->text, true);
+           assert(l->tp == Text);
+           for (s = l->text; *s; s++)
+             if (*s == '<') out("&lt;", true, true);
+             else if (*s == '&') out("&amp;", true, true);
+             else outc(*s, true, with_space);
          }
-         if (!hasprefix(n->children->text, "<![CDATA[")) out("]]>", true);
          if (break_before(n->name)) dec_indent();
-         out("</", preformatted);
-         out(n->name, preformatted);
+         out("</", true, true);
+         out(n->name, true, true);
          outbreakpoint();
-         out(">", preformatted);
+         out(">", preformatted, true);
        }
        if (!preformatted && break_after(n->name)) outbreak();
 
-      } else if (!do_xml && is_cdata_elt(n->name) && n->children &&
-                n->children->tp == Text && !n->children->sister &&
-                hasprefix(n->children->text, "<![CDATA[")) {
-       /* Remove <![CDATA[...]]>, but only if input was XML, not HTML */
-       assert(hasaffix(n->children->text, "]]>"));
-       out(">", preformatted);
-       s = n->children->text + 9; /* Skip "<![CDATA[" */
-       i = strlen(s) - 3;         /* Omit "]]>" */
-       for (j = 0; j < i; j++) outc(s[j], true);
+      } else if (!do_xml && input_is_xml && is_cdata_elt(n->name) &&
+                n->children) {
+       /* Remove "<![CDATA[" and "]]>", or unescape &lt; and &amp;,
+          but only if input was XML, not HTML */
+       outbreakpoint();
+       out(">", preformatted, true);
+       for (l = n->children; l != NULL; l = l->sister) {
+         if (l->tp != Text) {
+           errexit("Cannot convert <%s> to HTML because it has children\n",
+                   n->name);
+         } else if (hasprefix(l->text, "<![CDATA[")) {
+           assert(hasaffix(l->text, "]]>"));
+           s = l->text + 9;    /* Skip "<![CDATA[" */
+           i = strlen(s) - 3;  /* Omit "]]>" */
+           for (j = 0; j < i; j++) outc(s[j], true, with_space);
+         } else {              /* Unescape &lt; and &amp; */
+           for (s = l->text; *s; s++)
+             if (hasprefix(s, "&amp;")) {outc('&', true, true); s += 4;}
+             else if (hasprefix(s, "&lt;")) {outc('<', true, true); s += 3;}
+             else if (hasprefix(s, "&gt;")) {outc('>', true, true); s += 3;}
+             else if (hasprefix(s, "&quot;")) {outc('"', true, true); s += 5;}
+             else if (hasprefix(s, "&apos;")) {outc('\'', true, true); s += 5;}
+             else outc(*s, true, with_space);
+         }
+       }
        if (break_before(n->name)) dec_indent();
-       out("</", preformatted);
-       out(n->name, preformatted);
+       out("</", preformatted, true);
+       out(n->name, preformatted, true);
        outbreakpoint();
-       out(">", preformatted);
+       out(">", preformatted, true);
        if (!preformatted && break_after(n->name)) outbreak();
 
       } else {
        outbreakpoint();
-       out(">", preformatted);
+       out(">", preformatted, true);
        pre = preformatted || is_pre(n->name);
        mixed = is_mixed(n->name);
        for (l = n->children; l != NULL; l = l->sister)
          pp(l, pre, mixed, lang);
        if (break_before(n->name)) dec_indent();
        if (do_xml || do_endtag || need_etag(n->name) || next_ambiguous(n)) {
-         out("</", pre); out(n->name, pre);
+         out("</", pre, true); out(n->name, pre, true);
          outbreakpoint();
-         out(">", preformatted);
+         out(">", preformatted, true);
        }
        if (!preformatted && break_after(n->name)) outbreak();
       }
@@ -347,22 +394,11 @@
 {
   int c, status = 200;
 
-  /* Bind the parser callback routines to our handlers */
-  set_error_handler(handle_error);
-  set_start_handler(start);
-  set_end_handler(end);
-  set_comment_handler(handle_comment);
-  set_text_handler(handle_text);
-  set_decl_handler(handle_decl);
-  set_pi_handler(handle_pi);
-  set_starttag_handler(handle_starttag);
-  set_emptytag_handler(handle_emptytag);
-  set_endtag_handler(handle_endtag);
-
-  while ((c = getopt(argc, argv, "edxi:l:sc:L")) != -1)
+  while ((c = getopt(argc, argv, "edxXi:l:sc:L")) != -1)
     switch (c) {
     case 'e': do_endtag = true; break;
     case 'x': do_xml = true; break;
+    case 'X': input_is_xml = true; break;
     case 'd': do_doctype = false; break;
     case 'i': set_indent(atoi(optarg)); break;
     case 'l': set_linelen(atoi(optarg)); break;
@@ -376,6 +412,32 @@
   else usage(argv[0]);
   if (yyin == NULL) {perror(argv[optind]); exit(2);}
   if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status));
+
+  /* Bind the parser callback routines to our handlers */
+  if (input_is_xml) {
+    set_error_handler(handle_error);
+    set_start_handler(start);
+    set_end_handler(end);
+    set_comment_handler(handle_comment);
+    set_text_handler(handle_text);
+    set_decl_handler(handle_decl);
+    set_pi_handler(handle_pi);
+    set_starttag_handler(handle_starttag);
+    set_emptytag_handler(handle_emptytag);
+    set_endtag_handler(handle_endtag);
+  } else {                     /* Input is HTML */
+    set_error_handler(handle_error);
+    set_start_handler(start);
+    set_end_handler(end);
+    set_comment_handler(handle_comment);
+    set_text_handler(handle_html_text);
+    set_decl_handler(handle_decl);
+    set_pi_handler(handle_pi);
+    set_starttag_handler(handle_html_starttag);
+    set_emptytag_handler(handle_html_emptytag);
+    set_endtag_handler(handle_html_endtag);
+  }
+
   if (yyparse() != 0) {exit(3);}
   tree = get_root(tree);
   prettyprint(tree);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/hxnum.c new/html-xml-utils-7.8/hxnum.c
--- old/html-xml-utils-7.7/hxnum.c      2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/hxnum.c      2018-08-02 03:21:37.000000000 +0200
@@ -8,7 +8,7 @@
  *
  * Bert Bos
  * Created Sep 1997
- * $Id: hxnum.c,v 1.11 2017/11/24 09:50:25 bbos Exp $
+ * $Id: hxnum.c,v 1.12 2018/08/02 01:20:14 bbos Exp $
  */
 #include "config.h"
 #include <stdio.h>
@@ -46,8 +46,8 @@
 static int low = 1;                            /* First counter to use */
 static int high = 6;                           /* Last counter to use */
 static string format[7] = {                    /* Format for each counter */
-  NULL, "%d.", "%d.%d.", "%d.%d.%d.", "%d.%d.%d.%d.",
-  "%d.%d.%d.%d.%d.", "%d.%d.%d.%d.%d.%d."};
+  NULL, "%d. ", "%d.%d. ", "%d.%d.%d. ", "%d.%d.%d.%d. ",
+  "%d.%d.%d.%d.%d. ", "%d.%d.%d.%d.%d.%d. "};
 static int skipping = 0;                       /* >0 to suppress output */
 
 
@@ -179,7 +179,7 @@
        putchar(*s);
       }
     }
-    printf(" </span>");
+    printf("</span>");
   }
 }
 
@@ -224,12 +224,12 @@
   printf("  -l low     lowest header level to number (1-6) [default 1]\n");
   printf("  -h high    highest header level to number (1-6) [default 6]\n");
   printf("  -n start   number of first heading [default: 1]\n");
-  printf("  -1 format  format for level 1 [default \"%%d.\"]\n");
-  printf("  -2 format  format for level 2 [default \"%%d.%%d.\"]\n");
-  printf("  -3 format  format for level 3 [default \"%%d.%%d.%%d.\"]\n");
-  printf("  -4 format  format for level 4 [default \"%%d.%%d.%%d.%%d.\"]\n");
-  printf("  -5 format  format for level 5 [default 
\"%%d.%%d.%%d.%%d.%%d.\"]\n");
-  printf("  -6 format  format for level 6 [default 
\"%%d.%%d.%%d.%%d.%%d.%%d.\"]\n");
+  printf("  -1 format  format for level 1 [default \"%%d. \"]\n");
+  printf("  -2 format  format for level 2 [default \"%%d.%%d. \"]\n");
+  printf("  -3 format  format for level 3 [default \"%%d.%%d.%%d. \"]\n");
+  printf("  -4 format  format for level 4 [default \"%%d.%%d.%%d.%%d. \"]\n");
+  printf("  -5 format  format for level 5 [default \"%%d.%%d.%%d.%%d.%%d. 
\"]\n");
+  printf("  -6 format  format for level 6 [default \"%%d.%%d.%%d.%%d.%%d.%%d. 
\"]\n");
   printf("  -?         this help\n");
   printf("The format strings may contain:\n");
   printf("  %%d  replaced by decimal number\n");
@@ -272,9 +272,8 @@
 
 #ifdef HAVE_GETOPT_OPTRESET
   optreset = 1;
-#else
-  optind = 1;
 #endif
+  optind = 1;
 
   /* If -l and/or -h have been set, the default formats are different */
   if (low != 1 || high != 6) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/hxtoc.1 new/html-xml-utils-7.8/hxtoc.1
--- old/html-xml-utils-7.7/hxtoc.1      2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/hxtoc.1      2019-08-28 21:04:29.000000000 +0200
@@ -35,11 +35,11 @@
 .B \-h
 (unless the option
 .B \-d
-is in effect, see below) and also inserts A elements with NAME
-attributes, so old browsers will recognize the H1 to H6 headers as
-target anchors as well (unless the option
+is in effect, see below). Unless the option
 .B \-t
-is in effect). The output is written to stdout.
+is given, it also inserts A elements with NAME attributes, because old
+browsers do not recognize ID attributes as target anchors. The output
+is written to stdout.
 .LP
 If there is a comment of the form
 .d
@@ -78,7 +78,7 @@
 .TP
 .BI \-h " high"
 Sets the highest numbered header to appear in the table of
-content. Default is 6 (i.e., H6).
+content. Default is unlimited.
 .TP
 .B \-t
 Normally,
@@ -89,20 +89,20 @@
 generated.
 .TP
 .BI \-c " class"
-The generated UL elements in the table of contents will have a CLASS attribute 
with the value
+The generated UL elements in the table of contents will have a CLASS
+attribute with the value
 .I class.
 The default is "toc".
 .TP
 .B \-d
 Tries to use sectioning elements as targets in the table of contents
 instead of H1 to H6. A sectioning elements is a DIV, SECTION, ARTICLE,
-ASIDE or NAV element whose first child is a heading element (H1 to H6)
-or an HGROUP. The sectioning element will be given an ID if it doesn't
-have one yet. With this option, the level of any H1 to H6 that is the
-first child of a sectioning element (or of an HGROUP that is itself
-the first child of a sectioning element) is not determined by its
+ASIDE or NAV element that contains at least one heading element (H1 to
+H6) or HGROUP. The sectioning element will be given an ID if it
+doesn't have one yet. With this option, the level of any H1 to H6 that
+is the first heading of a sectioning element is not determined by its
 name, but by the nesting depth of the sectioning elements. (Any H1 to
-H6 that are not the first child of a sectioning element still have
+H6 that are not the first heading of a sectioning element still have
 their level implied by their name.)
 .TP
 .B \-f
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/hxtoc.c new/html-xml-utils-7.8/hxtoc.c
--- old/html-xml-utils-7.7/hxtoc.c      2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/hxtoc.c      2019-08-28 21:14:07.000000000 +0200
@@ -23,7 +23,7 @@
  *
  * Author: Bert Bos <[email protected]>
  * Created Sep 1997
- * Version: $Id: hxtoc.c,v 1.12 2017/11/24 09:50:25 bbos Exp $
+ * Version: $Id: hxtoc.c,v 1.13 2019/08/28 19:14:07 bbos Exp $
  *
  **/
 #include "config.h"
@@ -34,6 +34,7 @@
 #include <time.h>
 #include <stdbool.h>
 #include <getopt.h>
+#include <limits.h>
 #if STDC_HEADERS
 # include <string.h>
 #else
@@ -83,7 +84,7 @@
 #define INDENT " "                             /* Amount to indent ToC per 
level */
 
 static Tree tree;
-static int toc_low = 1, toc_high = 6;          /* Which headers to include */
+static int toc_low = 1, toc_high = INT_MAX;    /* Which headers to include */
 static bool xml = false;                       /* Use <empty /> convention */
 static bool bctarget = true;                   /* Generate <a name=> after IDs 
*/
 static string toc_class = "toc";               /* <ul class="..."> */
@@ -103,7 +104,7 @@
   tree = create();
   return NULL;
 }
-  
+
 /* end -- called after the last event is reported */
 static void end(void *clientdata)
 {
@@ -189,35 +190,38 @@
   return 0;
 }
 
-/* div_parent -- if t is the first child of a section elt, return that elt */
+/* div_parent -- if t is the first heading in a section elt, return that elt */
 static Tree div_parent(Tree t)
 {
   Tree h, result = NULL;
 
   assert(t->tp == Element);
   assert(t->parent);
+  assert(eq(t->name, "hgroup") || heading_level(t) > 0);
   if (t->parent->tp != Element) return NULL;
   if (has_class(t->parent->attribs, NO_TOC)) return NULL;
   if (is_div(t->parent)) result = t->parent;
-  else if (!eq(t->parent->name, "header")) return NULL;
+  else if (!eq(t->parent->name, "hgroup")) return NULL;
   else if (!(result = div_parent(t->parent))) return NULL;
-  for (h = t->parent->children; h != t; h = h->sister) {
-    if (h->tp == Element) return NULL;
-    if (h->tp == Text && !only_space(h->text)) return NULL;
-  }
-  return result;
+
+  /* Check if t is the first heading in its parent. */
+  for (h = t->parent->children; h != t; h = h->sister)
+    if (h->tp == Element && (eq(h->name, "hgroup") || heading_level(h) > 0))
+      return NULL;             /* No, it's not */
+  return result;               /* Yes, it is */
 }
 
-/* first_child_is_heading -- true if first child is a Hn or HEADER */
-static bool first_child_is_heading(Tree t)
+/* has_heading -- true if the element has at least one Hn or HGROUP as child */
+static bool has_heading(Tree t)
 {
   Tree h;
 
   assert(t->tp == Element);
   for (h = t->children; h; h = h->sister) {
+#if 0
     switch (h->tp) {
     case Element:
-      return eq(h->name, "header") || heading_level(h) > 0;
+      return eq(h->name, "hgroup") || heading_level(h) > 0;
     case Text:
       if (!only_space(h->text))
        return false;
@@ -225,6 +229,10 @@
     default:
       break;
     }
+#else
+    if (h->tp == Element &&
+       (eq(h->name, "hgroup") || heading_level(h) > 0)) return true;
+#endif
   }
   return false;
 }
@@ -246,8 +254,8 @@
     case Declaration: break;
     case Procins: break;
     case Element:
-      if (use_div && is_div(t) && first_child_is_heading(t)) {
-       /* It's a section element with a heading as first child */
+      if (use_div && is_div(t) && has_heading(t)) {
+       /* It's a section element with a heading */
        div_depth++;
        level = 0;
       } else {
@@ -358,7 +366,7 @@
        if (*write) printf("<?%s>", h->text);
        break;
       case Element:
-       if (use_div && is_div(h) && first_child_is_heading(h)) {
+       if (use_div && is_div(h) && has_heading(h)) {
          /* It's a section element with a heading as first child */
          div_depth++;
          level = div_depth;
@@ -457,7 +465,6 @@
     }
   }
   if (toc_low < 1) toc_low = 1;
-  if (toc_high > 6) toc_high = 6;
 
   if (argc > optind + 1) {
     usage(argv[0]);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/hxwls.c new/html-xml-utils-7.8/hxwls.c
--- old/html-xml-utils-7.7/hxwls.c      2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/hxwls.c      2019-08-28 21:14:34.000000000 +0200
@@ -6,7 +6,7 @@
  *
  * Bert Bos <[email protected]>
  * Created 31 July 1999
- * $Id: hxwls.c,v 1.12 2017/12/07 02:05:23 bbos Exp $
+ * $Id: hxwls.c,v 1.13 2019/08/28 19:14:34 bbos Exp $
  */
 #include "config.h"
 #include <assert.h>
@@ -172,7 +172,7 @@
   }
   return NULL;
 }
-  
+
 /* end -- called after the last event is reported */
 void end(void *clientdata)
 {
@@ -228,13 +228,14 @@
     output("a", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href"));
   } else if (strcasecmp(name, "img") == 0) {
     output("img", NULL, pairlist_get(attribs, "src"));
-    output("longdesc", NULL, pairlist_get(attribs, "longdesc"));
+    output("img", "longdesc", pairlist_get(attribs, "longdesc"));
+    output("img", "srcset", pairlist_get(attribs, "srcset"));
   } else if (strcasecmp(name, "input") == 0) {
-    output("input", NULL, pairlist_get(attribs, "src"));
+    output("input", "src", pairlist_get(attribs, "src"));
   } else if (strcasecmp(name, "object") == 0) {
     output("object", NULL,  pairlist_get(attribs, "data"));
-    output("object", NULL,  pairlist_get(attribs, "classid"));
-    output("object", NULL,  pairlist_get(attribs, "codebase"));
+    output("object", "classid",  pairlist_get(attribs, "classid"));
+    output("object", "codebase",  pairlist_get(attribs, "codebase"));
   } else if (strcasecmp(name, "area") == 0) {
     output("area", pairlist_get(attribs, "rel"), pairlist_get(attribs, 
"href"));
   } else if (strcasecmp(name, "ins") == 0) {
@@ -262,8 +263,8 @@
   } else if (strcasecmp(name, "audio") == 0) {
     output("audio", NULL, pairlist_get(attribs, "src"));
   } else if (strcasecmp(name, "source") == 0) {
-    output("source", NULL, pairlist_get(attribs, "srcset"));
-    output("source", NULL, pairlist_get(attribs, "src"));
+    output("source", "srcset", pairlist_get(attribs, "srcset"));
+    output("source", "src", pairlist_get(attribs, "src"));
   }
 
   /* Free memory */
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/langinfo.c new/html-xml-utils-7.8/langinfo.c
--- old/html-xml-utils-7.7/langinfo.c   1970-01-01 01:00:00.000000000 +0100
+++ new/html-xml-utils-7.8/langinfo.c   2019-10-06 01:27:29.000000000 +0200
@@ -0,0 +1,38 @@
+/*
+ * Information about natural languages, in particular if the language
+ * has spaces between words.
+ *
+ * Copyright © 1994-2012 World Wide Web Consortium
+ * See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
+ *
+ * Created 9 May 1998
+ * Bert Bos <[email protected]>
+ * $Id: langinfo.c,v 1.2 2019/10/05 23:25:46 bbos Exp $
+ */
+#include "config.h"
+#ifdef HAVE_STRING_H
+#  include <string.h>
+#elif HAVE_STRINGS_H
+#  include <strings.h>
+#endif
+#include <assert.h>
+#include <stdbool.h>
+#include "export.h"
+#include "types.e"
+
+
+/* with_spaces -- return true if the language has spaces between words */
+EXPORT bool with_spaces(const conststring lang)
+{
+  if (!lang) return true;      /* Default is with spaces */
+  if (eq(lang, "ja") || hasprefix(lang, "ja-")) return false; /* Japanese */
+  if (eq(lang, "zh") || hasprefix(lang, "zh-")) return false; /* Chinese */
+  if (eq(lang, "ko") || hasprefix(lang, "ko-")) return false; /* Korean */
+  if (eq(lang, "km") || hasprefix(lang, "km-")) return false; /* Khmer */
+  if (eq(lang, "th") || hasprefix(lang, "th-")) return false; /* Thai */
+#if 0
+  if (eq(lang, "lo") || hasprefix(lang, "lo-")) return false; /* Lao */
+  if (eq(lang, "my") || hasprefix(lang, "my-")) return false; /* Myanmar */
+#endif
+  return true;                 /* Other languages are with spaces */
+}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/langinfo.e new/html-xml-utils-7.8/langinfo.e
--- old/html-xml-utils-7.7/langinfo.e   1970-01-01 01:00:00.000000000 +0100
+++ new/html-xml-utils-7.8/langinfo.e   2019-10-06 01:27:35.000000000 +0200
@@ -0,0 +1,2 @@
+extern _Bool 
+           with_spaces(const conststring lang);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/normalize13.sh 
new/html-xml-utils-7.8/tests/normalize13.sh
--- old/html-xml-utils-7.7/tests/normalize13.sh 1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/normalize13.sh 2018-08-03 02:02:02.000000000 
+0200
@@ -0,0 +1,35 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html>
+       <head>
+       <style>
+       <![CDATA[123]]>
+       &lt;foo>
+       &amp;foo;&quot;&apos;&gt;
+       </style>
+       </head>
+       <body>
+       </body>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html>
+       <head>
+       <style>
+       123
+       <foo>
+       &foo;"'>
+       </style>
+
+       <body>
+EOF
+./hxnormalize -X -i 0 -L $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/normalize14.sh 
new/html-xml-utils-7.8/tests/normalize14.sh
--- old/html-xml-utils-7.7/tests/normalize14.sh 1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/normalize14.sh 2019-10-05 19:12:14.000000000 
+0200
@@ -0,0 +1,29 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<EOF
+<!DOCTYPE html>
+<html lang=ja-JP>
+
+<p>PやH2内の各行をその
+  マージンの間に中央寄せして描画します。
+  すると、このようになります:
+
+<p lang=en>Here there
+  are spaces.
+EOF
+cat >$TMP2 <<-EOF
+<!DOCTYPE html>
+
+<html lang=ja-JP>
+  <body>
+    <p>PやH2内の各行をそのマージンの間に中央寄せして描画します。すると、このようになります:
+
+    <p lang=en>Here there are spaces.
+EOF
+./hxnormalize -i 2 $TMP1 >$TMP3
+
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/normalize5.sh 
new/html-xml-utils-7.8/tests/normalize5.sh
--- old/html-xml-utils-7.7/tests/normalize5.sh  2018-02-23 15:29:48.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/normalize5.sh  2018-08-03 02:03:16.000000000 
+0200
@@ -17,10 +17,10 @@
 
        <html lang="en">
        <head>
-       <style><![CDATA[
-       /* no <elements> or <![CDATA[ mark-up here */
-       ]]></style></head>
+       <style>
+       /* no &lt;elements> or &lt;![CDATA[ mark-up here */
+       </style></head>
        </html>
 EOF
-./hxnormalize -i 0 -x $TMP1 | ./hxnormalize -i 0 -x >$TMP3
-cmp -s $TMP2 $TMP3
+./hxnormalize -i 0 -x $TMP1 | ./hxnormalize -i 0 -X -x >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/normalize6.sh 
new/html-xml-utils-7.8/tests/normalize6.sh
--- old/html-xml-utils-7.7/tests/normalize6.sh  2018-02-23 15:29:48.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/normalize6.sh  2018-08-03 02:03:16.000000000 
+0200
@@ -28,5 +28,5 @@
        <body>
        <p>...
 EOF
-./hxnormalize -x $TMP1 | ./hxnormalize -i 0 >$TMP3
-cmp -s $TMP2 $TMP3
+./hxnormalize -x $TMP1 | ./hxnormalize -i 0 -X >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/num1.sh new/html-xml-utils-7.8/tests/num1.sh
--- old/html-xml-utils-7.7/tests/num1.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/num1.sh        2018-08-02 03:21:37.000000000 
+0200
@@ -0,0 +1,22 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang=en>
+       <body>
+       <h2>Heading</h2>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang="en">
+       <body>
+       <h2><span class="secno">A) </span>Heading</h2>
+EOF
+./hxnum -2 '%n%A) ' $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/num2.sh new/html-xml-utils-7.8/tests/num2.sh
--- old/html-xml-utils-7.7/tests/num2.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/num2.sh        2018-08-02 03:21:37.000000000 
+0200
@@ -0,0 +1,22 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang=en>
+       <body>
+       <h2>Heading</h2>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang="en">
+       <body>
+       <h2><span class="secno">a) </span>Heading</h2>
+EOF
+./hxnum -l 2 -2 '%a) ' $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/num3.sh new/html-xml-utils-7.8/tests/num3.sh
--- old/html-xml-utils-7.7/tests/num3.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/num3.sh        2018-07-26 00:57:00.000000000 
+0200
@@ -0,0 +1,22 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang=en>
+       <body>
+       <h2>Heading</h2>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang="en">
+       <body>
+       <h2>Heading</h2>
+EOF
+./hxnum -l 3 -2 '%a) ' $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/num4.sh new/html-xml-utils-7.8/tests/num4.sh
--- old/html-xml-utils-7.7/tests/num4.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/num4.sh        2018-07-26 00:57:00.000000000 
+0200
@@ -0,0 +1,22 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang=en>
+       <body>
+       <h2>Heading</h2>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang="en">
+       <body>
+       <h2><span class="secno">1. </span>Heading</h2>
+EOF
+./hxnum -l 2 $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/num5.sh new/html-xml-utils-7.8/tests/num5.sh
--- old/html-xml-utils-7.7/tests/num5.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/num5.sh        2018-08-02 03:21:07.000000000 
+0200
@@ -0,0 +1,28 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang=en>
+       <body>
+       <h1>Heading</h1>
+       <h2>Heading</h2>
+       <h1>Heading</h1>
+       <h2>Heading</h2>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang="en">
+       <body>
+       <h1><span class="secno">1. </span>Heading</h1>
+       <h2><span class="secno">1. </span>Heading</h2>
+       <h1><span class="secno">2. </span>Heading</h1>
+       <h2><span class="secno">1. </span>Heading</h2>
+EOF
+./hxnum -2 '%n%d. ' $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/num6.sh new/html-xml-utils-7.8/tests/num6.sh
--- old/html-xml-utils-7.7/tests/num6.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/num6.sh        2018-08-02 03:21:07.000000000 
+0200
@@ -0,0 +1,28 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang=en>
+       <body>
+       <h1>Heading</h1>
+       <h2>Heading</h2>
+       <h1>Heading</h1>
+       <h2>Heading</h2>
+EOF
+cat >$TMP2 <<-EOF
+       <!DOCTYPE html>
+
+       <html lang="en">
+       <body>
+       <h1>Heading</h1>
+       <h2><span class="secno">1. </span>Heading</h2>
+       <h1>Heading</h1>
+       <h2><span class="secno">2. </span>Heading</h2>
+EOF
+./hxnum -l 2 -2 '%d. ' $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/toc4.sh new/html-xml-utils-7.8/tests/toc4.sh
--- old/html-xml-utils-7.7/tests/toc4.sh        2018-02-23 15:29:48.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/toc4.sh        2019-08-28 19:28:56.000000000 
+0200
@@ -1,5 +1,5 @@
 :
-# Test if hxtoc treatc some HTML5 elements correctly
+# Test if hxtoc treats some HTML5 elements correctly
 
 trap 'rm $TMP1 $TMP2 $TMP3' 0
 TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/toc5.sh new/html-xml-utils-7.8/tests/toc5.sh
--- old/html-xml-utils-7.7/tests/toc5.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/toc5.sh        2019-08-28 20:50:44.000000000 
+0200
@@ -0,0 +1,42 @@
+:
+trap 'rm $TMP1 $TMP2' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+./hxtoc -d -t >$TMP1 <<-EOF
+       <title>Test</title>
+       <h1>Document heading</h1>
+       <!--toc-->
+       <section id=sec1>
+       <p>Multiple headings in this section.
+       <h1>Second-level heading</h1>
+       <h3>Third-level heading</h3>
+       <section id=sec2>
+       <hgroup><h6>Another third-level heading</h6></hgroup>
+       </section>
+       </section>
+EOF
+echo >>$TMP1                   # Add newline
+
+cat >$TMP2 <<-EOF
+<html><head><title>Test</title></head><body><h1 id="document-heading">Document 
heading</h1><!--begin-toc-->
+<ul class="toc">
+<li><a href="#document-heading">Document heading</a>
+ <ul class="toc">
+ <li><a href="#sec1">Second-level heading</a>
+  <ul class="toc">
+  <li><a href="#third-level-heading">Third-level heading</a>
+  <li><a href="#sec2">Another third-level heading</a>
+  </ul>
+ </ul></ul>
+<!--end-toc--><section id="sec1">
+<p>Multiple headings in this section.
+</p><h1>Second-level heading</h1>
+<h3 id="third-level-heading">Third-level heading</h3>
+<section id="sec2">
+<hgroup><h6>Another third-level heading</h6></hgroup>
+</section>
+</section></body></html>
+EOF
+
+diff -u $TMP2 $TMP1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/toc6.sh new/html-xml-utils-7.8/tests/toc6.sh
--- old/html-xml-utils-7.7/tests/toc6.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/toc6.sh        2019-08-28 21:01:32.000000000 
+0200
@@ -0,0 +1,88 @@
+:
+trap 'rm $TMP1 $TMP2' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+./hxtoc -d -t -h 8 >$TMP1 <<-EOF
+       <title>Test</title>
+       <h1>Document heading</h1>
+       <!--toc-->
+       <section id=sec2>
+       <h1>Second-level heading</h1>
+       <section id=sec3>
+       <h1>Third-level heading</h1>
+       <section id=sec4>
+       <h1>Fourth-level heading</h1>
+       <section id=sec5>
+       <h1>Fifth-level heading</h1>
+       <section id=sec6>
+       <h1>Sixth-level heading</h1>
+       <section id=sec7>
+       <h1>Seventh-level heading</h1>
+       <section id=sec8>
+       <h1>Eight-level heading</h1>
+       <section id=sec9>
+       <h1>Ninth-level heading</h1>
+       </section>
+       </section>
+       </section>
+       </section>
+       </section>
+       </section>
+       </section>
+       </section>
+EOF
+echo >>$TMP1                   # Add newline
+
+cat >$TMP2 <<-EOF
+<html><head><title>Test</title></head><body><h1 id="document-heading">Document 
heading</h1><!--begin-toc-->
+<ul class="toc">
+<li><a href="#document-heading">Document heading</a>
+ <ul class="toc">
+ <li><a href="#sec2">Second-level heading</a>
+  <ul class="toc">
+  <li><a href="#sec3">Third-level heading</a>
+   <ul class="toc">
+   <li><a href="#sec4">Fourth-level heading</a>
+    <ul class="toc">
+    <li><a href="#sec5">Fifth-level heading</a>
+     <ul class="toc">
+     <li><a href="#sec6">Sixth-level heading</a>
+      <ul class="toc">
+      <li><a href="#sec7">Seventh-level heading</a>
+       <ul class="toc">
+       <li><a href="#sec8">Eight-level heading</a>
+       </ul>
+      </ul>
+     </ul>
+    </ul>
+   </ul>
+  </ul>
+ </ul></ul>
+<!--end-toc--><section id="sec2">
+<h1>Second-level heading</h1>
+<section id="sec3">
+<h1>Third-level heading</h1>
+<section id="sec4">
+<h1>Fourth-level heading</h1>
+<section id="sec5">
+<h1>Fifth-level heading</h1>
+<section id="sec6">
+<h1>Sixth-level heading</h1>
+<section id="sec7">
+<h1>Seventh-level heading</h1>
+<section id="sec8">
+<h1>Eight-level heading</h1>
+<section id="sec9">
+<h1>Ninth-level heading</h1>
+</section>
+</section>
+</section>
+</section>
+</section>
+</section>
+</section>
+</section></body></html>
+EOF
+
+diff -u $TMP2 $TMP1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/tests/wls6.sh new/html-xml-utils-7.8/tests/wls6.sh
--- old/html-xml-utils-7.7/tests/wls6.sh        1970-01-01 01:00:00.000000000 
+0100
+++ new/html-xml-utils-7.8/tests/wls6.sh        2019-07-08 14:35:30.000000000 
+0200
@@ -0,0 +1,23 @@
+:
+trap 'rm $TMP1 $TMP2 $TMP3' 0
+TMP1=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP2=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+TMP3=`mktemp /tmp/tmp.XXXXXXXXXX` || exit 1
+
+cat >$TMP1 <<-EOF
+       <link rel=stylesheet href="http://example.org/style.css";>
+       <a href="foo.html">.</a>
+       <img src="bar/foo.png">
+       <img src="../bar/foo.png">
+       <img src="../bar/foo.png" srcset="../bar/foo.svg">
+EOF
+cat >$TMP2 <<-EOF
+       link    stylesheet      http://example.org/style.css
+       a               http://example.org/othersub/foo.html
+       img             http://example.org/othersub/bar/foo.png
+       img             http://example.org/bar/foo.png
+       img             http://example.org/bar/foo.png
+       img     srcset  http://example.org/bar/foo.svg
+EOF
+./hxwls -b http://example.org/othersub/base -l $TMP1 >$TMP3
+diff -u $TMP2 $TMP3
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/textwrap.c new/html-xml-utils-7.8/textwrap.c
--- old/html-xml-utils-7.7/textwrap.c   2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/textwrap.c   2019-10-05 19:49:50.000000000 +0200
@@ -8,7 +8,7 @@
  *
  * Bert Bos
  * Created 10 May 1998
- * $Id: textwrap.c,v 1.32 2017/11/24 10:14:49 bbos Exp $
+ * $Id: textwrap.c,v 1.34 2019/10/05 17:49:44 bbos Exp $
  */
 #include "config.h"
 #include <stdio.h>
@@ -63,7 +63,7 @@
       else if ((buf[j] & 0xC0) != 0x80) k++; /* Start of a UTF-8 sequence */
     if (i < 0) break;                          /* No breakpoint */
     assert(i >= 0);                            /* Found a breakpoint at i */
-    assert(buf[i] == ' ' || buf[i]==BREAKOP);
+    assert(buf[i] == ' ' || buf[i] == BREAKOP);
     /* Print up to breakpoint (removing non-break-space markers) */
     for (j = 0; j < i; j++)
       if (buf[j] != BREAKOP) {
@@ -98,17 +98,24 @@
 }
 
 /* outc -- add one character to output buffer */
-EXPORT void outc(char c, bool preformatted)
+EXPORT void outc(char c, bool preformatted, bool with_space)
 {
-  if (c == '\n' && !preformatted) c = ' ';     /* Newline is just a space */
-  if (c == '\r' && !preformatted) c = ' ';     /* CR is just a space */
-  if (c == '\t' && !preformatted) c = ' ';     /* Tab is just a space */
-  if (c == '\f' && !preformatted) c = ' ';     /* Formfeed is just a space */
-  if (c == ' ' && preformatted) c = NBSP;      /* Non-break-space marker */
-  if (c == ' ' && prev == ' ') return;         /* Don't add another space */
+  if (c == '\n' || c == '\r' || c == '\f') {
+    if (preformatted) ;                  /* Keep unchanged */
+    else if (with_space) c = ' '; /* Treated as space */
+    else c = BREAKOP;            /* Treated as a break opportunity */
+  } else if (c == '\t') {
+    if (preformatted) ;                  /* Keep unchanged */
+    else c = ' ';                /* Tab is just a space */
+  }
+  if (c == ' ') {
+    if (preformatted) c = NBSP;          /* Non-break-space marker */
+    else if (prev == ' ') return; /* Don't add another space */
+    else if (prev == BREAKOP) return; /* Don't add a space after \n or similar 
*/
+  }
   if ((c == ' ' || c == BREAKOP) && linelen + bufchars >= maxlinelen) flush();
   if (c == '\n' || c == '\r' || c == '\f') flush(); /* Empty the buf */
-  if (c == ' ' && linelen + len == 0) return;  /* No ins at BOL */
+  if (c == ' ' && linelen + len == 0) return;  /* No insert at BOL */
   while (level * indent >= buflen) {buflen += 1024; renewarray(buf, buflen);}
   if (linelen + len == 0 && !preformatted)
     while (len < level * indent) {buf[len++] = NBSP; bufchars++;}
@@ -120,22 +127,22 @@
 }
 
 /* out -- add text to current output line, print line if getting too long */
-EXPORT void out(string s, bool preformatted)
+EXPORT void out(string s, bool preformatted, bool with_space)
 {
-  if (s) for (; *s; s++) outc(*s, preformatted);
+  if (s) for (; *s; s++) outc(*s, preformatted, with_space);
 }
 
 /* outn -- add n chars to current output, print line if getting too long */
-EXPORT void outn(string s, size_t n, bool preformatted)
+EXPORT void outn(string s, size_t n, bool preformatted, bool with_space)
 {
   size_t i;
-  for (i = 0; i < n; i++) outc(s[i], preformatted);
+  for (i = 0; i < n; i++) outc(s[i], preformatted, with_space);
 }
 
 /* outln -- add string to output buffer, followed by '\n' */
-EXPORT void outln(char *s, bool preformatted)
+EXPORT void outln(char *s, bool preformatted, bool with_space)
 {
-  out(s, preformatted);
+  out(s, preformatted, with_space);
   flush();
   assert(len == 0);
   assert(bufchars == 0);
@@ -158,7 +165,7 @@
 /* outbreakpoint -- mark a possible line break point */
 EXPORT void outbreakpoint(void)
 {
-  outc(BREAKOP, false);
+  outc(BREAKOP, false, true);
 }
 
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/textwrap.e new/html-xml-utils-7.8/textwrap.e
--- old/html-xml-utils-7.7/textwrap.e   2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/textwrap.e   2019-10-05 19:51:09.000000000 +0200
@@ -3,16 +3,24 @@
 extern void flush();
 extern void outc(char c, 
                         _Bool 
-                             preformatted);
+                             preformatted, 
+                                           _Bool 
+                                                with_space);
 extern void out(string s, 
                          _Bool 
-                              preformatted);
+                              preformatted, 
+                                            _Bool 
+                                                 with_space);
 extern void outn(string s, size_t n, 
                                     _Bool 
-                                         preformatted);
+                                         preformatted, 
+                                                       _Bool 
+                                                            with_space);
 extern void outln(char *s, 
                           _Bool 
-                               preformatted);
+                               preformatted, 
+                                             _Bool 
+                                                  with_space);
 extern void outbreak(void);
 extern void outbreakpoint(void);
 extern void inc_indent(void);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' --exclude Makefile.in --exclude configure --exclude 
config.guess --exclude '*.pot' --exclude mkinstalldirs --exclude aclocal.m4 
--exclude config.sub --exclude depcomp --exclude install-sh --exclude ltmain.sh 
old/html-xml-utils-7.7/types.e new/html-xml-utils-7.8/types.e
--- old/html-xml-utils-7.7/types.e      2018-02-23 15:29:48.000000000 +0100
+++ new/html-xml-utils-7.8/types.e      2019-08-28 19:33:59.000000000 +0200
@@ -24,12 +24,16 @@
 extern conststring pairlist_get(pairlist p, const conststring name);
 extern void pairlist_set(pairlist *p, const conststring name,
     const conststring val);
-extern _Bool pairlist_unset(pairlist *p, const conststring name);
+extern _Bool 
+           pairlist_unset(pairlist *p, const conststring name);
 extern string strapp(string *s,...);
 extern void chomp(string s);
 extern int min(int a, int b);
 extern int max(int a, int b);
 extern string down(const string s);
-extern _Bool hasprefix(conststring s, conststring prefix);
-extern _Bool hasaffix(conststring s, conststring affix);
-extern _Bool only_space(conststring s);
+extern _Bool 
+           hasprefix(conststring s, conststring prefix);
+extern _Bool 
+           hasaffix(conststring s, conststring affix);
+extern _Bool 
+           only_space(conststring s);


Reply via email to