Update libcmark source

Update to commit 8796258400fc07a653172ba9d808374fdcca7936 from
Dec 9 13:01:14 2014 -0800, mainly to fix -Wextra warnings.


Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/3d7c5ed3
Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/3d7c5ed3
Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/3d7c5ed3

Branch: refs/heads/markdown_v2
Commit: 3d7c5ed360815d66f2c42a596cf2657b411ca6ce
Parents: 8776f81
Author: Nick Wellnhofer <[email protected]>
Authored: Wed Dec 10 17:47:23 2014 +0100
Committer: Nick Wellnhofer <[email protected]>
Committed: Wed Dec 10 17:47:23 2014 +0100

----------------------------------------------------------------------
 compiler/modules/CommonMark/README.md           |    39 +-
 compiler/modules/CommonMark/src/bench.h         |    27 +
 compiler/modules/CommonMark/src/blocks.c        |     9 +-
 compiler/modules/CommonMark/src/cmark.c         |     2 +-
 compiler/modules/CommonMark/src/cmark.h         |     3 +
 compiler/modules/CommonMark/src/houdini.h       |    52 +
 .../modules/CommonMark/src/houdini_href_e.c     |   107 +
 .../modules/CommonMark/src/houdini_html_e.c     |    81 +
 .../modules/CommonMark/src/houdini_html_u.c     |   112 +
 compiler/modules/CommonMark/src/html.c          |   357 +
 compiler/modules/CommonMark/src/html/houdini.h  |    52 -
 .../CommonMark/src/html/houdini_href_e.c        |   107 -
 .../CommonMark/src/html/houdini_html_e.c        |    81 -
 .../CommonMark/src/html/houdini_html_u.c        |   112 -
 compiler/modules/CommonMark/src/html/html.c     |   357 -
 .../CommonMark/src/html/html_unescape.gperf     |  2131 ----
 .../modules/CommonMark/src/html/html_unescape.h |  9736 ---------------
 .../modules/CommonMark/src/html_unescape.gperf  |  2131 ++++
 compiler/modules/CommonMark/src/html_unescape.h | 10782 +++++++++++++++++
 compiler/modules/CommonMark/src/inlines.c       |     2 +-
 compiler/modules/CommonMark/src/libcmark.pc.in  |    10 +
 compiler/modules/CommonMark/src/node.c          |   134 +-
 compiler/modules/CommonMark/src/scanners.c      |    10 +-
 compiler/modules/CommonMark/src/scanners.h      |     1 +
 24 files changed, 13818 insertions(+), 12617 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/README.md
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/README.md 
b/compiler/modules/CommonMark/README.md
index f56dc3e..8db4c1f 100644
--- a/compiler/modules/CommonMark/README.md
+++ b/compiler/modules/CommonMark/README.md
@@ -15,8 +15,9 @@ standalone program `cmark` that converts CommonMark to HTML.  
It is
 written in standard C99 and has no library dependencies.  The parser is
 very fast (see [benchmarks](benchmarks.md)).
 
-It is easy to use `libcmark` in python or ruby code:  see `wrapper.py`
-and `wrapper.rb` in the repository for simple examples.
+It is easy to use `libcmark` in python, lua, ruby, and other dynamic
+languages: see `wrapper.py`, `wrapper.lua`, and `wrapper.rb` in the
+repository for simple examples.
 
 The JavaScript implementation is a single JavaScript file, with
 no dependencies, that can be linked to in an HTML page.  Here
@@ -43,15 +44,15 @@ Installing (C)
 --------------
 
 Building the C program (`cmark`) and shared library (`libcmark`)
-requires [cmake] and [re2c], which is used to generate `scanners.c` from
-`scanners.re`.  (Note that [re2c] is only a build dependency for
-developers, since `scanners.c` can be provided in a released source
-tarball.)
+requires [cmake].  If you modify `scanners.re`, then you will also
+need [re2c], which is used to generate `scanners.c` from
+`scanners.re`.  We have included a pre-generated `scanners.c` in
+the repository to reduce build dependencies.
 
 If you have GNU make, you can simply `make`, `make test`, and `make
 install`.  This calls [cmake] to create a `Makefile` in the `build`
 directory, then uses that `Makefile` to create the executable and
-library.
+library.  The binaries can be found in `build/src`.
 
 For a more portable method, you can use [cmake] manually. [cmake] knows
 how to create build environments for many build systems.  For example,
@@ -73,17 +74,6 @@ Or, to create Xcode project files on OSX:
     make test
     make install
 
-Tests can also be run manually on any executable `$PROG` using:
-
-    python test/spec_tests.py --program $PROG
-
-If you want to extract the raw test data from the spec without
-actually running the tests, you can do:
-
-    python test/spec_tests.py --dump-tests
-
-and you'll get all the tests in JSON format.
-
 The GNU Makefile also provides a few other targets for developers.
 To run a "fuzz test" against ten long randomly generated inputs:
 
@@ -141,9 +131,16 @@ The spec
 --------
 
 [The spec] contains over 500 embedded examples which serve as conformance
-tests.  To run the tests for `cmark`, do `make test`.  To run them for
-another Markdown program, say `myprog`, do `make test PROG=myprog`.  To
-run the tests for `commonmark.js`, do `make testjs`.
+tests. To run the tests using an executable `$PROG`:
+
+    python test/spec_tests.py --program $PROG
+
+If you want to extract the raw test data from the spec without
+actually running the tests, you can do:
+
+    python test/spec_tests.py --dump-tests
+
+and you'll get all the tests in JSON format.
 
 [The spec]:  http://jgm.github.io/CommonMark/spec.html
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/bench.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/bench.h 
b/compiler/modules/CommonMark/src/bench.h
new file mode 100644
index 0000000..bbea2c6
--- /dev/null
+++ b/compiler/modules/CommonMark/src/bench.h
@@ -0,0 +1,27 @@
+#ifndef CMARK_BENCH_H
+#define CMARK_BENCH_H
+
+#include <stdio.h>
+#include <time.h>
+
+#ifdef TIMER
+float _cmark_start_time;
+float _cmark_end_time;
+float _cmark_save_time;
+
+#define start_timer() \
+       _cmark_save_time = _cmark_start_time; \
+       _cmark_start_time = (float)clock()/CLOCKS_PER_SEC
+
+#define end_timer(M) \
+       _cmark_end_time = (float)clock()/CLOCKS_PER_SEC; \
+       fprintf(stderr, "[TIME] (%s:%d) %4.f ns " M "\n", __FILE__, \
+               __LINE__, (_cmark_end_time - _cmark_start_time) * 1000000); \
+       _cmark_start_time = _cmark_save_time;
+
+#else
+#define start_timer()
+#define end_timer(M)
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/blocks.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/blocks.c 
b/compiler/modules/CommonMark/src/blocks.c
index 5d11710..ebef88b 100644
--- a/compiler/modules/CommonMark/src/blocks.c
+++ b/compiler/modules/CommonMark/src/blocks.c
@@ -11,7 +11,7 @@
 #include "utf8.h"
 #include "scanners.h"
 #include "inlines.h"
-#include "html/houdini.h"
+#include "houdini.h"
 #include "buffer.h"
 #include "debug.h"
 
@@ -435,6 +435,9 @@ cmark_node *cmark_parse_file(FILE *f)
        while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
                bool eof = bytes < sizeof(buffer);
                S_parser_feed(parser, buffer, bytes, eof);
+               if (eof) {
+                       break;
+               }
        }
 
        document = cmark_parser_finish(parser);
@@ -786,8 +789,8 @@ S_process_line(cmark_parser *parser, const unsigned char 
*buffer, size_t bytes)
        container->last_line_blank = (blank &&
                        container->type != NODE_BLOCK_QUOTE &&
                        container->type != NODE_HEADER &&
-                       (container->type != NODE_CODE_BLOCK &&
-                        container->as.code.fenced) &&
+                       !(container->type == NODE_CODE_BLOCK &&
+                               container->as.code.fenced) &&
                        !(container->type == NODE_LIST_ITEM &&
                                container->first_child == NULL &&
                                container->start_line == parser->line_number));

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/cmark.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/cmark.c 
b/compiler/modules/CommonMark/src/cmark.c
index 140a14c..16817b9 100644
--- a/compiler/modules/CommonMark/src/cmark.c
+++ b/compiler/modules/CommonMark/src/cmark.c
@@ -2,7 +2,7 @@
 #include <assert.h>
 #include <stdio.h>
 #include "node.h"
-#include "html/houdini.h"
+#include "houdini.h"
 #include "cmark.h"
 #include "buffer.h"
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/cmark.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/cmark.h 
b/compiler/modules/CommonMark/src/cmark.h
index cae4426..f96cea9 100644
--- a/compiler/modules/CommonMark/src/cmark.h
+++ b/compiler/modules/CommonMark/src/cmark.h
@@ -34,6 +34,9 @@ char *cmark_markdown_to_html(const char *text, int len);
 /**
  */
 typedef enum {
+       /* Error status */
+       CMARK_NODE_NONE,
+
        /* Block */
        CMARK_NODE_DOCUMENT,
        CMARK_NODE_BLOCK_QUOTE,

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/houdini.h 
b/compiler/modules/CommonMark/src/houdini.h
new file mode 100644
index 0000000..9e1200e
--- /dev/null
+++ b/compiler/modules/CommonMark/src/houdini.h
@@ -0,0 +1,52 @@
+#ifndef CMARK_HOUDINI_H
+#define CMARK_HOUDINI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include "config.h"
+#include "buffer.h"
+
+#ifdef HAVE___BUILTIN_EXPECT
+#      define likely(x)        __builtin_expect((x),1)
+#      define unlikely(x)      __builtin_expect((x),0)
+#else
+#      define likely(x)        (x)
+#      define unlikely(x)      (x)
+#endif
+
+#ifdef HOUDINI_USE_LOCALE
+#      define _isxdigit(c) isxdigit(c)
+#      define _isdigit(c) isdigit(c)
+#else
+/*
+ * Helper _isdigit methods -- do not trust the current locale
+ * */
+#      define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
+#      define _isdigit(c) ((c) >= '0' && (c) <= '9')
+#endif
+
+#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
+#define HOUDINI_UNESCAPED_SIZE(x) (x)
+
+extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t 
size);
+extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, 
int secure);
+extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t 
size);
+extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini_href_e.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/houdini_href_e.c 
b/compiler/modules/CommonMark/src/houdini_href_e.c
new file mode 100644
index 0000000..1c99432
--- /dev/null
+++ b/compiler/modules/CommonMark/src/houdini_href_e.c
@@ -0,0 +1,107 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "houdini.h"
+
+/*
+ * The following characters will not be escaped:
+ *
+ *             -_.+!*'(),%#@?=;:/,+&$ alphanum
+ *
+ * Note that this character set is the addition of:
+ *
+ *     - The characters which are safe to be in an URL
+ *     - The characters which are *not* safe to be in
+ *     an URL because they are RESERVED characters.
+ *
+ * We asume (lazily) that any RESERVED char that
+ * appears inside an URL is actually meant to
+ * have its native function (i.e. as an URL
+ * component/separator) and hence needs no escaping.
+ *
+ * There are two exceptions: the chacters & (amp)
+ * and ' (single quote) do not appear in the table.
+ * They are meant to appear in the URL as components,
+ * yet they require special HTML-entity escaping
+ * to generate valid HTML markup.
+ *
+ * All other characters will be escaped to %XX.
+ *
+ */
+static const char HREF_SAFE[] = {
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+int
+houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
+{
+       static const uint8_t hex_chars[] = "0123456789ABCDEF";
+       size_t  i = 0, org;
+       uint8_t hex_str[3];
+
+       hex_str[0] = '%';
+
+       while (i < size) {
+               org = i;
+               while (i < size && HREF_SAFE[src[i]] != 0)
+                       i++;
+
+               if (likely(i > org))
+                       strbuf_put(ob, src + org, i - org);
+
+               /* escaping */
+               if (i >= size)
+                       break;
+
+               switch (src[i]) {
+               /* amp appears all the time in URLs, but needs
+                * HTML-entity escaping to be inside an href */
+               case '&':
+                       strbuf_puts(ob, "&amp;");
+                       break;
+
+               /* the single quote is a valid URL character
+                * according to the standard; it needs HTML
+                * entity escaping too */
+               case '\'':
+                       strbuf_puts(ob, "&#x27;");
+                       break;
+
+               /* the space can be escaped to %20 or a plus
+                * sign. we're going with the generic escape
+                * for now. the plus thing is more commonly seen
+                * when building GET strings */
+#if 0
+               case ' ':
+                       strbuf_putc(ob, '+');
+                       break;
+#endif
+
+               /* every other character goes with a %XX escaping */
+               default:
+                       hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
+                       hex_str[2] = hex_chars[src[i] & 0xF];
+                       strbuf_put(ob, hex_str, 3);
+               }
+
+               i++;
+       }
+
+       return 1;
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini_html_e.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/houdini_html_e.c 
b/compiler/modules/CommonMark/src/houdini_html_e.c
new file mode 100644
index 0000000..db5034b
--- /dev/null
+++ b/compiler/modules/CommonMark/src/houdini_html_e.c
@@ -0,0 +1,81 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "houdini.h"
+
+/**
+ * According to the OWASP rules:
+ *
+ * & --> &amp;
+ * < --> &lt;
+ * > --> &gt;
+ * " --> &quot;
+ * ' --> &#x27;     &apos; is not recommended
+ * / --> &#x2F;     forward slash is included as it helps end an HTML entity
+ *
+ */
+static const char HTML_ESCAPE_TABLE[] = {
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+static const char *HTML_ESCAPES[] = {
+        "",
+        "&quot;",
+        "&amp;",
+        "&#39;",
+        "&#47;",
+        "&lt;",
+        "&gt;"
+};
+
+int
+houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
+{
+       size_t  i = 0, org, esc = 0;
+
+       while (i < size) {
+               org = i;
+               while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
+                       i++;
+
+               if (i > org)
+                       strbuf_put(ob, src + org, i - org);
+
+               /* escaping */
+               if (unlikely(i >= size))
+                       break;
+
+               /* The forward slash is only escaped in secure mode */
+               if ((src[i] == '/' || src[i] == '\'') && !secure) {
+                       strbuf_putc(ob, src[i]);
+               } else {
+                       strbuf_puts(ob, HTML_ESCAPES[esc]);
+               }
+
+               i++;
+       }
+
+       return 1;
+}
+
+int
+houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
+{
+       return houdini_escape_html0(ob, src, size, 1);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini_html_u.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/houdini_html_u.c 
b/compiler/modules/CommonMark/src/houdini_html_u.c
new file mode 100644
index 0000000..b88b9d1
--- /dev/null
+++ b/compiler/modules/CommonMark/src/houdini_html_u.c
@@ -0,0 +1,112 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "buffer.h"
+#include "houdini.h"
+#include "utf8.h"
+#include "html_unescape.h"
+
+size_t
+houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size)
+{
+       size_t i = 0;
+
+       if (size > 3 && src[0] == '#') {
+               int codepoint = 0;
+
+               if (_isdigit(src[1])) {
+                       for (i = 1; i < size && _isdigit(src[i]); ++i) {
+                               int cp = (codepoint * 10) + (src[i] - '0');
+
+                               if (cp < codepoint)
+                                       return 0;
+
+                               codepoint = cp;
+                       }
+               }
+
+               else if (src[1] == 'x' || src[1] == 'X') {
+                       for (i = 2; i < size && _isxdigit(src[i]); ++i) {
+                               int cp = (codepoint * 16) + ((src[i] | 32) % 39 
- 9);
+
+                               if (cp < codepoint)
+                                       return 0;
+
+                               codepoint = cp;
+                       }
+               }
+
+               if (i < size && src[i] == ';' && codepoint) {
+                       utf8proc_encode_char(codepoint, ob);
+                       return i + 1;
+               }
+       }
+
+       else {
+               if (size > MAX_WORD_LENGTH)
+                       size = MAX_WORD_LENGTH;
+
+               for (i = MIN_WORD_LENGTH; i < size; ++i) {
+                       if (src[i] == ' ')
+                               break;
+
+                       if (src[i] == ';') {
+                               const struct html_ent *entity = 
find_entity((char *)src, i);
+
+                               if (entity != NULL) {
+                                       strbuf_put(ob, entity->utf8, 
entity->utf8_len);
+                                       return i + 1;
+                               }
+
+                               break;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+int
+houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size)
+{
+       size_t  i = 0, org, ent;
+
+       while (i < size) {
+               org = i;
+               while (i < size && src[i] != '&')
+                       i++;
+
+               if (likely(i > org)) {
+                       if (unlikely(org == 0)) {
+                               if (i >= size)
+                                       return 0;
+
+                               strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
+                       }
+
+                       strbuf_put(ob, src + org, i - org);
+               }
+
+               /* escaping */
+               if (i >= size)
+                       break;
+
+               i++;
+
+               ent = houdini_unescape_ent(ob, src + i, size - i);
+               i += ent;
+
+               /* not really an entity */
+               if (ent == 0)
+                       strbuf_putc(ob, '&');
+       }
+
+       return 1;
+}
+
+void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size)
+{
+       if (!houdini_unescape_html(ob, src, size))
+               strbuf_put(ob, src, size);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/html.c 
b/compiler/modules/CommonMark/src/html.c
new file mode 100644
index 0000000..60229cc
--- /dev/null
+++ b/compiler/modules/CommonMark/src/html.c
@@ -0,0 +1,357 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "houdini.h"
+
+// Functions to convert cmark_nodes to HTML strings.
+
+static bool
+finish_node(strbuf *html, cmark_node *node, bool tight);
+
+static void escape_html(strbuf *dest, const unsigned char *source, int length)
+{
+       if (length < 0)
+               length = strlen((char *)source);
+
+       houdini_escape_html0(dest, source, (size_t)length, 0);
+}
+
+static void escape_href(strbuf *dest, const unsigned char *source, int length)
+{
+       if (length < 0)
+               length = strlen((char *)source);
+
+       houdini_escape_href(dest, source, (size_t)length);
+}
+
+static inline void cr(strbuf *html)
+{
+       if (html->size && html->ptr[html->size - 1] != '\n')
+               strbuf_putc(html, '\n');
+}
+
+// Convert the inline children of a node to a plain string.
+static void inlines_to_plain_html(strbuf *html, cmark_node* node)
+{
+       cmark_node* cur = node->first_child;
+
+       if (cur == NULL) {
+               return;
+       }
+
+       while (true) {
+               switch(cur->type) {
+               case NODE_TEXT:
+               case NODE_INLINE_CODE:
+               case NODE_INLINE_HTML:
+                       escape_html(html, cur->as.literal.data, 
cur->as.literal.len);
+                       break;
+
+               case NODE_LINEBREAK:
+               case NODE_SOFTBREAK:
+                       strbuf_putc(html, ' ');
+                       break;
+
+               default:
+                       break;
+               }
+
+               if (cur->first_child) {
+                       cur = cur->first_child;
+                       continue;
+               }
+
+       next_sibling:
+               if (cur->next) {
+                       cur = cur->next;
+                       continue;
+               }
+               cur = cur->parent;
+               if (cur == node) {
+                       break;
+               }
+               goto next_sibling;
+       }
+}
+
+
+// Convert a cmark_node to HTML.
+static void node_to_html(strbuf *html, cmark_node *node)
+{
+       cmark_node *cur;
+       char start_header[] = "<h0>";
+       bool tight = false;
+       bool visit_children;
+       strbuf *info;
+
+       if (node == NULL) {
+               return;
+       }
+
+       cur = node;
+       while (true) {
+               // Only NODE_IMAGE wants to skip its children.
+               visit_children = true;
+
+               switch(cur->type) {
+               case NODE_DOCUMENT:
+                       break;
+
+               case NODE_PARAGRAPH:
+                       if (!tight) {
+                               cr(html);
+                               strbuf_puts(html, "<p>");
+                       }
+                       break;
+
+               case NODE_BLOCK_QUOTE:
+                       cr(html);
+                       strbuf_puts(html, "<blockquote>\n");
+                       // BLOCK_QUOTE doesn't use any of the 'as' structs,
+                       // so the 'list' member can be used to store the
+                       // current value of 'tight'.
+                       cur->as.list.tight = tight;
+                       tight = false;
+                       break;
+
+               case NODE_LIST_ITEM:
+                       cr(html);
+                       strbuf_puts(html, "<li>");
+                       break;
+
+               case NODE_LIST: {
+                       cmark_list *list = &cur->as.list;
+                       bool tmp;
+
+                       // make sure a list starts at the beginning of the line:
+                       cr(html);
+
+                       if (list->list_type == CMARK_BULLET_LIST) {
+                               strbuf_puts(html, "<ul>\n");
+                       }
+                       else if (list->start == 1) {
+                               strbuf_puts(html, "<ol>\n");
+                       }
+                       else {
+                               strbuf_printf(html, "<ol start=\"%d\">\n",
+                                             list->start);
+                       }
+
+                       // Store the current value of 'tight' by swapping.
+                       tmp = list->tight;
+                       list->tight = tight;
+                       tight = tmp;
+                       break;
+               }
+
+               case NODE_HEADER:
+                       cr(html);
+                       start_header[2] = '0' + cur->as.header.level;
+                       strbuf_puts(html, start_header);
+                       break;
+
+               case NODE_CODE_BLOCK:
+                       info = &cur->as.code.info;
+                       cr(html);
+
+                       if (&cur->as.code.fence_length == 0
+                           || strbuf_len(info) == 0) {
+                               strbuf_puts(html, "<pre><code>");
+                       }
+                       else {
+                               int first_tag = strbuf_strchr(info, ' ', 0);
+                               if (first_tag < 0)
+                                       first_tag = strbuf_len(info);
+
+                               strbuf_puts(html,
+                                           "<pre><code class=\"language-");
+                               escape_html(html, info->ptr, first_tag);
+                               strbuf_puts(html, "\">");
+                       }
+
+                       escape_html(html, cur->string_content.ptr, 
cur->string_content.size);
+                       break;
+
+               case NODE_HTML:
+                       cr(html);
+                       strbuf_put(html, cur->string_content.ptr, 
cur->string_content.size);
+                       break;
+
+               case NODE_HRULE:
+                       cr(html);
+                       strbuf_puts(html, "<hr />\n");
+                       break;
+
+               case NODE_REFERENCE_DEF:
+                       break;
+
+               case NODE_TEXT:
+                       escape_html(html, cur->as.literal.data, 
cur->as.literal.len);
+                       break;
+
+               case NODE_LINEBREAK:
+                       strbuf_puts(html, "<br />\n");
+                       break;
+
+               case NODE_SOFTBREAK:
+                       strbuf_putc(html, '\n');
+                       break;
+
+               case NODE_INLINE_CODE:
+                       strbuf_puts(html, "<code>");
+                       escape_html(html, cur->as.literal.data, 
cur->as.literal.len);
+                       break;
+
+               case NODE_INLINE_HTML:
+                       strbuf_put(html,
+                                  cur->as.literal.data,
+                                  cur->as.literal.len);
+                       break;
+
+               case NODE_LINK:
+                       strbuf_puts(html, "<a href=\"");
+                       if (cur->as.link.url)
+                               escape_href(html, cur->as.link.url, -1);
+
+                       if (cur->as.link.title) {
+                               strbuf_puts(html, "\" title=\"");
+                               escape_html(html, cur->as.link.title, -1);
+                       }
+
+                       strbuf_puts(html, "\">");
+                       break;
+
+               case NODE_IMAGE:
+                       strbuf_puts(html, "<img src=\"");
+                       if (cur->as.link.url)
+                               escape_href(html, cur->as.link.url, -1);
+
+                       strbuf_puts(html, "\" alt=\"");
+                       inlines_to_plain_html(html, cur);
+
+                       if (cur->as.link.title) {
+                               strbuf_puts(html, "\" title=\"");
+                               escape_html(html, cur->as.link.title, -1);
+                       }
+
+                       strbuf_puts(html, "\" />");
+                       visit_children = false;
+                       break;
+
+               case NODE_STRONG:
+                       strbuf_puts(html, "<strong>");
+                       break;
+
+               case NODE_EMPH:
+                       strbuf_puts(html, "<em>");
+                       break;
+
+               default:
+                       assert(false);
+               }
+
+               if (visit_children && cur->first_child) {
+                       cur = cur->first_child;
+                       continue;
+               }
+
+       next_sibling:
+               tight = finish_node(html, cur, tight);
+               if (cur == node) {
+                       break;
+               }
+               if (cur->next) {
+                       cur = cur->next;
+                       continue;
+               }
+               cur = cur->parent;
+               goto next_sibling;
+       }
+}
+
+// Returns the restored value of 'tight'.
+static bool
+finish_node(strbuf *html, cmark_node *node, bool tight)
+{
+       char end_header[] = "</h0>\n";
+
+       switch (node->type) {
+       case NODE_PARAGRAPH:
+               if (!tight) {
+                       strbuf_puts(html, "</p>\n");
+               }
+               break;
+
+       case NODE_BLOCK_QUOTE: {
+               cmark_list *list = &node->as.list;
+               strbuf_puts(html, "</blockquote>\n");
+               // Restore old 'tight' value.
+               tight = list->tight;
+               list->tight = false;
+               break;
+       }
+
+       case NODE_LIST_ITEM:
+               strbuf_puts(html, "</li>\n");
+               break;
+
+       case NODE_LIST: {
+               cmark_list *list = &node->as.list;
+               bool tmp;
+               strbuf_puts(html,
+                           list->list_type == CMARK_BULLET_LIST ?
+                           "</ul>\n" : "</ol>\n");
+               // Restore old 'tight' value.
+               tmp = tight;
+               tight = list->tight;
+               list->tight = tmp;
+               break;
+       }
+
+       case NODE_HEADER:
+               end_header[3] = '0' + node->as.header.level;
+               strbuf_puts(html, end_header);
+               break;
+
+       case NODE_CODE_BLOCK:
+               strbuf_puts(html, "</code></pre>\n");
+               break;
+
+       case NODE_INLINE_CODE:
+               strbuf_puts(html, "</code>");
+               break;
+
+       case NODE_LINK:
+               strbuf_puts(html, "</a>");
+               break;
+
+       case NODE_STRONG:
+               strbuf_puts(html, "</strong>");
+               break;
+
+       case NODE_EMPH:
+               strbuf_puts(html, "</em>");
+               break;
+
+       default:
+               break;
+       }
+
+       return tight;
+}
+
+char *cmark_render_html(cmark_node *root)
+{
+       char *result;
+       strbuf html = GH_BUF_INIT;
+       node_to_html(&html, root);
+       result = (char *)strbuf_detach(&html);
+       strbuf_free(&html);
+       return result;
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/html/houdini.h 
b/compiler/modules/CommonMark/src/html/houdini.h
deleted file mode 100644
index 9e1200e..0000000
--- a/compiler/modules/CommonMark/src/html/houdini.h
+++ /dev/null
@@ -1,52 +0,0 @@
-#ifndef CMARK_HOUDINI_H
-#define CMARK_HOUDINI_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdint.h>
-#include "config.h"
-#include "buffer.h"
-
-#ifdef HAVE___BUILTIN_EXPECT
-#      define likely(x)        __builtin_expect((x),1)
-#      define unlikely(x)      __builtin_expect((x),0)
-#else
-#      define likely(x)        (x)
-#      define unlikely(x)      (x)
-#endif
-
-#ifdef HOUDINI_USE_LOCALE
-#      define _isxdigit(c) isxdigit(c)
-#      define _isdigit(c) isdigit(c)
-#else
-/*
- * Helper _isdigit methods -- do not trust the current locale
- * */
-#      define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
-#      define _isdigit(c) ((c) >= '0' && (c) <= '9')
-#endif
-
-#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
-#define HOUDINI_UNESCAPED_SIZE(x) (x)
-
-extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t 
size);
-extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, 
int secure);
-extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
-extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t 
size);
-extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini_href_e.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/html/houdini_href_e.c 
b/compiler/modules/CommonMark/src/html/houdini_href_e.c
deleted file mode 100644
index 12456ce..0000000
--- a/compiler/modules/CommonMark/src/html/houdini_href_e.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "html/houdini.h"
-
-/*
- * The following characters will not be escaped:
- *
- *             -_.+!*'(),%#@?=;:/,+&$ alphanum
- *
- * Note that this character set is the addition of:
- *
- *     - The characters which are safe to be in an URL
- *     - The characters which are *not* safe to be in
- *     an URL because they are RESERVED characters.
- *
- * We asume (lazily) that any RESERVED char that
- * appears inside an URL is actually meant to
- * have its native function (i.e. as an URL
- * component/separator) and hence needs no escaping.
- *
- * There are two exceptions: the chacters & (amp)
- * and ' (single quote) do not appear in the table.
- * They are meant to appear in the URL as components,
- * yet they require special HTML-entity escaping
- * to generate valid HTML markup.
- *
- * All other characters will be escaped to %XX.
- *
- */
-static const char HREF_SAFE[] = {
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1,
-       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
-       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-int
-houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
-{
-       static const uint8_t hex_chars[] = "0123456789ABCDEF";
-       size_t  i = 0, org;
-       uint8_t hex_str[3];
-
-       hex_str[0] = '%';
-
-       while (i < size) {
-               org = i;
-               while (i < size && HREF_SAFE[src[i]] != 0)
-                       i++;
-
-               if (likely(i > org))
-                       strbuf_put(ob, src + org, i - org);
-
-               /* escaping */
-               if (i >= size)
-                       break;
-
-               switch (src[i]) {
-               /* amp appears all the time in URLs, but needs
-                * HTML-entity escaping to be inside an href */
-               case '&':
-                       strbuf_puts(ob, "&amp;");
-                       break;
-
-               /* the single quote is a valid URL character
-                * according to the standard; it needs HTML
-                * entity escaping too */
-               case '\'':
-                       strbuf_puts(ob, "&#x27;");
-                       break;
-
-               /* the space can be escaped to %20 or a plus
-                * sign. we're going with the generic escape
-                * for now. the plus thing is more commonly seen
-                * when building GET strings */
-#if 0
-               case ' ':
-                       strbuf_putc(ob, '+');
-                       break;
-#endif
-
-               /* every other character goes with a %XX escaping */
-               default:
-                       hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
-                       hex_str[2] = hex_chars[src[i] & 0xF];
-                       strbuf_put(ob, hex_str, 3);
-               }
-
-               i++;
-       }
-
-       return 1;
-}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini_html_e.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/html/houdini_html_e.c 
b/compiler/modules/CommonMark/src/html/houdini_html_e.c
deleted file mode 100644
index f2e86fe..0000000
--- a/compiler/modules/CommonMark/src/html/houdini_html_e.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "html/houdini.h"
-
-/**
- * According to the OWASP rules:
- *
- * & --> &amp;
- * < --> &lt;
- * > --> &gt;
- * " --> &quot;
- * ' --> &#x27;     &apos; is not recommended
- * / --> &#x2F;     forward slash is included as it helps end an HTML entity
- *
- */
-static const char HTML_ESCAPE_TABLE[] = {
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-};
-
-static const char *HTML_ESCAPES[] = {
-        "",
-        "&quot;",
-        "&amp;",
-        "&#39;",
-        "&#47;",
-        "&lt;",
-        "&gt;"
-};
-
-int
-houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
-{
-       size_t  i = 0, org, esc = 0;
-
-       while (i < size) {
-               org = i;
-               while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
-                       i++;
-
-               if (i > org)
-                       strbuf_put(ob, src + org, i - org);
-
-               /* escaping */
-               if (unlikely(i >= size))
-                       break;
-
-               /* The forward slash is only escaped in secure mode */
-               if ((src[i] == '/' || src[i] == '\'') && !secure) {
-                       strbuf_putc(ob, src[i]);
-               } else {
-                       strbuf_puts(ob, HTML_ESCAPES[esc]);
-               }
-
-               i++;
-       }
-
-       return 1;
-}
-
-int
-houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
-{
-       return houdini_escape_html0(ob, src, size, 1);
-}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini_html_u.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/html/houdini_html_u.c 
b/compiler/modules/CommonMark/src/html/houdini_html_u.c
deleted file mode 100644
index b88b9d1..0000000
--- a/compiler/modules/CommonMark/src/html/houdini_html_u.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "buffer.h"
-#include "houdini.h"
-#include "utf8.h"
-#include "html_unescape.h"
-
-size_t
-houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size)
-{
-       size_t i = 0;
-
-       if (size > 3 && src[0] == '#') {
-               int codepoint = 0;
-
-               if (_isdigit(src[1])) {
-                       for (i = 1; i < size && _isdigit(src[i]); ++i) {
-                               int cp = (codepoint * 10) + (src[i] - '0');
-
-                               if (cp < codepoint)
-                                       return 0;
-
-                               codepoint = cp;
-                       }
-               }
-
-               else if (src[1] == 'x' || src[1] == 'X') {
-                       for (i = 2; i < size && _isxdigit(src[i]); ++i) {
-                               int cp = (codepoint * 16) + ((src[i] | 32) % 39 
- 9);
-
-                               if (cp < codepoint)
-                                       return 0;
-
-                               codepoint = cp;
-                       }
-               }
-
-               if (i < size && src[i] == ';' && codepoint) {
-                       utf8proc_encode_char(codepoint, ob);
-                       return i + 1;
-               }
-       }
-
-       else {
-               if (size > MAX_WORD_LENGTH)
-                       size = MAX_WORD_LENGTH;
-
-               for (i = MIN_WORD_LENGTH; i < size; ++i) {
-                       if (src[i] == ' ')
-                               break;
-
-                       if (src[i] == ';') {
-                               const struct html_ent *entity = 
find_entity((char *)src, i);
-
-                               if (entity != NULL) {
-                                       strbuf_put(ob, entity->utf8, 
entity->utf8_len);
-                                       return i + 1;
-                               }
-
-                               break;
-                       }
-               }
-       }
-
-       return 0;
-}
-
-int
-houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size)
-{
-       size_t  i = 0, org, ent;
-
-       while (i < size) {
-               org = i;
-               while (i < size && src[i] != '&')
-                       i++;
-
-               if (likely(i > org)) {
-                       if (unlikely(org == 0)) {
-                               if (i >= size)
-                                       return 0;
-
-                               strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size));
-                       }
-
-                       strbuf_put(ob, src + org, i - org);
-               }
-
-               /* escaping */
-               if (i >= size)
-                       break;
-
-               i++;
-
-               ent = houdini_unescape_ent(ob, src + i, size - i);
-               i += ent;
-
-               /* not really an entity */
-               if (ent == 0)
-                       strbuf_putc(ob, '&');
-       }
-
-       return 1;
-}
-
-void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size)
-{
-       if (!houdini_unescape_html(ob, src, size))
-               strbuf_put(ob, src, size);
-}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/html.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/html/html.c 
b/compiler/modules/CommonMark/src/html/html.c
deleted file mode 100644
index 0e3dd15..0000000
--- a/compiler/modules/CommonMark/src/html/html.c
+++ /dev/null
@@ -1,357 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-#include "config.h"
-#include "cmark.h"
-#include "node.h"
-#include "buffer.h"
-#include "html/houdini.h"
-
-// Functions to convert cmark_nodes to HTML strings.
-
-static bool
-finish_node(strbuf *html, cmark_node *node, bool tight);
-
-static void escape_html(strbuf *dest, const unsigned char *source, int length)
-{
-       if (length < 0)
-               length = strlen((char *)source);
-
-       houdini_escape_html0(dest, source, (size_t)length, 0);
-}
-
-static void escape_href(strbuf *dest, const unsigned char *source, int length)
-{
-       if (length < 0)
-               length = strlen((char *)source);
-
-       houdini_escape_href(dest, source, (size_t)length);
-}
-
-static inline void cr(strbuf *html)
-{
-       if (html->size && html->ptr[html->size - 1] != '\n')
-               strbuf_putc(html, '\n');
-}
-
-// Convert the inline children of a node to a plain string.
-static void inlines_to_plain_html(strbuf *html, cmark_node* node)
-{
-       cmark_node* cur = node->first_child;
-
-       if (cur == NULL) {
-               return;
-       }
-
-       while (true) {
-               switch(cur->type) {
-               case NODE_TEXT:
-               case NODE_INLINE_CODE:
-               case NODE_INLINE_HTML:
-                       escape_html(html, cur->as.literal.data, 
cur->as.literal.len);
-                       break;
-
-               case NODE_LINEBREAK:
-               case NODE_SOFTBREAK:
-                       strbuf_putc(html, ' ');
-                       break;
-
-               default:
-                       break;
-               }
-
-               if (cur->first_child) {
-                       cur = cur->first_child;
-                       continue;
-               }
-
-       next_sibling:
-               if (cur->next) {
-                       cur = cur->next;
-                       continue;
-               }
-               cur = cur->parent;
-               if (cur == node) {
-                       break;
-               }
-               goto next_sibling;
-       }
-}
-
-
-// Convert a cmark_node to HTML.
-static void node_to_html(strbuf *html, cmark_node *node)
-{
-       cmark_node *cur;
-       char start_header[] = "<h0>";
-       bool tight = false;
-       bool visit_children;
-       strbuf *info;
-
-       if (node == NULL) {
-               return;
-       }
-
-       cur = node;
-       while (true) {
-               // Only NODE_IMAGE wants to skip its children.
-               visit_children = true;
-
-               switch(cur->type) {
-               case NODE_DOCUMENT:
-                       break;
-
-               case NODE_PARAGRAPH:
-                       if (!tight) {
-                               cr(html);
-                               strbuf_puts(html, "<p>");
-                       }
-                       break;
-
-               case NODE_BLOCK_QUOTE:
-                       cr(html);
-                       strbuf_puts(html, "<blockquote>\n");
-                       // BLOCK_QUOTE doesn't use any of the 'as' structs,
-                       // so the 'list' member can be used to store the
-                       // current value of 'tight'.
-                       cur->as.list.tight = tight;
-                       tight = false;
-                       break;
-
-               case NODE_LIST_ITEM:
-                       cr(html);
-                       strbuf_puts(html, "<li>");
-                       break;
-
-               case NODE_LIST: {
-                       cmark_list *list = &cur->as.list;
-                       bool tmp;
-
-                       // make sure a list starts at the beginning of the line:
-                       cr(html);
-
-                       if (list->list_type == CMARK_BULLET_LIST) {
-                               strbuf_puts(html, "<ul>\n");
-                       }
-                       else if (list->start == 1) {
-                               strbuf_puts(html, "<ol>\n");
-                       }
-                       else {
-                               strbuf_printf(html, "<ol start=\"%d\">\n",
-                                             list->start);
-                       }
-
-                       // Store the current value of 'tight' by swapping.
-                       tmp = list->tight;
-                       list->tight = tight;
-                       tight = tmp;
-                       break;
-               }
-
-               case NODE_HEADER:
-                       cr(html);
-                       start_header[2] = '0' + cur->as.header.level;
-                       strbuf_puts(html, start_header);
-                       break;
-
-               case NODE_CODE_BLOCK:
-                       info = &cur->as.code.info;
-                       cr(html);
-
-                       if (&cur->as.code.fence_length == 0
-                           || strbuf_len(info) == 0) {
-                               strbuf_puts(html, "<pre><code>");
-                       }
-                       else {
-                               int first_tag = strbuf_strchr(info, ' ', 0);
-                               if (first_tag < 0)
-                                       first_tag = strbuf_len(info);
-
-                               strbuf_puts(html,
-                                           "<pre><code class=\"language-");
-                               escape_html(html, info->ptr, first_tag);
-                               strbuf_puts(html, "\">");
-                       }
-
-                       escape_html(html, cur->string_content.ptr, 
cur->string_content.size);
-                       break;
-
-               case NODE_HTML:
-                       cr(html);
-                       strbuf_put(html, cur->string_content.ptr, 
cur->string_content.size);
-                       break;
-
-               case NODE_HRULE:
-                       cr(html);
-                       strbuf_puts(html, "<hr />\n");
-                       break;
-
-               case NODE_REFERENCE_DEF:
-                       break;
-
-               case NODE_TEXT:
-                       escape_html(html, cur->as.literal.data, 
cur->as.literal.len);
-                       break;
-
-               case NODE_LINEBREAK:
-                       strbuf_puts(html, "<br />\n");
-                       break;
-
-               case NODE_SOFTBREAK:
-                       strbuf_putc(html, '\n');
-                       break;
-
-               case NODE_INLINE_CODE:
-                       strbuf_puts(html, "<code>");
-                       escape_html(html, cur->as.literal.data, 
cur->as.literal.len);
-                       break;
-
-               case NODE_INLINE_HTML:
-                       strbuf_put(html,
-                                  cur->as.literal.data,
-                                  cur->as.literal.len);
-                       break;
-
-               case NODE_LINK:
-                       strbuf_puts(html, "<a href=\"");
-                       if (cur->as.link.url)
-                               escape_href(html, cur->as.link.url, -1);
-
-                       if (cur->as.link.title) {
-                               strbuf_puts(html, "\" title=\"");
-                               escape_html(html, cur->as.link.title, -1);
-                       }
-
-                       strbuf_puts(html, "\">");
-                       break;
-
-               case NODE_IMAGE:
-                       strbuf_puts(html, "<img src=\"");
-                       if (cur->as.link.url)
-                               escape_href(html, cur->as.link.url, -1);
-
-                       strbuf_puts(html, "\" alt=\"");
-                       inlines_to_plain_html(html, cur);
-
-                       if (cur->as.link.title) {
-                               strbuf_puts(html, "\" title=\"");
-                               escape_html(html, cur->as.link.title, -1);
-                       }
-
-                       strbuf_puts(html, "\" />");
-                       visit_children = false;
-                       break;
-
-               case NODE_STRONG:
-                       strbuf_puts(html, "<strong>");
-                       break;
-
-               case NODE_EMPH:
-                       strbuf_puts(html, "<em>");
-                       break;
-
-               default:
-                       assert(false);
-               }
-
-               if (visit_children && cur->first_child) {
-                       cur = cur->first_child;
-                       continue;
-               }
-
-       next_sibling:
-               tight = finish_node(html, cur, tight);
-               if (cur == node) {
-                       break;
-               }
-               if (cur->next) {
-                       cur = cur->next;
-                       continue;
-               }
-               cur = cur->parent;
-               goto next_sibling;
-       }
-}
-
-// Returns the restored value of 'tight'.
-static bool
-finish_node(strbuf *html, cmark_node *node, bool tight)
-{
-       char end_header[] = "</h0>\n";
-
-       switch (node->type) {
-       case NODE_PARAGRAPH:
-               if (!tight) {
-                       strbuf_puts(html, "</p>\n");
-               }
-               break;
-
-       case NODE_BLOCK_QUOTE: {
-               cmark_list *list = &node->as.list;
-               strbuf_puts(html, "</blockquote>\n");
-               // Restore old 'tight' value.
-               tight = list->tight;
-               list->tight = false;
-               break;
-       }
-
-       case NODE_LIST_ITEM:
-               strbuf_puts(html, "</li>\n");
-               break;
-
-       case NODE_LIST: {
-               cmark_list *list = &node->as.list;
-               bool tmp;
-               strbuf_puts(html,
-                           list->list_type == CMARK_BULLET_LIST ?
-                           "</ul>\n" : "</ol>\n");
-               // Restore old 'tight' value.
-               tmp = tight;
-               tight = list->tight;
-               list->tight = tmp;
-               break;
-       }
-
-       case NODE_HEADER:
-               end_header[3] = '0' + node->as.header.level;
-               strbuf_puts(html, end_header);
-               break;
-
-       case NODE_CODE_BLOCK:
-               strbuf_puts(html, "</code></pre>\n");
-               break;
-
-       case NODE_INLINE_CODE:
-               strbuf_puts(html, "</code>");
-               break;
-
-       case NODE_LINK:
-               strbuf_puts(html, "</a>");
-               break;
-
-       case NODE_STRONG:
-               strbuf_puts(html, "</strong>");
-               break;
-
-       case NODE_EMPH:
-               strbuf_puts(html, "</em>");
-               break;
-
-       default:
-               break;
-       }
-
-       return tight;
-}
-
-char *cmark_render_html(cmark_node *root)
-{
-       char *result;
-       strbuf html = GH_BUF_INIT;
-       node_to_html(&html, root);
-       result = (char *)strbuf_detach(&html);
-       strbuf_free(&html);
-       return result;
-}

Reply via email to