Update libcmark source Update to commit 8796258400fc07a653172ba9d808374fdcca7936 from Dec 9 13:01:14 2014 -0800, mainly to fix -Wextra warnings.
Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/3d7c5ed3 Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/3d7c5ed3 Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/3d7c5ed3 Branch: refs/heads/markdown_v2 Commit: 3d7c5ed360815d66f2c42a596cf2657b411ca6ce Parents: 8776f81 Author: Nick Wellnhofer <[email protected]> Authored: Wed Dec 10 17:47:23 2014 +0100 Committer: Nick Wellnhofer <[email protected]> Committed: Wed Dec 10 17:47:23 2014 +0100 ---------------------------------------------------------------------- compiler/modules/CommonMark/README.md | 39 +- compiler/modules/CommonMark/src/bench.h | 27 + compiler/modules/CommonMark/src/blocks.c | 9 +- compiler/modules/CommonMark/src/cmark.c | 2 +- compiler/modules/CommonMark/src/cmark.h | 3 + compiler/modules/CommonMark/src/houdini.h | 52 + .../modules/CommonMark/src/houdini_href_e.c | 107 + .../modules/CommonMark/src/houdini_html_e.c | 81 + .../modules/CommonMark/src/houdini_html_u.c | 112 + compiler/modules/CommonMark/src/html.c | 357 + compiler/modules/CommonMark/src/html/houdini.h | 52 - .../CommonMark/src/html/houdini_href_e.c | 107 - .../CommonMark/src/html/houdini_html_e.c | 81 - .../CommonMark/src/html/houdini_html_u.c | 112 - compiler/modules/CommonMark/src/html/html.c | 357 - .../CommonMark/src/html/html_unescape.gperf | 2131 ---- .../modules/CommonMark/src/html/html_unescape.h | 9736 --------------- .../modules/CommonMark/src/html_unescape.gperf | 2131 ++++ compiler/modules/CommonMark/src/html_unescape.h | 10782 +++++++++++++++++ compiler/modules/CommonMark/src/inlines.c | 2 +- compiler/modules/CommonMark/src/libcmark.pc.in | 10 + compiler/modules/CommonMark/src/node.c | 134 +- compiler/modules/CommonMark/src/scanners.c | 10 +- compiler/modules/CommonMark/src/scanners.h | 1 + 24 files changed, 13818 insertions(+), 12617 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/README.md ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/README.md b/compiler/modules/CommonMark/README.md index f56dc3e..8db4c1f 100644 --- a/compiler/modules/CommonMark/README.md +++ b/compiler/modules/CommonMark/README.md @@ -15,8 +15,9 @@ standalone program `cmark` that converts CommonMark to HTML. It is written in standard C99 and has no library dependencies. The parser is very fast (see [benchmarks](benchmarks.md)). -It is easy to use `libcmark` in python or ruby code: see `wrapper.py` -and `wrapper.rb` in the repository for simple examples. +It is easy to use `libcmark` in python, lua, ruby, and other dynamic +languages: see `wrapper.py`, `wrapper.lua`, and `wrapper.rb` in the +repository for simple examples. The JavaScript implementation is a single JavaScript file, with no dependencies, that can be linked to in an HTML page. Here @@ -43,15 +44,15 @@ Installing (C) -------------- Building the C program (`cmark`) and shared library (`libcmark`) -requires [cmake] and [re2c], which is used to generate `scanners.c` from -`scanners.re`. (Note that [re2c] is only a build dependency for -developers, since `scanners.c` can be provided in a released source -tarball.) +requires [cmake]. If you modify `scanners.re`, then you will also +need [re2c], which is used to generate `scanners.c` from +`scanners.re`. We have included a pre-generated `scanners.c` in +the repository to reduce build dependencies. If you have GNU make, you can simply `make`, `make test`, and `make install`. This calls [cmake] to create a `Makefile` in the `build` directory, then uses that `Makefile` to create the executable and -library. +library. The binaries can be found in `build/src`. For a more portable method, you can use [cmake] manually. [cmake] knows how to create build environments for many build systems. For example, @@ -73,17 +74,6 @@ Or, to create Xcode project files on OSX: make test make install -Tests can also be run manually on any executable `$PROG` using: - - python test/spec_tests.py --program $PROG - -If you want to extract the raw test data from the spec without -actually running the tests, you can do: - - python test/spec_tests.py --dump-tests - -and you'll get all the tests in JSON format. - The GNU Makefile also provides a few other targets for developers. To run a "fuzz test" against ten long randomly generated inputs: @@ -141,9 +131,16 @@ The spec -------- [The spec] contains over 500 embedded examples which serve as conformance -tests. To run the tests for `cmark`, do `make test`. To run them for -another Markdown program, say `myprog`, do `make test PROG=myprog`. To -run the tests for `commonmark.js`, do `make testjs`. +tests. To run the tests using an executable `$PROG`: + + python test/spec_tests.py --program $PROG + +If you want to extract the raw test data from the spec without +actually running the tests, you can do: + + python test/spec_tests.py --dump-tests + +and you'll get all the tests in JSON format. [The spec]: http://jgm.github.io/CommonMark/spec.html http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/bench.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/bench.h b/compiler/modules/CommonMark/src/bench.h new file mode 100644 index 0000000..bbea2c6 --- /dev/null +++ b/compiler/modules/CommonMark/src/bench.h @@ -0,0 +1,27 @@ +#ifndef CMARK_BENCH_H +#define CMARK_BENCH_H + +#include <stdio.h> +#include <time.h> + +#ifdef TIMER +float _cmark_start_time; +float _cmark_end_time; +float _cmark_save_time; + +#define start_timer() \ + _cmark_save_time = _cmark_start_time; \ + _cmark_start_time = (float)clock()/CLOCKS_PER_SEC + +#define end_timer(M) \ + _cmark_end_time = (float)clock()/CLOCKS_PER_SEC; \ + fprintf(stderr, "[TIME] (%s:%d) %4.f ns " M "\n", __FILE__, \ + __LINE__, (_cmark_end_time - _cmark_start_time) * 1000000); \ + _cmark_start_time = _cmark_save_time; + +#else +#define start_timer() +#define end_timer(M) +#endif + +#endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/blocks.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/blocks.c b/compiler/modules/CommonMark/src/blocks.c index 5d11710..ebef88b 100644 --- a/compiler/modules/CommonMark/src/blocks.c +++ b/compiler/modules/CommonMark/src/blocks.c @@ -11,7 +11,7 @@ #include "utf8.h" #include "scanners.h" #include "inlines.h" -#include "html/houdini.h" +#include "houdini.h" #include "buffer.h" #include "debug.h" @@ -435,6 +435,9 @@ cmark_node *cmark_parse_file(FILE *f) while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) { bool eof = bytes < sizeof(buffer); S_parser_feed(parser, buffer, bytes, eof); + if (eof) { + break; + } } document = cmark_parser_finish(parser); @@ -786,8 +789,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) container->last_line_blank = (blank && container->type != NODE_BLOCK_QUOTE && container->type != NODE_HEADER && - (container->type != NODE_CODE_BLOCK && - container->as.code.fenced) && + !(container->type == NODE_CODE_BLOCK && + container->as.code.fenced) && !(container->type == NODE_LIST_ITEM && container->first_child == NULL && container->start_line == parser->line_number)); http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/cmark.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/cmark.c b/compiler/modules/CommonMark/src/cmark.c index 140a14c..16817b9 100644 --- a/compiler/modules/CommonMark/src/cmark.c +++ b/compiler/modules/CommonMark/src/cmark.c @@ -2,7 +2,7 @@ #include <assert.h> #include <stdio.h> #include "node.h" -#include "html/houdini.h" +#include "houdini.h" #include "cmark.h" #include "buffer.h" http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/cmark.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/cmark.h b/compiler/modules/CommonMark/src/cmark.h index cae4426..f96cea9 100644 --- a/compiler/modules/CommonMark/src/cmark.h +++ b/compiler/modules/CommonMark/src/cmark.h @@ -34,6 +34,9 @@ char *cmark_markdown_to_html(const char *text, int len); /** */ typedef enum { + /* Error status */ + CMARK_NODE_NONE, + /* Block */ CMARK_NODE_DOCUMENT, CMARK_NODE_BLOCK_QUOTE, http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/houdini.h b/compiler/modules/CommonMark/src/houdini.h new file mode 100644 index 0000000..9e1200e --- /dev/null +++ b/compiler/modules/CommonMark/src/houdini.h @@ -0,0 +1,52 @@ +#ifndef CMARK_HOUDINI_H +#define CMARK_HOUDINI_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> +#include "config.h" +#include "buffer.h" + +#ifdef HAVE___BUILTIN_EXPECT +# define likely(x) __builtin_expect((x),1) +# define unlikely(x) __builtin_expect((x),0) +#else +# define likely(x) (x) +# define unlikely(x) (x) +#endif + +#ifdef HOUDINI_USE_LOCALE +# define _isxdigit(c) isxdigit(c) +# define _isdigit(c) isdigit(c) +#else +/* + * Helper _isdigit methods -- do not trust the current locale + * */ +# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) +# define _isdigit(c) ((c) >= '0' && (c) <= '9') +#endif + +#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) +#define HOUDINI_UNESCAPED_SIZE(x) (x) + +extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure); +extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size); +extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size); +extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini_href_e.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/houdini_href_e.c b/compiler/modules/CommonMark/src/houdini_href_e.c new file mode 100644 index 0000000..1c99432 --- /dev/null +++ b/compiler/modules/CommonMark/src/houdini_href_e.c @@ -0,0 +1,107 @@ +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "houdini.h" + +/* + * The following characters will not be escaped: + * + * -_.+!*'(),%#@?=;:/,+&$ alphanum + * + * Note that this character set is the addition of: + * + * - The characters which are safe to be in an URL + * - The characters which are *not* safe to be in + * an URL because they are RESERVED characters. + * + * We asume (lazily) that any RESERVED char that + * appears inside an URL is actually meant to + * have its native function (i.e. as an URL + * component/separator) and hence needs no escaping. + * + * There are two exceptions: the chacters & (amp) + * and ' (single quote) do not appear in the table. + * They are meant to appear in the URL as components, + * yet they require special HTML-entity escaping + * to generate valid HTML markup. + * + * All other characters will be escaped to %XX. + * + */ +static const char HREF_SAFE[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +int +houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size) +{ + static const uint8_t hex_chars[] = "0123456789ABCDEF"; + size_t i = 0, org; + uint8_t hex_str[3]; + + hex_str[0] = '%'; + + while (i < size) { + org = i; + while (i < size && HREF_SAFE[src[i]] != 0) + i++; + + if (likely(i > org)) + strbuf_put(ob, src + org, i - org); + + /* escaping */ + if (i >= size) + break; + + switch (src[i]) { + /* amp appears all the time in URLs, but needs + * HTML-entity escaping to be inside an href */ + case '&': + strbuf_puts(ob, "&"); + break; + + /* the single quote is a valid URL character + * according to the standard; it needs HTML + * entity escaping too */ + case '\'': + strbuf_puts(ob, "'"); + break; + + /* the space can be escaped to %20 or a plus + * sign. we're going with the generic escape + * for now. the plus thing is more commonly seen + * when building GET strings */ +#if 0 + case ' ': + strbuf_putc(ob, '+'); + break; +#endif + + /* every other character goes with a %XX escaping */ + default: + hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; + hex_str[2] = hex_chars[src[i] & 0xF]; + strbuf_put(ob, hex_str, 3); + } + + i++; + } + + return 1; +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini_html_e.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/houdini_html_e.c b/compiler/modules/CommonMark/src/houdini_html_e.c new file mode 100644 index 0000000..db5034b --- /dev/null +++ b/compiler/modules/CommonMark/src/houdini_html_e.c @@ -0,0 +1,81 @@ +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "houdini.h" + +/** + * According to the OWASP rules: + * + * & --> & + * < --> < + * > --> > + * " --> " + * ' --> ' ' is not recommended + * / --> / forward slash is included as it helps end an HTML entity + * + */ +static const char HTML_ESCAPE_TABLE[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +static const char *HTML_ESCAPES[] = { + "", + """, + "&", + "'", + "/", + "<", + ">" +}; + +int +houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure) +{ + size_t i = 0, org, esc = 0; + + while (i < size) { + org = i; + while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) + i++; + + if (i > org) + strbuf_put(ob, src + org, i - org); + + /* escaping */ + if (unlikely(i >= size)) + break; + + /* The forward slash is only escaped in secure mode */ + if ((src[i] == '/' || src[i] == '\'') && !secure) { + strbuf_putc(ob, src[i]); + } else { + strbuf_puts(ob, HTML_ESCAPES[esc]); + } + + i++; + } + + return 1; +} + +int +houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size) +{ + return houdini_escape_html0(ob, src, size, 1); +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/houdini_html_u.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/houdini_html_u.c b/compiler/modules/CommonMark/src/houdini_html_u.c new file mode 100644 index 0000000..b88b9d1 --- /dev/null +++ b/compiler/modules/CommonMark/src/houdini_html_u.c @@ -0,0 +1,112 @@ +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "buffer.h" +#include "houdini.h" +#include "utf8.h" +#include "html_unescape.h" + +size_t +houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size) +{ + size_t i = 0; + + if (size > 3 && src[0] == '#') { + int codepoint = 0; + + if (_isdigit(src[1])) { + for (i = 1; i < size && _isdigit(src[i]); ++i) { + int cp = (codepoint * 10) + (src[i] - '0'); + + if (cp < codepoint) + return 0; + + codepoint = cp; + } + } + + else if (src[1] == 'x' || src[1] == 'X') { + for (i = 2; i < size && _isxdigit(src[i]); ++i) { + int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9); + + if (cp < codepoint) + return 0; + + codepoint = cp; + } + } + + if (i < size && src[i] == ';' && codepoint) { + utf8proc_encode_char(codepoint, ob); + return i + 1; + } + } + + else { + if (size > MAX_WORD_LENGTH) + size = MAX_WORD_LENGTH; + + for (i = MIN_WORD_LENGTH; i < size; ++i) { + if (src[i] == ' ') + break; + + if (src[i] == ';') { + const struct html_ent *entity = find_entity((char *)src, i); + + if (entity != NULL) { + strbuf_put(ob, entity->utf8, entity->utf8_len); + return i + 1; + } + + break; + } + } + } + + return 0; +} + +int +houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size) +{ + size_t i = 0, org, ent; + + while (i < size) { + org = i; + while (i < size && src[i] != '&') + i++; + + if (likely(i > org)) { + if (unlikely(org == 0)) { + if (i >= size) + return 0; + + strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); + } + + strbuf_put(ob, src + org, i - org); + } + + /* escaping */ + if (i >= size) + break; + + i++; + + ent = houdini_unescape_ent(ob, src + i, size - i); + i += ent; + + /* not really an entity */ + if (ent == 0) + strbuf_putc(ob, '&'); + } + + return 1; +} + +void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size) +{ + if (!houdini_unescape_html(ob, src, size)) + strbuf_put(ob, src, size); +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/html.c b/compiler/modules/CommonMark/src/html.c new file mode 100644 index 0000000..60229cc --- /dev/null +++ b/compiler/modules/CommonMark/src/html.c @@ -0,0 +1,357 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "houdini.h" + +// Functions to convert cmark_nodes to HTML strings. + +static bool +finish_node(strbuf *html, cmark_node *node, bool tight); + +static void escape_html(strbuf *dest, const unsigned char *source, int length) +{ + if (length < 0) + length = strlen((char *)source); + + houdini_escape_html0(dest, source, (size_t)length, 0); +} + +static void escape_href(strbuf *dest, const unsigned char *source, int length) +{ + if (length < 0) + length = strlen((char *)source); + + houdini_escape_href(dest, source, (size_t)length); +} + +static inline void cr(strbuf *html) +{ + if (html->size && html->ptr[html->size - 1] != '\n') + strbuf_putc(html, '\n'); +} + +// Convert the inline children of a node to a plain string. +static void inlines_to_plain_html(strbuf *html, cmark_node* node) +{ + cmark_node* cur = node->first_child; + + if (cur == NULL) { + return; + } + + while (true) { + switch(cur->type) { + case NODE_TEXT: + case NODE_INLINE_CODE: + case NODE_INLINE_HTML: + escape_html(html, cur->as.literal.data, cur->as.literal.len); + break; + + case NODE_LINEBREAK: + case NODE_SOFTBREAK: + strbuf_putc(html, ' '); + break; + + default: + break; + } + + if (cur->first_child) { + cur = cur->first_child; + continue; + } + + next_sibling: + if (cur->next) { + cur = cur->next; + continue; + } + cur = cur->parent; + if (cur == node) { + break; + } + goto next_sibling; + } +} + + +// Convert a cmark_node to HTML. +static void node_to_html(strbuf *html, cmark_node *node) +{ + cmark_node *cur; + char start_header[] = "<h0>"; + bool tight = false; + bool visit_children; + strbuf *info; + + if (node == NULL) { + return; + } + + cur = node; + while (true) { + // Only NODE_IMAGE wants to skip its children. + visit_children = true; + + switch(cur->type) { + case NODE_DOCUMENT: + break; + + case NODE_PARAGRAPH: + if (!tight) { + cr(html); + strbuf_puts(html, "<p>"); + } + break; + + case NODE_BLOCK_QUOTE: + cr(html); + strbuf_puts(html, "<blockquote>\n"); + // BLOCK_QUOTE doesn't use any of the 'as' structs, + // so the 'list' member can be used to store the + // current value of 'tight'. + cur->as.list.tight = tight; + tight = false; + break; + + case NODE_LIST_ITEM: + cr(html); + strbuf_puts(html, "<li>"); + break; + + case NODE_LIST: { + cmark_list *list = &cur->as.list; + bool tmp; + + // make sure a list starts at the beginning of the line: + cr(html); + + if (list->list_type == CMARK_BULLET_LIST) { + strbuf_puts(html, "<ul>\n"); + } + else if (list->start == 1) { + strbuf_puts(html, "<ol>\n"); + } + else { + strbuf_printf(html, "<ol start=\"%d\">\n", + list->start); + } + + // Store the current value of 'tight' by swapping. + tmp = list->tight; + list->tight = tight; + tight = tmp; + break; + } + + case NODE_HEADER: + cr(html); + start_header[2] = '0' + cur->as.header.level; + strbuf_puts(html, start_header); + break; + + case NODE_CODE_BLOCK: + info = &cur->as.code.info; + cr(html); + + if (&cur->as.code.fence_length == 0 + || strbuf_len(info) == 0) { + strbuf_puts(html, "<pre><code>"); + } + else { + int first_tag = strbuf_strchr(info, ' ', 0); + if (first_tag < 0) + first_tag = strbuf_len(info); + + strbuf_puts(html, + "<pre><code class=\"language-"); + escape_html(html, info->ptr, first_tag); + strbuf_puts(html, "\">"); + } + + escape_html(html, cur->string_content.ptr, cur->string_content.size); + break; + + case NODE_HTML: + cr(html); + strbuf_put(html, cur->string_content.ptr, cur->string_content.size); + break; + + case NODE_HRULE: + cr(html); + strbuf_puts(html, "<hr />\n"); + break; + + case NODE_REFERENCE_DEF: + break; + + case NODE_TEXT: + escape_html(html, cur->as.literal.data, cur->as.literal.len); + break; + + case NODE_LINEBREAK: + strbuf_puts(html, "<br />\n"); + break; + + case NODE_SOFTBREAK: + strbuf_putc(html, '\n'); + break; + + case NODE_INLINE_CODE: + strbuf_puts(html, "<code>"); + escape_html(html, cur->as.literal.data, cur->as.literal.len); + break; + + case NODE_INLINE_HTML: + strbuf_put(html, + cur->as.literal.data, + cur->as.literal.len); + break; + + case NODE_LINK: + strbuf_puts(html, "<a href=\""); + if (cur->as.link.url) + escape_href(html, cur->as.link.url, -1); + + if (cur->as.link.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, cur->as.link.title, -1); + } + + strbuf_puts(html, "\">"); + break; + + case NODE_IMAGE: + strbuf_puts(html, "<img src=\""); + if (cur->as.link.url) + escape_href(html, cur->as.link.url, -1); + + strbuf_puts(html, "\" alt=\""); + inlines_to_plain_html(html, cur); + + if (cur->as.link.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, cur->as.link.title, -1); + } + + strbuf_puts(html, "\" />"); + visit_children = false; + break; + + case NODE_STRONG: + strbuf_puts(html, "<strong>"); + break; + + case NODE_EMPH: + strbuf_puts(html, "<em>"); + break; + + default: + assert(false); + } + + if (visit_children && cur->first_child) { + cur = cur->first_child; + continue; + } + + next_sibling: + tight = finish_node(html, cur, tight); + if (cur == node) { + break; + } + if (cur->next) { + cur = cur->next; + continue; + } + cur = cur->parent; + goto next_sibling; + } +} + +// Returns the restored value of 'tight'. +static bool +finish_node(strbuf *html, cmark_node *node, bool tight) +{ + char end_header[] = "</h0>\n"; + + switch (node->type) { + case NODE_PARAGRAPH: + if (!tight) { + strbuf_puts(html, "</p>\n"); + } + break; + + case NODE_BLOCK_QUOTE: { + cmark_list *list = &node->as.list; + strbuf_puts(html, "</blockquote>\n"); + // Restore old 'tight' value. + tight = list->tight; + list->tight = false; + break; + } + + case NODE_LIST_ITEM: + strbuf_puts(html, "</li>\n"); + break; + + case NODE_LIST: { + cmark_list *list = &node->as.list; + bool tmp; + strbuf_puts(html, + list->list_type == CMARK_BULLET_LIST ? + "</ul>\n" : "</ol>\n"); + // Restore old 'tight' value. + tmp = tight; + tight = list->tight; + list->tight = tmp; + break; + } + + case NODE_HEADER: + end_header[3] = '0' + node->as.header.level; + strbuf_puts(html, end_header); + break; + + case NODE_CODE_BLOCK: + strbuf_puts(html, "</code></pre>\n"); + break; + + case NODE_INLINE_CODE: + strbuf_puts(html, "</code>"); + break; + + case NODE_LINK: + strbuf_puts(html, "</a>"); + break; + + case NODE_STRONG: + strbuf_puts(html, "</strong>"); + break; + + case NODE_EMPH: + strbuf_puts(html, "</em>"); + break; + + default: + break; + } + + return tight; +} + +char *cmark_render_html(cmark_node *root) +{ + char *result; + strbuf html = GH_BUF_INIT; + node_to_html(&html, root); + result = (char *)strbuf_detach(&html); + strbuf_free(&html); + return result; +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/html/houdini.h b/compiler/modules/CommonMark/src/html/houdini.h deleted file mode 100644 index 9e1200e..0000000 --- a/compiler/modules/CommonMark/src/html/houdini.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef CMARK_HOUDINI_H -#define CMARK_HOUDINI_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdint.h> -#include "config.h" -#include "buffer.h" - -#ifdef HAVE___BUILTIN_EXPECT -# define likely(x) __builtin_expect((x),1) -# define unlikely(x) __builtin_expect((x),0) -#else -# define likely(x) (x) -# define unlikely(x) (x) -#endif - -#ifdef HOUDINI_USE_LOCALE -# define _isxdigit(c) isxdigit(c) -# define _isdigit(c) isdigit(c) -#else -/* - * Helper _isdigit methods -- do not trust the current locale - * */ -# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL) -# define _isdigit(c) ((c) >= '0' && (c) <= '9') -#endif - -#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) -#define HOUDINI_UNESCAPED_SIZE(x) (x) - -extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure); -extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size); -extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size); -extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size); - -#ifdef __cplusplus -} -#endif - -#endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini_href_e.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/html/houdini_href_e.c b/compiler/modules/CommonMark/src/html/houdini_href_e.c deleted file mode 100644 index 12456ce..0000000 --- a/compiler/modules/CommonMark/src/html/houdini_href_e.c +++ /dev/null @@ -1,107 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#include <string.h> - -#include "html/houdini.h" - -/* - * The following characters will not be escaped: - * - * -_.+!*'(),%#@?=;:/,+&$ alphanum - * - * Note that this character set is the addition of: - * - * - The characters which are safe to be in an URL - * - The characters which are *not* safe to be in - * an URL because they are RESERVED characters. - * - * We asume (lazily) that any RESERVED char that - * appears inside an URL is actually meant to - * have its native function (i.e. as an URL - * component/separator) and hence needs no escaping. - * - * There are two exceptions: the chacters & (amp) - * and ' (single quote) do not appear in the table. - * They are meant to appear in the URL as components, - * yet they require special HTML-entity escaping - * to generate valid HTML markup. - * - * All other characters will be escaped to %XX. - * - */ -static const char HREF_SAFE[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -int -houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size) -{ - static const uint8_t hex_chars[] = "0123456789ABCDEF"; - size_t i = 0, org; - uint8_t hex_str[3]; - - hex_str[0] = '%'; - - while (i < size) { - org = i; - while (i < size && HREF_SAFE[src[i]] != 0) - i++; - - if (likely(i > org)) - strbuf_put(ob, src + org, i - org); - - /* escaping */ - if (i >= size) - break; - - switch (src[i]) { - /* amp appears all the time in URLs, but needs - * HTML-entity escaping to be inside an href */ - case '&': - strbuf_puts(ob, "&"); - break; - - /* the single quote is a valid URL character - * according to the standard; it needs HTML - * entity escaping too */ - case '\'': - strbuf_puts(ob, "'"); - break; - - /* the space can be escaped to %20 or a plus - * sign. we're going with the generic escape - * for now. the plus thing is more commonly seen - * when building GET strings */ -#if 0 - case ' ': - strbuf_putc(ob, '+'); - break; -#endif - - /* every other character goes with a %XX escaping */ - default: - hex_str[1] = hex_chars[(src[i] >> 4) & 0xF]; - hex_str[2] = hex_chars[src[i] & 0xF]; - strbuf_put(ob, hex_str, 3); - } - - i++; - } - - return 1; -} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini_html_e.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/html/houdini_html_e.c b/compiler/modules/CommonMark/src/html/houdini_html_e.c deleted file mode 100644 index f2e86fe..0000000 --- a/compiler/modules/CommonMark/src/html/houdini_html_e.c +++ /dev/null @@ -1,81 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#include <string.h> - -#include "html/houdini.h" - -/** - * According to the OWASP rules: - * - * & --> & - * < --> < - * > --> > - * " --> " - * ' --> ' ' is not recommended - * / --> / forward slash is included as it helps end an HTML entity - * - */ -static const char HTML_ESCAPE_TABLE[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 1, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 4, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 6, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -static const char *HTML_ESCAPES[] = { - "", - """, - "&", - "'", - "/", - "<", - ">" -}; - -int -houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure) -{ - size_t i = 0, org, esc = 0; - - while (i < size) { - org = i; - while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) - i++; - - if (i > org) - strbuf_put(ob, src + org, i - org); - - /* escaping */ - if (unlikely(i >= size)) - break; - - /* The forward slash is only escaped in secure mode */ - if ((src[i] == '/' || src[i] == '\'') && !secure) { - strbuf_putc(ob, src[i]); - } else { - strbuf_puts(ob, HTML_ESCAPES[esc]); - } - - i++; - } - - return 1; -} - -int -houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size) -{ - return houdini_escape_html0(ob, src, size, 1); -} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/houdini_html_u.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/html/houdini_html_u.c b/compiler/modules/CommonMark/src/html/houdini_html_u.c deleted file mode 100644 index b88b9d1..0000000 --- a/compiler/modules/CommonMark/src/html/houdini_html_u.c +++ /dev/null @@ -1,112 +0,0 @@ -#include <assert.h> -#include <stdio.h> -#include <string.h> - -#include "buffer.h" -#include "houdini.h" -#include "utf8.h" -#include "html_unescape.h" - -size_t -houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size) -{ - size_t i = 0; - - if (size > 3 && src[0] == '#') { - int codepoint = 0; - - if (_isdigit(src[1])) { - for (i = 1; i < size && _isdigit(src[i]); ++i) { - int cp = (codepoint * 10) + (src[i] - '0'); - - if (cp < codepoint) - return 0; - - codepoint = cp; - } - } - - else if (src[1] == 'x' || src[1] == 'X') { - for (i = 2; i < size && _isxdigit(src[i]); ++i) { - int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9); - - if (cp < codepoint) - return 0; - - codepoint = cp; - } - } - - if (i < size && src[i] == ';' && codepoint) { - utf8proc_encode_char(codepoint, ob); - return i + 1; - } - } - - else { - if (size > MAX_WORD_LENGTH) - size = MAX_WORD_LENGTH; - - for (i = MIN_WORD_LENGTH; i < size; ++i) { - if (src[i] == ' ') - break; - - if (src[i] == ';') { - const struct html_ent *entity = find_entity((char *)src, i); - - if (entity != NULL) { - strbuf_put(ob, entity->utf8, entity->utf8_len); - return i + 1; - } - - break; - } - } - } - - return 0; -} - -int -houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size) -{ - size_t i = 0, org, ent; - - while (i < size) { - org = i; - while (i < size && src[i] != '&') - i++; - - if (likely(i > org)) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - strbuf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); - } - - strbuf_put(ob, src + org, i - org); - } - - /* escaping */ - if (i >= size) - break; - - i++; - - ent = houdini_unescape_ent(ob, src + i, size - i); - i += ent; - - /* not really an entity */ - if (ent == 0) - strbuf_putc(ob, '&'); - } - - return 1; -} - -void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size) -{ - if (!houdini_unescape_html(ob, src, size)) - strbuf_put(ob, src, size); -} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3d7c5ed3/compiler/modules/CommonMark/src/html/html.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/html/html.c b/compiler/modules/CommonMark/src/html/html.c deleted file mode 100644 index 0e3dd15..0000000 --- a/compiler/modules/CommonMark/src/html/html.c +++ /dev/null @@ -1,357 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <assert.h> - -#include "config.h" -#include "cmark.h" -#include "node.h" -#include "buffer.h" -#include "html/houdini.h" - -// Functions to convert cmark_nodes to HTML strings. - -static bool -finish_node(strbuf *html, cmark_node *node, bool tight); - -static void escape_html(strbuf *dest, const unsigned char *source, int length) -{ - if (length < 0) - length = strlen((char *)source); - - houdini_escape_html0(dest, source, (size_t)length, 0); -} - -static void escape_href(strbuf *dest, const unsigned char *source, int length) -{ - if (length < 0) - length = strlen((char *)source); - - houdini_escape_href(dest, source, (size_t)length); -} - -static inline void cr(strbuf *html) -{ - if (html->size && html->ptr[html->size - 1] != '\n') - strbuf_putc(html, '\n'); -} - -// Convert the inline children of a node to a plain string. -static void inlines_to_plain_html(strbuf *html, cmark_node* node) -{ - cmark_node* cur = node->first_child; - - if (cur == NULL) { - return; - } - - while (true) { - switch(cur->type) { - case NODE_TEXT: - case NODE_INLINE_CODE: - case NODE_INLINE_HTML: - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_LINEBREAK: - case NODE_SOFTBREAK: - strbuf_putc(html, ' '); - break; - - default: - break; - } - - if (cur->first_child) { - cur = cur->first_child; - continue; - } - - next_sibling: - if (cur->next) { - cur = cur->next; - continue; - } - cur = cur->parent; - if (cur == node) { - break; - } - goto next_sibling; - } -} - - -// Convert a cmark_node to HTML. -static void node_to_html(strbuf *html, cmark_node *node) -{ - cmark_node *cur; - char start_header[] = "<h0>"; - bool tight = false; - bool visit_children; - strbuf *info; - - if (node == NULL) { - return; - } - - cur = node; - while (true) { - // Only NODE_IMAGE wants to skip its children. - visit_children = true; - - switch(cur->type) { - case NODE_DOCUMENT: - break; - - case NODE_PARAGRAPH: - if (!tight) { - cr(html); - strbuf_puts(html, "<p>"); - } - break; - - case NODE_BLOCK_QUOTE: - cr(html); - strbuf_puts(html, "<blockquote>\n"); - // BLOCK_QUOTE doesn't use any of the 'as' structs, - // so the 'list' member can be used to store the - // current value of 'tight'. - cur->as.list.tight = tight; - tight = false; - break; - - case NODE_LIST_ITEM: - cr(html); - strbuf_puts(html, "<li>"); - break; - - case NODE_LIST: { - cmark_list *list = &cur->as.list; - bool tmp; - - // make sure a list starts at the beginning of the line: - cr(html); - - if (list->list_type == CMARK_BULLET_LIST) { - strbuf_puts(html, "<ul>\n"); - } - else if (list->start == 1) { - strbuf_puts(html, "<ol>\n"); - } - else { - strbuf_printf(html, "<ol start=\"%d\">\n", - list->start); - } - - // Store the current value of 'tight' by swapping. - tmp = list->tight; - list->tight = tight; - tight = tmp; - break; - } - - case NODE_HEADER: - cr(html); - start_header[2] = '0' + cur->as.header.level; - strbuf_puts(html, start_header); - break; - - case NODE_CODE_BLOCK: - info = &cur->as.code.info; - cr(html); - - if (&cur->as.code.fence_length == 0 - || strbuf_len(info) == 0) { - strbuf_puts(html, "<pre><code>"); - } - else { - int first_tag = strbuf_strchr(info, ' ', 0); - if (first_tag < 0) - first_tag = strbuf_len(info); - - strbuf_puts(html, - "<pre><code class=\"language-"); - escape_html(html, info->ptr, first_tag); - strbuf_puts(html, "\">"); - } - - escape_html(html, cur->string_content.ptr, cur->string_content.size); - break; - - case NODE_HTML: - cr(html); - strbuf_put(html, cur->string_content.ptr, cur->string_content.size); - break; - - case NODE_HRULE: - cr(html); - strbuf_puts(html, "<hr />\n"); - break; - - case NODE_REFERENCE_DEF: - break; - - case NODE_TEXT: - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_LINEBREAK: - strbuf_puts(html, "<br />\n"); - break; - - case NODE_SOFTBREAK: - strbuf_putc(html, '\n'); - break; - - case NODE_INLINE_CODE: - strbuf_puts(html, "<code>"); - escape_html(html, cur->as.literal.data, cur->as.literal.len); - break; - - case NODE_INLINE_HTML: - strbuf_put(html, - cur->as.literal.data, - cur->as.literal.len); - break; - - case NODE_LINK: - strbuf_puts(html, "<a href=\""); - if (cur->as.link.url) - escape_href(html, cur->as.link.url, -1); - - if (cur->as.link.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, cur->as.link.title, -1); - } - - strbuf_puts(html, "\">"); - break; - - case NODE_IMAGE: - strbuf_puts(html, "<img src=\""); - if (cur->as.link.url) - escape_href(html, cur->as.link.url, -1); - - strbuf_puts(html, "\" alt=\""); - inlines_to_plain_html(html, cur); - - if (cur->as.link.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, cur->as.link.title, -1); - } - - strbuf_puts(html, "\" />"); - visit_children = false; - break; - - case NODE_STRONG: - strbuf_puts(html, "<strong>"); - break; - - case NODE_EMPH: - strbuf_puts(html, "<em>"); - break; - - default: - assert(false); - } - - if (visit_children && cur->first_child) { - cur = cur->first_child; - continue; - } - - next_sibling: - tight = finish_node(html, cur, tight); - if (cur == node) { - break; - } - if (cur->next) { - cur = cur->next; - continue; - } - cur = cur->parent; - goto next_sibling; - } -} - -// Returns the restored value of 'tight'. -static bool -finish_node(strbuf *html, cmark_node *node, bool tight) -{ - char end_header[] = "</h0>\n"; - - switch (node->type) { - case NODE_PARAGRAPH: - if (!tight) { - strbuf_puts(html, "</p>\n"); - } - break; - - case NODE_BLOCK_QUOTE: { - cmark_list *list = &node->as.list; - strbuf_puts(html, "</blockquote>\n"); - // Restore old 'tight' value. - tight = list->tight; - list->tight = false; - break; - } - - case NODE_LIST_ITEM: - strbuf_puts(html, "</li>\n"); - break; - - case NODE_LIST: { - cmark_list *list = &node->as.list; - bool tmp; - strbuf_puts(html, - list->list_type == CMARK_BULLET_LIST ? - "</ul>\n" : "</ol>\n"); - // Restore old 'tight' value. - tmp = tight; - tight = list->tight; - list->tight = tmp; - break; - } - - case NODE_HEADER: - end_header[3] = '0' + node->as.header.level; - strbuf_puts(html, end_header); - break; - - case NODE_CODE_BLOCK: - strbuf_puts(html, "</code></pre>\n"); - break; - - case NODE_INLINE_CODE: - strbuf_puts(html, "</code>"); - break; - - case NODE_LINK: - strbuf_puts(html, "</a>"); - break; - - case NODE_STRONG: - strbuf_puts(html, "</strong>"); - break; - - case NODE_EMPH: - strbuf_puts(html, "</em>"); - break; - - default: - break; - } - - return tight; -} - -char *cmark_render_html(cmark_node *root) -{ - char *result; - strbuf html = GH_BUF_INIT; - node_to_html(&html, root); - result = (char *)strbuf_detach(&html); - strbuf_free(&html); - return result; -}
