Update CommonMark source code Update CommonMark source code to commit b34e19c from Sat Jan 17 22:00:19 2015 -0800.
Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/5672da15 Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/5672da15 Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/5672da15 Branch: refs/heads/master Commit: 5672da15fe6837fc7c1081d74a283d7005a8da22 Parents: dd37ce3 Author: Nick Wellnhofer <[email protected]> Authored: Sat Jan 10 18:54:08 2015 +0100 Committer: Nick Wellnhofer <[email protected]> Committed: Sun Jan 18 19:37:05 2015 +0100 ---------------------------------------------------------------------- compiler/modules/CommonMark/COPYING | 173 + compiler/modules/CommonMark/LICENSE | 70 - compiler/modules/CommonMark/README.md | 87 +- compiler/modules/CommonMark/src/blocks.c | 508 +- compiler/modules/CommonMark/src/buffer.c | 84 +- compiler/modules/CommonMark/src/buffer.h | 82 +- compiler/modules/CommonMark/src/chunk.h | 18 +- compiler/modules/CommonMark/src/cmark.c | 2 +- compiler/modules/CommonMark/src/cmark.h | 354 +- compiler/modules/CommonMark/src/cmark_ctype.c | 53 + compiler/modules/CommonMark/src/cmark_ctype.h | 24 + compiler/modules/CommonMark/src/cmark_export.h | 32 +- compiler/modules/CommonMark/src/config.h | 4 + compiler/modules/CommonMark/src/config.h.in | 6 + compiler/modules/CommonMark/src/houdini.h | 26 +- .../modules/CommonMark/src/houdini_href_e.c | 20 +- .../modules/CommonMark/src/houdini_html_e.c | 24 +- .../modules/CommonMark/src/houdini_html_u.c | 17 +- compiler/modules/CommonMark/src/html.c | 489 +- .../modules/CommonMark/src/html_unescape.gperf | 4253 ++-- compiler/modules/CommonMark/src/html_unescape.h | 21275 +++++++++-------- compiler/modules/CommonMark/src/inlines.c | 328 +- compiler/modules/CommonMark/src/iterator.c | 140 + compiler/modules/CommonMark/src/iterator.h | 25 + compiler/modules/CommonMark/src/man.c | 249 + compiler/modules/CommonMark/src/node.c | 250 +- compiler/modules/CommonMark/src/node.h | 5 +- compiler/modules/CommonMark/src/parser.h | 1 + compiler/modules/CommonMark/src/print.c | 169 - compiler/modules/CommonMark/src/references.c | 16 +- compiler/modules/CommonMark/src/scanners.c | 4008 ++-- compiler/modules/CommonMark/src/scanners.h | 2 +- compiler/modules/CommonMark/src/scanners.re | 7 +- compiler/modules/CommonMark/src/utf8.c | 427 +- compiler/modules/CommonMark/src/xml.c | 175 + 35 files changed, 18401 insertions(+), 15002 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/COPYING ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/COPYING b/compiler/modules/CommonMark/COPYING new file mode 100644 index 0000000..0bb3445 --- /dev/null +++ b/compiler/modules/CommonMark/COPYING @@ -0,0 +1,173 @@ +Copyright (c) 2014, John MacFarlane + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of John MacFarlane nor the names of other + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +----- + +houdini.h, houdini_href_e.c, houdini_html_e.c, houdini_html_u.c, +html_unescape.gperf, html_unescape.h + +derive from https://github.com/vmg/houdini (with some modifications) + +Copyright (C) 2012 Vicent Martà + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +----- + +buffer.h, buffer.c, chunk.h + +are derived from code (C) 2012 Github, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +----- + +utf8.c and utf8.c + +are derived from utf8proc +(<http://www.public-software-group.org/utf8proc>), +(C) 2009 Public Software Group e. V., Berlin, Germany. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +----- + +normalize-reference.js is a slightly modified version of +https://github.com/dmoscrop/fold-case: + +Copyright Mathias Bynens <https://mathiasbynens.be/> + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +----- + +The polyfill for String.fromCodePoint included in commonmark.js is +Copyright Mathias Bynens <http://mathiasbynens.be/> + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +----- + +The normalization code in runtests.py was derived from the +markdowntest project, Copyright 2013 Karl Dubost: + +The MIT License (MIT) + +Copyright (c) 2013 Karl Dubost + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/LICENSE ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/LICENSE b/compiler/modules/CommonMark/LICENSE deleted file mode 100644 index c8377be..0000000 --- a/compiler/modules/CommonMark/LICENSE +++ /dev/null @@ -1,70 +0,0 @@ -Copyright (c) 2014, John MacFarlane - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of John MacFarlane nor the names of other - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ------ - -The polyfill for String.fromCodePoint included in commonmark.js is -Copyright Mathias Bynens <http://mathiasbynens.be/> - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ------ - -The normalization code in runtests.py was derived from the -markdowntest project, Copyright 2013 Karl Dubost: - -The MIT License (MIT) - -Copyright (c) 2013 Karl Dubost - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/README.md ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/README.md b/compiler/modules/CommonMark/README.md index 8db4c1f..4bbac88 100644 --- a/compiler/modules/CommonMark/README.md +++ b/compiler/modules/CommonMark/README.md @@ -16,22 +16,12 @@ written in standard C99 and has no library dependencies. The parser is very fast (see [benchmarks](benchmarks.md)). It is easy to use `libcmark` in python, lua, ruby, and other dynamic -languages: see `wrapper.py`, `wrapper.lua`, and `wrapper.rb` in the -repository for simple examples. +languages: see the `wrappers/` subdirectory for some simple examples. -The JavaScript implementation is a single JavaScript file, with -no dependencies, that can be linked to in an HTML page. Here -is a simple usage example: - -``` javascript -var reader = new commonmark.DocParser(); -var writer = new commonmark.HtmlRenderer(); -var parsed = reader.parse("Hello *world*"); -var result = writer.render(parsed); -``` - -A node package is also available; it includes a command-line tool called -`commonmark`. +The JavaScript implementation provides both an NPM package and a +single JavaScript file, with no dependencies, that can be linked into +an HTML page. For further information, see the +[README in the js directory](js/README.md). **A note on security:** Neither implementation attempts to sanitize link attributes or @@ -61,7 +51,7 @@ on FreeBSD: mkdir build cd build cmake .. # optionally: -DCMAKE_INSTALL_PREFIX=path - make # executable will be create as build/src/cmake + make # executable will be created as build/src/cmark make test make install @@ -75,21 +65,26 @@ Or, to create Xcode project files on OSX: make install The GNU Makefile also provides a few other targets for developers. +To run a benchmark: + + make bench + To run a "fuzz test" against ten long randomly generated inputs: make fuzztest -To run a test for memory leaks using valgrind: +To run a test for memory leaks using `valgrind`: make leakcheck +To reformat source code using `astyle`: + + make astyle + To make a release tarball and zip archive: make archive -To test the archives: - - make testarchive Compiling for Windows --------------------- @@ -112,20 +107,14 @@ The JavaScript library can be installed through `npm`: npm install commonmark -To build the JavaScript library as a single standalone file: - - browserify --standalone commonmark js/lib/index.js -o js/commonmark.js - -Or fetch a pre-built copy from -<http://spec.commonmark.org/js/commonmark.js>`. - -To run tests for the JavaScript library: - - make testjs +This includes a command-line converter called `commonmark`. -or +If you want to use it in a client application, you can fetch +a pre-built copy of `commonmark.js` from +<http://spec.commonmark.org/js/commonmark.js>. - node js/test.js +For further information, see the +[README in the js directory](js/README.md). The spec -------- @@ -133,16 +122,16 @@ The spec [The spec] contains over 500 embedded examples which serve as conformance tests. To run the tests using an executable `$PROG`: - python test/spec_tests.py --program $PROG + python3 test/spec_tests.py --program $PROG If you want to extract the raw test data from the spec without actually running the tests, you can do: - python test/spec_tests.py --dump-tests + python3 test/spec_tests.py --dump-tests and you'll get all the tests in JSON format. -[The spec]: http://jgm.github.io/CommonMark/spec.html +[The spec]: http://spec.commonmark.org/0.13/ The source of [the spec] is `spec.txt`. This is basically a Markdown file, with code examples written in a shorthand form: @@ -154,8 +143,9 @@ file, with code examples written in a shorthand form: . To build an HTML version of the spec, do `make spec.html`. To build a -PDF version, do `make spec.pdf`. Both these commands require that -[pandoc] is installed, and creating a PDF requires a latex installation. +PDF version, do `make spec.pdf`. (Creating a PDF requires [pandoc] +and a LaTeX installation. Creating the HTML version requires only +`libcmark` and `python3`.) The spec is written from the point of view of the human writer, not the computer reader. It is not an algorithm---an English translation of @@ -191,15 +181,13 @@ Differences from original Markdown There are only a few places where this spec says things that contradict the canonical syntax description: -- It [allows all punctuation symbols to be - backslash-escaped](http://jgm.github.io/CommonMark/spec.html#backslash-escapes), +- It allows all punctuation symbols to be backslash-escaped, not just the symbols with special meanings in Markdown. We found that it was just too hard to remember which symbols could be escaped. -- It introduces an [alternative syntax for hard line - breaks](http://jgm.github.io/CommonMark/spec.html#hard-line-breaks), a - backslash at the end of the line, supplementing the +- It introduces an alternative syntax for hard line + breaks, a backslash at the end of the line, supplementing the two-spaces-at-the-end-of-line rule. This is motivated by persistent complaints about the âinvisibleâ nature of the two-space rule. @@ -207,13 +195,11 @@ the canonical syntax description: backwards-compatible way). For example, `Markdown.pl` allows single quotes around a title in inline links, but not in reference links. This kind of difference is really hard for users to remember, so the - spec [allows single quotes in both - contexts](http://jgm.github.io/CommonMark/spec.html#links). + spec allows single quotes in both contexts. - The rule for HTML blocks differs, though in most real cases it - shouldn't make a difference. (See - [here](http://jgm.github.io/CommonMark/spec.html#html-blocks) for - details.) The spec's proposal makes it easy to include Markdown + shouldn't make a difference. (See the section on HTML Blocks + for details.) The spec's proposal makes it easy to include Markdown inside HTML block-level tags, if you want to, but also allows you to exclude this. It is also makes parsing much easier, avoiding expensive backtracking. @@ -232,7 +218,7 @@ the canonical syntax description: - Rules for content in lists differ in a few respects, though (as with HTML blocks), most lists in existing documents should render as intended. There is some discussion of the choice points and - differences [here](http://jgm.github.io/CommonMark/spec.html#motivation). + differences in the subsection of List Items entitled Motivation. We think that the spec's proposal does better than any existing implementation in rendering lists the way a human writer or reader would intuitively understand them. (We could give numerous examples @@ -254,7 +240,7 @@ the canonical syntax description: - The start number of an ordered list is significant. -- [Fenced code blocks](http://jgm.github.io/CommonMark/spec.html#fenced-code-blocks) are supported, delimited by either +- Fenced code blocks are supported, delimited by either backticks (```` ``` ```` or tildes (` ~~~ `). Contributing @@ -290,7 +276,8 @@ optimized the C implementation for performance, increasing its speed tenfold. KÄrlis GaÅÄ£is helped work out a better parsing algorithm for links and emphasis, eliminating several worst-case performance issues. Nick Wellnhofer contributed many improvements, including -most of the C library's API and its test harness. +most of the C library's API and its test harness. Vitaly Puzrin +has offered much good advice about the JavaScript implementation. [cmake]: http://www.cmake.org/download/ [pandoc]: http://johnmacfarlane.net/pandoc/ http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/blocks.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/blocks.c b/compiler/modules/CommonMark/src/blocks.c index ebef88b..dafbb9b 100644 --- a/compiler/modules/CommonMark/src/blocks.c +++ b/compiler/modules/CommonMark/src/blocks.c @@ -1,8 +1,8 @@ #include <stdlib.h> #include <assert.h> #include <stdio.h> -#include <ctype.h> +#include "cmark_ctype.h" #include "config.h" #include "parser.h" #include "cmark.h" @@ -20,11 +20,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, - bool eof); + bool eof); static void S_process_line(cmark_parser *parser, const unsigned char *buffer, - size_t bytes); + size_t bytes); static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column) { @@ -37,7 +37,7 @@ static cmark_node* make_block(cmark_node_type tag, int start_line, int start_col e->start_line = start_line; e->start_column = start_column; e->end_line = start_line; - strbuf_init(&e->string_content, 32); + cmark_strbuf_init(&e->string_content, 32); } return e; @@ -54,8 +54,8 @@ cmark_parser *cmark_parser_new() { cmark_parser *parser = (cmark_parser*)malloc(sizeof(cmark_parser)); cmark_node *document = make_document(); - strbuf *line = (strbuf*)malloc(sizeof(strbuf)); - strbuf *buf = (strbuf*)malloc(sizeof(strbuf)); + cmark_strbuf *line = (cmark_strbuf*)malloc(sizeof(cmark_strbuf)); + cmark_strbuf *buf = (cmark_strbuf*)malloc(sizeof(cmark_strbuf)); cmark_strbuf_init(line, 256); cmark_strbuf_init(buf, 0); @@ -64,6 +64,7 @@ cmark_parser *cmark_parser_new() parser->current = document; parser->line_number = 0; parser->curline = line; + parser->last_line_length = 0; parser->linebuf = buf; return parser; @@ -79,20 +80,21 @@ void cmark_parser_free(cmark_parser *parser) free(parser); } -static void finalize(cmark_parser *parser, cmark_node* b, int line_number); +static cmark_node* +finalize(cmark_parser *parser, cmark_node* b); // Returns true if line has only space characters, else false. -static bool is_blank(strbuf *s, int offset) +static bool is_blank(cmark_strbuf *s, int offset) { while (offset < s->size) { switch (s->ptr[offset]) { - case '\n': - return true; - case ' ': - offset++; - break; - default: - return false; + case '\n': + return true; + case ' ': + offset++; + break; + default: + return false; } } @@ -102,25 +104,25 @@ static bool is_blank(strbuf *s, int offset) static inline bool can_contain(cmark_node_type parent_type, cmark_node_type child_type) { return ( parent_type == NODE_DOCUMENT || - parent_type == NODE_BLOCK_QUOTE || - parent_type == NODE_LIST_ITEM || - (parent_type == NODE_LIST && child_type == NODE_LIST_ITEM) ); + parent_type == NODE_BLOCK_QUOTE || + parent_type == NODE_ITEM || + (parent_type == NODE_LIST && child_type == NODE_ITEM) ); } static inline bool accepts_lines(cmark_node_type block_type) { return (block_type == NODE_PARAGRAPH || - block_type == NODE_HEADER || - block_type == NODE_CODE_BLOCK); + block_type == NODE_HEADER || + block_type == NODE_CODE_BLOCK); } -static void add_line(cmark_node* cmark_node, chunk *ch, int offset) +static void add_line(cmark_node* node, cmark_chunk *ch, int offset) { - assert(cmark_node->open); - strbuf_put(&cmark_node->string_content, ch->data + offset, ch->len - offset); + assert(node->open); + cmark_strbuf_put(&node->string_content, ch->data + offset, ch->len - offset); } -static void remove_trailing_blank_lines(strbuf *ln) +static void remove_trailing_blank_lines(cmark_strbuf *ln) { int i; @@ -132,31 +134,35 @@ static void remove_trailing_blank_lines(strbuf *ln) } if (i < 0) { - strbuf_clear(ln); + cmark_strbuf_clear(ln); return; } - i = strbuf_strchr(ln, '\n', i); + i = cmark_strbuf_strchr(ln, '\n', i); if (i >= 0) - strbuf_truncate(ln, i); + cmark_strbuf_truncate(ln, i); } // Check to see if a cmark_node ends with a blank line, descending // if needed into lists and sublists. -static bool ends_with_blank_line(cmark_node* cmark_node) +static bool ends_with_blank_line(cmark_node* node) { - if (cmark_node->last_line_blank) { - return true; - } - if ((cmark_node->type == NODE_LIST || cmark_node->type == NODE_LIST_ITEM) && cmark_node->last_child) { - return ends_with_blank_line(cmark_node->last_child); - } else { - return false; + cmark_node *cur = node; + while (cur != NULL) { + if (cur->last_line_blank) { + return true; + } + if (cur->type == NODE_LIST || cur->type == NODE_ITEM) { + cur = cur->last_child; + } else { + cur = NULL; + } } + return false; } // Break out of all containing lists -static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr, int line_number) +static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) { cmark_node *container = *bptr; cmark_node *b = parser->root; @@ -166,116 +172,137 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr, int line } if (b) { while (container && container != b) { - finalize(parser, container, line_number); - container = container->parent; + container = finalize(parser, container); } - finalize(parser, b, line_number); + finalize(parser, b); *bptr = b->parent; } return 0; } -static void finalize(cmark_parser *parser, cmark_node* b, int line_number) +static cmark_node* +finalize(cmark_parser *parser, cmark_node* b) { int firstlinelen; int pos; cmark_node* item; cmark_node* subitem; + cmark_node* parent; + + parent = b->parent; + // don't do anything if the cmark_node is already closed if (!b->open) - return; // don't do anything if the cmark_node is already closed + return parent; b->open = false; - if (line_number > b->start_line) { - b->end_line = line_number - 1; + + if (parser->curline->size == 0) { + // end of input - line number has not been incremented + b->end_line = parser->line_number; + b->end_column = parser->last_line_length; + } else if (b->type == NODE_DOCUMENT || + (b->type == NODE_CODE_BLOCK && b->as.code.fenced) || + (b->type == NODE_HEADER && b->as.header.setext)) { + b->end_line = parser->line_number; + b->end_column = parser->curline->size - + (parser->curline->ptr[parser->curline->size - 1] == '\n' ? + 1 : 0); } else { - b->end_line = line_number; + b->end_line = parser->line_number - 1; + b->end_column = parser->last_line_length; } switch (b->type) { - case NODE_PARAGRAPH: - while (strbuf_at(&b->string_content, 0) == '[' && - (pos = cmark_parse_reference_inline(&b->string_content, parser->refmap))) { + case NODE_PARAGRAPH: + while (cmark_strbuf_at(&b->string_content, 0) == '[' && + (pos = cmark_parse_reference_inline(&b->string_content, parser->refmap))) { - strbuf_drop(&b->string_content, pos); - } - if (is_blank(&b->string_content, 0)) { - b->type = NODE_REFERENCE_DEF; - } - break; + cmark_strbuf_drop(&b->string_content, pos); + } + if (is_blank(&b->string_content, 0)) { + // remove blank node (former reference def) + cmark_node_free(b); + } + break; - case NODE_CODE_BLOCK: - if (!b->as.code.fenced) { // indented code - remove_trailing_blank_lines(&b->string_content); - strbuf_putc(&b->string_content, '\n'); - break; - } else { + case NODE_CODE_BLOCK: + if (!b->as.code.fenced) { // indented code + remove_trailing_blank_lines(&b->string_content); + cmark_strbuf_putc(&b->string_content, '\n'); + } else { - // first line of contents becomes info - firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); + // first line of contents becomes info + firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0); - houdini_unescape_html_f( - &b->as.code.info, - b->string_content.ptr, - firstlinelen - ); + cmark_strbuf tmp = GH_BUF_INIT; + houdini_unescape_html_f( + &tmp, + b->string_content.ptr, + firstlinelen + ); + cmark_strbuf_trim(&tmp); + cmark_strbuf_unescape(&tmp); + b->as.code.info = cmark_chunk_buf_detach(&tmp); - strbuf_drop(&b->string_content, firstlinelen + 1); + cmark_strbuf_drop(&b->string_content, firstlinelen + 1); + } + b->as.code.literal = cmark_chunk_buf_detach(&b->string_content); + break; - strbuf_trim(&b->as.code.info); - strbuf_unescape(&b->as.code.info); - break; - } + case NODE_HTML: + b->as.literal = cmark_chunk_buf_detach(&b->string_content); + break; - case NODE_LIST: // determine tight/loose status - b->as.list.tight = true; // tight by default - item = b->first_child; + case NODE_LIST: // determine tight/loose status + b->as.list.tight = true; // tight by default + item = b->first_child; - while (item) { - // check for non-final non-empty list item ending with blank line: - if (item->last_line_blank && item->next) { + while (item) { + // check for non-final non-empty list item ending with blank line: + if (item->last_line_blank && item->next) { + b->as.list.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between them: + subitem = item->first_child; + while (subitem) { + if (ends_with_blank_line(subitem) && + (item->next || subitem->next)) { b->as.list.tight = false; break; } - // recurse into children of list item, to see if there are - // spaces between them: - subitem = item->first_child; - while (subitem) { - if (ends_with_blank_line(subitem) && - (item->next || subitem->next)) { - b->as.list.tight = false; - break; - } - subitem = subitem->next; - } - if (!(b->as.list.tight)) { - break; - } - item = item->next; + subitem = subitem->next; + } + if (!(b->as.list.tight)) { + break; } + item = item->next; + } - break; + break; - default: - break; + default: + break; } + return parent; } // Add a cmark_node as child of another. Return pointer to child. static cmark_node* add_child(cmark_parser *parser, cmark_node* parent, - cmark_node_type block_type, int start_line, int start_column) + cmark_node_type block_type, int start_column) { assert(parent); // if 'parent' isn't the kind of cmark_node that can accept this child, // then back up til we hit a cmark_node that can. while (!can_contain(parent->type, block_type)) { - finalize(parser, parent, start_line); - parent = parent->parent; + parent = finalize(parser, parent); } - cmark_node* child = make_block(block_type, start_line, start_column); + cmark_node* child = make_block(block_type, parser->line_number, start_column); child->parent = parent; if (parent->last_child) { @@ -290,58 +317,31 @@ static cmark_node* add_child(cmark_parser *parser, cmark_node* parent, } -typedef struct BlockStack { - struct BlockStack *previous; - cmark_node *next_sibling; -} block_stack; - // Walk through cmark_node and all children, recursively, parsing // string content into inline content where appropriate. -static void process_inlines(cmark_node* cur, cmark_reference_map *refmap) +static void process_inlines(cmark_node* root, cmark_reference_map *refmap) { - block_stack* stack = NULL; - block_stack* newstack = NULL; - - while (cur != NULL) { - switch (cur->type) { - case NODE_PARAGRAPH: - case NODE_HEADER: + cmark_iter *iter = cmark_iter_new(root); + cmark_node *cur; + cmark_event_type ev_type; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_ENTER) { + if (cur->type == NODE_PARAGRAPH || + cur->type == NODE_HEADER) { cmark_parse_inlines(cur, refmap); - break; - - default: - break; - } - - if (cur->first_child) { - newstack = (block_stack*)malloc(sizeof(block_stack)); - if (newstack == NULL) break; - newstack->previous = stack; - stack = newstack; - stack->next_sibling = cur->next; - cur = cur->first_child; - } else { - cur = cur->next; - } - - while (cur == NULL && stack != NULL) { - cur = stack->next_sibling; - newstack = stack->previous; - free(stack); - stack = newstack; + } } } - while (stack != NULL) { - newstack = stack->previous; - free(stack); - stack = newstack; - } + + cmark_iter_free(iter); } // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr) +static int parse_list_marker(cmark_chunk *input, int pos, cmark_list **dataptr) { unsigned char c; int startpos; @@ -352,7 +352,7 @@ static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr) if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) { pos++; - if (!isspace(peek_at(input, pos))) { + if (!cmark_isspace(peek_at(input, pos))) { return 0; } data = (cmark_list *)calloc(1, sizeof(*data)); @@ -366,18 +366,18 @@ static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr) data->delimiter = CMARK_PERIOD_DELIM; data->tight = false; } - } else if (isdigit(c)) { + } else if (cmark_isdigit(c)) { int start = 0; do { start = (10 * start) + (peek_at(input, pos) - '0'); pos++; - } while (isdigit(peek_at(input, pos))); + } while (cmark_isdigit(peek_at(input, pos))); c = peek_at(input, pos); if (c == '.' || c == ')') { pos++; - if (!isspace(peek_at(input, pos))) { + if (!cmark_isspace(peek_at(input, pos))) { return 0; } data = (cmark_list *)calloc(1, sizeof(*data)); @@ -407,19 +407,18 @@ static int parse_list_marker(chunk *input, int pos, cmark_list **dataptr) static int lists_match(cmark_list *list_data, cmark_list *item_data) { return (list_data->list_type == item_data->list_type && - list_data->delimiter == item_data->delimiter && - // list_data->marker_offset == item_data.marker_offset && - list_data->bullet_char == item_data->bullet_char); + list_data->delimiter == item_data->delimiter && + // list_data->marker_offset == item_data.marker_offset && + list_data->bullet_char == item_data->bullet_char); } static cmark_node *finalize_document(cmark_parser *parser) { while (parser->current != parser->root) { - finalize(parser, parser->current, parser->line_number); - parser->current = parser->current->parent; + parser->current = finalize(parser, parser->current); } - finalize(parser, parser->root, parser->line_number); + finalize(parser, parser->root); process_inlines(parser->root, parser->refmap); return parser->root; @@ -465,34 +464,31 @@ cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, - bool eof) + bool eof) { const unsigned char *end = buffer + len; while (buffer < end) { const unsigned char *eol - = (const unsigned char *)memchr(buffer, '\n', - end - buffer); + = (const unsigned char *)memchr(buffer, '\n', + end - buffer); size_t line_len; if (eol) { line_len = eol + 1 - buffer; - } - else if (eof) { + } else if (eof) { line_len = end - buffer; - } - else { - strbuf_put(parser->linebuf, buffer, end - buffer); + } else { + cmark_strbuf_put(parser->linebuf, buffer, end - buffer); break; } if (parser->linebuf->size > 0) { - strbuf_put(parser->linebuf, buffer, line_len); + cmark_strbuf_put(parser->linebuf, buffer, line_len); S_process_line(parser, parser->linebuf->ptr, - parser->linebuf->size); - strbuf_clear(parser->linebuf); - } - else { + parser->linebuf->size); + cmark_strbuf_clear(parser->linebuf); + } else { S_process_line(parser, buffer, line_len); } @@ -500,11 +496,11 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, } } -static void chop_trailing_hashtags(chunk *ch) +static void chop_trailing_hashtags(cmark_chunk *ch) { int n, orig_n; - chunk_rtrim(ch); + cmark_chunk_rtrim(ch); orig_n = n = ch->len - 1; // if string ends in space followed by #s, remove these: @@ -514,7 +510,7 @@ static void chop_trailing_hashtags(chunk *ch) // Check for a be a space before the final #s: if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') { ch->len = n; - chunk_rtrim(ch); + cmark_chunk_rtrim(ch); } } @@ -533,14 +529,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) bool blank = false; int first_nonspace; int indent; - chunk input; + cmark_chunk input; utf8proc_detab(parser->curline, buffer, bytes); // Add a newline to the end if not present: // TODO this breaks abstraction: if (parser->curline->ptr[parser->curline->size - 1] != '\n') { - strbuf_putc(parser->curline, '\n'); + cmark_strbuf_putc(parser->curline, '\n'); } input.data = parser->curline->ptr; input.len = parser->curline->size; @@ -574,12 +570,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) all_matched = false; } - } else if (container->type == NODE_LIST_ITEM) { + } else if (container->type == NODE_ITEM) { if (indent >= container->as.list.marker_offset + - container->as.list.padding) { + container->as.list.padding) { offset += container->as.list.marker_offset + - container->as.list.padding; + container->as.list.padding; } else if (blank) { offset = first_nonspace; } else { @@ -596,34 +592,45 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } else { all_matched = false; } - } else { - // skip optional spaces of fence offset - i = container->as.code.fence_offset; - while (i > 0 && peek_at(&input, offset) == ' ') { - offset++; - i--; + } else { // fenced + matched = 0; + if (indent <= 3 && + (peek_at(&input, first_nonspace) == + container->as.code.fence_char)) { + matched = scan_close_code_fence(&input, + first_nonspace); + } + if (matched >= container->as.code.fence_length) { + // closing fence - and since we're at + // the end of a line, we can return: + all_matched = false; + offset += matched; + finalize(parser, container); + goto finished; + } else { + // skip opt. spaces of fence offset + i = container->as.code.fence_offset; + while (i > 0 && + peek_at(&input, offset) == ' ') { + offset++; + i--; + } } } - } else if (container->type == NODE_HEADER) { // a header can never contain more than one line all_matched = false; - if (blank) { - container->last_line_blank = true; - } } else if (container->type == NODE_HTML) { if (blank) { - container->last_line_blank = true; all_matched = false; } } else if (container->type == NODE_PARAGRAPH) { if (blank) { - container->last_line_blank = true; all_matched = false; } @@ -639,12 +646,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // check to see if we've hit 2nd blank line, break out of list: if (blank && container->last_line_blank) { - break_out_of_lists(parser, &container, parser->line_number); + break_out_of_lists(parser, &container); } // unless last matched container is code cmark_node, try new container starts: while (container->type != NODE_CODE_BLOCK && - container->type != NODE_HTML) { + container->type != NODE_HTML) { first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') @@ -656,12 +663,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) if (indent >= CODE_INDENT) { if (cur->type != NODE_PARAGRAPH && !blank) { offset += CODE_INDENT; - container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, offset + 1); + container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1); container->as.code.fenced = false; container->as.code.fence_char = 0; container->as.code.fence_length = 0; container->as.code.fence_offset = 0; - strbuf_init(&container->as.code.info, 0); + container->as.code.info = cmark_chunk_literal(""); } else { // indent > 4 in lazy line break; } @@ -672,14 +679,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // optional following character if (peek_at(&input, offset) == ' ') offset++; - container = add_child(parser, container, NODE_BLOCK_QUOTE, parser->line_number, offset + 1); + container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1); } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; - container = add_child(parser, container, NODE_HEADER, parser->line_number, offset + 1); + container = add_child(parser, container, NODE_HEADER, offset + 1); - int hashpos = chunk_strchr(&input, '#', first_nonspace); + int hashpos = cmark_chunk_strchr(&input, '#', first_nonspace); int level = 0; while (peek_at(&input, hashpos) == '#') { @@ -691,24 +698,24 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { - container = add_child(parser, container, NODE_CODE_BLOCK, parser->line_number, first_nonspace + 1); + container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1); container->as.code.fenced = true; container->as.code.fence_char = peek_at(&input, first_nonspace); container->as.code.fence_length = matched; container->as.code.fence_offset = first_nonspace - offset; - strbuf_init(&container->as.code.info, 0); + container->as.code.info = cmark_chunk_literal(""); offset = first_nonspace + matched; } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { - container = add_child(parser, container, NODE_HTML, parser->line_number, first_nonspace + 1); + container = add_child(parser, container, NODE_HTML, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text } else if (container->type == NODE_PARAGRAPH && - (lev = scan_setext_header_line(&input, first_nonspace)) && - // check that there is only one line in the paragraph: - strbuf_strrchr(&container->string_content, '\n', - strbuf_len(&container->string_content) - 2) < 0) { + (lev = scan_setext_header_line(&input, first_nonspace)) && + // check that there is only one line in the paragraph: + cmark_strbuf_strrchr(&container->string_content, '\n', + cmark_strbuf_len(&container->string_content) - 2) < 0) { container->type = NODE_HEADER; container->as.header.level = lev; @@ -716,12 +723,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) offset = input.len - 1; } else if (!(container->type == NODE_PARAGRAPH && !all_matched) && - (matched = scan_hrule(&input, first_nonspace))) { + (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: - container = add_child(parser, container, NODE_HRULE, parser->line_number, first_nonspace + 1); - finalize(parser, container, parser->line_number); - container = container->parent; + container = add_child(parser, container, NODE_HRULE, first_nonspace + 1); + container = finalize(parser, container); offset = input.len - 1; } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) { @@ -749,16 +755,16 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) data->marker_offset = indent; if (container->type != NODE_LIST || - !lists_match(&container->as.list, data)) { - container = add_child(parser, container, NODE_LIST, parser->line_number, - first_nonspace + 1); + !lists_match(&container->as.list, data)) { + container = add_child(parser, container, NODE_LIST, + first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item - container = add_child(parser, container, NODE_LIST_ITEM, parser->line_number, - first_nonspace + 1); + container = add_child(parser, container, NODE_ITEM, + first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); @@ -782,18 +788,22 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n'; - // cmark_node quote lines are never blank as they start with > + if (blank && container->last_child) { + container->last_child->last_line_blank = true; + } + + // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (blank && - container->type != NODE_BLOCK_QUOTE && - container->type != NODE_HEADER && - !(container->type == NODE_CODE_BLOCK && - container->as.code.fenced) && - !(container->type == NODE_LIST_ITEM && - container->first_child == NULL && - container->start_line == parser->line_number)); + container->type != NODE_BLOCK_QUOTE && + container->type != NODE_HEADER && + !(container->type == NODE_CODE_BLOCK && + container->as.code.fenced) && + !(container->type == NODE_ITEM && + container->first_child == NULL && + container->start_line == parser->line_number)); cmark_node *cont = container; while (cont->parent) { @@ -802,10 +812,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } if (cur != last_matched_container && - container == last_matched_container && - !blank && - cur->type == NODE_PARAGRAPH && - strbuf_len(&cur->string_content) > 0) { + container == last_matched_container && + !blank && + cur->type == NODE_PARAGRAPH && + cmark_strbuf_len(&cur->string_content) > 0) { add_line(cur, &input, offset); @@ -813,36 +823,12 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // finalize any blocks that were not matched and set cur to container: while (cur != last_matched_container) { - finalize(parser, cur, parser->line_number); - cur = cur->parent; + cur = finalize(parser, cur); assert(cur != NULL); } - if (container->type == NODE_CODE_BLOCK && - !container->as.code.fenced) { - - add_line(container, &input, offset); - - } else if (container->type == NODE_CODE_BLOCK && - container->as.code.fenced) { - matched = 0; - - if (indent <= 3 && - peek_at(&input, first_nonspace) == container->as.code.fence_char) { - int fence_len = scan_close_code_fence(&input, first_nonspace); - if (fence_len > container->as.code.fence_length) - matched = 1; - } - - if (matched) { - // if closing fence, don't add line to container; instead, close it: - finalize(parser, container, parser->line_number); - container = container->parent; // back up to parent - } else { - add_line(container, &input, offset); - } - - } else if (container->type == NODE_HTML) { + if (container->type == NODE_CODE_BLOCK || + container->type == NODE_HTML) { add_line(container, &input, offset); @@ -850,31 +836,29 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // ??? do nothing - } else if (container->type == NODE_HEADER) { - - chop_trailing_hashtags(&input); - add_line(container, &input, first_nonspace); - finalize(parser, container, parser->line_number); - container = container->parent; - } else if (accepts_lines(container->type)) { + if (container->type == NODE_HEADER && + container->as.header.setext == false) { + chop_trailing_hashtags(&input); + } add_line(container, &input, first_nonspace); - } else if (container->type != NODE_HRULE && - container->type != NODE_HEADER) { - + } else { // create paragraph container for line - container = add_child(parser, container, NODE_PARAGRAPH, parser->line_number, first_nonspace + 1); + container = add_child(parser, container, NODE_PARAGRAPH, first_nonspace + 1); add_line(container, &input, first_nonspace); - } else { - assert(false); } parser->current = container; } - strbuf_clear(parser->curline); +finished: + parser->last_line_length = parser->curline->size - + (parser->curline->ptr[parser->curline->size - 1] == '\n' ? + 1 : 0); + ; + cmark_strbuf_clear(parser->curline); } @@ -882,12 +866,12 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { if (parser->linebuf->size) { S_process_line(parser, parser->linebuf->ptr, - parser->linebuf->size); - strbuf_clear(parser->linebuf); + parser->linebuf->size); + cmark_strbuf_clear(parser->linebuf); } finalize_document(parser); - strbuf_free(parser->curline); + cmark_strbuf_free(parser->curline); #if CMARK_DEBUG_NODES if (cmark_node_check(parser->root, stderr)) { abort(); http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/buffer.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/buffer.c b/compiler/modules/CommonMark/src/buffer.c index 45b6984..0df6561 100644 --- a/compiler/modules/CommonMark/src/buffer.c +++ b/compiler/modules/CommonMark/src/buffer.c @@ -1,28 +1,29 @@ #include <stdarg.h> -#include <ctype.h> #include <string.h> #include <assert.h> #include <string.h> #include <stdio.h> #include <stdlib.h> +#include "config.h" +#include "cmark_ctype.h" #include "buffer.h" -/* Used as default value for strbuf->ptr so that people can always - * assume ptr is non-NULL and zero terminated even for new strbufs. +/* Used as default value for cmark_strbuf->ptr so that people can always + * assume ptr is non-NULL and zero terminated even for new cmark_strbufs. */ unsigned char cmark_strbuf__initbuf[1]; unsigned char cmark_strbuf__oom[1]; #define ENSURE_SIZE(b, d) \ - if ((d) > buf->asize && strbuf_grow(b, (d)) < 0) \ + if ((d) > buf->asize && cmark_strbuf_grow(b, (d)) < 0) \ return -1; #ifndef MIN #define MIN(x,y) ((x<y) ? x : y) #endif -void cmark_strbuf_init(strbuf *buf, int initial_size) +void cmark_strbuf_init(cmark_strbuf *buf, int initial_size) { buf->asize = 0; buf->size = 0; @@ -32,7 +33,7 @@ void cmark_strbuf_init(strbuf *buf, int initial_size) cmark_strbuf_grow(buf, initial_size); } -int cmark_strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom) +int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom) { unsigned char *new_ptr; int new_size; @@ -93,7 +94,7 @@ size_t cmark_strbuf_len(const cmark_strbuf *buf) return buf->size; } -void cmark_strbuf_free(strbuf *buf) +void cmark_strbuf_free(cmark_strbuf *buf) { if (!buf) return; @@ -103,7 +104,7 @@ void cmark_strbuf_free(strbuf *buf) cmark_strbuf_init(buf, 0); } -void cmark_strbuf_clear(strbuf *buf) +void cmark_strbuf_clear(cmark_strbuf *buf) { buf->size = 0; @@ -111,7 +112,7 @@ void cmark_strbuf_clear(strbuf *buf) buf->ptr[0] = '\0'; } -int cmark_strbuf_set(strbuf *buf, const unsigned char *data, int len) +int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len) { if (len <= 0 || data == NULL) { cmark_strbuf_clear(buf); @@ -126,14 +127,14 @@ int cmark_strbuf_set(strbuf *buf, const unsigned char *data, int len) return 0; } -int cmark_strbuf_sets(strbuf *buf, const char *string) +int cmark_strbuf_sets(cmark_strbuf *buf, const char *string) { return cmark_strbuf_set(buf, - (const unsigned char *)string, - string ? strlen(string) : 0); + (const unsigned char *)string, + string ? strlen(string) : 0); } -int cmark_strbuf_putc(strbuf *buf, int c) +int cmark_strbuf_putc(cmark_strbuf *buf, int c) { ENSURE_SIZE(buf, buf->size + 2); buf->ptr[buf->size++] = c; @@ -141,7 +142,7 @@ int cmark_strbuf_putc(strbuf *buf, int c) return 0; } -int cmark_strbuf_put(strbuf *buf, const unsigned char *data, int len) +int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len) { if (len <= 0) return 0; @@ -153,12 +154,12 @@ int cmark_strbuf_put(strbuf *buf, const unsigned char *data, int len) return 0; } -int cmark_strbuf_puts(strbuf *buf, const char *string) +int cmark_strbuf_puts(cmark_strbuf *buf, const char *string) { return cmark_strbuf_put(buf, (const unsigned char *)string, strlen(string)); } -int cmark_strbuf_vprintf(strbuf *buf, const char *format, va_list ap) +int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap) { const int expected_size = buf->size + (strlen(format) * 2); int len; @@ -166,11 +167,16 @@ int cmark_strbuf_vprintf(strbuf *buf, const char *format, va_list ap) ENSURE_SIZE(buf, expected_size); while (1) { + va_list args; + va_copy(args, ap); + len = vsnprintf( - (char *)buf->ptr + buf->size, - buf->asize - buf->size, - format, ap - ); + (char *)buf->ptr + buf->size, + buf->asize - buf->size, + format, args + ); + + va_end(args); if (len < 0) { free(buf->ptr); @@ -189,7 +195,7 @@ int cmark_strbuf_vprintf(strbuf *buf, const char *format, va_list ap) return 0; } -int cmark_strbuf_printf(strbuf *buf, const char *format, ...) +int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) { int r; va_list ap; @@ -201,7 +207,7 @@ int cmark_strbuf_printf(strbuf *buf, const char *format, ...) return r; } -void cmark_strbuf_copy_cstr(char *data, int datasize, const strbuf *buf) +void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf) { int copylen; @@ -219,14 +225,14 @@ void cmark_strbuf_copy_cstr(char *data, int datasize, const strbuf *buf) data[copylen] = '\0'; } -void cmark_strbuf_swap(strbuf *buf_a, strbuf *buf_b) +void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b) { - strbuf t = *buf_a; + cmark_strbuf t = *buf_a; *buf_a = *buf_b; *buf_b = t; } -unsigned char *cmark_strbuf_detach(strbuf *buf) +unsigned char *cmark_strbuf_detach(cmark_strbuf *buf) { unsigned char *data = buf->ptr; @@ -239,7 +245,7 @@ unsigned char *cmark_strbuf_detach(strbuf *buf) return data; } -void cmark_strbuf_attach(strbuf *buf, unsigned char *ptr, int asize) +void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize) { cmark_strbuf_free(buf); @@ -255,14 +261,14 @@ void cmark_strbuf_attach(strbuf *buf, unsigned char *ptr, int asize) } } -int cmark_strbuf_cmp(const strbuf *a, const strbuf *b) +int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b) { int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size)); return (result != 0) ? result : - (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; + (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0; } -int cmark_strbuf_strchr(const strbuf *buf, int c, int pos) +int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos) { const unsigned char *p = (unsigned char *)memchr(buf->ptr + pos, c, buf->size - pos); if (!p) @@ -271,7 +277,7 @@ int cmark_strbuf_strchr(const strbuf *buf, int c, int pos) return (int)(p - (const unsigned char *)buf->ptr); } -int cmark_strbuf_strrchr(const strbuf *buf, int c, int pos) +int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos) { int i; @@ -283,7 +289,7 @@ int cmark_strbuf_strrchr(const strbuf *buf, int c, int pos) return -1; } -void cmark_strbuf_truncate(strbuf *buf, int len) +void cmark_strbuf_truncate(cmark_strbuf *buf, int len) { if (len < buf->size) { buf->size = len; @@ -291,7 +297,7 @@ void cmark_strbuf_truncate(strbuf *buf, int len) } } -void cmark_strbuf_drop(strbuf *buf, int n) +void cmark_strbuf_drop(cmark_strbuf *buf, int n) { if (n > 0) { buf->size = buf->size - n; @@ -302,13 +308,13 @@ void cmark_strbuf_drop(strbuf *buf, int n) } } -void cmark_strbuf_rtrim(strbuf *buf) +void cmark_strbuf_rtrim(cmark_strbuf *buf) { if (!buf->size) return; while (buf->size > 0) { - if (!isspace(buf->ptr[buf->size - 1])) + if (!cmark_isspace(buf->ptr[buf->size - 1])) break; buf->size--; @@ -317,14 +323,14 @@ void cmark_strbuf_rtrim(strbuf *buf) buf->ptr[buf->size] = '\0'; } -void cmark_strbuf_trim(strbuf *buf) +void cmark_strbuf_trim(cmark_strbuf *buf) { int i = 0; if (!buf->size) return; - while (i < buf->size && isspace(buf->ptr[i])) + while (i < buf->size && cmark_isspace(buf->ptr[i])) i++; cmark_strbuf_drop(buf, i); @@ -334,7 +340,7 @@ void cmark_strbuf_trim(strbuf *buf) // Destructively modify string, collapsing consecutive // space and newline characters into a single space. -void cmark_strbuf_normalize_whitespace(strbuf *s) +void cmark_strbuf_normalize_whitespace(cmark_strbuf *s) { bool last_char_was_space = false; int r, w; @@ -360,12 +366,12 @@ void cmark_strbuf_normalize_whitespace(strbuf *s) } // Destructively unescape a string: remove backslashes before punctuation chars. -extern void cmark_strbuf_unescape(strbuf *buf) +extern void cmark_strbuf_unescape(cmark_strbuf *buf) { int r, w; for (r = 0, w = 0; r < buf->size; ++r) { - if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) + if (buf->ptr[r] == '\\' && cmark_ispunct(buf->ptr[r + 1])) continue; buf->ptr[w++] = buf->ptr[r]; http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/buffer.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/buffer.h b/compiler/modules/CommonMark/src/buffer.h index 7401b22..fb9f910 100644 --- a/compiler/modules/CommonMark/src/buffer.h +++ b/compiler/modules/CommonMark/src/buffer.h @@ -4,7 +4,6 @@ #include <stddef.h> #include <stdarg.h> #include "config.h" -#include "cmark_export.h" #ifdef __cplusplus extern "C" { @@ -15,21 +14,18 @@ typedef struct { int asize, size; } cmark_strbuf; -CMARK_EXPORT extern unsigned char cmark_strbuf__initbuf[]; -CMARK_EXPORT extern unsigned char cmark_strbuf__oom[]; -#define CMARK_GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 } +#define GH_BUF_INIT { cmark_strbuf__initbuf, 0, 0 } /** - * Initialize a strbuf structure. + * Initialize a cmark_strbuf structure. * * For the cases where GH_BUF_INIT cannot be used to do static * initialization. */ -CMARK_EXPORT void cmark_strbuf_init(cmark_strbuf *buf, int initial_size); /** @@ -40,7 +36,6 @@ void cmark_strbuf_init(cmark_strbuf *buf, int initial_size); * existing buffer content will be preserved, but calling code must handle * that buffer was not expanded. */ -CMARK_EXPORT int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom); /** @@ -51,39 +46,30 @@ int cmark_strbuf_try_grow(cmark_strbuf *buf, int target_size, bool mark_oom); * * @return 0 on success or -1 on failure */ -CMARK_EXPORT int cmark_strbuf_grow(cmark_strbuf *buf, int target_size); -CMARK_EXPORT void cmark_strbuf_free(cmark_strbuf *buf); -CMARK_EXPORT void cmark_strbuf_swap(cmark_strbuf *buf_a, cmark_strbuf *buf_b); /** - * Test if there have been any reallocation failures with this strbuf. + * Test if there have been any reallocation failures with this cmark_strbuf. * - * Any function that writes to a strbuf can fail due to memory allocation - * issues. If one fails, the strbuf will be marked with an OOM error and - * further calls to modify the buffer will fail. Check strbuf_oom() at the + * Any function that writes to a cmark_strbuf can fail due to memory allocation + * issues. If one fails, the cmark_strbuf will be marked with an OOM error and + * further calls to modify the buffer will fail. Check cmark_strbuf_oom() at the * end of your sequence and it will be true if you ran out of memory at any * point with that buffer. * * @return false if no error, true if allocation error */ -CMARK_EXPORT bool cmark_strbuf_oom(const cmark_strbuf *buf); -CMARK_EXPORT size_t cmark_strbuf_len(const cmark_strbuf *buf); -CMARK_EXPORT int cmark_strbuf_cmp(const cmark_strbuf *a, const cmark_strbuf *b); -CMARK_EXPORT void cmark_strbuf_attach(cmark_strbuf *buf, unsigned char *ptr, int asize); -CMARK_EXPORT unsigned char *cmark_strbuf_detach(cmark_strbuf *buf); -CMARK_EXPORT void cmark_strbuf_copy_cstr(char *data, int datasize, const cmark_strbuf *buf); static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) @@ -96,80 +82,30 @@ static inline const char *cmark_strbuf_cstr(const cmark_strbuf *buf) /* * Functions below that return int value error codes will return 0 on * success or -1 on failure (which generally means an allocation failed). - * Using a strbuf where the allocation has failed with result in -1 from + * Using a cmark_strbuf where the allocation has failed with result in -1 from * all further calls using that buffer. As a result, you can ignore the * return code of these functions and call them in a series then just call - * strbuf_oom at the end. + * cmark_strbuf_oom at the end. */ -CMARK_EXPORT int cmark_strbuf_set(cmark_strbuf *buf, const unsigned char *data, int len); -CMARK_EXPORT int cmark_strbuf_sets(cmark_strbuf *buf, const char *string); -CMARK_EXPORT int cmark_strbuf_putc(cmark_strbuf *buf, int c); -CMARK_EXPORT int cmark_strbuf_put(cmark_strbuf *buf, const unsigned char *data, int len); -CMARK_EXPORT int cmark_strbuf_puts(cmark_strbuf *buf, const char *string); -CMARK_EXPORT int cmark_strbuf_printf(cmark_strbuf *buf, const char *format, ...) - CMARK_ATTRIBUTE((format (printf, 2, 3))); -CMARK_EXPORT +CMARK_ATTRIBUTE((format (printf, 2, 3))); int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap); -CMARK_EXPORT void cmark_strbuf_clear(cmark_strbuf *buf); -CMARK_EXPORT int cmark_strbuf_strchr(const cmark_strbuf *buf, int c, int pos); -CMARK_EXPORT int cmark_strbuf_strrchr(const cmark_strbuf *buf, int c, int pos); -CMARK_EXPORT void cmark_strbuf_drop(cmark_strbuf *buf, int n); -CMARK_EXPORT void cmark_strbuf_truncate(cmark_strbuf *buf, int len); -CMARK_EXPORT void cmark_strbuf_rtrim(cmark_strbuf *buf); -CMARK_EXPORT void cmark_strbuf_trim(cmark_strbuf *buf); -CMARK_EXPORT void cmark_strbuf_normalize_whitespace(cmark_strbuf *s); -CMARK_EXPORT void cmark_strbuf_unescape(cmark_strbuf *s); -// Convenience macros -#define strbuf cmark_strbuf -#define strbuf__initbuf cmark_strbuf__initbuf -#define strbuf__oom cmark_strbuf__oom -#define GH_BUF_INIT CMARK_GH_BUF_INIT -#define strbuf_init cmark_strbuf_init -#define strbuf_try_grow cmark_strbuf_try_grow -#define strbuf_grow cmark_strbuf_grow -#define strbuf_free cmark_strbuf_free -#define strbuf_swap cmark_strbuf_swap -#define strbuf_oom cmark_strbuf_oom -#define strbuf_len cmark_strbuf_len -#define strbuf_cmp cmark_strbuf_cmp -#define strbuf_attach cmark_strbuf_attach -#define strbuf_detach cmark_strbuf_detach -#define strbuf_copy_cstr cmark_strbuf_copy_cstr -#define strbuf_at cmark_strbuf_at -#define strbuf_set cmark_strbuf_set -#define strbuf_sets cmark_strbuf_sets -#define strbuf_putc cmark_strbuf_putc -#define strbuf_put cmark_strbuf_put -#define strbuf_puts cmark_strbuf_puts -#define strbuf_printf cmark_strbuf_printf -#define strbuf_vprintf cmark_strbuf_vprintf -#define strbuf_clear cmark_strbuf_clear -#define strbuf_strchr cmark_strbuf_strchr -#define strbuf_strrchr cmark_strbuf_strrchr -#define strbuf_drop cmark_strbuf_drop -#define strbuf_truncate cmark_strbuf_truncate -#define strbuf_rtrim cmark_strbuf_rtrim -#define strbuf_trim cmark_strbuf_trim -#define strbuf_normalize_whitespace cmark_strbuf_normalize_whitespace -#define strbuf_unescape cmark_strbuf_unescape - #ifdef __cplusplus } #endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/chunk.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/chunk.h b/compiler/modules/CommonMark/src/chunk.h index 22594b1..54c4b16 100644 --- a/compiler/modules/CommonMark/src/chunk.h +++ b/compiler/modules/CommonMark/src/chunk.h @@ -2,9 +2,9 @@ #define CMARK_CHUNK_H #include <string.h> -#include <ctype.h> #include <stdlib.h> #include <assert.h> +#include "cmark_ctype.h" #include "buffer.h" typedef struct { @@ -27,7 +27,7 @@ static inline void cmark_chunk_ltrim(cmark_chunk *c) { assert(!c->alloc); - while (c->len && isspace(c->data[0])) { + while (c->len && cmark_isspace(c->data[0])) { c->data++; c->len--; } @@ -36,7 +36,7 @@ static inline void cmark_chunk_ltrim(cmark_chunk *c) static inline void cmark_chunk_rtrim(cmark_chunk *c) { while (c->len > 0) { - if (!isspace(c->data[c->len - 1])) + if (!cmark_isspace(c->data[c->len - 1])) break; c->len--; @@ -107,16 +107,4 @@ static inline cmark_chunk cmark_chunk_buf_detach(cmark_strbuf *buf) return c; } -// Convenience macros -#define chunk cmark_chunk -#define chunk_free cmark_chunk_free -#define chunk_ltrim cmark_chunk_ltrim -#define chunk_rtrim cmark_chunk_rtrim -#define chunk_trim cmark_chunk_trim -#define chunk_strchr cmark_chunk_strchr -#define chunk_to_cstr cmark_chunk_to_cstr -#define chunk_literal cmark_chunk_literal -#define chunk_dup cmark_chunk_dup -#define chunk_buf_detach cmark_chunk_buf_detach - #endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/cmark.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/cmark.c b/compiler/modules/CommonMark/src/cmark.c index 16817b9..1d7a500 100644 --- a/compiler/modules/CommonMark/src/cmark.c +++ b/compiler/modules/CommonMark/src/cmark.c @@ -13,7 +13,7 @@ char *cmark_markdown_to_html(const char *text, int len) doc = cmark_parse_document(text, len); - result = cmark_render_html(doc); + result = cmark_render_html(doc, CMARK_OPT_DEFAULT); cmark_node_free(doc); return result; http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/cmark.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/cmark.h b/compiler/modules/CommonMark/src/cmark.h index f96cea9..04ca6d7 100644 --- a/compiler/modules/CommonMark/src/cmark.h +++ b/compiler/modules/CommonMark/src/cmark.h @@ -8,13 +8,14 @@ extern "C" { #endif -/** .SH NAME +/** # NAME * - * .B cmark - * \- CommonMark parsing, manipulating, and rendering + * **cmark** - CommonMark parsing, manipulating, and rendering */ -/** .SH SIMPLE INTERFACE +/** # DESCRIPTION + * + * ## Simple Interface */ /** Current version of library. @@ -28,11 +29,9 @@ extern "C" { CMARK_EXPORT char *cmark_markdown_to_html(const char *text, int len); -/** .SH NODE STRUCTURE +/** ## Node Structure */ -/** - */ typedef enum { /* Error status */ CMARK_NODE_NONE, @@ -41,22 +40,21 @@ typedef enum { CMARK_NODE_DOCUMENT, CMARK_NODE_BLOCK_QUOTE, CMARK_NODE_LIST, - CMARK_NODE_LIST_ITEM, + CMARK_NODE_ITEM, CMARK_NODE_CODE_BLOCK, CMARK_NODE_HTML, CMARK_NODE_PARAGRAPH, CMARK_NODE_HEADER, CMARK_NODE_HRULE, - CMARK_NODE_REFERENCE_DEF, CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT, - CMARK_NODE_LAST_BLOCK = CMARK_NODE_REFERENCE_DEF, + CMARK_NODE_LAST_BLOCK = CMARK_NODE_HRULE, /* Inline */ CMARK_NODE_TEXT, CMARK_NODE_SOFTBREAK, CMARK_NODE_LINEBREAK, - CMARK_NODE_INLINE_CODE, + CMARK_NODE_CODE, CMARK_NODE_INLINE_HTML, CMARK_NODE_EMPH, CMARK_NODE_STRONG, @@ -68,275 +66,451 @@ typedef enum { } cmark_node_type; -/** - */ typedef enum { CMARK_NO_LIST, CMARK_BULLET_LIST, CMARK_ORDERED_LIST } cmark_list_type; -/** - */ typedef enum { + CMARK_NO_DELIM, CMARK_PERIOD_DELIM, CMARK_PAREN_DELIM } cmark_delim_type; typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; +typedef struct cmark_iter cmark_iter; + +typedef enum { + CMARK_EVENT_NONE, + CMARK_EVENT_DONE, + CMARK_EVENT_ENTER, + CMARK_EVENT_EXIT +} cmark_event_type; /** - * .SH CREATING AND DESTROYING NODES + * ## Creating and Destroying Nodes */ -/** +/** Creates a new node of type 'type'. Note that the node may have + * other required properties, which it is the caller's responsibility + * to assign. */ CMARK_EXPORT cmark_node* cmark_node_new(cmark_node_type type); -/** +/** Frees the memory allocated for a node. */ CMARK_EXPORT void cmark_node_free(cmark_node *node); /** - * .SH TREE TRAVERSAL + * ## Tree Traversal + */ + +/** Returns the next node in the sequence after 'node', or NULL if + * there is none. */ CMARK_EXPORT cmark_node* cmark_node_next(cmark_node *node); -/** +/** Returns the previous node in the sequence after 'node', or NULL if + * there is none. */ CMARK_EXPORT cmark_node* cmark_node_previous(cmark_node *node); -/** +/** Returns the parent of 'node', or NULL if there is none. */ CMARK_EXPORT cmark_node* cmark_node_parent(cmark_node *node); -/** +/** Returns the first child of 'node', or NULL if 'node' has no children. */ CMARK_EXPORT cmark_node* cmark_node_first_child(cmark_node *node); -/** +/** Returns the last child of 'node', or NULL if 'node' has no children. */ CMARK_EXPORT cmark_node* cmark_node_last_child(cmark_node *node); /** - * .SH ACCESSORS + * ## Iterator + * + * An iterator will walk through a tree of nodes, starting from a root + * node, returning one node at a time, together with information about + * whether the node is being entered or exited. The iterator will + * first descend to a child node, if there is one. When there is no + * child, the iterator will go to the next sibling. When there is no + * next sibling, the iterator will return to the parent (but with + * a 'cmark_event_type' of `CMARK_EVENT_EXIT`). The iterator will + * return `CMARK_EVENT_DONE` when it reaches the root node again. + * One natural application is an HTML renderer, where an `ENTER` event + * outputs an open tag and an `EXIT` event outputs a close tag. + * An iterator might also be used to transform an AST in some systematic + * way, for example, turning all level-3 headers into regular paragraphs. + * + * void + * usage_example(cmark_node *root) { + * cmark_event_type ev_type; + * cmark_iter *iter = cmark_iter_new(root); + * + * while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + * cmark_node *cur = cmark_iter_get_node(iter); + * // Do something with `cur` and `ev_type` + * } + * + * cmark_iter_free(iter); + * } + * + * Iterators will never return `EXIT` events for leaf nodes, which are nodes + * of type: + * + * * CMARK_NODE_HTML + * * CMARK_NODE_HRULE + * * CMARK_NODE_CODE_BLOCK + * * CMARK_NODE_TEXT + * * CMARK_NODE_SOFTBREAK + * * CMARK_NODE_LINEBREAK + * * CMARK_NODE_CODE + * * CMARK_NODE_INLINE_HTML + * + * Nodes must only be modified after an `EXIT` event, or an `ENTER` event for + * leaf nodes. */ +/** Creates a new iterator starting at 'root'. The current node and event + * type are undefined until `cmark_iter_next` is called for the first time. + */ +CMARK_EXPORT +cmark_iter* +cmark_iter_new(cmark_node *root); + +/** Frees the memory allocated for an iterator. + */ +CMARK_EXPORT +void +cmark_iter_free(cmark_iter *iter); + +/** Advances to the next node and returns the event type (`CMARK_EVENT_ENTER`, + * `CMARK_EVENT_EXIT` or `CMARK_EVENT_DONE`). + */ +CMARK_EXPORT +cmark_event_type +cmark_iter_next(cmark_iter *iter); + +/** Returns the current node. + */ +CMARK_EXPORT +cmark_node* +cmark_iter_get_node(cmark_iter *iter); + +/** Returns the current event type. + */ +CMARK_EXPORT +cmark_event_type +cmark_iter_get_event_type(cmark_iter *iter); + +/** Resets the iterator so that the current node is 'current' and + * the event type is 'event_type'. The new current node must be a + * descendant of the root node or the root node itself. + */ +CMARK_EXPORT +void +cmark_iter_reset(cmark_iter *iter, cmark_node *current, + cmark_event_type event_type); + /** + * ## Accessors + */ + +/** Returns the type of 'node', or `CMARK_NODE_NONE` on error. */ CMARK_EXPORT cmark_node_type cmark_node_get_type(cmark_node *node); -/** +/** Like 'cmark_node_get_type', but returns a string representation + of the type, or `"<unknown>"`. + */ +CMARK_EXPORT +const char* +cmark_node_get_type_string(cmark_node *node); + +/** Returns the string contents of 'node', or NULL if none. */ CMARK_EXPORT const char* -cmark_node_get_string_content(cmark_node *node); +cmark_node_get_literal(cmark_node *node); -/** +/** Sets the string contents of 'node'. Returns 1 on success, + * 0 on failure. */ CMARK_EXPORT int -cmark_node_set_string_content(cmark_node *node, const char *content); +cmark_node_set_literal(cmark_node *node, const char *content); -/** +/** Returns the header level of 'node', or 0 if 'node' is not a header. */ CMARK_EXPORT int cmark_node_get_header_level(cmark_node *node); -/** +/** Sets the header level of 'node', returning 1 on success and 0 on error. */ CMARK_EXPORT int cmark_node_set_header_level(cmark_node *node, int level); -/** +/** Returns the list type of 'node', or `CMARK_NO_LIST` if 'node' + * is not a list. */ CMARK_EXPORT cmark_list_type cmark_node_get_list_type(cmark_node *node); -/** +/** Sets the list type of 'node', returning 1 on success and 0 on error. */ CMARK_EXPORT int cmark_node_set_list_type(cmark_node *node, cmark_list_type type); -/** +/** Returns the list delimiter type of 'node', or `CMARK_NO_DELIM` if 'node' + * is not a list. + */ +CMARK_EXPORT cmark_delim_type +cmark_node_get_list_delim(cmark_node *node); + +/** Sets the list delimiter type of 'node', returning 1 on success and 0 + * on error. + */ +CMARK_EXPORT int +cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim); + +/** Returns starting number of 'node', if it is an ordered list, otherwise 0. */ CMARK_EXPORT int cmark_node_get_list_start(cmark_node *node); -/** +/** Sets starting number of 'node', if it is an ordered list. Returns 1 + * on success, 0 on failure. */ CMARK_EXPORT int cmark_node_set_list_start(cmark_node *node, int start); -/** +/** Returns 1 if 'node' is a tight list, 0 otherwise. */ CMARK_EXPORT int cmark_node_get_list_tight(cmark_node *node); -/** +/** Sets the "tightness" of a list. Returns 1 on success, 0 on failure. */ CMARK_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight); -/** +/** Returns the info string from a fenced code block, or NULL if none. */ CMARK_EXPORT const char* cmark_node_get_fence_info(cmark_node *node); -/** +/** Sets the info string in a fenced code block, returning 1 on + * success and 0 on failure. */ CMARK_EXPORT int cmark_node_set_fence_info(cmark_node *node, const char *info); -/** +/** Gets the URL of a link or image 'node', or NULL if none. */ CMARK_EXPORT const char* cmark_node_get_url(cmark_node *node); -/** +/** Sets the URL of a link or image 'node'. Returns 1 on success, + * 0 on failure. */ CMARK_EXPORT int cmark_node_set_url(cmark_node *node, const char *url); -/** +/** Gets the title of a link or image 'node', or NULL if none. */ CMARK_EXPORT const char* cmark_node_get_title(cmark_node *node); -/** +/** Sets the title of a link or image 'node'. Returns 1 on success, + * 0 on failure. */ CMARK_EXPORT int cmark_node_set_title(cmark_node *node, const char *title); -/** +/** Returns the line on which 'node' begins. */ CMARK_EXPORT int cmark_node_get_start_line(cmark_node *node); -/** +/** Returns the column at which 'node' begins. */ CMARK_EXPORT int cmark_node_get_start_column(cmark_node *node); -/** +/** Returns the line on which 'node' ends. */ CMARK_EXPORT int cmark_node_get_end_line(cmark_node *node); -/** - * .SH TREE MANIPULATION +/** Returns the column at which 'node' ends. */ +CMARK_EXPORT int +cmark_node_get_end_column(cmark_node *node); /** + * ## Tree Manipulation + */ + +/** Unlinks a 'node', removing it from the tree, but not freeing its + * memory. (Use 'cmark_node_free' for that.) */ CMARK_EXPORT void cmark_node_unlink(cmark_node *node); -/** +/** Inserts 'sibling' before 'node'. Returns 1 on success, 0 on failure. */ CMARK_EXPORT int cmark_node_insert_before(cmark_node *node, cmark_node *sibling); -/** +/** Inserts 'sibling' after 'node'. Returns 1 on success, 0 on failure. */ CMARK_EXPORT int cmark_node_insert_after(cmark_node *node, cmark_node *sibling); -/** +/** Adds 'child' to the beginning of the children of 'node'. + * Returns 1 on success, 0 on failure. */ CMARK_EXPORT int cmark_node_prepend_child(cmark_node *node, cmark_node *child); -/** +/** Adds 'child' to the end of the children of 'node'. + * Returns 1 on success, 0 on failure. */ CMARK_EXPORT int cmark_node_append_child(cmark_node *node, cmark_node *child); -/** - * .SH PARSING +/** Consolidates adjacent text nodes. */ +CMARK_EXPORT void +cmark_consolidate_text_nodes(cmark_node *root); /** + * ## Parsing + * + * Simple interface: + * + * cmark_node *document = cmark_parse_document("Hello *world*", 12); + * + * Streaming interface: + * + * cmark_parser *parser = cmark_parser_new(); + * FILE *fp = fopen("myfile.md", "r"); + * while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { + * cmark_parser_feed(parser, buffer, bytes); + * if (bytes < sizeof(buffer)) { + * break; + * } + * } + * document = cmark_parser_finish(parser); + * cmark_parser_free(parser); + */ + +/** Creates a new parser object. */ CMARK_EXPORT cmark_parser *cmark_parser_new(); -/** +/** Frees memory allocated for a parser object. */ CMARK_EXPORT void cmark_parser_free(cmark_parser *parser); -/** +/** Feeds a string of length 'len' to 'parser'. */ CMARK_EXPORT -cmark_node *cmark_parser_finish(cmark_parser *parser); +void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); -/** +/** Finish parsing and return a pointer to a tree of nodes. */ CMARK_EXPORT -void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); +cmark_node *cmark_parser_finish(cmark_parser *parser); -/** +/** Parse a CommonMark document in 'buffer' of length 'len'. + * Returns a pointer to a tree of nodes. */ CMARK_EXPORT cmark_node *cmark_parse_document(const char *buffer, size_t len); -/** +/** Parse a CommonMark document in file 'f', returning a pointer to + * a tree of nodes. */ CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f); /** - * .SH RENDERING + * ## Rendering */ -/** +/** Render a 'node' tree as XML. */ CMARK_EXPORT -char *cmark_render_ast(cmark_node *root); +char *cmark_render_xml(cmark_node *root, long options); -/** +/** Render a 'node' tree as an HTML fragment. It is up to the user + * to add an appropriate header and footer. */ CMARK_EXPORT -char *cmark_render_html(cmark_node *root); +char *cmark_render_html(cmark_node *root, long options); + +/** Render a 'node' tree as a groff man page, without the header. + */ +CMARK_EXPORT +char *cmark_render_man(cmark_node *root, long options); + +/** Default writer options. + */ +#define CMARK_OPT_DEFAULT 0 + +/** Include a `data-sourcepos` attribute on all block elements. + */ +#define CMARK_OPT_SOURCEPOS 1 + +/** Render `softbreak` elements as hard line breaks. + */ +#define CMARK_OPT_HARDBREAKS 2 + +/** Normalize tree by consolidating adjacent text nodes. + */ +#define CMARK_OPT_NORMALIZE 4 -/** .SH AUTHORS +/** # AUTHORS * * John MacFarlane, Vicent Marti, KÄrlis GaÅÄ£is, Nick Wellnhofer. */ #ifndef CMARK_NO_SHORT_NAMES - #define NODE_DOCUMENT CMARK_NODE_DOCUMENT - #define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE - #define NODE_LIST CMARK_NODE_LIST - #define NODE_LIST_ITEM CMARK_NODE_LIST_ITEM - #define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK - #define NODE_HTML CMARK_NODE_HTML - #define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH - #define NODE_HEADER CMARK_NODE_HEADER - #define NODE_HRULE CMARK_NODE_HRULE - #define NODE_REFERENCE_DEF CMARK_NODE_REFERENCE_DEF - #define NODE_TEXT CMARK_NODE_TEXT - #define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK - #define NODE_LINEBREAK CMARK_NODE_LINEBREAK - #define NODE_INLINE_CODE CMARK_NODE_INLINE_CODE - #define NODE_INLINE_HTML CMARK_NODE_INLINE_HTML - #define NODE_EMPH CMARK_NODE_EMPH - #define NODE_STRONG CMARK_NODE_STRONG - #define NODE_LINK CMARK_NODE_LINK - #define NODE_IMAGE CMARK_NODE_IMAGE - #define NODE_LINK_LABEL CMARK_NODE_LINK_LABEL - #define BULLET_LIST CMARK_BULLET_LIST - #define ORDERED_LIST CMARK_ORDERED_LIST - #define PERIOD_DELIM CMARK_PERIOD_DELIM - #define PAREN_DELIM CMARK_PAREN_DELIM +#define NODE_DOCUMENT CMARK_NODE_DOCUMENT +#define NODE_BLOCK_QUOTE CMARK_NODE_BLOCK_QUOTE +#define NODE_LIST CMARK_NODE_LIST +#define NODE_ITEM CMARK_NODE_ITEM +#define NODE_CODE_BLOCK CMARK_NODE_CODE_BLOCK +#define NODE_HTML CMARK_NODE_HTML +#define NODE_PARAGRAPH CMARK_NODE_PARAGRAPH +#define NODE_HEADER CMARK_NODE_HEADER +#define NODE_HRULE CMARK_NODE_HRULE +#define NODE_TEXT CMARK_NODE_TEXT +#define NODE_SOFTBREAK CMARK_NODE_SOFTBREAK +#define NODE_LINEBREAK CMARK_NODE_LINEBREAK +#define NODE_CODE CMARK_NODE_CODE +#define NODE_INLINE_HTML CMARK_NODE_INLINE_HTML +#define NODE_EMPH CMARK_NODE_EMPH +#define NODE_STRONG CMARK_NODE_STRONG +#define NODE_LINK CMARK_NODE_LINK +#define NODE_IMAGE CMARK_NODE_IMAGE +#define NODE_LINK_LABEL CMARK_NODE_LINK_LABEL +#define BULLET_LIST CMARK_BULLET_LIST +#define ORDERED_LIST CMARK_ORDERED_LIST +#define PERIOD_DELIM CMARK_PERIOD_DELIM +#define PAREN_DELIM CMARK_PAREN_DELIM #endif #ifdef __cplusplus
