http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/inlines.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c index 08a934b..2487f63 100644 --- a/compiler/modules/CommonMark/src/inlines.c +++ b/compiler/modules/CommonMark/src/inlines.c @@ -1,8 +1,8 @@ #include <stdlib.h> #include <string.h> #include <stdio.h> -#include <ctype.h> +#include "cmark_ctype.h" #include "config.h" #include "node.h" #include "parser.h" @@ -16,7 +16,7 @@ // Macros for creating various kinds of simple. #define make_str(s) make_literal(CMARK_NODE_TEXT, s) -#define make_code(s) make_literal(CMARK_NODE_INLINE_CODE, s) +#define make_code(s) make_literal(CMARK_NODE_CODE, s) #define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s) #define make_linebreak() make_simple(CMARK_NODE_LINEBREAK) #define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK) @@ -31,10 +31,11 @@ typedef struct delimiter { int position; bool can_open; bool can_close; + bool active; } delimiter; typedef struct { - chunk input; + cmark_chunk input; int pos; cmark_reference_map *refmap; delimiter *last_delim; @@ -45,24 +46,24 @@ S_insert_emph(subject *subj, delimiter *opener, delimiter *closer); static int parse_inline(subject* subj, cmark_node * parent); -static void subject_from_buf(subject *e, strbuf *buffer, - cmark_reference_map *refmap); +static void subject_from_buf(subject *e, cmark_strbuf *buffer, + cmark_reference_map *refmap); static int subject_find_special_char(subject *subj); -static unsigned char *cmark_clean_autolink(chunk *url, int is_email) +static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email) { - strbuf buf = GH_BUF_INIT; + cmark_strbuf buf = GH_BUF_INIT; - chunk_trim(url); + cmark_chunk_trim(url); if (url->len == 0) return NULL; if (is_email) - strbuf_puts(&buf, "mailto:"); + cmark_strbuf_puts(&buf, "mailto:"); houdini_unescape_html_f(&buf, url->data, url->len); - return strbuf_detach(&buf); + return cmark_strbuf_detach(&buf); } static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title) @@ -71,11 +72,11 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig if(e != NULL) { e->type = CMARK_NODE_LINK; e->first_child = label; - e->last_child = label; + e->last_child = label; e->as.link.url = url; e->as.link.title = title; e->next = NULL; - label->parent = e; + label->parent = e; } return e; } @@ -93,14 +94,14 @@ static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s) e->type = t; e->as.literal = s; e->next = NULL; - e->prev = NULL; - e->parent = NULL; - e->first_child = NULL; - e->last_child = NULL; - // These fields aren't used for inlines: - e->start_line = 0; - e->start_column = 0; - e->end_line = 0; + e->prev = NULL; + e->parent = NULL; + e->first_child = NULL; + e->last_child = NULL; + // These fields aren't used for inlines: + e->start_line = 0; + e->start_column = 0; + e->end_line = 0; } return e; } @@ -112,14 +113,14 @@ static inline cmark_node* make_simple(cmark_node_type t) if(e != NULL) { e->type = t; e->next = NULL; - e->prev = NULL; - e->parent = NULL; - e->first_child = NULL; - e->last_child = NULL; - // These fields aren't used for inlines: - e->start_line = 0; - e->start_column = 0; - e->end_line = 0; + e->prev = NULL; + e->parent = NULL; + e->first_child = NULL; + e->last_child = NULL; + // These fields aren't used for inlines: + e->start_line = 0; + e->start_column = 0; + e->end_line = 0; } return e; } @@ -139,8 +140,8 @@ static unsigned char *bufdup(const unsigned char *buf) return new_buf; } -static void subject_from_buf(subject *e, strbuf *buffer, - cmark_reference_map *refmap) +static void subject_from_buf(subject *e, cmark_strbuf *buffer, + cmark_reference_map *refmap) { e->input.data = buffer->ptr; e->input.len = buffer->size; @@ -149,7 +150,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, e->refmap = refmap; e->last_delim = NULL; - chunk_rtrim(&e->input); + cmark_chunk_rtrim(&e->input); } static inline int isbacktick(int c) @@ -177,7 +178,7 @@ static inline int is_eof(subject* subj) #define advance(subj) (subj)->pos += 1 // Take characters while a predicate holds, and return a string. -static inline chunk take_while(subject* subj, int (*f)(int)) +static inline cmark_chunk take_while(subject* subj, int (*f)(int)) { unsigned char c; int startpos = subj->pos; @@ -188,7 +189,7 @@ static inline chunk take_while(subject* subj, int (*f)(int)) len++; } - return chunk_dup(&subj->input, startpos, len); + return cmark_chunk_dup(&subj->input, startpos, len); } // Try to process a backtick code span that began with a @@ -211,7 +212,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) advance(subj); numticks++; } - if (numticks != openticklength){ + if (numticks != openticklength) { return(scan_to_closing_backticks(subj, openticklength)); } return (subj->pos); @@ -221,7 +222,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) // Assumes that the subject has a backtick at the current position. static cmark_node* handle_backticks(subject *subj) { - chunk openticks = take_while(subj, isbacktick); + cmark_chunk openticks = take_while(subj, isbacktick); int startpos = subj->pos; int endpos = scan_to_closing_backticks(subj, openticks.len); @@ -229,34 +230,66 @@ static cmark_node* handle_backticks(subject *subj) subj->pos = startpos; // rewind return make_str(openticks); } else { - strbuf buf = GH_BUF_INIT; + cmark_strbuf buf = GH_BUF_INIT; - strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); - strbuf_trim(&buf); - strbuf_normalize_whitespace(&buf); + cmark_strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); + cmark_strbuf_trim(&buf); + cmark_strbuf_normalize_whitespace(&buf); - return make_code(chunk_buf_detach(&buf)); + return make_code(cmark_chunk_buf_detach(&buf)); } } // Scan ***, **, or * and return number scanned, or 0. // Advances position. -static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) +static int +scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) { int numdelims = 0; - unsigned char char_before, char_after; + int before_char_pos; + int32_t after_char = 0; + int32_t before_char = 0; + int len; + + if (subj->pos == 0) { + before_char = 10; + } else { + before_char_pos = subj->pos - 1; + // walk back to the beginning of the UTF_8 sequence: + while (peek_at(subj, before_char_pos) >> 6 == 2 && + before_char_pos > 0) { + before_char_pos -= 1; + } + len = utf8proc_iterate(subj->input.data + before_char_pos, + subj->pos - before_char_pos, &before_char); + if (len == -1) { + before_char = 10; + } + } - char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); while (peek_char(subj) == c) { numdelims++; advance(subj); } - char_after = peek_char(subj); - *can_open = numdelims > 0 && !isspace(char_after); - *can_close = numdelims > 0 && !isspace(char_before); + + len = utf8proc_iterate(subj->input.data + subj->pos, + subj->input.len - subj->pos, &after_char); + if (len == -1) { + after_char = 10; + } + *can_open = numdelims > 0 && !utf8proc_is_space(after_char) && + !(utf8proc_is_punctuation(after_char) && + !utf8proc_is_space(before_char) && + !utf8proc_is_punctuation(before_char)); + *can_close = numdelims > 0 && !utf8proc_is_space(before_char) && + !(utf8proc_is_punctuation(before_char) && + !utf8proc_is_space(after_char) && + !utf8proc_is_punctuation(after_char)); if (c == '_') { - *can_open = *can_open && !isalnum(char_before); - *can_close = *can_close && !isalnum(char_after); + *can_open = *can_open && !(before_char < 128 && + cmark_isalnum((char)before_char)); + *can_close = *can_close && !(before_char < 128 && + cmark_isalnum((char)after_char)); } return numdelims; } @@ -293,10 +326,10 @@ static void remove_delimiter(subject *subj, delimiter *delim) } static void push_delimiter(subject *subj, unsigned char c, bool can_open, - bool can_close, cmark_node *inl_text) + bool can_close, cmark_node *inl_text) { delimiter *delim = - (delimiter*)malloc(sizeof(delimiter)); + (delimiter*)malloc(sizeof(delimiter)); if (delim == NULL) { return; } @@ -310,6 +343,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, delim->previous->next = delim; } delim->position = subj->pos; + delim->active = true; subj->last_delim = delim; } @@ -323,7 +357,7 @@ static cmark_node* handle_strong_emph(subject* subj, unsigned char c) numdelims = scan_delims(subj, c, &can_open, &can_close); - inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); + inl_text = make_str(cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); if (can_open || can_close) { push_delimiter(subj, c, can_open, can_close, inl_text); @@ -384,7 +418,7 @@ S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) // calculate the actual number of characters used from this closer if (closer_num_chars < 3 || opener_num_chars < 3) { use_delims = closer_num_chars <= opener_num_chars ? - closer_num_chars : opener_num_chars; + closer_num_chars : opener_num_chars; } else { // closer and opener both have >= 3 characters use_delims = closer_num_chars % 2 == 0 ? 2 : 1; } @@ -409,13 +443,12 @@ S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) // if opener has 0 characters, remove it and its associated inline if (opener_num_chars == 0) { // replace empty opener inline with emph - chunk_free(&(opener_inl->as.literal)); + cmark_chunk_free(&(opener_inl->as.literal)); emph = opener_inl; emph->type = use_delims == 1 ? NODE_EMPH : NODE_STRONG; // remove opener from list remove_delimiter(subj, opener); - } - else { + } else { // create new emph or strong, and splice it in to our inlines // between the opener and closer emph = use_delims == 1 ? make_emph() : make_strong(); @@ -455,14 +488,14 @@ static cmark_node* handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); - if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped + if (cmark_ispunct(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); - return make_str(chunk_dup(&subj->input, subj->pos - 1, 1)); + return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (nextchar == '\n') { advance(subj); return make_linebreak(); } else { - return make_str(chunk_literal("\\")); + return make_str(cmark_chunk_literal("\\")); } } @@ -470,31 +503,31 @@ static cmark_node* handle_backslash(subject *subj) // Assumes the subject has an '&' character at the current position. static cmark_node* handle_entity(subject* subj) { - strbuf ent = GH_BUF_INIT; + cmark_strbuf ent = GH_BUF_INIT; size_t len; advance(subj); len = houdini_unescape_ent(&ent, - subj->input.data + subj->pos, - subj->input.len - subj->pos - ); + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); if (len == 0) - return make_str(chunk_literal("&")); + return make_str(cmark_chunk_literal("&")); subj->pos += len; - return make_str(chunk_buf_detach(&ent)); + return make_str(cmark_chunk_buf_detach(&ent)); } // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static cmark_node *make_str_with_entities(chunk *content) +static cmark_node *make_str_with_entities(cmark_chunk *content) { - strbuf unescaped = GH_BUF_INIT; + cmark_strbuf unescaped = GH_BUF_INIT; if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { - return make_str(chunk_buf_detach(&unescaped)); + return make_str(cmark_chunk_buf_detach(&unescaped)); } else { return make_str(*content); } @@ -502,11 +535,11 @@ static cmark_node *make_str_with_entities(chunk *content) // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -unsigned char *cmark_clean_url(chunk *url) +unsigned char *cmark_clean_url(cmark_chunk *url) { - strbuf buf = GH_BUF_INIT; + cmark_strbuf buf = GH_BUF_INIT; - chunk_trim(url); + cmark_chunk_trim(url); if (url->len == 0) return NULL; @@ -517,32 +550,32 @@ unsigned char *cmark_clean_url(chunk *url) houdini_unescape_html_f(&buf, url->data, url->len); } - strbuf_unescape(&buf); - return strbuf_detach(&buf); + cmark_strbuf_unescape(&buf); + return cmark_strbuf_detach(&buf); } -unsigned char *cmark_clean_title(chunk *title) +unsigned char *cmark_clean_title(cmark_chunk *title) { - strbuf buf = GH_BUF_INIT; - unsigned char first, last; + cmark_strbuf buf = GH_BUF_INIT; + unsigned char first, last; - if (title->len == 0) - return NULL; + if (title->len == 0) + return NULL; - first = title->data[0]; - last = title->data[title->len - 1]; + first = title->data[0]; + last = title->data[title->len - 1]; - // remove surrounding quotes if any: - if ((first == '\'' && last == '\'') || - (first == '(' && last == ')') || - (first == '"' && last == '"')) { - houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); - } else { - houdini_unescape_html_f(&buf, title->data, title->len); - } + // remove surrounding quotes if any: + if ((first == '\'' && last == '\'') || + (first == '(' && last == ')') || + (first == '"' && last == '"')) { + houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); + } else { + houdini_unescape_html_f(&buf, title->data, title->len); + } - strbuf_unescape(&buf); - return strbuf_detach(&buf); + cmark_strbuf_unescape(&buf); + return cmark_strbuf_detach(&buf); } // Parse an autolink or HTML tag. @@ -550,51 +583,51 @@ unsigned char *cmark_clean_title(chunk *title) static cmark_node* handle_pointy_brace(subject* subj) { int matchlen = 0; - chunk contents; + cmark_chunk contents; advance(subj); // advance past first < // first try to match a URL autolink matchlen = scan_autolink_uri(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); + contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_autolink( - make_str_with_entities(&contents), - contents, 0 - ); + make_str_with_entities(&contents), + contents, 0 + ); } // next try to match an email autolink matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); + contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_autolink( - make_str_with_entities(&contents), - contents, 1 - ); + make_str_with_entities(&contents), + contents, 1 + ); } // finally, try to match an html tag matchlen = scan_html_tag(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); + contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; return make_raw_html(contents); } // if nothing matches, just return the opening <: - return make_str(chunk_literal("<")); + return make_str(cmark_chunk_literal("<")); } // Parse a link label. Returns 1 if successful. // Note: unescaped brackets are not allowed in labels. // The label begins with `[` and ends with the first `]` character // encountered. Backticks in labels do not start code spans. -static int link_label(subject* subj, chunk *raw_label) +static int link_label(subject* subj, cmark_chunk *raw_label) { int startpos = subj->pos; int length = 0; @@ -611,7 +644,7 @@ static int link_label(subject* subj, chunk *raw_label) if (c == '\\') { advance(subj); length++; - if (ispunct(peek_char(subj))) { + if (cmark_ispunct(peek_char(subj))) { advance(subj); length++; } @@ -625,12 +658,12 @@ static int link_label(subject* subj, chunk *raw_label) } if (c == ']') { // match found - *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); + *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); advance(subj); // advance past ] return 1; } - noMatch: +noMatch: subj->pos = startpos; // rewind return 0; @@ -645,13 +678,12 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) int sps; cmark_reference *ref; bool is_image = false; - chunk urlchunk, titlechunk; + cmark_chunk url_chunk, title_chunk; unsigned char *url, *title; delimiter *opener; - delimiter *tmp_delim; cmark_node *link_text; cmark_node *inl; - chunk raw_label; + cmark_chunk raw_label; int found_label; advance(subj); // advance past ] @@ -667,7 +699,13 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) } if (opener == NULL) { - return make_str(chunk_literal("]")); + return make_str(cmark_chunk_literal("]")); + } + + if (!opener->active) { + // take delimiter off stack + remove_delimiter(subj, opener); + return make_str(cmark_chunk_literal("]")); } // If we got here, we matched a potential link/image text. @@ -688,19 +726,19 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : - starttitle + scan_link_title(&subj->input, starttitle); + starttitle + scan_link_title(&subj->input, starttitle); endall = endtitle + scan_spacechars(&subj->input, endtitle); if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; - urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl); - titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle); - url = cmark_clean_url(&urlchunk); - title = cmark_clean_title(&titlechunk); - chunk_free(&urlchunk); - chunk_free(&titlechunk); + url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl); + title_chunk = cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle); + url = cmark_clean_url(&url_chunk); + title = cmark_clean_title(&title_chunk); + cmark_chunk_free(&url_chunk); + cmark_chunk_free(&title_chunk); goto match; } else { @@ -711,12 +749,12 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) // Next, look for a following [link label] that matches in refmap. // skip spaces subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos); - raw_label = chunk_literal(""); + raw_label = cmark_chunk_literal(""); found_label = link_label(subj, &raw_label); if (!found_label || raw_label.len == 0) { - chunk_free(&raw_label); - raw_label = chunk_dup(&subj->input, opener->position, - initial_pos - opener->position - 1); + cmark_chunk_free(&raw_label); + raw_label = cmark_chunk_dup(&subj->input, opener->position, + initial_pos - opener->position - 1); } if (!found_label) { @@ -726,7 +764,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) } ref = cmark_reference_lookup(subj->refmap, &raw_label); - chunk_free(&raw_label); + cmark_chunk_free(&raw_label); if (ref != NULL) { // found url = bufdup(ref->url); @@ -740,12 +778,12 @@ noMatch: // If we fall through to here, it means we didn't match a link: remove_delimiter(subj, opener); // remove this opener from delimiter list subj->pos = initial_pos; - return make_str(chunk_literal("]")); + return make_str(cmark_chunk_literal("]")); match: inl = opener->inl_text; inl->type = is_image ? NODE_IMAGE : NODE_LINK; - chunk_free(&inl->as.literal); + cmark_chunk_free(&inl->as.literal); inl->first_child = link_text; process_emphasis(subj, opener->previous); inl->as.link.url = url; @@ -763,17 +801,20 @@ match: parent->last_child = inl; // process_emphasis will remove this delimiter and all later ones. - // Now, if we have a link, we also want to remove earlier link - // delimiters. (This code can be removed if we decide to allow links + // Now, if we have a link, we also want to deactivate earlier link + // delimiters. (This code can be removed if we decide to allow links // inside links.) if (!is_image) { opener = subj->last_delim; while (opener != NULL) { - tmp_delim = opener->previous; if (opener->delim_char == '[') { - remove_delimiter(subj, opener); + if (!opener->active) { + break; + } else { + opener->active = false; + } } - opener = tmp_delim; + opener = opener->previous; } } @@ -819,7 +860,8 @@ static int subject_find_special_char(subject *subj) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; int n = subj->pos + 1; @@ -837,14 +879,14 @@ static int subject_find_special_char(subject *subj) static int parse_inline(subject* subj, cmark_node * parent) { cmark_node* new_inl = NULL; - chunk contents; + cmark_chunk contents; unsigned char c; int endpos; c = peek_char(subj); if (c == 0) { return 0; } - switch(c){ + switch(c) { case '\n': new_inl = handle_newline(subj); break; @@ -866,7 +908,7 @@ static int parse_inline(subject* subj, cmark_node * parent) break; case '[': advance(subj); - new_inl = make_str(chunk_literal("[")); + new_inl = make_str(cmark_chunk_literal("[")); push_delimiter(subj, '[', true, false, new_inl); break; case ']': @@ -876,20 +918,20 @@ static int parse_inline(subject* subj, cmark_node * parent) advance(subj); if (peek_char(subj) == '[') { advance(subj); - new_inl = make_str(chunk_literal("![")); + new_inl = make_str(cmark_chunk_literal("![")); push_delimiter(subj, '!', false, true, new_inl); } else { - new_inl = make_str(chunk_literal("!")); + new_inl = make_str(cmark_chunk_literal("!")); } break; default: endpos = subject_find_special_char(subj); - contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (peek_char(subj) == '\n') { - chunk_rtrim(&contents); + cmark_chunk_rtrim(&contents); } new_inl = make_str(contents); @@ -918,7 +960,7 @@ static void spnl(subject* subj) bool seen_newline = false; while (peek_char(subj) == ' ' || (!seen_newline && - (seen_newline = peek_char(subj) == '\n'))) { + (seen_newline = peek_char(subj) == '\n'))) { advance(subj); } } @@ -927,13 +969,13 @@ static void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap) +int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap) { subject subj; - chunk lab; - chunk url; - chunk title; + cmark_chunk lab; + cmark_chunk url; + cmark_chunk title; int matchlen = 0; int beforetitle; @@ -955,7 +997,7 @@ int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap) spnl(&subj); matchlen = scan_link_url(&subj.input, subj.pos); if (matchlen) { - url = chunk_dup(&subj.input, subj.pos, matchlen); + url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { return 0; @@ -966,11 +1008,11 @@ int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap) spnl(&subj); matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { - title = chunk_dup(&subj.input, subj.pos, matchlen); + title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; - title = chunk_literal(""); + title = cmark_chunk_literal(""); } // parse final spaces and newline: while (peek_char(&subj) == ' ') {
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/iterator.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/iterator.c b/compiler/modules/CommonMark/src/iterator.c new file mode 100644 index 0000000..4daec2d --- /dev/null +++ b/compiler/modules/CommonMark/src/iterator.c @@ -0,0 +1,140 @@ +#include <assert.h> +#include <stdlib.h> + +#include "config.h" +#include "node.h" +#include "cmark.h" +#include "iterator.h" + +static const int S_leaf_mask = + (1 << CMARK_NODE_HTML) | + (1 << CMARK_NODE_HRULE) | + (1 << CMARK_NODE_CODE_BLOCK) | + (1 << CMARK_NODE_TEXT) | + (1 << CMARK_NODE_SOFTBREAK) | + (1 << CMARK_NODE_LINEBREAK) | + (1 << CMARK_NODE_CODE) | + (1 << CMARK_NODE_INLINE_HTML); + +cmark_iter* +cmark_iter_new(cmark_node *root) +{ + if (root == NULL) { + return NULL; + } + cmark_iter *iter = (cmark_iter*)malloc(sizeof(cmark_iter)); + if (iter == NULL) { + return NULL; + } + iter->root = root; + iter->cur.ev_type = CMARK_EVENT_NONE; + iter->cur.node = NULL; + iter->next.ev_type = CMARK_EVENT_ENTER; + iter->next.node = root; + return iter; +} + +void +cmark_iter_free(cmark_iter *iter) +{ + free(iter); +} + +static bool +S_is_leaf(cmark_node *node) +{ + return (1 << node->type) & S_leaf_mask; +} + +cmark_event_type +cmark_iter_next(cmark_iter *iter) +{ + cmark_event_type ev_type = iter->next.ev_type; + cmark_node *node = iter->next.node; + + iter->cur.ev_type = ev_type; + iter->cur.node = node; + + if (ev_type == CMARK_EVENT_DONE) { + return ev_type; + } + + /* roll forward to next item, setting both fields */ + if (ev_type == CMARK_EVENT_ENTER && !S_is_leaf(node)) { + if (node->first_child == NULL) { + /* stay on this node but exit */ + iter->next.ev_type = CMARK_EVENT_EXIT; + } else { + iter->next.ev_type = CMARK_EVENT_ENTER; + iter->next.node = node->first_child; + } + } else if (node == iter->root) { + /* don't move past root */ + iter->next.ev_type = CMARK_EVENT_DONE; + iter->next.node = NULL; + } else if (node->next) { + iter->next.ev_type = CMARK_EVENT_ENTER; + iter->next.node = node->next; + } else if (node->parent) { + iter->next.ev_type = CMARK_EVENT_EXIT; + iter->next.node = node->parent; + } else { + assert(false); + iter->next.ev_type = CMARK_EVENT_DONE; + iter->next.node = NULL; + } + + return ev_type; +} + +void +cmark_iter_reset(cmark_iter *iter, cmark_node *current, + cmark_event_type event_type) +{ + iter->next.ev_type = event_type; + iter->next.node = current; + cmark_iter_next(iter); +} + +cmark_node* +cmark_iter_get_node(cmark_iter *iter) +{ + return iter->cur.node; +} + +cmark_event_type +cmark_iter_get_event_type(cmark_iter *iter) +{ + return iter->cur.ev_type; +} + + +void cmark_consolidate_text_nodes(cmark_node *root) +{ + cmark_iter *iter = cmark_iter_new(root); + cmark_strbuf buf = GH_BUF_INIT; + cmark_event_type ev_type; + cmark_node *cur, *tmp, *next; + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + if (ev_type == CMARK_EVENT_ENTER && + cur->type == CMARK_NODE_TEXT && + cur->next && + cur->next->type == CMARK_NODE_TEXT) { + cmark_strbuf_clear(&buf); + cmark_strbuf_puts(&buf, cmark_node_get_literal(cur)); + tmp = cur->next; + while (tmp && tmp->type == CMARK_NODE_TEXT) { + cmark_iter_get_node(iter); // advance pointer + cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp)); + next = tmp->next; + cmark_node_free(tmp); + tmp = next; + } + cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf)); + } + } + + cmark_iter_free(iter); +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/iterator.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/iterator.h b/compiler/modules/CommonMark/src/iterator.h new file mode 100644 index 0000000..027b10b --- /dev/null +++ b/compiler/modules/CommonMark/src/iterator.h @@ -0,0 +1,25 @@ +#ifndef CMARK_ITERATOR_H +#define CMARK_ITERATOR_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "cmark.h" + +typedef struct { + cmark_event_type ev_type; + cmark_node *node; +} cmark_iter_state; + +struct cmark_iter { + cmark_node *root; + cmark_iter_state cur; + cmark_iter_state next; +}; + +#ifdef __cplusplus +} +#endif + +#endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/man.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/man.c b/compiler/modules/CommonMark/src/man.c new file mode 100644 index 0000000..2c8a3a5 --- /dev/null +++ b/compiler/modules/CommonMark/src/man.c @@ -0,0 +1,249 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" + +// Functions to convert cmark_nodes to groff man strings. + +static void escape_man(cmark_strbuf *dest, const unsigned char *source, int length) +{ + int i; + unsigned char c; + + for (i = 0; i < length; i++) { + c = source[i]; + if (c == '.' && i == 0) { + cmark_strbuf_puts(dest, "\\&."); + } else if (c == '\'' && i == 0) { + cmark_strbuf_puts(dest, "\\&'"); + } else if (c == '-') { + cmark_strbuf_puts(dest, "\\-"); + } else if (c == '\\') { + cmark_strbuf_puts(dest, "\\e"); + } else { + cmark_strbuf_putc(dest, source[i]); + } + } +} + +static inline void cr(cmark_strbuf *man) +{ + if (man->size && man->ptr[man->size - 1] != '\n') + cmark_strbuf_putc(man, '\n'); +} + +struct render_state { + cmark_strbuf* man; + cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, cmark_event_type ev_type, + struct render_state *state) +{ + cmark_node *tmp; + cmark_strbuf *man = state->man; + int list_number; + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (state->plain == node) { // back at original node + state->plain = NULL; + } + + if (state->plain != NULL) { + switch(node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_CODE: + escape_man(man, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: + cmark_strbuf_putc(man, ' '); + break; + + default: + break; + } + return 1; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + break; + + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + cr(man); + cmark_strbuf_puts(man, ".RS"); + cr(man); + } else { + cr(man); + cmark_strbuf_puts(man, ".RE"); + cr(man); + } + break; + + case CMARK_NODE_LIST: + break; + + case CMARK_NODE_ITEM: + if (entering) { + cr(man); + cmark_strbuf_puts(man, ".IP "); + if (cmark_node_get_list_type(node->parent) == + CMARK_BULLET_LIST) { + cmark_strbuf_puts(man, "\\[bu] 2"); + } else { + list_number = cmark_node_get_list_start(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + cmark_strbuf_printf(man, "\"%d.\" 4", list_number); + } + cr(man); + } else { + cr(man); + } + break; + + case CMARK_NODE_HEADER: + if (entering) { + cr(man); + cmark_strbuf_puts(man, + cmark_node_get_header_level(node) == 1 ? + ".SH" : ".SS"); + cr(man); + } else { + cr(man); + } + break; + + case CMARK_NODE_CODE_BLOCK: + cr(man); + cmark_strbuf_puts(man, ".IP\n.nf\n\\f[C]\n"); + escape_man(man, node->as.code.literal.data, + node->as.code.literal.len); + cr(man); + cmark_strbuf_puts(man, "\\f[]\n.fi"); + cr(man); + break; + + case CMARK_NODE_HTML: + break; + + case CMARK_NODE_HRULE: + cr(man); + cmark_strbuf_puts(man, ".PP\n * * * * *"); + cr(man); + break; + + case CMARK_NODE_PARAGRAPH: + if (entering) { + // no blank line if first paragraph in list: + if (node->parent && + node->parent->type == CMARK_NODE_ITEM && + node->prev == NULL) { + // no blank line or .PP + } else { + cr(man); + cmark_strbuf_puts(man, ".PP\n"); + } + } else { + cr(man); + } + break; + + case CMARK_NODE_TEXT: + escape_man(man, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + cmark_strbuf_puts(man, ".PD 0\n.P\n.PD"); + cr(man); + break; + + case CMARK_NODE_SOFTBREAK: + cmark_strbuf_putc(man, '\n'); + break; + + case CMARK_NODE_CODE: + cmark_strbuf_puts(man, "\\f[C]"); + escape_man(man, node->as.literal.data, node->as.literal.len); + cmark_strbuf_puts(man, "\\f[]"); + break; + + case CMARK_NODE_INLINE_HTML: + break; + + case CMARK_NODE_STRONG: + if (entering) { + cmark_strbuf_puts(man, "\\f[B]"); + } else { + cmark_strbuf_puts(man, "\\f[]"); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + cmark_strbuf_puts(man, "\\f[I]"); + } else { + cmark_strbuf_puts(man, "\\f[]"); + } + break; + + case CMARK_NODE_LINK: + if (!entering) { + cmark_strbuf_printf(man, " (%s)", + cmark_node_get_url(node)); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + cmark_strbuf_puts(man, "[IMAGE: "); + state->plain = node; + } else { + cmark_strbuf_puts(man, "]"); + } + break; + + default: + assert(false); + break; + } + + // cmark_strbuf_putc(man, 'x'); + return 1; +} + +char *cmark_render_man(cmark_node *root, long options) +{ + char *result; + cmark_strbuf man = GH_BUF_INIT; + struct render_state state = { &man, NULL }; + cmark_node *cur; + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(root); + + if (options == 0) options = 0; // avoid warning about unused parameters + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state); + } + result = (char *)cmark_strbuf_detach(&man); + + cmark_iter_free(iter); + cmark_strbuf_free(&man); + return result; +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/node.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c index 243c3e6..3785a27 100644 --- a/compiler/modules/CommonMark/src/node.c +++ b/compiler/modules/CommonMark/src/node.c @@ -8,7 +8,8 @@ static void S_node_unlink(cmark_node *node); cmark_node* -cmark_node_new(cmark_node_type type) { +cmark_node_new(cmark_node_type type) +{ cmark_node *node = (cmark_node *)calloc(1, sizeof(*node)); node->type = type; @@ -38,14 +39,16 @@ void S_free_nodes(cmark_node *e) { cmark_node *next; while (e != NULL) { - strbuf_free(&e->string_content); - switch (e->type){ + cmark_strbuf_free(&e->string_content); + switch (e->type) { case NODE_CODE_BLOCK: - strbuf_free(&e->as.code.info); + cmark_chunk_free(&e->as.code.info); + cmark_chunk_free(&e->as.code.literal); break; case NODE_TEXT: case NODE_INLINE_HTML: - case NODE_INLINE_CODE: + case NODE_CODE: + case NODE_HTML: cmark_chunk_free(&e->as.literal); break; case NODE_LINK: @@ -68,7 +71,8 @@ void S_free_nodes(cmark_node *e) } void -cmark_node_free(cmark_node *node) { +cmark_node_free(cmark_node *node) +{ S_node_unlink(node); node->next = NULL; S_free_nodes(node); @@ -84,34 +88,52 @@ cmark_node_get_type(cmark_node *node) } } -static const char* -S_type_string(cmark_node *node) +const char* +cmark_node_get_type_string(cmark_node *node) { if (node == NULL) { return "NONE"; } switch (node->type) { - case CMARK_NODE_NONE: return "NONE"; - case CMARK_NODE_DOCUMENT: return "DOCUMENT"; - case CMARK_NODE_BLOCK_QUOTE: return "BLOCK_QUOTE"; - case CMARK_NODE_LIST: return "LIST"; - case CMARK_NODE_LIST_ITEM: return "LIST_ITEM"; - case CMARK_NODE_CODE_BLOCK: return "CODE_BLOCK"; - case CMARK_NODE_HTML: return "HTML"; - case CMARK_NODE_PARAGRAPH: return "PARAGRAPH"; - case CMARK_NODE_HEADER: return "HEADER"; - case CMARK_NODE_HRULE: return "HRULE"; - case CMARK_NODE_REFERENCE_DEF: return "REFERENCE_DEF"; - case CMARK_NODE_TEXT: return "TEXT"; - case CMARK_NODE_SOFTBREAK: return "SOFTBREAK"; - case CMARK_NODE_LINEBREAK: return "LINEBREAK"; - case CMARK_NODE_INLINE_CODE: return "INLINE_CODE"; - case CMARK_NODE_INLINE_HTML: return "INLINE_HTML"; - case CMARK_NODE_EMPH: return "EMPH"; - case CMARK_NODE_STRONG: return "STRONG"; - case CMARK_NODE_LINK: return "LINK"; - case CMARK_NODE_IMAGE: return "IMAGE"; + case CMARK_NODE_NONE: + return "none"; + case CMARK_NODE_DOCUMENT: + return "document"; + case CMARK_NODE_BLOCK_QUOTE: + return "block_quote"; + case CMARK_NODE_LIST: + return "list"; + case CMARK_NODE_ITEM: + return "item"; + case CMARK_NODE_CODE_BLOCK: + return "code_block"; + case CMARK_NODE_HTML: + return "html"; + case CMARK_NODE_PARAGRAPH: + return "paragraph"; + case CMARK_NODE_HEADER: + return "header"; + case CMARK_NODE_HRULE: + return "hrule"; + case CMARK_NODE_TEXT: + return "text"; + case CMARK_NODE_SOFTBREAK: + return "softbreak"; + case CMARK_NODE_LINEBREAK: + return "linebreak"; + case CMARK_NODE_CODE: + return "code"; + case CMARK_NODE_INLINE_HTML: + return "inline_html"; + case CMARK_NODE_EMPH: + return "emph"; + case CMARK_NODE_STRONG: + return "strong"; + case CMARK_NODE_LINK: + return "link"; + case CMARK_NODE_IMAGE: + return "image"; } return "<unknown>"; @@ -168,7 +190,8 @@ cmark_node_last_child(cmark_node *node) } static char* -S_strdup(const char *str) { +S_strdup(const char *str) +{ size_t size = strlen(str) + 1; char *dup = (char *)malloc(size); memcpy(dup, str, size); @@ -176,21 +199,22 @@ S_strdup(const char *str) { } const char* -cmark_node_get_string_content(cmark_node *node) { +cmark_node_get_literal(cmark_node *node) +{ if (node == NULL) { return NULL; } switch (node->type) { - case NODE_CODE_BLOCK: case NODE_HTML: - return cmark_strbuf_cstr(&node->string_content); - case NODE_TEXT: case NODE_INLINE_HTML: - case NODE_INLINE_CODE: + case NODE_CODE: return cmark_chunk_to_cstr(&node->as.literal); + case NODE_CODE_BLOCK: + return cmark_chunk_to_cstr(&node->as.code.literal); + default: break; } @@ -199,23 +223,24 @@ cmark_node_get_string_content(cmark_node *node) { } int -cmark_node_set_string_content(cmark_node *node, const char *content) { +cmark_node_set_literal(cmark_node *node, const char *content) +{ if (node == NULL) { return 0; } switch (node->type) { - case NODE_CODE_BLOCK: case NODE_HTML: - cmark_strbuf_sets(&node->string_content, content); - return 1; - case NODE_TEXT: case NODE_INLINE_HTML: - case NODE_INLINE_CODE: + case NODE_CODE: cmark_chunk_set_cstr(&node->as.literal, content); return 1; + case NODE_CODE_BLOCK: + cmark_chunk_set_cstr(&node->as.code.literal, content); + return 1; + default: break; } @@ -224,7 +249,8 @@ cmark_node_set_string_content(cmark_node *node, const char *content) { } int -cmark_node_get_header_level(cmark_node *node) { +cmark_node_get_header_level(cmark_node *node) +{ if (node == NULL) { return 0; } @@ -241,7 +267,8 @@ cmark_node_get_header_level(cmark_node *node) { } int -cmark_node_set_header_level(cmark_node *node, int level) { +cmark_node_set_header_level(cmark_node *node, int level) +{ if (node == NULL || level < 1 || level > 6) { return 0; } @@ -259,21 +286,22 @@ cmark_node_set_header_level(cmark_node *node, int level) { } cmark_list_type -cmark_node_get_list_type(cmark_node *node) { +cmark_node_get_list_type(cmark_node *node) +{ if (node == NULL) { return CMARK_NO_LIST; } if (node->type == CMARK_NODE_LIST) { return node->as.list.list_type; - } - else { + } else { return CMARK_NO_LIST; } } int -cmark_node_set_list_type(cmark_node *node, cmark_list_type type) { +cmark_node_set_list_type(cmark_node *node, cmark_list_type type) +{ if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) { return 0; } @@ -285,28 +313,61 @@ cmark_node_set_list_type(cmark_node *node, cmark_list_type type) { if (node->type == CMARK_NODE_LIST) { node->as.list.list_type = type; return 1; + } else { + return 0; + } +} + +cmark_delim_type +cmark_node_get_list_delim(cmark_node *node) +{ + if (node == NULL) { + return CMARK_NO_DELIM; + } + + if (node->type == CMARK_NODE_LIST) { + return node->as.list.delimiter; + } else { + return CMARK_NO_DELIM; } - else { +} + +int +cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim) +{ + if (!(delim == CMARK_PERIOD_DELIM || delim == CMARK_PAREN_DELIM)) { + return 0; + } + + if (node == NULL) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.delimiter = delim; + return 1; + } else { return 0; } } int -cmark_node_get_list_start(cmark_node *node) { +cmark_node_get_list_start(cmark_node *node) +{ if (node == NULL) { return 0; } if (node->type == CMARK_NODE_LIST) { return node->as.list.start; - } - else { + } else { return 0; } } int -cmark_node_set_list_start(cmark_node *node, int start) { +cmark_node_set_list_start(cmark_node *node, int start) +{ if (node == NULL || start < 0) { return 0; } @@ -314,28 +375,28 @@ cmark_node_set_list_start(cmark_node *node, int start) { if (node->type == CMARK_NODE_LIST) { node->as.list.start = start; return 1; - } - else { + } else { return 0; } } int -cmark_node_get_list_tight(cmark_node *node) { +cmark_node_get_list_tight(cmark_node *node) +{ if (node == NULL) { return 0; } if (node->type == CMARK_NODE_LIST) { return node->as.list.tight; - } - else { + } else { return 0; } } int -cmark_node_set_list_tight(cmark_node *node, int tight) { +cmark_node_set_list_tight(cmark_node *node, int tight) +{ if (node == NULL) { return 0; } @@ -343,43 +404,43 @@ cmark_node_set_list_tight(cmark_node *node, int tight) { if (node->type == CMARK_NODE_LIST) { node->as.list.tight = tight; return 1; - } - else { + } else { return 0; } } const char* -cmark_node_get_fence_info(cmark_node *node) { +cmark_node_get_fence_info(cmark_node *node) +{ if (node == NULL) { return NULL; } if (node->type == NODE_CODE_BLOCK) { - return cmark_strbuf_cstr(&node->as.code.info); - } - else { + return cmark_chunk_to_cstr(&node->as.code.info); + } else { return NULL; } } int -cmark_node_set_fence_info(cmark_node *node, const char *info) { +cmark_node_set_fence_info(cmark_node *node, const char *info) +{ if (node == NULL) { return 0; } if (node->type == NODE_CODE_BLOCK) { - cmark_strbuf_sets(&node->as.code.info, info); + cmark_chunk_set_cstr(&node->as.code.info, info); return 1; - } - else { + } else { return 0; } } const char* -cmark_node_get_url(cmark_node *node) { +cmark_node_get_url(cmark_node *node) +{ if (node == NULL) { return NULL; } @@ -396,7 +457,8 @@ cmark_node_get_url(cmark_node *node) { } int -cmark_node_set_url(cmark_node *node, const char *url) { +cmark_node_set_url(cmark_node *node, const char *url) +{ if (node == NULL) { return 0; } @@ -415,7 +477,8 @@ cmark_node_set_url(cmark_node *node, const char *url) { } const char* -cmark_node_get_title(cmark_node *node) { +cmark_node_get_title(cmark_node *node) +{ if (node == NULL) { return NULL; } @@ -432,7 +495,8 @@ cmark_node_get_title(cmark_node *node) { } int -cmark_node_set_title(cmark_node *node, const char *title) { +cmark_node_set_title(cmark_node *node, const char *title) +{ if (node == NULL) { return 0; } @@ -451,7 +515,8 @@ cmark_node_set_title(cmark_node *node, const char *title) { } int -cmark_node_get_start_line(cmark_node *node) { +cmark_node_get_start_line(cmark_node *node) +{ if (node == NULL) { return 0; } @@ -459,7 +524,8 @@ cmark_node_get_start_line(cmark_node *node) { } int -cmark_node_get_start_column(cmark_node *node) { +cmark_node_get_start_column(cmark_node *node) +{ if (node == NULL) { return 0; } @@ -467,15 +533,26 @@ cmark_node_get_start_column(cmark_node *node) { } int -cmark_node_get_end_line(cmark_node *node) { +cmark_node_get_end_line(cmark_node *node) +{ if (node == NULL) { return 0; } return node->end_line; } +int +cmark_node_get_end_column(cmark_node *node) +{ + if (node == NULL) { + return 0; + } + return node->end_column; +} + static inline bool -S_is_block(cmark_node *node) { +S_is_block(cmark_node *node) +{ if (node == NULL) { return false; } @@ -484,7 +561,8 @@ S_is_block(cmark_node *node) { } static inline bool -S_is_inline(cmark_node *node) { +S_is_inline(cmark_node *node) +{ if (node == NULL) { return false; } @@ -517,12 +595,12 @@ S_can_contain(cmark_node *node, cmark_node *child) switch (node->type) { case CMARK_NODE_DOCUMENT: case CMARK_NODE_BLOCK_QUOTE: - case CMARK_NODE_LIST_ITEM: + case CMARK_NODE_ITEM: return S_is_block(child) - && child->type != CMARK_NODE_LIST_ITEM; + && child->type != CMARK_NODE_ITEM; case CMARK_NODE_LIST: - return child->type == CMARK_NODE_LIST_ITEM; + return child->type == CMARK_NODE_ITEM; case CMARK_NODE_PARAGRAPH: case CMARK_NODE_HEADER: @@ -567,7 +645,8 @@ S_node_unlink(cmark_node *node) } void -cmark_node_unlink(cmark_node *node) { +cmark_node_unlink(cmark_node *node) +{ S_node_unlink(node); node->next = NULL; @@ -664,8 +743,7 @@ cmark_node_prepend_child(cmark_node *node, cmark_node *child) if (old_first_child) { old_first_child->prev = child; - } - else { + } else { // Also set last_child if node previously had no children. node->last_child = child; } @@ -691,8 +769,7 @@ cmark_node_append_child(cmark_node *node, cmark_node *child) if (old_last_child) { old_last_child->next = child; - } - else { + } else { // Also set first_child if node previously had no children. node->first_child = child; } @@ -707,7 +784,8 @@ S_print_error(FILE *out, cmark_node *node, const char *elem) return; } fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem, - S_type_string(node), node->start_line, node->start_column); + cmark_node_get_type_string(node), node->start_line, + node->start_column); } int @@ -737,7 +815,7 @@ cmark_node_check(cmark_node *node, FILE *out) continue; } - next_sibling: +next_sibling: if (cur == node) { break; } http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/node.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h index b842ed8..c0c43d3 100644 --- a/compiler/modules/CommonMark/src/node.h +++ b/compiler/modules/CommonMark/src/node.h @@ -26,7 +26,8 @@ typedef struct { int fence_length; int fence_offset; unsigned char fence_char; - cmark_strbuf info; + cmark_chunk info; + cmark_chunk literal; } cmark_code; typedef struct { @@ -51,6 +52,7 @@ struct cmark_node { int start_line; int start_column; int end_line; + int end_column; bool open; bool last_line_blank; @@ -73,4 +75,3 @@ cmark_node_check(cmark_node *node, FILE *out); #endif #endif - http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/parser.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h index 9d65b67..3c8def9 100644 --- a/compiler/modules/CommonMark/src/parser.h +++ b/compiler/modules/CommonMark/src/parser.h @@ -17,6 +17,7 @@ struct cmark_parser { struct cmark_node* current; int line_number; cmark_strbuf *curline; + int last_line_length; cmark_strbuf *linebuf; }; http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/print.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/print.c b/compiler/modules/CommonMark/src/print.c deleted file mode 100644 index d2dfe8c..0000000 --- a/compiler/modules/CommonMark/src/print.c +++ /dev/null @@ -1,169 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include "cmark.h" -#include "buffer.h" -#include "node.h" - -#define INDENT 2 - -static void print_str(strbuf* buffer, const unsigned char *s, int len) -{ - int i; - - if (len < 0) - len = strlen((char *)s); - - strbuf_putc(buffer, '"'); - for (i = 0; i < len; ++i) { - unsigned char c = s[i]; - - switch (c) { - case '\n': - strbuf_printf(buffer, "\\n"); - break; - case '"': - strbuf_printf(buffer, "\\\""); - break; - case '\\': - strbuf_printf(buffer, "\\\\"); - break; - default: - strbuf_putc(buffer, (int)c); - } - } - strbuf_putc(buffer, '"'); -} - -// Prettyprint an inline list, for debugging. -static void render_nodes(strbuf* buffer, cmark_node* node, int indent) -{ - int i; - cmark_list *data; - - while(node != NULL) { - for (i=0; i < indent; i++) { - strbuf_putc(buffer, ' '); - } - switch(node->type) { - case NODE_DOCUMENT: - break; - case NODE_BLOCK_QUOTE: - strbuf_printf(buffer, "block_quote\n"); - break; - case NODE_LIST_ITEM: - strbuf_printf(buffer, "list_item\n"); - break; - case NODE_LIST: - data = &(node->as.list); - if (data->list_type == CMARK_ORDERED_LIST) { - strbuf_printf(buffer, "list (type=ordered tight=%s start=%d delim=%s)\n", - (data->tight ? "true" : "false"), - data->start, - (data->delimiter == CMARK_PAREN_DELIM ? "parens" : "period")); - } else { - strbuf_printf(buffer, "list (type=bullet tight=%s bullet_char=%c)\n", - (data->tight ? "true" : "false"), - data->bullet_char); - } - break; - case NODE_HEADER: - strbuf_printf(buffer, "header (level=%d)\n", node->as.header.level); - break; - case NODE_PARAGRAPH: - strbuf_printf(buffer, "paragraph\n"); - break; - case NODE_HRULE: - strbuf_printf(buffer, "hrule\n"); - break; - case NODE_CODE_BLOCK: - strbuf_printf(buffer, "code_block info="); - print_str(buffer, node->as.code.info.ptr, -1); - strbuf_putc(buffer, ' '); - print_str(buffer, node->string_content.ptr, -1); - strbuf_putc(buffer, '\n'); - break; - case NODE_HTML: - strbuf_printf(buffer, "html "); - print_str(buffer, node->string_content.ptr, -1); - strbuf_putc(buffer, '\n'); - break; - case NODE_REFERENCE_DEF: - // skip - // strbuf_printf(buffer, "reference_def\n"); - break; - case NODE_TEXT: - strbuf_printf(buffer, "text "); - print_str(buffer, node->as.literal.data, node->as.literal.len); - strbuf_putc(buffer, '\n'); - break; - case NODE_LINEBREAK: - strbuf_printf(buffer, "linebreak\n"); - break; - case NODE_SOFTBREAK: - strbuf_printf(buffer, "softbreak\n"); - break; - case NODE_INLINE_CODE: - strbuf_printf(buffer, "code "); - print_str(buffer, node->as.literal.data, node->as.literal.len); - strbuf_putc(buffer, '\n'); - break; - case NODE_INLINE_HTML: - strbuf_printf(buffer, "inline_html "); - print_str(buffer, node->as.literal.data, node->as.literal.len); - strbuf_putc(buffer, '\n'); - break; - case NODE_LINK: - case NODE_IMAGE: - strbuf_printf(buffer, "%s url=", node->type == NODE_LINK ? "link" : "image"); - - if (node->as.link.url) - print_str(buffer, node->as.link.url, -1); - - if (node->as.link.title) { - strbuf_printf(buffer, " title="); - print_str(buffer, node->as.link.title, -1); - } - strbuf_putc(buffer, '\n'); - break; - case NODE_STRONG: - strbuf_printf(buffer, "strong\n"); - break; - case NODE_EMPH: - strbuf_printf(buffer, "emph\n"); - break; - default: - break; - } - if (node->first_child) { // render children if any - indent += INDENT; - node = node->first_child; - } else if (node->next) { // otherwise render next sibling - node = node->next; - } else { - node = node->parent; // back up to parent - while (node) { - indent -= INDENT; - if (node->next) { - node = node->next; - break; - } else { - node = node->parent; - } - if (!node) { - break; - } - } - } - } -} - -char *cmark_render_ast(cmark_node *root) -{ - char* result; - strbuf buffer = GH_BUF_INIT; - render_nodes(&buffer, root, -2); - result = (char *)strbuf_detach(&buffer); - strbuf_free(&buffer); - return result; -} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/5672da15/compiler/modules/CommonMark/src/references.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c index 2b1d0a7..37bf4cb 100644 --- a/compiler/modules/CommonMark/src/references.c +++ b/compiler/modules/CommonMark/src/references.c @@ -30,9 +30,9 @@ static void reference_free(cmark_reference *ref) // remove leading/trailing whitespace, case fold // Return NULL if the reference name is actually empty (i.e. composed // solely from whitespace) -static unsigned char *normalize_reference(chunk *ref) +static unsigned char *normalize_reference(cmark_chunk *ref) { - strbuf normalized = GH_BUF_INIT; + cmark_strbuf normalized = GH_BUF_INIT; unsigned char *result; if(ref == NULL) @@ -42,10 +42,10 @@ static unsigned char *normalize_reference(chunk *ref) return NULL; utf8proc_case_fold(&normalized, ref->data, ref->len); - strbuf_trim(&normalized); - strbuf_normalize_whitespace(&normalized); + cmark_strbuf_trim(&normalized); + cmark_strbuf_normalize_whitespace(&normalized); - result = strbuf_detach(&normalized); + result = cmark_strbuf_detach(&normalized); assert(result); if (result[0] == '\0') { @@ -73,8 +73,8 @@ static void add_reference(cmark_reference_map *map, cmark_reference* ref) map->table[ref->hash % REFMAP_SIZE] = ref; } -void cmark_reference_create(cmark_reference_map *map, chunk *label, chunk *url, - chunk *title) +void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url, + cmark_chunk *title) { cmark_reference *ref; unsigned char *reflabel = normalize_reference(label); @@ -97,7 +97,7 @@ void cmark_reference_create(cmark_reference_map *map, chunk *label, chunk *url, // Returns reference if refmap contains a reference with matching // label, otherwise NULL. -cmark_reference* cmark_reference_lookup(cmark_reference_map *map, chunk *label) +cmark_reference* cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label) { cmark_reference *ref = NULL; unsigned char *norm;
