http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/inlines.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/inlines.c b/compiler/modules/CommonMark/src/inlines.c new file mode 100644 index 0000000..643837b --- /dev/null +++ b/compiler/modules/CommonMark/src/inlines.c @@ -0,0 +1,987 @@ +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <ctype.h> + +#include "config.h" +#include "node.h" +#include "parser.h" +#include "references.h" +#include "cmark.h" +#include "html/houdini.h" +#include "utf8.h" +#include "scanners.h" +#include "inlines.h" + + +// Macros for creating various kinds of simple. +#define make_str(s) make_literal(CMARK_NODE_TEXT, s) +#define make_code(s) make_literal(CMARK_NODE_INLINE_CODE, s) +#define make_raw_html(s) make_literal(CMARK_NODE_INLINE_HTML, s) +#define make_linebreak() make_simple(CMARK_NODE_LINEBREAK) +#define make_softbreak() make_simple(CMARK_NODE_SOFTBREAK) +#define make_emph() make_simple(CMARK_NODE_EMPH) +#define make_strong() make_simple(CMARK_NODE_STRONG) + +typedef struct delimiter { + struct delimiter *previous; + struct delimiter *next; + cmark_node *inl_text; + unsigned char delim_char; + int position; + bool can_open; + bool can_close; +} delimiter; + +typedef struct { + chunk input; + int pos; + cmark_reference_map *refmap; + delimiter *last_delim; +} subject; + +static delimiter* +S_insert_emph(subject *subj, delimiter *opener, delimiter *closer); + +static int parse_inline(subject* subj, cmark_node * parent); + +static void subject_from_buf(subject *e, strbuf *buffer, + cmark_reference_map *refmap); +static int subject_find_special_char(subject *subj); + +static unsigned char *cmark_clean_autolink(chunk *url, int is_email) +{ + strbuf buf = GH_BUF_INIT; + + chunk_trim(url); + + if (url->len == 0) + return NULL; + + if (is_email) + strbuf_puts(&buf, "mailto:"); + + houdini_unescape_html_f(&buf, url->data, url->len); + return strbuf_detach(&buf); +} + +static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title) +{ + cmark_node* e = (cmark_node *)calloc(1, sizeof(*e)); + if(e != NULL) { + e->type = CMARK_NODE_LINK; + e->first_child = label; + e->last_child = label; + e->as.link.url = url; + e->as.link.title = title; + e->next = NULL; + label->parent = e; + } + return e; +} + +static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email) +{ + return make_link(label, cmark_clean_autolink(&url, is_email), NULL); +} + +// Create an inline with a literal string value. +static inline cmark_node* make_literal(cmark_node_type t, cmark_chunk s) +{ + cmark_node * e = (cmark_node *)calloc(1, sizeof(*e)); + if(e != NULL) { + e->type = t; + e->as.literal = s; + e->next = NULL; + e->prev = NULL; + e->parent = NULL; + e->first_child = NULL; + e->last_child = NULL; + // These fields aren't used for inlines: + e->start_line = 0; + e->start_column = 0; + e->end_line = 0; + } + return e; +} + +// Create an inline with no value. +static inline cmark_node* make_simple(cmark_node_type t) +{ + cmark_node* e = (cmark_node *)calloc(1, sizeof(*e)); + if(e != NULL) { + e->type = t; + e->next = NULL; + e->prev = NULL; + e->parent = NULL; + e->first_child = NULL; + e->last_child = NULL; + // These fields aren't used for inlines: + e->start_line = 0; + e->start_column = 0; + e->end_line = 0; + } + return e; +} + +static unsigned char *bufdup(const unsigned char *buf) +{ + unsigned char *new_buf = NULL; + + if (buf) { + int len = strlen((char *)buf); + new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf)); + if(new_buf != NULL) { + memcpy(new_buf, buf, len + 1); + } + } + + return new_buf; +} + +static void subject_from_buf(subject *e, strbuf *buffer, + cmark_reference_map *refmap) +{ + e->input.data = buffer->ptr; + e->input.len = buffer->size; + e->input.alloc = 0; + e->pos = 0; + e->refmap = refmap; + e->last_delim = NULL; + + chunk_rtrim(&e->input); +} + +static inline int isbacktick(int c) +{ + return (c == '`'); +} + +static inline unsigned char peek_char(subject *subj) +{ + return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; +} + +static inline unsigned char peek_at(subject *subj, int pos) +{ + return subj->input.data[pos]; +} + +// Return true if there are more characters in the subject. +static inline int is_eof(subject* subj) +{ + return (subj->pos >= subj->input.len); +} + +// Advance the subject. Doesn't check for eof. +#define advance(subj) (subj)->pos += 1 + +// Take characters while a predicate holds, and return a string. +static inline chunk take_while(subject* subj, int (*f)(int)) +{ + unsigned char c; + int startpos = subj->pos; + int len = 0; + + while ((c = peek_char(subj)) && (*f)(c)) { + advance(subj); + len++; + } + + return chunk_dup(&subj->input, startpos, len); +} + +// Try to process a backtick code span that began with a +// span of ticks of length openticklength length (already +// parsed). Return 0 if you don't find matching closing +// backticks, otherwise return the position in the subject +// after the closing backticks. +static int scan_to_closing_backticks(subject* subj, int openticklength) +{ + // read non backticks + unsigned char c; + while ((c = peek_char(subj)) && c != '`') { + advance(subj); + } + if (is_eof(subj)) { + return 0; // did not find closing ticks, return 0 + } + int numticks = 0; + while (peek_char(subj) == '`') { + advance(subj); + numticks++; + } + if (numticks != openticklength){ + return(scan_to_closing_backticks(subj, openticklength)); + } + return (subj->pos); +} + +// Parse backtick code section or raw backticks, return an inline. +// Assumes that the subject has a backtick at the current position. +static cmark_node* handle_backticks(subject *subj) +{ + chunk openticks = take_while(subj, isbacktick); + int startpos = subj->pos; + int endpos = scan_to_closing_backticks(subj, openticks.len); + + if (endpos == 0) { // not found + subj->pos = startpos; // rewind + return make_str(openticks); + } else { + strbuf buf = GH_BUF_INIT; + + strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); + strbuf_trim(&buf); + strbuf_normalize_whitespace(&buf); + + return make_code(chunk_buf_detach(&buf)); + } +} + +// Scan ***, **, or * and return number scanned, or 0. +// Advances position. +static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) +{ + int numdelims = 0; + unsigned char char_before, char_after; + + char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); + while (peek_char(subj) == c) { + numdelims++; + advance(subj); + } + char_after = peek_char(subj); + *can_open = numdelims > 0 && !isspace(char_after); + *can_close = numdelims > 0 && !isspace(char_before); + if (c == '_') { + *can_open = *can_open && !isalnum(char_before); + *can_close = *can_close && !isalnum(char_after); + } + return numdelims; +} + +/* +static void print_delimiters(subject *subj) +{ + delimiter *delim; + delim = subj->last_delim; + while (delim != NULL) { + printf("Item at %p: %d %d %d next(%p) prev(%p)\n", + delim, delim->delim_char, + delim->can_open, delim->can_close, + delim->next, delim->previous); + delim = delim->previous; + } +} +*/ + +static void remove_delimiter(subject *subj, delimiter *delim) +{ + if (delim == NULL) return; + if (delim->next == NULL) { + // end of list: + assert(delim == subj->last_delim); + subj->last_delim = delim->previous; + } else { + delim->next->previous = delim->previous; + } + if (delim->previous != NULL) { + delim->previous->next = delim->next; + } + free(delim); +} + +static void push_delimiter(subject *subj, unsigned char c, bool can_open, + bool can_close, cmark_node *inl_text) +{ + delimiter *delim = + (delimiter*)malloc(sizeof(delimiter)); + if (delim == NULL) { + return; + } + delim->delim_char = c; + delim->can_open = can_open; + delim->can_close = can_close; + delim->inl_text = inl_text; + delim->previous = subj->last_delim; + delim->next = NULL; + if (delim->previous != NULL) { + delim->previous->next = delim; + } + delim->position = subj->pos; + subj->last_delim = delim; +} + +// Parse strong/emph or a fallback. +// Assumes the subject has '_' or '*' at the current position. +static cmark_node* handle_strong_emph(subject* subj, unsigned char c) +{ + int numdelims; + cmark_node * inl_text; + bool can_open, can_close; + + numdelims = scan_delims(subj, c, &can_open, &can_close); + + inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); + + if (can_open || can_close) { + push_delimiter(subj, c, can_open, can_close, inl_text); + } + + return inl_text; +} + +static void process_emphasis(subject *subj, delimiter *start_delim) +{ + delimiter *closer = subj->last_delim; + delimiter *opener; + + // move back to first relevant delim. + while (closer != NULL && closer->previous != start_delim) { + closer = closer->previous; + } + + // now move forward, looking for closers, and handling each + while (closer != NULL) { + if (closer->can_close && + (closer->delim_char == '*' || closer->delim_char == '_')) { + // Now look backwards for first matching opener: + opener = closer->previous; + while (opener != NULL && opener != start_delim) { + if (opener->delim_char == closer->delim_char && + opener->can_open) { + break; + } + opener = opener->previous; + } + if (opener != NULL && opener != start_delim) { + closer = S_insert_emph(subj, opener, closer); + } else { + closer = closer->next; + } + } else { + closer = closer->next; + } + } + // free all delimiters in list until start_delim: + while (subj->last_delim != start_delim) { + remove_delimiter(subj, subj->last_delim); + } +} + +static delimiter* +S_insert_emph(subject *subj, delimiter *opener, delimiter *closer) +{ + delimiter *delim, *tmp_delim; + int use_delims; + cmark_node *opener_inl = opener->inl_text; + cmark_node *closer_inl = closer->inl_text; + int opener_num_chars = opener_inl->as.literal.len; + int closer_num_chars = closer_inl->as.literal.len; + cmark_node *tmp, *emph, *first_child, *last_child; + + // calculate the actual number of characters used from this closer + if (closer_num_chars < 3 || opener_num_chars < 3) { + use_delims = closer_num_chars <= opener_num_chars ? + closer_num_chars : opener_num_chars; + } else { // closer and opener both have >= 3 characters + use_delims = closer_num_chars % 2 == 0 ? 2 : 1; + } + + // remove used characters from associated inlines. + opener_num_chars -= use_delims; + closer_num_chars -= use_delims; + opener_inl->as.literal.len = opener_num_chars; + closer_inl->as.literal.len = closer_num_chars; + + // free delimiters between opener and closer + delim = closer->previous; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + remove_delimiter(subj, delim); + delim = tmp_delim; + } + + first_child = opener_inl->next; + last_child = closer_inl->prev; + + // if opener has 0 characters, remove it and its associated inline + if (opener_num_chars == 0) { + // replace empty opener inline with emph + chunk_free(&(opener_inl->as.literal)); + emph = opener_inl; + emph->type = use_delims == 1 ? NODE_EMPH : NODE_STRONG; + // remove opener from list + remove_delimiter(subj, opener); + } + else { + // create new emph or strong, and splice it in to our inlines + // between the opener and closer + emph = use_delims == 1 ? make_emph() : make_strong(); + emph->parent = opener_inl->parent; + emph->prev = opener_inl; + opener_inl->next = emph; + } + + // push children below emph + emph->next = closer_inl; + closer_inl->prev = emph; + emph->first_child = first_child; + emph->last_child = last_child; + + // fix children pointers + first_child->prev = NULL; + last_child->next = NULL; + for (tmp = first_child; tmp != NULL; tmp = tmp->next) { + tmp->parent = emph; + } + + // if closer has 0 characters, remove it and its associated inline + if (closer_num_chars == 0) { + // remove empty closer inline + cmark_node_free(closer_inl); + // remove closer from list + tmp_delim = closer->next; + remove_delimiter(subj, closer); + closer = tmp_delim; + } + + return closer; +} + +// Parse backslash-escape or just a backslash, returning an inline. +static cmark_node* handle_backslash(subject *subj) +{ + advance(subj); + unsigned char nextchar = peek_char(subj); + if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped + advance(subj); + return make_str(chunk_dup(&subj->input, subj->pos - 1, 1)); + } else if (nextchar == '\n') { + advance(subj); + return make_linebreak(); + } else { + return make_str(chunk_literal("\\")); + } +} + +// Parse an entity or a regular "&" string. +// Assumes the subject has an '&' character at the current position. +static cmark_node* handle_entity(subject* subj) +{ + strbuf ent = GH_BUF_INIT; + size_t len; + + advance(subj); + + len = houdini_unescape_ent(&ent, + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); + + if (len == 0) + return make_str(chunk_literal("&")); + + subj->pos += len; + return make_str(chunk_buf_detach(&ent)); +} + +// Like make_str, but parses entities. +// Returns an inline sequence consisting of str and entity elements. +static cmark_node *make_str_with_entities(chunk *content) +{ + strbuf unescaped = GH_BUF_INIT; + + if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + return make_str(chunk_buf_detach(&unescaped)); + } else { + return make_str(*content); + } +} + +// Clean a URL: remove surrounding whitespace and surrounding <>, +// and remove \ that escape punctuation. +unsigned char *cmark_clean_url(chunk *url) +{ + strbuf buf = GH_BUF_INIT; + + chunk_trim(url); + + if (url->len == 0) + return NULL; + + if (url->data[0] == '<' && url->data[url->len - 1] == '>') { + houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); + } else { + houdini_unescape_html_f(&buf, url->data, url->len); + } + + strbuf_unescape(&buf); + return strbuf_detach(&buf); +} + +unsigned char *cmark_clean_title(chunk *title) +{ + strbuf buf = GH_BUF_INIT; + unsigned char first, last; + + if (title->len == 0) + return NULL; + + first = title->data[0]; + last = title->data[title->len - 1]; + + // remove surrounding quotes if any: + if ((first == '\'' && last == '\'') || + (first == '(' && last == ')') || + (first == '"' && last == '"')) { + houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); + } else { + houdini_unescape_html_f(&buf, title->data, title->len); + } + + strbuf_unescape(&buf); + return strbuf_detach(&buf); +} + +// Parse an autolink or HTML tag. +// Assumes the subject has a '<' character at the current position. +static cmark_node* handle_pointy_brace(subject* subj) +{ + int matchlen = 0; + chunk contents; + + advance(subj); // advance past first < + + // first try to match a URL autolink + matchlen = scan_autolink_uri(&subj->input, subj->pos); + if (matchlen > 0) { + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); + subj->pos += matchlen; + + return make_autolink( + make_str_with_entities(&contents), + contents, 0 + ); + } + + // next try to match an email autolink + matchlen = scan_autolink_email(&subj->input, subj->pos); + if (matchlen > 0) { + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); + subj->pos += matchlen; + + return make_autolink( + make_str_with_entities(&contents), + contents, 1 + ); + } + + // finally, try to match an html tag + matchlen = scan_html_tag(&subj->input, subj->pos); + if (matchlen > 0) { + contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); + subj->pos += matchlen; + return make_raw_html(contents); + } + + // if nothing matches, just return the opening <: + return make_str(chunk_literal("<")); +} + +// Parse a link label. Returns 1 if successful. +// Note: unescaped brackets are not allowed in labels. +// The label begins with `[` and ends with the first `]` character +// encountered. Backticks in labels do not start code spans. +static int link_label(subject* subj, chunk *raw_label) +{ + int startpos = subj->pos; + int length = 0; + unsigned char c; + + // advance past [ + if (peek_char(subj) == '[') { + advance(subj); + } else { + return 0; + } + + while ((c = peek_char(subj)) && c != '[' && c != ']') { + if (c == '\\') { + advance(subj); + length++; + if (ispunct(peek_char(subj))) { + advance(subj); + length++; + } + } else { + advance(subj); + length++; + } + if (length > MAX_LINK_LABEL_LENGTH) { + goto noMatch; + } + } + + if (c == ']') { // match found + *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); + advance(subj); // advance past ] + return 1; + } + + noMatch: + subj->pos = startpos; // rewind + return 0; + +} + +// Return a link, an image, or a literal close bracket. +static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent) +{ + int initial_pos; + int starturl, endurl, starttitle, endtitle, endall; + int n; + int sps; + cmark_reference *ref; + bool is_image = false; + chunk urlchunk, titlechunk; + unsigned char *url, *title; + delimiter *opener; + delimiter *tmp_delim; + cmark_node *link_text; + cmark_node *inl; + chunk raw_label; + int found_label; + + advance(subj); // advance past ] + initial_pos = subj->pos; + + // look through list of delimiters for a [ or ! + opener = subj->last_delim; + while (opener) { + if (opener->delim_char == '[' || opener->delim_char == '!') { + break; + } + opener = opener->previous; + } + + if (opener == NULL) { + return make_str(chunk_literal("]")); + } + + // If we got here, we matched a potential link/image text. + is_image = opener->delim_char == '!'; + link_text = opener->inl_text->next; + + // Now we check to see if it's a link/image. + + // First, look for an inline link. + if (peek_char(subj) == '(' && + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { + + // try to parse an explicit link: + starturl = subj->pos + 1 + sps; // after ( + endurl = starturl + n; + starttitle = endurl + scan_spacechars(&subj->input, endurl); + + // ensure there are spaces btw url and title + endtitle = (starttitle == endurl) ? starttitle : + starttitle + scan_link_title(&subj->input, starttitle); + + endall = endtitle + scan_spacechars(&subj->input, endtitle); + + if (peek_at(subj, endall) == ')') { + subj->pos = endall + 1; + + urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl); + titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle); + url = cmark_clean_url(&urlchunk); + title = cmark_clean_title(&titlechunk); + chunk_free(&urlchunk); + chunk_free(&titlechunk); + goto match; + + } else { + goto noMatch; + } + } + + // Next, look for a following [link label] that matches in refmap. + // skip spaces + subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos); + raw_label = chunk_literal(""); + found_label = link_label(subj, &raw_label); + if (!found_label || raw_label.len == 0) { + chunk_free(&raw_label); + raw_label = chunk_dup(&subj->input, opener->position, + initial_pos - opener->position - 1); + } + + if (!found_label) { + // If we have a shortcut reference link, back up + // to before the spacse we skipped. + subj->pos = initial_pos; + } + + ref = cmark_reference_lookup(subj->refmap, &raw_label); + chunk_free(&raw_label); + + if (ref != NULL) { // found + url = bufdup(ref->url); + title = bufdup(ref->title); + goto match; + } else { + goto noMatch; + } + +noMatch: + // If we fall through to here, it means we didn't match a link: + remove_delimiter(subj, opener); // remove this opener from delimiter list + subj->pos = initial_pos; + return make_str(chunk_literal("]")); + +match: + inl = opener->inl_text; + inl->type = is_image ? NODE_IMAGE : NODE_LINK; + chunk_free(&inl->as.literal); + inl->first_child = link_text; + process_emphasis(subj, opener->previous); + inl->as.link.url = url; + inl->as.link.title = title; + inl->next = NULL; + if (link_text) { + cmark_node *tmp; + link_text->prev = NULL; + for (tmp = link_text; tmp->next != NULL; tmp = tmp->next) { + tmp->parent = inl; + } + tmp->parent = inl; + inl->last_child = tmp; + } + parent->last_child = inl; + + // process_emphasis will remove this delimiter and all later ones. + // Now, if we have a link, we also want to remove earlier link + // delimiters. (This code can be removed if we decide to allow links + // inside links.) + if (!is_image) { + opener = subj->last_delim; + while (opener != NULL) { + tmp_delim = opener->previous; + if (opener->delim_char == '[') { + remove_delimiter(subj, opener); + } + opener = tmp_delim; + } + } + + return NULL; +} + +// Parse a hard or soft linebreak, returning an inline. +// Assumes the subject has a newline at the current position. +static cmark_node* handle_newline(subject *subj) +{ + int nlpos = subj->pos; + // skip over newline + advance(subj); + // skip spaces at beginning of line + while (peek_char(subj) == ' ') { + advance(subj); + } + if (nlpos > 1 && + peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { + return make_linebreak(); + } else { + return make_softbreak(); + } +} + +static int subject_find_special_char(subject *subj) +{ + // "\n\\`&_*[]<!" + static const int8_t SPECIAL_CHARS[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + int n = subj->pos + 1; + + while (n < subj->input.len) { + if (SPECIAL_CHARS[subj->input.data[n]]) + return n; + n++; + } + + return subj->input.len; +} + +// Parse an inline, advancing subject, and add it as a child of parent. +// Return 0 if no inline can be parsed, 1 otherwise. +static int parse_inline(subject* subj, cmark_node * parent) +{ + cmark_node* new_inl = NULL; + chunk contents; + unsigned char c; + int endpos; + c = peek_char(subj); + if (c == 0) { + return 0; + } + switch(c){ + case '\n': + new_inl = handle_newline(subj); + break; + case '`': + new_inl = handle_backticks(subj); + break; + case '\\': + new_inl = handle_backslash(subj); + break; + case '&': + new_inl = handle_entity(subj); + break; + case '<': + new_inl = handle_pointy_brace(subj); + break; + case '*': + case '_': + new_inl = handle_strong_emph(subj, c); + break; + case '[': + advance(subj); + new_inl = make_str(chunk_literal("[")); + push_delimiter(subj, '[', true, false, new_inl); + break; + case ']': + new_inl = handle_close_bracket(subj, parent); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { + advance(subj); + new_inl = make_str(chunk_literal("![")); + push_delimiter(subj, '!', false, true, new_inl); + } else { + new_inl = make_str(chunk_literal("!")); + } + break; + default: + endpos = subject_find_special_char(subj); + contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if (peek_char(subj) == '\n') { + chunk_rtrim(&contents); + } + + new_inl = make_str(contents); + } + if (new_inl != NULL) { + cmark_node_append_child(parent, new_inl); + } + + return 1; +} + +// Parse inlines from parent's string_content, adding as children of parent. +extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap) +{ + subject subj; + subject_from_buf(&subj, &parent->string_content, refmap); + + while (!is_eof(&subj) && parse_inline(&subj, parent)) ; + + process_emphasis(&subj, NULL); +} + +// Parse zero or more space characters, including at most one newline. +static void spnl(subject* subj) +{ + bool seen_newline = false; + while (peek_char(subj) == ' ' || + (!seen_newline && + (seen_newline = peek_char(subj) == '\n'))) { + advance(subj); + } +} + +// Parse reference. Assumes string begins with '[' character. +// Modify refmap if a reference is encountered. +// Return 0 if no reference found, otherwise position of subject +// after reference is parsed. +int cmark_parse_reference_inline(strbuf *input, cmark_reference_map *refmap) +{ + subject subj; + + chunk lab; + chunk url; + chunk title; + + int matchlen = 0; + int beforetitle; + + subject_from_buf(&subj, input, NULL); + + // parse label: + if (!link_label(&subj, &lab)) + return 0; + + // colon: + if (peek_char(&subj) == ':') { + advance(&subj); + } else { + return 0; + } + + // parse link url: + spnl(&subj); + matchlen = scan_link_url(&subj.input, subj.pos); + if (matchlen) { + url = chunk_dup(&subj.input, subj.pos, matchlen); + subj.pos += matchlen; + } else { + return 0; + } + + // parse optional link_title + beforetitle = subj.pos; + spnl(&subj); + matchlen = scan_link_title(&subj.input, subj.pos); + if (matchlen) { + title = chunk_dup(&subj.input, subj.pos, matchlen); + subj.pos += matchlen; + } else { + subj.pos = beforetitle; + title = chunk_literal(""); + } + // parse final spaces and newline: + while (peek_char(&subj) == ' ') { + advance(&subj); + } + if (peek_char(&subj) == '\n') { + advance(&subj); + } else if (peek_char(&subj) != 0) { + return 0; + } + // insert reference into refmap + cmark_reference_create(refmap, &lab, &url, &title); + return subj.pos; +}
http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/inlines.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/inlines.h b/compiler/modules/CommonMark/src/inlines.h new file mode 100644 index 0000000..d2ccfb4 --- /dev/null +++ b/compiler/modules/CommonMark/src/inlines.h @@ -0,0 +1,19 @@ +#ifndef CMARK_INLINES_H +#define CMARK_INLINES_H + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned char *cmark_clean_url(cmark_chunk *url); +unsigned char *cmark_clean_title(cmark_chunk *title); + +void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap); + +int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refmap); + +#ifdef __cplusplus +} +#endif + +#endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/node.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/node.c b/compiler/modules/CommonMark/src/node.c new file mode 100644 index 0000000..fd92fdc --- /dev/null +++ b/compiler/modules/CommonMark/src/node.c @@ -0,0 +1,655 @@ +#include <stdlib.h> +#include <string.h> + +#include "config.h" +#include "node.h" + +static void +S_node_unlink(cmark_node *node); + +cmark_node* +cmark_node_new(cmark_node_type type) { + cmark_node *node = (cmark_node *)calloc(1, sizeof(*node)); + node->type = type; + + switch (node->type) { + case CMARK_NODE_HEADER: + node->as.header.level = 1; + break; + + case CMARK_NODE_LIST: { + cmark_list *list = &node->as.list; + list->list_type = CMARK_BULLET_LIST; + list->start = 1; + list->tight = false; + break; + } + + default: + break; + } + + return node; +} + +// Free a cmark_node list and any children. +static +void S_free_nodes(cmark_node *e) +{ + cmark_node *next; + while (e != NULL) { + strbuf_free(&e->string_content); + switch (e->type){ + case NODE_CODE_BLOCK: + strbuf_free(&e->as.code.info); + break; + case NODE_TEXT: + case NODE_INLINE_HTML: + case NODE_INLINE_CODE: + cmark_chunk_free(&e->as.literal); + break; + case NODE_LINK: + case NODE_IMAGE: + free(e->as.link.url); + free(e->as.link.title); + break; + default: + break; + } + if (e->last_child) { + // Splice children into list + e->last_child->next = e->next; + e->next = e->first_child; + } + next = e->next; + free(e); + e = next; + } +} + +void +cmark_node_free(cmark_node *node) { + S_node_unlink(node); + node->next = NULL; + S_free_nodes(node); +} + +cmark_node_type +cmark_node_get_type(cmark_node *node) +{ + return node->type; +} + +static const char* +S_type_string(cmark_node *node) +{ + switch (node->type) { + case CMARK_NODE_DOCUMENT: return "DOCUMENT"; + case CMARK_NODE_BLOCK_QUOTE: return "BLOCK_QUOTE"; + case CMARK_NODE_LIST: return "LIST"; + case CMARK_NODE_LIST_ITEM: return "LIST_ITEM"; + case CMARK_NODE_CODE_BLOCK: return "CODE_BLOCK"; + case CMARK_NODE_HTML: return "HTML"; + case CMARK_NODE_PARAGRAPH: return "PARAGRAPH"; + case CMARK_NODE_HEADER: return "HEADER"; + case CMARK_NODE_HRULE: return "HRULE"; + case CMARK_NODE_REFERENCE_DEF: return "REFERENCE_DEF"; + case CMARK_NODE_TEXT: return "TEXT"; + case CMARK_NODE_SOFTBREAK: return "SOFTBREAK"; + case CMARK_NODE_LINEBREAK: return "LINEBREAK"; + case CMARK_NODE_INLINE_CODE: return "INLINE_CODE"; + case CMARK_NODE_INLINE_HTML: return "INLINE_HTML"; + case CMARK_NODE_EMPH: return "EMPH"; + case CMARK_NODE_STRONG: return "STRONG"; + case CMARK_NODE_LINK: return "LINK"; + case CMARK_NODE_IMAGE: return "IMAGE"; + } + + return "<unknown>"; +} + +cmark_node* +cmark_node_next(cmark_node *node) +{ + return node->next; +} + +cmark_node* +cmark_node_previous(cmark_node *node) +{ + return node->prev; +} + +cmark_node* +cmark_node_parent(cmark_node *node) +{ + return node->parent; +} + +cmark_node* +cmark_node_first_child(cmark_node *node) +{ + return node->first_child; +} + +cmark_node* +cmark_node_last_child(cmark_node *node) +{ + return node->last_child; +} + +static char* +S_strdup(const char *str) { + size_t size = strlen(str) + 1; + char *dup = (char *)malloc(size); + memcpy(dup, str, size); + return dup; +} + +const char* +cmark_node_get_string_content(cmark_node *node) { + switch (node->type) { + case NODE_CODE_BLOCK: + case NODE_HTML: + return cmark_strbuf_cstr(&node->string_content); + + case NODE_TEXT: + case NODE_INLINE_HTML: + case NODE_INLINE_CODE: + return cmark_chunk_to_cstr(&node->as.literal); + + default: + break; + } + + return NULL; +} + +int +cmark_node_set_string_content(cmark_node *node, const char *content) { + switch (node->type) { + case NODE_CODE_BLOCK: + case NODE_HTML: + cmark_strbuf_sets(&node->string_content, content); + return 1; + + case NODE_TEXT: + case NODE_INLINE_HTML: + case NODE_INLINE_CODE: + cmark_chunk_set_cstr(&node->as.literal, content); + return 1; + + default: + break; + } + + return 0; +} + +int +cmark_node_get_header_level(cmark_node *node) { + switch (node->type) { + case CMARK_NODE_HEADER: + return node->as.header.level; + + default: + break; + } + + return 0; +} + +int +cmark_node_set_header_level(cmark_node *node, int level) { + if (level < 1 || level > 6) { + return 0; + } + + switch (node->type) { + case CMARK_NODE_HEADER: + node->as.header.level = level; + return 1; + + default: + break; + } + + return 0; +} + +cmark_list_type +cmark_node_get_list_type(cmark_node *node) { + if (node->type == CMARK_NODE_LIST) { + return node->as.list.list_type; + } + else { + return CMARK_NO_LIST; + } +} + +int +cmark_node_set_list_type(cmark_node *node, cmark_list_type type) { + if (!(type == CMARK_BULLET_LIST || type == CMARK_ORDERED_LIST)) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.list_type = type; + return 1; + } + else { + return 0; + } +} + +int +cmark_node_get_list_start(cmark_node *node) { + if (node->type == CMARK_NODE_LIST) { + return node->as.list.start; + } + else { + return 0; + } +} + +int +cmark_node_set_list_start(cmark_node *node, int start) { + if (start < 0) { + return 0; + } + + if (node->type == CMARK_NODE_LIST) { + node->as.list.start = start; + return 1; + } + else { + return 0; + } +} + +int +cmark_node_get_list_tight(cmark_node *node) { + if (node->type == CMARK_NODE_LIST) { + return node->as.list.tight; + } + else { + return 0; + } +} + +int +cmark_node_set_list_tight(cmark_node *node, int tight) { + if (node->type == CMARK_NODE_LIST) { + node->as.list.tight = tight; + return 1; + } + else { + return 0; + } +} + +const char* +cmark_node_get_fence_info(cmark_node *node) { + if (node->type == NODE_CODE_BLOCK) { + return cmark_strbuf_cstr(&node->as.code.info); + } + else { + return NULL; + } +} + +int +cmark_node_set_fence_info(cmark_node *node, const char *info) { + if (node->type == NODE_CODE_BLOCK) { + cmark_strbuf_sets(&node->as.code.info, info); + return 1; + } + else { + return 0; + } +} + +const char* +cmark_node_get_url(cmark_node *node) { + switch (node->type) { + case NODE_LINK: + case NODE_IMAGE: + return (char *)node->as.link.url; + default: + break; + } + + return NULL; +} + +int +cmark_node_set_url(cmark_node *node, const char *url) { + switch (node->type) { + case NODE_LINK: + case NODE_IMAGE: + free(node->as.link.url); + node->as.link.url = (unsigned char *)S_strdup(url); + return 1; + default: + break; + } + + return 0; +} + +const char* +cmark_node_get_title(cmark_node *node) { + switch (node->type) { + case NODE_LINK: + case NODE_IMAGE: + return (char *)node->as.link.title; + default: + break; + } + + return NULL; +} + +int +cmark_node_set_title(cmark_node *node, const char *title) { + switch (node->type) { + case NODE_LINK: + case NODE_IMAGE: + free(node->as.link.title); + node->as.link.title = (unsigned char *)S_strdup(title); + return 1; + default: + break; + } + + return 0; +} + +int +cmark_node_get_start_line(cmark_node *node) { + return node->start_line; +} + +int +cmark_node_get_start_column(cmark_node *node) { + return node->start_column; +} + +int +cmark_node_get_end_line(cmark_node *node) { + return node->end_line; +} + +static inline bool +S_is_block(cmark_node *node) { + return node->type >= CMARK_NODE_FIRST_BLOCK + && node->type <= CMARK_NODE_LAST_BLOCK; +} + +static inline bool +S_is_inline(cmark_node *node) { + return node->type >= CMARK_NODE_FIRST_INLINE + && node->type <= CMARK_NODE_LAST_INLINE; +} + +static bool +S_can_contain(cmark_node *node, cmark_node *child) +{ + cmark_node *cur; + + // Verify that child is not an ancestor of node or equal to node. + cur = node; + do { + if (cur == child) { + return false; + } + cur = cur->parent; + } while (cur != NULL); + + if (child->type == CMARK_NODE_DOCUMENT) { + return false; + } + + switch (node->type) { + case CMARK_NODE_DOCUMENT: + case CMARK_NODE_BLOCK_QUOTE: + case CMARK_NODE_LIST_ITEM: + return S_is_block(child) + && child->type != CMARK_NODE_LIST_ITEM; + + case CMARK_NODE_LIST: + return child->type == CMARK_NODE_LIST_ITEM; + + case CMARK_NODE_PARAGRAPH: + case CMARK_NODE_HEADER: + case CMARK_NODE_EMPH: + case CMARK_NODE_STRONG: + case CMARK_NODE_LINK: + case CMARK_NODE_IMAGE: + return S_is_inline(child); + + default: + break; + } + + return false; +} + +// Unlink a node without adjusting its next, prev, and parent pointers. +static void +S_node_unlink(cmark_node *node) +{ + if (node->prev) { + node->prev->next = node->next; + } + if (node->next) { + node->next->prev = node->prev; + } + + // Adjust first_child and last_child of parent. + cmark_node *parent = node->parent; + if (parent) { + if (parent->first_child == node) { + parent->first_child = node->next; + } + if (parent->last_child == node) { + parent->last_child = node->prev; + } + } +} + +void +cmark_node_unlink(cmark_node *node) { + S_node_unlink(node); + + node->next = NULL; + node->prev = NULL; + node->parent = NULL; + +} + +int +cmark_node_insert_before(cmark_node *node, cmark_node *sibling) +{ + if (!node->parent || !S_can_contain(node->parent, sibling)) { + return 0; + } + + S_node_unlink(sibling); + + cmark_node *old_prev = node->prev; + + // Insert 'sibling' between 'old_prev' and 'node'. + if (old_prev) { + old_prev->next = sibling; + } + sibling->prev = old_prev; + sibling->next = node; + node->prev = sibling; + + // Set new parent. + cmark_node *parent = node->parent; + sibling->parent = parent; + + // Adjust first_child of parent if inserted as first child. + if (parent && !old_prev) { + parent->first_child = sibling; + } + + return 1; +} + +int +cmark_node_insert_after(cmark_node *node, cmark_node *sibling) +{ + if (!node->parent || !S_can_contain(node->parent, sibling)) { + return 0; + } + + S_node_unlink(sibling); + + cmark_node *old_next = node->next; + + // Insert 'sibling' between 'node' and 'old_next'. + if (old_next) { + old_next->prev = sibling; + } + sibling->next = old_next; + sibling->prev = node; + node->next = sibling; + + // Set new parent. + cmark_node *parent = node->parent; + sibling->parent = parent; + + // Adjust last_child of parent if inserted as last child. + if (parent && !old_next) { + parent->last_child = sibling; + } + + return 1; +} + +int +cmark_node_prepend_child(cmark_node *node, cmark_node *child) +{ + if (!S_can_contain(node, child)) { + return 0; + } + + S_node_unlink(child); + + cmark_node *old_first_child = node->first_child; + + child->next = old_first_child; + child->prev = NULL; + child->parent = node; + node->first_child = child; + + if (old_first_child) { + old_first_child->prev = child; + } + else { + // Also set last_child if node previously had no children. + node->last_child = child; + } + + return 1; +} + +int +cmark_node_append_child(cmark_node *node, cmark_node *child) +{ + if (!S_can_contain(node, child)) { + return 0; + } + + S_node_unlink(child); + + cmark_node *old_last_child = node->last_child; + + child->next = NULL; + child->prev = old_last_child; + child->parent = node; + node->last_child = child; + + if (old_last_child) { + old_last_child->next = child; + } + else { + // Also set first_child if node previously had no children. + node->first_child = child; + } + + return 1; +} + +static void +S_print_error(FILE *out, cmark_node *node, const char *elem) +{ + if (out == NULL) { + return; + } + fprintf(out, "Invalid '%s' in node type %s at %d:%d\n", elem, + S_type_string(node), node->start_line, node->start_column); +} + +int +cmark_node_check(cmark_node *node, FILE *out) +{ + cmark_node *cur; + int errors = 0; + + if (!node) { + return 0; + } + + cur = node; + while (true) { + if (cur->first_child) { + if (cur->first_child->prev != NULL) { + S_print_error(out, cur->first_child, "prev"); + cur->first_child->prev = NULL; + ++errors; + } + if (cur->first_child->parent != cur) { + S_print_error(out, cur->first_child, "parent"); + cur->first_child->parent = cur; + ++errors; + } + cur = cur->first_child; + continue; + } + + next_sibling: + if (cur == node) { + break; + } + if (cur->next) { + if (cur->next->prev != cur) { + S_print_error(out, cur->next, "prev"); + cur->next->prev = cur; + ++errors; + } + if (cur->next->parent != cur->parent) { + S_print_error(out, cur->next, "parent"); + cur->next->parent = cur->parent; + ++errors; + } + cur = cur->next; + continue; + } + + if (cur->parent->last_child != cur) { + S_print_error(out, cur->parent, "last_child"); + cur->parent->last_child = cur; + ++errors; + } + cur = cur->parent; + goto next_sibling; + } + + return errors; +} + + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/node.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/node.h b/compiler/modules/CommonMark/src/node.h new file mode 100644 index 0000000..b842ed8 --- /dev/null +++ b/compiler/modules/CommonMark/src/node.h @@ -0,0 +1,76 @@ +#ifndef CMARK_NODE_H +#define CMARK_NODE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> + +#include "cmark.h" +#include "buffer.h" +#include "chunk.h" + +typedef struct { + cmark_list_type list_type; + int marker_offset; + int padding; + int start; + cmark_delim_type delimiter; + unsigned char bullet_char; + bool tight; +} cmark_list; + +typedef struct { + bool fenced; + int fence_length; + int fence_offset; + unsigned char fence_char; + cmark_strbuf info; +} cmark_code; + +typedef struct { + int level; + bool setext; +} cmark_header; + +typedef struct { + unsigned char *url; + unsigned char *title; +} cmark_link; + +struct cmark_node { + cmark_node_type type; + + struct cmark_node *next; + struct cmark_node *prev; + struct cmark_node *parent; + struct cmark_node *first_child; + struct cmark_node *last_child; + + int start_line; + int start_column; + int end_line; + bool open; + bool last_line_blank; + + cmark_strbuf string_content; + + union { + cmark_chunk literal; + cmark_list list; + cmark_code code; + cmark_header header; + cmark_link link; + } as; +}; + +CMARK_EXPORT int +cmark_node_check(cmark_node *node, FILE *out); + +#ifdef __cplusplus +} +#endif + +#endif + http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/parser.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/parser.h b/compiler/modules/CommonMark/src/parser.h new file mode 100644 index 0000000..9d65b67 --- /dev/null +++ b/compiler/modules/CommonMark/src/parser.h @@ -0,0 +1,27 @@ +#ifndef CMARK_AST_H +#define CMARK_AST_H + +#include <stdio.h> +#include "node.h" +#include "buffer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_LINK_LABEL_LENGTH 1000 + +struct cmark_parser { + struct cmark_reference_map *refmap; + struct cmark_node* root; + struct cmark_node* current; + int line_number; + cmark_strbuf *curline; + cmark_strbuf *linebuf; +}; + +#ifdef __cplusplus +} +#endif + +#endif http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/print.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/print.c b/compiler/modules/CommonMark/src/print.c new file mode 100644 index 0000000..d2dfe8c --- /dev/null +++ b/compiler/modules/CommonMark/src/print.c @@ -0,0 +1,169 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "cmark.h" +#include "buffer.h" +#include "node.h" + +#define INDENT 2 + +static void print_str(strbuf* buffer, const unsigned char *s, int len) +{ + int i; + + if (len < 0) + len = strlen((char *)s); + + strbuf_putc(buffer, '"'); + for (i = 0; i < len; ++i) { + unsigned char c = s[i]; + + switch (c) { + case '\n': + strbuf_printf(buffer, "\\n"); + break; + case '"': + strbuf_printf(buffer, "\\\""); + break; + case '\\': + strbuf_printf(buffer, "\\\\"); + break; + default: + strbuf_putc(buffer, (int)c); + } + } + strbuf_putc(buffer, '"'); +} + +// Prettyprint an inline list, for debugging. +static void render_nodes(strbuf* buffer, cmark_node* node, int indent) +{ + int i; + cmark_list *data; + + while(node != NULL) { + for (i=0; i < indent; i++) { + strbuf_putc(buffer, ' '); + } + switch(node->type) { + case NODE_DOCUMENT: + break; + case NODE_BLOCK_QUOTE: + strbuf_printf(buffer, "block_quote\n"); + break; + case NODE_LIST_ITEM: + strbuf_printf(buffer, "list_item\n"); + break; + case NODE_LIST: + data = &(node->as.list); + if (data->list_type == CMARK_ORDERED_LIST) { + strbuf_printf(buffer, "list (type=ordered tight=%s start=%d delim=%s)\n", + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == CMARK_PAREN_DELIM ? "parens" : "period")); + } else { + strbuf_printf(buffer, "list (type=bullet tight=%s bullet_char=%c)\n", + (data->tight ? "true" : "false"), + data->bullet_char); + } + break; + case NODE_HEADER: + strbuf_printf(buffer, "header (level=%d)\n", node->as.header.level); + break; + case NODE_PARAGRAPH: + strbuf_printf(buffer, "paragraph\n"); + break; + case NODE_HRULE: + strbuf_printf(buffer, "hrule\n"); + break; + case NODE_CODE_BLOCK: + strbuf_printf(buffer, "code_block info="); + print_str(buffer, node->as.code.info.ptr, -1); + strbuf_putc(buffer, ' '); + print_str(buffer, node->string_content.ptr, -1); + strbuf_putc(buffer, '\n'); + break; + case NODE_HTML: + strbuf_printf(buffer, "html "); + print_str(buffer, node->string_content.ptr, -1); + strbuf_putc(buffer, '\n'); + break; + case NODE_REFERENCE_DEF: + // skip + // strbuf_printf(buffer, "reference_def\n"); + break; + case NODE_TEXT: + strbuf_printf(buffer, "text "); + print_str(buffer, node->as.literal.data, node->as.literal.len); + strbuf_putc(buffer, '\n'); + break; + case NODE_LINEBREAK: + strbuf_printf(buffer, "linebreak\n"); + break; + case NODE_SOFTBREAK: + strbuf_printf(buffer, "softbreak\n"); + break; + case NODE_INLINE_CODE: + strbuf_printf(buffer, "code "); + print_str(buffer, node->as.literal.data, node->as.literal.len); + strbuf_putc(buffer, '\n'); + break; + case NODE_INLINE_HTML: + strbuf_printf(buffer, "inline_html "); + print_str(buffer, node->as.literal.data, node->as.literal.len); + strbuf_putc(buffer, '\n'); + break; + case NODE_LINK: + case NODE_IMAGE: + strbuf_printf(buffer, "%s url=", node->type == NODE_LINK ? "link" : "image"); + + if (node->as.link.url) + print_str(buffer, node->as.link.url, -1); + + if (node->as.link.title) { + strbuf_printf(buffer, " title="); + print_str(buffer, node->as.link.title, -1); + } + strbuf_putc(buffer, '\n'); + break; + case NODE_STRONG: + strbuf_printf(buffer, "strong\n"); + break; + case NODE_EMPH: + strbuf_printf(buffer, "emph\n"); + break; + default: + break; + } + if (node->first_child) { // render children if any + indent += INDENT; + node = node->first_child; + } else if (node->next) { // otherwise render next sibling + node = node->next; + } else { + node = node->parent; // back up to parent + while (node) { + indent -= INDENT; + if (node->next) { + node = node->next; + break; + } else { + node = node->parent; + } + if (!node) { + break; + } + } + } + } +} + +char *cmark_render_ast(cmark_node *root) +{ + char* result; + strbuf buffer = GH_BUF_INIT; + render_nodes(&buffer, root, -2); + result = (char *)strbuf_detach(&buffer); + strbuf_free(&buffer); + return result; +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/references.c ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/references.c b/compiler/modules/CommonMark/src/references.c new file mode 100644 index 0000000..2b1d0a7 --- /dev/null +++ b/compiler/modules/CommonMark/src/references.c @@ -0,0 +1,154 @@ +#include "cmark.h" +#include "utf8.h" +#include "parser.h" +#include "references.h" +#include "inlines.h" +#include "chunk.h" + +static unsigned int +refhash(const unsigned char *link_ref) +{ + unsigned int hash = 0; + + while (*link_ref) + hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +static void reference_free(cmark_reference *ref) +{ + if(ref != NULL) { + free(ref->label); + free(ref->url); + free(ref->title); + free(ref); + } +} + +// normalize reference: collapse internal whitespace to single space, +// remove leading/trailing whitespace, case fold +// Return NULL if the reference name is actually empty (i.e. composed +// solely from whitespace) +static unsigned char *normalize_reference(chunk *ref) +{ + strbuf normalized = GH_BUF_INIT; + unsigned char *result; + + if(ref == NULL) + return NULL; + + if (ref->len == 0) + return NULL; + + utf8proc_case_fold(&normalized, ref->data, ref->len); + strbuf_trim(&normalized); + strbuf_normalize_whitespace(&normalized); + + result = strbuf_detach(&normalized); + assert(result); + + if (result[0] == '\0') { + free(result); + return NULL; + } + + return result; +} + +static void add_reference(cmark_reference_map *map, cmark_reference* ref) +{ + cmark_reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE]; + + while (t) { + if (t->hash == ref->hash && + !strcmp((char *)t->label, (char *)ref->label)) { + reference_free(ref); + return; + } + + t = t->next; + } + + map->table[ref->hash % REFMAP_SIZE] = ref; +} + +void cmark_reference_create(cmark_reference_map *map, chunk *label, chunk *url, + chunk *title) +{ + cmark_reference *ref; + unsigned char *reflabel = normalize_reference(label); + + /* empty reference name, or composed from only whitespace */ + if (reflabel == NULL) + return; + + ref = (cmark_reference *)calloc(1, sizeof(*ref)); + if(ref != NULL) { + ref->label = reflabel; + ref->hash = refhash(ref->label); + ref->url = cmark_clean_url(url); + ref->title = cmark_clean_title(title); + ref->next = NULL; + + add_reference(map, ref); + } +} + +// Returns reference if refmap contains a reference with matching +// label, otherwise NULL. +cmark_reference* cmark_reference_lookup(cmark_reference_map *map, chunk *label) +{ + cmark_reference *ref = NULL; + unsigned char *norm; + unsigned int hash; + + if (label->len > MAX_LINK_LABEL_LENGTH) + return NULL; + + if (map == NULL) + return NULL; + + norm = normalize_reference(label); + if (norm == NULL) + return NULL; + + hash = refhash(norm); + ref = map->table[hash % REFMAP_SIZE]; + + while (ref) { + if (ref->hash == hash && + !strcmp((char *)ref->label, (char *)norm)) + break; + ref = ref->next; + } + + free(norm); + return ref; +} + +void cmark_reference_map_free(cmark_reference_map *map) +{ + unsigned int i; + + if(map == NULL) + return; + + for (i = 0; i < REFMAP_SIZE; ++i) { + cmark_reference *ref = map->table[i]; + cmark_reference *next; + + while (ref) { + next = ref->next; + reference_free(ref); + ref = next; + } + } + + free(map); +} + +cmark_reference_map *cmark_reference_map_new(void) +{ + return (cmark_reference_map *)calloc(1, sizeof(cmark_reference_map)); +} http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/23472b18/compiler/modules/CommonMark/src/references.h ---------------------------------------------------------------------- diff --git a/compiler/modules/CommonMark/src/references.h b/compiler/modules/CommonMark/src/references.h new file mode 100644 index 0000000..69325bb --- /dev/null +++ b/compiler/modules/CommonMark/src/references.h @@ -0,0 +1,37 @@ +#ifndef CMARK_REFERENCES_H +#define CMARK_REFERENCES_H + +#include "chunk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define REFMAP_SIZE 16 + +struct cmark_reference { + struct cmark_reference *next; + unsigned char *label; + unsigned char *url; + unsigned char *title; + unsigned int hash; +}; + +typedef struct cmark_reference cmark_reference; + +struct cmark_reference_map { + cmark_reference *table[REFMAP_SIZE]; +}; + +typedef struct cmark_reference_map cmark_reference_map; + +cmark_reference_map *cmark_reference_map_new(void); +void cmark_reference_map_free(cmark_reference_map *map); +cmark_reference* cmark_reference_lookup(cmark_reference_map *map, cmark_chunk *label); +extern void cmark_reference_create(cmark_reference_map *map, cmark_chunk *label, cmark_chunk *url, cmark_chunk *title); + +#ifdef __cplusplus +} +#endif + +#endif
