http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/inlines.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.c 
b/compiler/modules/CommonMark/src/inlines.c
index 2487f63..7ea308d 100644
--- a/compiler/modules/CommonMark/src/inlines.c
+++ b/compiler/modules/CommonMark/src/inlines.c
@@ -14,6 +14,15 @@
 #include "inlines.h"
 
 
+static const char *EMDASH = "\xE2\x80\x94";
+static const char *ENDASH = "\xE2\x80\x93";
+static const char *ELLIPSES = "\xE2\x80\xA6";
+static const char *LEFTDOUBLEQUOTE = "\xE2\x80\x9C";
+static const char *RIGHTDOUBLEQUOTE = "\xE2\x80\x9D";
+static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";
+static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";
+
+
 // Macros for creating various kinds of simple.
 #define make_str(s) make_literal(CMARK_NODE_TEXT, s)
 #define make_code(s) make_literal(CMARK_NODE_CODE, s)
@@ -27,8 +36,8 @@ typedef struct delimiter {
        struct delimiter *previous;
        struct delimiter *next;
        cmark_node *inl_text;
+       bufsize_t position;
        unsigned char delim_char;
-       int position;
        bool can_open;
        bool can_close;
        bool active;
@@ -36,45 +45,53 @@ typedef struct delimiter {
 
 typedef struct {
        cmark_chunk input;
-       int pos;
+       bufsize_t pos;
        cmark_reference_map *refmap;
        delimiter *last_delim;
 } subject;
 
+static inline bool
+S_is_line_end_char(char c)
+{
+       return (c == '\n' || c == '\r');
+}
+
 static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer);
 
-static int parse_inline(subject* subj, cmark_node * parent);
+static int parse_inline(subject* subj, cmark_node * parent, int options);
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
                              cmark_reference_map *refmap);
-static int subject_find_special_char(subject *subj);
+static bufsize_t subject_find_special_char(subject *subj, int options);
 
-static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
+static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
 {
        cmark_strbuf buf = GH_BUF_INIT;
 
        cmark_chunk_trim(url);
 
-       if (url->len == 0)
-               return NULL;
+       if (url->len == 0) {
+               cmark_chunk result = CMARK_CHUNK_EMPTY;
+               return result;
+       }
 
        if (is_email)
                cmark_strbuf_puts(&buf, "mailto:";);
 
        houdini_unescape_html_f(&buf, url->data, url->len);
-       return cmark_strbuf_detach(&buf);
+       return cmark_chunk_buf_detach(&buf);
 }
 
-static inline cmark_node *make_link(cmark_node *label, unsigned char *url, 
unsigned char *title)
+static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, 
cmark_chunk *title)
 {
        cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
        if(e != NULL) {
                e->type = CMARK_NODE_LINK;
                e->first_child   = label;
                e->last_child    = label;
-               e->as.link.url   = url;
-               e->as.link.title = title;
+               e->as.link.url   = *url;
+               e->as.link.title = *title;
                e->next = NULL;
                label->parent = e;
        }
@@ -83,7 +100,9 @@ static inline cmark_node *make_link(cmark_node *label, 
unsigned char *url, unsig
 
 static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, 
int is_email)
 {
-       return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+       cmark_chunk clean_url = cmark_clean_autolink(&url, is_email);
+       cmark_chunk title = CMARK_CHUNK_EMPTY;
+       return make_link(label, &clean_url, &title);
 }
 
 // Create an inline with a literal string value.
@@ -125,19 +144,20 @@ static inline cmark_node* make_simple(cmark_node_type t)
        return e;
 }
 
-static unsigned char *bufdup(const unsigned char *buf)
+// Duplicate a chunk by creating a copy of the buffer not by reusing the
+// buffer like cmark_chunk_dup does.
+static cmark_chunk chunk_clone(cmark_chunk *src)
 {
-       unsigned char *new_buf = NULL;
+       cmark_chunk c;
+       bufsize_t len = src->len;
 
-       if (buf) {
-               int len = strlen((char *)buf);
-               new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
-               if(new_buf != NULL) {
-                       memcpy(new_buf, buf, len + 1);
-               }
-       }
+       c.len   = len;
+       c.data  = (unsigned char *)malloc(len + 1);
+       c.alloc = 1;
+       memcpy(c.data, src->data, len);
+       c.data[len] = '\0';
 
-       return new_buf;
+       return c;
 }
 
 static void subject_from_buf(subject *e, cmark_strbuf *buffer,
@@ -149,8 +169,6 @@ static void subject_from_buf(subject *e, cmark_strbuf 
*buffer,
        e->pos = 0;
        e->refmap = refmap;
        e->last_delim = NULL;
-
-       cmark_chunk_rtrim(&e->input);
 }
 
 static inline int isbacktick(int c)
@@ -160,10 +178,13 @@ static inline int isbacktick(int c)
 
 static inline unsigned char peek_char(subject *subj)
 {
+       // NULL bytes should have been stripped out by now.  If they're
+       // present, it's a programming error:
+       assert(!(subj->pos < subj->input.len && subj->input.data[subj->pos] == 
0));
        return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }
 
-static inline unsigned char peek_at(subject *subj, int pos)
+static inline unsigned char peek_at(subject *subj, bufsize_t pos)
 {
        return subj->input.data[pos];
 }
@@ -177,12 +198,38 @@ static inline int is_eof(subject* subj)
 // Advance the subject.  Doesn't check for eof.
 #define advance(subj) (subj)->pos += 1
 
+static inline bool
+skip_spaces(subject *subj)
+{
+       bool skipped = false;
+       while (peek_char(subj) == ' ' || peek_char(subj) == '\t') {
+               advance(subj);
+               skipped = true;
+       }
+       return skipped;
+}
+
+static inline bool
+skip_line_end(subject *subj)
+{
+       bool seen_line_end_char = false;
+       if (peek_char(subj) == '\r') {
+               advance(subj);
+               seen_line_end_char = true;
+       }
+       if (peek_char(subj) == '\n') {
+               advance(subj);
+               seen_line_end_char = true;
+       }
+       return seen_line_end_char || is_eof(subj);
+}
+
 // Take characters while a predicate holds, and return a string.
 static inline cmark_chunk take_while(subject* subj, int (*f)(int))
 {
        unsigned char c;
-       int startpos = subj->pos;
-       int len = 0;
+       bufsize_t startpos = subj->pos;
+       bufsize_t len = 0;
 
        while ((c = peek_char(subj)) && (*f)(c)) {
                advance(subj);
@@ -197,7 +244,7 @@ static inline cmark_chunk take_while(subject* subj, int 
(*f)(int))
 // parsed).  Return 0 if you don't find matching closing
 // backticks, otherwise return the position in the subject
 // after the closing backticks.
-static int scan_to_closing_backticks(subject* subj, int openticklength)
+static bufsize_t scan_to_closing_backticks(subject* subj, bufsize_t 
openticklength)
 {
        // read non backticks
        unsigned char c;
@@ -207,7 +254,7 @@ static int scan_to_closing_backticks(subject* subj, int 
openticklength)
        if (is_eof(subj)) {
                return 0;  // did not find closing ticks, return 0
        }
-       int numticks = 0;
+       bufsize_t numticks = 0;
        while (peek_char(subj) == '`') {
                advance(subj);
                numticks++;
@@ -223,8 +270,8 @@ static int scan_to_closing_backticks(subject* subj, int 
openticklength)
 static cmark_node* handle_backticks(subject *subj)
 {
        cmark_chunk openticks = take_while(subj, isbacktick);
-       int startpos = subj->pos;
-       int endpos = scan_to_closing_backticks(subj, openticks.len);
+       bufsize_t startpos = subj->pos;
+       bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);
 
        if (endpos == 0) { // not found
                subj->pos = startpos; // rewind
@@ -246,10 +293,11 @@ static int
 scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
 {
        int numdelims = 0;
-       int before_char_pos;
+       bufsize_t before_char_pos;
        int32_t after_char = 0;
        int32_t before_char = 0;
        int len;
+       bool left_flanking, right_flanking;
 
        if (subj->pos == 0) {
                before_char = 10;
@@ -267,9 +315,14 @@ scan_delims(subject* subj, unsigned char c, bool * 
can_open, bool * can_close)
                }
        }
 
-       while (peek_char(subj) == c) {
+       if (c == '\'' || c == '"') {
                numdelims++;
-               advance(subj);
+               advance(subj);  // limit to 1 delim for quotes
+       } else {
+               while (peek_char(subj) == c) {
+                       numdelims++;
+                       advance(subj);
+               }
        }
 
        len = utf8proc_iterate(subj->input.data + subj->pos,
@@ -277,19 +330,25 @@ scan_delims(subject* subj, unsigned char c, bool * 
can_open, bool * can_close)
        if (len == -1) {
                after_char = 10;
        }
-       *can_open = numdelims > 0 && !utf8proc_is_space(after_char) &&
-                   !(utf8proc_is_punctuation(after_char) &&
-                     !utf8proc_is_space(before_char) &&
-                     !utf8proc_is_punctuation(before_char));
-       *can_close = numdelims > 0 && !utf8proc_is_space(before_char) &&
-                    !(utf8proc_is_punctuation(before_char) &&
-                      !utf8proc_is_space(after_char) &&
-                      !utf8proc_is_punctuation(after_char));
+       left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) &&
+                       !(utf8proc_is_punctuation(after_char) &&
+                         !utf8proc_is_space(before_char) &&
+                         !utf8proc_is_punctuation(before_char));
+       right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) &&
+                        !(utf8proc_is_punctuation(before_char) &&
+                          !utf8proc_is_space(after_char) &&
+                          !utf8proc_is_punctuation(after_char));
        if (c == '_') {
-               *can_open = *can_open && !(before_char < 128 &&
-                                          cmark_isalnum((char)before_char));
-               *can_close = *can_close && !(before_char < 128 &&
-                                            cmark_isalnum((char)after_char));
+               *can_open = left_flanking &&
+                           (!right_flanking || 
utf8proc_is_punctuation(before_char));
+               *can_close = right_flanking &&
+                            (!left_flanking || 
utf8proc_is_punctuation(after_char));
+       } else if (c == '\'' || c == '"') {
+               *can_open = left_flanking && !right_flanking;
+               *can_close = right_flanking;
+       } else {
+               *can_open = left_flanking;
+               *can_close = right_flanking;
        }
        return numdelims;
 }
@@ -300,10 +359,10 @@ static void print_delimiters(subject *subj)
        delimiter *delim;
        delim = subj->last_delim;
        while (delim != NULL) {
-               printf("Item at %p: %d %d %d next(%p) prev(%p)\n",
-                      delim, delim->delim_char,
+               printf("Item at stack pos %p, text pos %d: %d %d %d next(%p) 
prev(%p)\n",
+                      (void*)delim, delim->position, delim->delim_char,
                       delim->can_open, delim->can_close,
-                      delim->next, delim->previous);
+                      (void*)delim->next, (void*)delim->previous);
                delim = delim->previous;
        }
 }
@@ -347,59 +406,175 @@ static void push_delimiter(subject *subj, unsigned char 
c, bool can_open,
        subj->last_delim = delim;
 }
 
-// Parse strong/emph or a fallback.
-// Assumes the subject has '_' or '*' at the current position.
-static cmark_node* handle_strong_emph(subject* subj, unsigned char c)
+// Assumes the subject has a c at the current position.
+static cmark_node* handle_delim(subject* subj, unsigned char c, bool smart)
 {
-       int numdelims;
+       bufsize_t numdelims;
        cmark_node * inl_text;
        bool can_open, can_close;
+       cmark_chunk contents;
 
        numdelims = scan_delims(subj, c, &can_open, &can_close);
 
-       inl_text = make_str(cmark_chunk_dup(&subj->input, subj->pos - 
numdelims, numdelims));
+       if (c == '\'' && smart) {
+               contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
+       } else if (c == '"' && smart) {
+               contents = cmark_chunk_literal(can_close ? RIGHTDOUBLEQUOTE : 
LEFTDOUBLEQUOTE);
+       } else {
+               contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, 
numdelims);
+       }
 
-       if (can_open || can_close) {
+       inl_text = make_str(contents);
+
+       if ((can_open || can_close) &&
+           (!(c == '\'' || c == '"') || smart)) {
                push_delimiter(subj, c, can_open, can_close, inl_text);
        }
 
        return inl_text;
 }
 
-static void process_emphasis(subject *subj, delimiter *start_delim)
+// Assumes we have a hyphen at the current position.
+static cmark_node* handle_hyphen(subject* subj, bool smart)
+{
+       int startpos = subj->pos;
+
+       advance(subj);
+
+       if (!smart || peek_char(subj) != '-') {
+               return make_str(cmark_chunk_literal("-"));
+       }
+
+       while (smart && peek_char(subj) == '-') {
+               advance(subj);
+       }
+
+       int numhyphens = subj->pos - startpos;
+       int en_count = 0;
+       int em_count = 0;
+       int i;
+       cmark_strbuf buf = GH_BUF_INIT;
+
+       if (numhyphens % 3 == 0) { // if divisible by 3, use all em dashes
+               em_count = numhyphens / 3;
+       } else if (numhyphens % 2 == 0) { // if divisible by 2, use all en 
dashes
+               en_count = numhyphens / 2;
+       } else if (numhyphens % 3 == 2) { // use one en dash at end
+               en_count = 1;
+               em_count = (numhyphens - 2) / 3;
+       } else { // use two en dashes at the end
+               en_count = 2;
+               em_count = (numhyphens - 4) / 3;
+       }
+
+       for (i = em_count; i > 0; i--) {
+               cmark_strbuf_puts(&buf, EMDASH);
+       }
+
+       for (i = en_count; i > 0; i--) {
+               cmark_strbuf_puts(&buf, ENDASH);
+       }
+
+       return make_str(cmark_chunk_buf_detach(&buf));
+}
+
+// Assumes we have a period at the current position.
+static cmark_node* handle_period(subject* subj, bool smart)
+{
+       advance(subj);
+       if (smart && peek_char(subj) == '.') {
+               advance(subj);
+               if (peek_char(subj) == '.') {
+                       advance(subj);
+                       return make_str(cmark_chunk_literal(ELLIPSES));
+               } else {
+                       return make_str(cmark_chunk_literal(".."));
+               }
+       } else {
+               return make_str(cmark_chunk_literal("."));
+       }
+}
+
+static void process_emphasis(subject *subj, delimiter *stack_bottom)
 {
        delimiter *closer = subj->last_delim;
        delimiter *opener;
+       delimiter *old_closer;
+       bool opener_found;
+       delimiter *openers_bottom[128];
+
+       // initialize openers_bottom:
+       openers_bottom['*'] = stack_bottom;
+       openers_bottom['_'] = stack_bottom;
+       openers_bottom['\''] = stack_bottom;
+       openers_bottom['"'] = stack_bottom;
 
        // move back to first relevant delim.
-       while (closer != NULL && closer->previous != start_delim) {
+       while (closer != NULL && closer->previous != stack_bottom) {
                closer = closer->previous;
        }
 
        // now move forward, looking for closers, and handling each
        while (closer != NULL) {
                if (closer->can_close &&
-                   (closer->delim_char == '*' || closer->delim_char == '_')) {
+                   (closer->delim_char == '*' || closer->delim_char == '_' ||
+                    closer->delim_char == '"' || closer->delim_char == '\'')) {
                        // Now look backwards for first matching opener:
                        opener = closer->previous;
-                       while (opener != NULL && opener != start_delim) {
+                       opener_found = false;
+                       while (opener != NULL && opener != stack_bottom &&
+                              opener != openers_bottom[closer->delim_char]) {
                                if (opener->delim_char == closer->delim_char &&
                                    opener->can_open) {
+                                       opener_found = true;
                                        break;
                                }
                                opener = opener->previous;
                        }
-                       if (opener != NULL && opener != start_delim) {
-                               closer = S_insert_emph(subj, opener, closer);
-                       } else {
+                       old_closer = closer;
+                       if (closer->delim_char == '*' || closer->delim_char == 
'_') {
+                               if (opener_found) {
+                                       closer = S_insert_emph(subj, opener, 
closer);
+                               } else {
+                                       closer = closer->next;
+                               }
+                       } else if (closer->delim_char == '\'') {
+                               cmark_chunk_free(&closer->inl_text->as.literal);
+                               closer->inl_text->as.literal =
+                                   cmark_chunk_literal(RIGHTSINGLEQUOTE);
+                               if (opener_found) {
+                                       
cmark_chunk_free(&opener->inl_text->as.literal);
+                                       opener->inl_text->as.literal =
+                                           
cmark_chunk_literal(LEFTSINGLEQUOTE);
+                               }
                                closer = closer->next;
+                       } else if (closer->delim_char == '"') {
+                               cmark_chunk_free(&closer->inl_text->as.literal);
+                               closer->inl_text->as.literal =
+                                   cmark_chunk_literal(RIGHTDOUBLEQUOTE);
+                               if (opener_found) {
+                                       
cmark_chunk_free(&opener->inl_text->as.literal);
+                                       opener->inl_text->as.literal =
+                                           
cmark_chunk_literal(LEFTDOUBLEQUOTE);
+                               }
+                               closer = closer->next;
+                       }
+                       if (!opener_found) {
+                               // set lower bound for future searches for 
openers:
+                               openers_bottom[old_closer->delim_char] = 
old_closer->previous;
+                               if (!old_closer->can_open) {
+                                       // we can remove a closer that can't be 
an
+                                       // opener, once we've seen there's no
+                                       // matching opener:
+                                       remove_delimiter(subj, old_closer);
+                               }
                        }
                } else {
                        closer = closer->next;
                }
        }
-       // free all delimiters in list until start_delim:
-       while (subj->last_delim != start_delim) {
+       // free all delimiters in list until stack_bottom:
+       while (subj->last_delim != stack_bottom) {
                remove_delimiter(subj, subj->last_delim);
        }
 }
@@ -408,11 +583,11 @@ static delimiter*
 S_insert_emph(subject *subj, delimiter *opener, delimiter *closer)
 {
        delimiter *delim, *tmp_delim;
-       int use_delims;
+       bufsize_t use_delims;
        cmark_node *opener_inl = opener->inl_text;
        cmark_node *closer_inl = closer->inl_text;
-       int opener_num_chars = opener_inl->as.literal.len;
-       int closer_num_chars = closer_inl->as.literal.len;
+       bufsize_t opener_num_chars = opener_inl->as.literal.len;
+       bufsize_t closer_num_chars = closer_inl->as.literal.len;
        cmark_node *tmp, *emph, *first_child, *last_child;
 
        // calculate the actual number of characters used from this closer
@@ -491,8 +666,7 @@ static cmark_node* handle_backslash(subject *subj)
        if (cmark_ispunct(nextchar)) {  // only ascii symbols and newline can 
be escaped
                advance(subj);
                return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 
1));
-       } else if (nextchar == '\n') {
-               advance(subj);
+       } else if (!is_eof(subj) && skip_line_end(subj)) {
                return make_linebreak();
        } else {
                return make_str(cmark_chunk_literal("\\"));
@@ -504,7 +678,7 @@ static cmark_node* handle_backslash(subject *subj)
 static cmark_node* handle_entity(subject* subj)
 {
        cmark_strbuf ent = GH_BUF_INIT;
-       size_t len;
+       bufsize_t len;
 
        advance(subj);
 
@@ -526,7 +700,7 @@ static cmark_node *make_str_with_entities(cmark_chunk 
*content)
 {
        cmark_strbuf unescaped = GH_BUF_INIT;
 
-       if (houdini_unescape_html(&unescaped, content->data, 
(size_t)content->len)) {
+       if (houdini_unescape_html(&unescaped, content->data, content->len)) {
                return make_str(cmark_chunk_buf_detach(&unescaped));
        } else {
                return make_str(*content);
@@ -535,14 +709,16 @@ static cmark_node *make_str_with_entities(cmark_chunk 
*content)
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-unsigned char *cmark_clean_url(cmark_chunk *url)
+cmark_chunk cmark_clean_url(cmark_chunk *url)
 {
        cmark_strbuf buf = GH_BUF_INIT;
 
        cmark_chunk_trim(url);
 
-       if (url->len == 0)
-               return NULL;
+       if (url->len == 0) {
+               cmark_chunk result = CMARK_CHUNK_EMPTY;
+               return result;
+       }
 
        if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
                houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
@@ -551,16 +727,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url)
        }
 
        cmark_strbuf_unescape(&buf);
-       return cmark_strbuf_detach(&buf);
+       return cmark_chunk_buf_detach(&buf);
 }
 
-unsigned char *cmark_clean_title(cmark_chunk *title)
+cmark_chunk cmark_clean_title(cmark_chunk *title)
 {
        cmark_strbuf buf = GH_BUF_INIT;
        unsigned char first, last;
 
-       if (title->len == 0)
-               return NULL;
+       if (title->len == 0) {
+               cmark_chunk result = CMARK_CHUNK_EMPTY;
+               return result;
+       }
 
        first = title->data[0];
        last = title->data[title->len - 1];
@@ -575,14 +753,14 @@ unsigned char *cmark_clean_title(cmark_chunk *title)
        }
 
        cmark_strbuf_unescape(&buf);
-       return cmark_strbuf_detach(&buf);
+       return cmark_chunk_buf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
 static cmark_node* handle_pointy_brace(subject* subj)
 {
-       int matchlen = 0;
+       bufsize_t matchlen = 0;
        cmark_chunk contents;
 
        advance(subj);  // advance past first <
@@ -629,7 +807,7 @@ static cmark_node* handle_pointy_brace(subject* subj)
 // encountered.  Backticks in labels do not start code spans.
 static int link_label(subject* subj, cmark_chunk *raw_label)
 {
-       int startpos = subj->pos;
+       bufsize_t startpos = subj->pos;
        int length = 0;
        unsigned char c;
 
@@ -659,6 +837,7 @@ static int link_label(subject* subj, cmark_chunk *raw_label)
 
        if (c == ']') { // match found
                *raw_label = cmark_chunk_dup(&subj->input, startpos + 1, 
subj->pos - (startpos + 1));
+               cmark_chunk_trim(raw_label);
                advance(subj);  // advance past ]
                return 1;
        }
@@ -672,14 +851,14 @@ noMatch:
 // Return a link, an image, or a literal close bracket.
 static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
 {
-       int initial_pos;
-       int starturl, endurl, starttitle, endtitle, endall;
-       int n;
-       int sps;
+       bufsize_t initial_pos;
+       bufsize_t starturl, endurl, starttitle, endtitle, endall;
+       bufsize_t n;
+       bufsize_t sps;
        cmark_reference *ref;
        bool is_image = false;
        cmark_chunk url_chunk, title_chunk;
-       unsigned char *url, *title;
+       cmark_chunk url, title;
        delimiter *opener;
        cmark_node *link_text;
        cmark_node *inl;
@@ -767,8 +946,8 @@ static cmark_node* handle_close_bracket(subject* subj, 
cmark_node *parent)
        cmark_chunk_free(&raw_label);
 
        if (ref != NULL) { // found
-               url = bufdup(ref->url);
-               title = bufdup(ref->title);
+               url   = chunk_clone(&ref->url);
+               title = chunk_clone(&ref->title);
                goto match;
        } else {
                goto noMatch;
@@ -785,7 +964,7 @@ match:
        inl->type = is_image ? NODE_IMAGE : NODE_LINK;
        cmark_chunk_free(&inl->as.literal);
        inl->first_child = link_text;
-       process_emphasis(subj, opener->previous);
+       process_emphasis(subj, opener);
        inl->as.link.url   = url;
        inl->as.link.title = title;
        inl->next = NULL;
@@ -800,10 +979,10 @@ match:
        }
        parent->last_child = inl;
 
-       // process_emphasis will remove this delimiter and all later ones.
        // Now, if we have a link, we also want to deactivate earlier link
        // delimiters. (This code can be removed if we decide to allow links
        // inside links.)
+       remove_delimiter(subj, opener);
        if (!is_image) {
                opener = subj->last_delim;
                while (opener != NULL) {
@@ -825,13 +1004,11 @@ match:
 // Assumes the subject has a newline at the current position.
 static cmark_node* handle_newline(subject *subj)
 {
-       int nlpos = subj->pos;
+       bufsize_t nlpos = subj->pos;
        // skip over newline
        advance(subj);
        // skip spaces at beginning of line
-       while (peek_char(subj) == ' ') {
-               advance(subj);
-       }
+       skip_spaces(subj);
        if (nlpos > 1 &&
            peek_at(subj, nlpos - 1) == ' ' &&
            peek_at(subj, nlpos - 2) == ' ') {
@@ -841,11 +1018,11 @@ static cmark_node* handle_newline(subject *subj)
        }
 }
 
-static int subject_find_special_char(subject *subj)
+static bufsize_t subject_find_special_char(subject *subj, int options)
 {
-       // "\n\\`&_*[]<!"
+       // "\r\n\\`&_*[]<!"
        static const int8_t SPECIAL_CHARS[256] = {
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@@ -863,11 +1040,34 @@ static int subject_find_special_char(subject *subj)
                0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        };
 
-       int n = subj->pos + 1;
+       // " ' . -
+       static const char SMART_PUNCT_CHARS[] = {
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+       };
+
+       bufsize_t n = subj->pos + 1;
 
        while (n < subj->input.len) {
                if (SPECIAL_CHARS[subj->input.data[n]])
                        return n;
+               if (options & CMARK_OPT_SMART &&
+                   SMART_PUNCT_CHARS[subj->input.data[n]])
+                       return n;
                n++;
        }
 
@@ -876,17 +1076,18 @@ static int subject_find_special_char(subject *subj)
 
 // Parse an inline, advancing subject, and add it as a child of parent.
 // Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, cmark_node * parent)
+static int parse_inline(subject* subj, cmark_node * parent, int options)
 {
        cmark_node* new_inl = NULL;
        cmark_chunk contents;
        unsigned char c;
-       int endpos;
+       bufsize_t endpos;
        c = peek_char(subj);
        if (c == 0) {
                return 0;
        }
        switch(c) {
+       case '\r':
        case '\n':
                new_inl = handle_newline(subj);
                break;
@@ -904,7 +1105,15 @@ static int parse_inline(subject* subj, cmark_node * 
parent)
                break;
        case '*':
        case '_':
-               new_inl = handle_strong_emph(subj, c);
+       case '\'':
+       case '"':
+               new_inl = handle_delim(subj, c, options & CMARK_OPT_SMART);
+               break;
+       case '-':
+               new_inl = handle_hyphen(subj, options & CMARK_OPT_SMART);
+               break;
+       case '.':
+               new_inl = handle_period(subj, options & CMARK_OPT_SMART);
                break;
        case '[':
                advance(subj);
@@ -925,12 +1134,12 @@ static int parse_inline(subject* subj, cmark_node * 
parent)
                }
                break;
        default:
-               endpos = subject_find_special_char(subj);
+               endpos = subject_find_special_char(subj, options);
                contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - 
subj->pos);
                subj->pos = endpos;
 
                // if we're at a newline, strip trailing spaces.
-               if (peek_char(subj) == '\n') {
+               if (S_is_line_end_char(peek_char(subj))) {
                        cmark_chunk_rtrim(&contents);
                }
 
@@ -944,12 +1153,13 @@ static int parse_inline(subject* subj, cmark_node * 
parent)
 }
 
 // Parse inlines from parent's string_content, adding as children of parent.
-extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map 
*refmap)
+extern void cmark_parse_inlines(cmark_node* parent, cmark_reference_map 
*refmap, int options)
 {
        subject subj;
        subject_from_buf(&subj, &parent->string_content, refmap);
+       cmark_chunk_rtrim(&subj.input);
 
-       while (!is_eof(&subj) && parse_inline(&subj, parent)) ;
+       while (!is_eof(&subj) && parse_inline(&subj, parent, options)) ;
 
        process_emphasis(&subj, NULL);
 }
@@ -957,11 +1167,9 @@ extern void cmark_parse_inlines(cmark_node* parent, 
cmark_reference_map *refmap)
 // Parse zero or more space characters, including at most one newline.
 static void spnl(subject* subj)
 {
-       bool seen_newline = false;
-       while (peek_char(subj) == ' ' ||
-              (!seen_newline &&
-               (seen_newline = peek_char(subj) == '\n'))) {
-               advance(subj);
+       skip_spaces(subj);
+       if (skip_line_end(subj)) {
+               skip_spaces(subj);
        }
 }
 
@@ -969,7 +1177,7 @@ static void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map 
*refmap)
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, 
cmark_reference_map *refmap)
 {
        subject subj;
 
@@ -977,13 +1185,13 @@ int cmark_parse_reference_inline(cmark_strbuf *input, 
cmark_reference_map *refma
        cmark_chunk url;
        cmark_chunk title;
 
-       int matchlen = 0;
-       int beforetitle;
+       bufsize_t matchlen = 0;
+       bufsize_t beforetitle;
 
        subject_from_buf(&subj, input, NULL);
 
        // parse label:
-       if (!link_label(&subj, &lab))
+       if (!link_label(&subj, &lab) || lab.len == 0)
                return 0;
 
        // colon:
@@ -1014,14 +1222,19 @@ int cmark_parse_reference_inline(cmark_strbuf *input, 
cmark_reference_map *refma
                subj.pos = beforetitle;
                title = cmark_chunk_literal("");
        }
+
        // parse final spaces and newline:
-       while (peek_char(&subj) == ' ') {
-               advance(&subj);
-       }
-       if (peek_char(&subj) == '\n') {
-               advance(&subj);
-       } else if (peek_char(&subj) != 0) {
-               return 0;
+       skip_spaces(&subj);
+       if (!skip_line_end(&subj)) {
+               if (matchlen) { // try rewinding before title
+                       subj.pos = beforetitle;
+                       skip_spaces(&subj);
+                       if (!skip_line_end(&subj)) {
+                               return 0;
+                       }
+               } else {
+                       return 0;
+               }
        }
        // insert reference into refmap
        cmark_reference_create(refmap, &lab, &url, &title);

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/inlines.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/inlines.h 
b/compiler/modules/CommonMark/src/inlines.h
index d2ccfb4..f8847fc 100644
--- a/compiler/modules/CommonMark/src/inlines.h
+++ b/compiler/modules/CommonMark/src/inlines.h
@@ -5,12 +5,12 @@
 extern "C" {
 #endif
 
-unsigned char *cmark_clean_url(cmark_chunk *url);
-unsigned char *cmark_clean_title(cmark_chunk *title);
+cmark_chunk cmark_clean_url(cmark_chunk *url);
+cmark_chunk cmark_clean_title(cmark_chunk *title);
 
-void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap);
+void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int 
options);
 
-int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map 
*refmap);
+bufsize_t cmark_parse_reference_inline(cmark_strbuf *input, 
cmark_reference_map *refmap);
 
 #ifdef __cplusplus
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/iterator.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/iterator.c 
b/compiler/modules/CommonMark/src/iterator.c
index 4daec2d..f18e3bf 100644
--- a/compiler/modules/CommonMark/src/iterator.c
+++ b/compiler/modules/CommonMark/src/iterator.c
@@ -108,6 +108,12 @@ cmark_iter_get_event_type(cmark_iter *iter)
        return iter->cur.ev_type;
 }
 
+cmark_node*
+cmark_iter_get_root(cmark_iter *iter)
+{
+       return iter->root;
+}
+
 
 void cmark_consolidate_text_nodes(cmark_node *root)
 {
@@ -123,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root)
                    cur->next &&
                    cur->next->type == CMARK_NODE_TEXT) {
                        cmark_strbuf_clear(&buf);
-                       cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+                       cmark_strbuf_put(&buf, cur->as.literal.data, 
cur->as.literal.len);
                        tmp = cur->next;
                        while (tmp && tmp->type == CMARK_NODE_TEXT) {
-                               cmark_iter_get_node(iter); // advance pointer
-                               cmark_strbuf_puts(&buf, 
cmark_node_get_literal(tmp));
+                               cmark_iter_next(iter); // advance pointer
+                               cmark_strbuf_put(&buf, tmp->as.literal.data, 
tmp->as.literal.len);
                                next = tmp->next;
                                cmark_node_free(tmp);
                                tmp = next;
                        }
-                       cmark_node_set_literal(cur, (char 
*)cmark_strbuf_detach(&buf));
+                       cmark_chunk_free(&cur->as.literal);
+                       cur->as.literal = cmark_chunk_buf_detach(&buf);
                }
        }
 
+       cmark_strbuf_free(&buf);
        cmark_iter_free(iter);
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/latex.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/latex.c 
b/compiler/modules/CommonMark/src/latex.c
new file mode 100644
index 0000000..782b0c0
--- /dev/null
+++ b/compiler/modules/CommonMark/src/latex.c
@@ -0,0 +1,430 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "utf8.h"
+#include "scanners.h"
+#include "render.h"
+
+#define safe_strlen(s) cmark_strbuf_safe_strlen(s)
+#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define CR() renderer->cr(renderer)
+#define BLANKLINE() renderer->blankline(renderer)
+
+static inline void outc(cmark_renderer *renderer,
+                        cmark_escaping escape,
+                        int32_t c,
+                        unsigned char nextc)
+{
+       if (escape == LITERAL) {
+               cmark_render_code_point(renderer, c);
+               return;
+       }
+
+       switch(c) {
+       case 123: // '{'
+       case 125: // '}'
+       case 35: // '#'
+       case 37: // '%'
+       case 38: // '&'
+               cmark_render_ascii(renderer, "\\");
+               cmark_render_code_point(renderer, c);
+               break;
+       case 36: // '$'
+       case 95: // '_'
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "\\");
+               }
+               cmark_render_code_point(renderer, c);
+               break;
+       case 45 : // '-'
+               if (nextc == 45) { // prevent ligature
+                       cmark_render_ascii(renderer, "\\-");
+               } else {
+                       cmark_render_ascii(renderer, "-");
+               }
+               break;
+       case 126: // '~'
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "\\textasciitilde{}");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 94: // '^'
+               cmark_render_ascii(renderer, "\\^{}");
+               break;
+       case 92: // '\\'
+               if (escape == URL) {
+                       // / acts as path sep even on windows:
+                       cmark_render_ascii(renderer, "/");
+               } else {
+                       cmark_render_ascii(renderer, "\\textbackslash{}");
+               }
+               break;
+       case 124: // '|'
+               cmark_render_ascii(renderer, "\\textbar{}");
+               break;
+       case 60: // '<'
+               cmark_render_ascii(renderer, "\\textless{}");
+               break;
+       case 62: // '>'
+               cmark_render_ascii(renderer, "\\textgreater{}");
+               break;
+       case 91: // '['
+       case 93: // ']'
+               cmark_render_ascii(renderer, "{");
+               cmark_render_code_point(renderer, c);
+               cmark_render_ascii(renderer, "}");
+               break;
+       case 34: // '"'
+               cmark_render_ascii(renderer, "\\textquotedbl{}");
+               // requires \usepackage[T1]{fontenc}
+               break;
+       case 39: // '\''
+               cmark_render_ascii(renderer, "\\textquotesingle{}");
+               // requires \usepackage{textcomp}
+               break;
+       case 160: // nbsp
+               cmark_render_ascii(renderer, "~");
+               break;
+       case 8230: // hellip
+               cmark_render_ascii(renderer, "\\ldots{}");
+               break;
+       case 8216: // lsquo
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "`");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 8217: // rsquo
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "\'");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 8220: // ldquo
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "``");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 8221: // rdquo
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "''");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 8212: // emdash
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "---");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 8211: // endash
+               if (escape == NORMAL) {
+                       cmark_render_ascii(renderer, "--");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       default:
+               cmark_render_code_point(renderer, c);
+       }
+}
+
+typedef enum  {
+       NO_LINK,
+       URL_AUTOLINK,
+       EMAIL_AUTOLINK,
+       NORMAL_LINK
+} link_type;
+
+static link_type
+get_link_type(cmark_node *node)
+{
+       size_t title_len, url_len;
+       cmark_node *link_text;
+       char *realurl;
+       int realurllen;
+       bool isemail = false;
+
+       if (node->type != CMARK_NODE_LINK) {
+               return NO_LINK;
+       }
+
+       const char* url = cmark_node_get_url(node);
+       cmark_chunk url_chunk = cmark_chunk_literal(url);
+
+       url_len = safe_strlen(url);
+       if (url_len == 0 || scan_scheme(&url_chunk, 0) == 0) {
+               return NO_LINK;
+       }
+
+       const char* title = cmark_node_get_title(node);
+       title_len = safe_strlen(title);
+       // if it has a title, we can't treat it as an autolink:
+       if (title_len > 0) {
+               return NORMAL_LINK;
+       }
+
+       link_text = node->first_child;
+       cmark_consolidate_text_nodes(link_text);
+       realurl = (char*)url;
+       realurllen = url_len;
+       if (strncmp(realurl, "mailto:";, 7) == 0) {
+               realurl += 7;
+               realurllen -= 7;
+               isemail = true;
+       }
+       if (realurllen == link_text->as.literal.len &&
+           strncmp(realurl,
+                   (char*)link_text->as.literal.data,
+                   link_text->as.literal.len) == 0) {
+               if (isemail) {
+                       return EMAIL_AUTOLINK;
+               } else {
+                       return URL_AUTOLINK;
+               }
+       } else {
+               return NORMAL_LINK;
+       }
+}
+
+static int
+S_get_enumlevel(cmark_node *node)
+{
+       int enumlevel = 0;
+       cmark_node *tmp = node;
+       while (tmp) {
+               if (tmp->type == CMARK_NODE_LIST &&
+                   cmark_node_get_list_type(node) == CMARK_ORDERED_LIST) {
+                       enumlevel++;
+               }
+               tmp = tmp->parent;
+       }
+       return enumlevel;
+}
+
+static int
+S_render_node(cmark_renderer *renderer,
+              cmark_node *node,
+              cmark_event_type ev_type,
+              int options)
+{
+       int list_number;
+       char list_number_string[20];
+       bool entering = (ev_type == CMARK_EVENT_ENTER);
+       cmark_list_type list_type;
+       const char* roman_numerals[] = { "", "i", "ii", "iii", "iv", "v",
+                                        "vi", "vii", "viii", "ix", "x"
+                                      };
+
+       // avoid warning about unused parameter:
+       (void)(options);
+
+       switch (node->type) {
+       case CMARK_NODE_DOCUMENT:
+               break;
+
+       case CMARK_NODE_BLOCK_QUOTE:
+               if (entering) {
+                       LIT("\\begin{quote}");
+                       CR();
+               } else {
+                       LIT("\\end{quote}");
+                       BLANKLINE();
+               }
+               break;
+
+       case CMARK_NODE_LIST:
+               list_type = cmark_node_get_list_type(node);
+               if (entering) {
+                       LIT("\\begin{");
+                       LIT(list_type == CMARK_ORDERED_LIST ?
+                           "enumerate" : "itemize");
+                       LIT("}");
+                       CR();
+                       list_number = cmark_node_get_list_start(node);
+                       if (list_number > 1) {
+                               sprintf(list_number_string,
+                                       "%d", list_number);
+                               LIT("\\setcounter{enum");
+                               LIT((char 
*)roman_numerals[S_get_enumlevel(node)]);
+                               LIT("}{");
+                               OUT(list_number_string, false, NORMAL);
+                               LIT("}");
+                               CR();
+                       }
+               } else {
+                       LIT("\\end{");
+                       LIT(list_type == CMARK_ORDERED_LIST ?
+                           "enumerate" : "itemize");
+                       LIT("}");
+                       BLANKLINE();
+               }
+               break;
+
+       case CMARK_NODE_ITEM:
+               if (entering) {
+                       LIT("\\item ");
+               } else {
+                       CR();
+               }
+               break;
+
+       case CMARK_NODE_HEADER:
+               if (entering) {
+                       switch (cmark_node_get_header_level(node)) {
+                       case 1:
+                               LIT("\\section");
+                               break;
+                       case 2:
+                               LIT("\\subsection");
+                               break;
+                       case 3:
+                               LIT("\\subsubsection");
+                               break;
+                       case 4:
+                               LIT("\\paragraph");
+                               break;
+                       case 5:
+                               LIT("\\subparagraph");
+                               break;
+                       }
+                       LIT("{");
+               } else {
+                       LIT("}");
+                       BLANKLINE();
+               }
+               break;
+
+       case CMARK_NODE_CODE_BLOCK:
+               CR();
+               LIT("\\begin{verbatim}");
+               CR();
+               OUT(cmark_node_get_literal(node), false, LITERAL);
+               CR();
+               LIT("\\end{verbatim}");
+               BLANKLINE();
+               break;
+
+       case CMARK_NODE_HTML:
+               break;
+
+       case CMARK_NODE_HRULE:
+               BLANKLINE();
+               
LIT("\\begin{center}\\rule{0.5\\linewidth}{\\linethickness}\\end{center}");
+               BLANKLINE();
+               break;
+
+       case CMARK_NODE_PARAGRAPH:
+               if (!entering) {
+                       BLANKLINE();
+               }
+               break;
+
+       case CMARK_NODE_TEXT:
+               OUT(cmark_node_get_literal(node), true, NORMAL);
+               break;
+
+       case CMARK_NODE_LINEBREAK:
+               LIT("\\\\");
+               CR();
+               break;
+
+       case CMARK_NODE_SOFTBREAK:
+               if (renderer->width == 0) {
+                       CR();
+               } else {
+                       OUT(" ", true, NORMAL);
+               }
+               break;
+
+       case CMARK_NODE_CODE:
+               LIT("\\texttt{");
+               OUT(cmark_node_get_literal(node), false, NORMAL);
+               LIT("}");
+               break;
+
+       case CMARK_NODE_INLINE_HTML:
+               break;
+
+       case CMARK_NODE_STRONG:
+               if (entering) {
+                       LIT("\\textbf{");
+               } else {
+                       LIT("}");
+               }
+               break;
+
+       case CMARK_NODE_EMPH:
+               if (entering) {
+                       LIT("\\emph{");
+               } else {
+                       LIT("}");
+               }
+               break;
+
+       case CMARK_NODE_LINK:
+               if (entering) {
+                       const char* url = cmark_node_get_url(node);
+                       // requires \usepackage{hyperref}
+                       switch(get_link_type(node)) {
+                       case URL_AUTOLINK:
+                               LIT("\\url{");
+                               OUT(url, false, URL);
+                               break;
+                       case EMAIL_AUTOLINK:
+                               LIT("\\href{");
+                               OUT(url, false, URL);
+                               LIT("}\\nolinkurl{");
+                               break;
+                       case NORMAL_LINK:
+                               LIT("\\href{");
+                               OUT(url, false, URL);
+                               LIT("}{");
+                               break;
+                       case NO_LINK:
+                               LIT("{");  // error?
+                       }
+               } else {
+                       LIT("}");
+               }
+
+               break;
+
+       case CMARK_NODE_IMAGE:
+               if (entering) {
+                       LIT("\\protect\\includegraphics{");
+                       // requires \include{graphicx}
+                       OUT(cmark_node_get_url(node), false, URL);
+                       LIT("}");
+                       return 0;
+               }
+               break;
+
+       default:
+               assert(false);
+               break;
+       }
+
+       return 1;
+}
+
+char *cmark_render_latex(cmark_node *root, int options, int width)
+{
+       return cmark_render(root, options, width, outc, S_render_node);
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/libcmark.pc.in
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/libcmark.pc.in 
b/compiler/modules/CommonMark/src/libcmark.pc.in
deleted file mode 100644
index 9c3a9a9..0000000
--- a/compiler/modules/CommonMark/src/libcmark.pc.in
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=@CMAKE_INSTALL_PREFIX@
-exec_prefix=@CMAKE_INSTALL_PREFIX@
-libdir=@CMAKE_INSTALL_PREFIX@/lib
-includedir=@CMAKE_INSTALL_PREFIX@/include
-
-Name: libcmark
-Description: CommonMark parsing, rendering, and manipulation
-Version: @PROJECT_VERSION@
-Libs: -L${libdir} -lcmark
-Cflags: -I${includedir}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/man.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/man.c 
b/compiler/modules/CommonMark/src/man.c
index 2c8a3a5..6ff33f5 100644
--- a/compiler/modules/CommonMark/src/man.c
+++ b/compiler/modules/CommonMark/src/man.c
@@ -7,72 +7,84 @@
 #include "cmark.h"
 #include "node.h"
 #include "buffer.h"
+#include "utf8.h"
+#include "render.h"
 
-// Functions to convert cmark_nodes to groff man strings.
+#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
+#define LIT(s) renderer->out(renderer, s, false, LITERAL)
+#define CR() renderer->cr(renderer)
+#define BLANKLINE() renderer->blankline(renderer)
 
-static void escape_man(cmark_strbuf *dest, const unsigned char *source, int 
length)
+// Functions to convert cmark_nodes to groff man strings.
+static
+void S_outc(cmark_renderer *renderer,
+            cmark_escaping escape,
+            int32_t c,
+            unsigned char nextc)
 {
-       int i;
-       unsigned char c;
-
-       for (i = 0; i < length; i++) {
-               c = source[i];
-               if (c == '.' && i == 0) {
-                       cmark_strbuf_puts(dest, "\\&.");
-               } else if (c == '\'' && i == 0) {
-                       cmark_strbuf_puts(dest, "\\&'");
-               } else if (c == '-') {
-                       cmark_strbuf_puts(dest, "\\-");
-               } else if (c == '\\') {
-                       cmark_strbuf_puts(dest, "\\e");
+       (void)(nextc);
+
+       if (escape == LITERAL) {
+               cmark_render_code_point(renderer, c);
+               return;
+       }
+
+       switch(c) {
+       case 46:
+               if (renderer->begin_line) {
+                       cmark_render_ascii(renderer, "\\&.");
+               } else {
+                       cmark_render_code_point(renderer, c);
+               }
+               break;
+       case 39:
+               if (renderer->begin_line) {
+                       cmark_render_ascii(renderer, "\\&'");
                } else {
-                       cmark_strbuf_putc(dest, source[i]);
+                       cmark_render_code_point(renderer, c);
                }
+               break;
+       case 45:
+               cmark_render_ascii(renderer, "\\-");
+               break;
+       case 92:
+               cmark_render_ascii(renderer, "\\e");
+               break;
+       case 8216: // left single quote
+               cmark_render_ascii(renderer, "\\[oq]");
+               break;
+       case 8217: // right single quote
+               cmark_render_ascii(renderer, "\\[cq]");
+               break;
+       case 8220: // left double quote
+               cmark_render_ascii(renderer, "\\[lq]");
+               break;
+       case 8221: // right double quote
+               cmark_render_ascii(renderer, "\\[rq]");
+               break;
+       case 8212: // em dash
+               cmark_render_ascii(renderer, "\\[em]");
+               break;
+       case 8211: // en dash
+               cmark_render_ascii(renderer, "\\[en]");
+               break;
+       default:
+               cmark_render_code_point(renderer, c);
        }
 }
 
-static inline void cr(cmark_strbuf *man)
-{
-       if (man->size && man->ptr[man->size - 1] != '\n')
-               cmark_strbuf_putc(man, '\n');
-}
-
-struct render_state {
-       cmark_strbuf* man;
-       cmark_node *plain;
-};
-
 static int
-S_render_node(cmark_node *node, cmark_event_type ev_type,
-              struct render_state *state)
+S_render_node(cmark_renderer *renderer,
+              cmark_node *node,
+              cmark_event_type ev_type,
+              int options)
 {
        cmark_node *tmp;
-       cmark_strbuf *man = state->man;
        int list_number;
        bool entering = (ev_type == CMARK_EVENT_ENTER);
 
-       if (state->plain == node) { // back at original node
-               state->plain = NULL;
-       }
-
-       if (state->plain != NULL) {
-               switch(node->type) {
-               case CMARK_NODE_TEXT:
-               case CMARK_NODE_CODE:
-                       escape_man(man, node->as.literal.data,
-                                  node->as.literal.len);
-                       break;
-
-               case CMARK_NODE_LINEBREAK:
-               case CMARK_NODE_SOFTBREAK:
-                       cmark_strbuf_putc(man, ' ');
-                       break;
-
-               default:
-                       break;
-               }
-               return 1;
-       }
+       // avoid unused parameter error:
+       (void)(options);
 
        switch (node->type) {
        case CMARK_NODE_DOCUMENT:
@@ -80,13 +92,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
        case CMARK_NODE_BLOCK_QUOTE:
                if (entering) {
-                       cr(man);
-                       cmark_strbuf_puts(man, ".RS");
-                       cr(man);
+                       CR();
+                       LIT(".RS");
+                       CR();
                } else {
-                       cr(man);
-                       cmark_strbuf_puts(man, ".RE");
-                       cr(man);
+                       CR();
+                       LIT(".RE");
+                       CR();
                }
                break;
 
@@ -95,11 +107,11 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
        case CMARK_NODE_ITEM:
                if (entering) {
-                       cr(man);
-                       cmark_strbuf_puts(man, ".IP ");
+                       CR();
+                       LIT(".IP ");
                        if (cmark_node_get_list_type(node->parent) ==
                            CMARK_BULLET_LIST) {
-                               cmark_strbuf_puts(man, "\\[bu] 2");
+                               LIT("\\[bu] 2");
                        } else {
                                list_number = 
cmark_node_get_list_start(node->parent);
                                tmp = node;
@@ -107,43 +119,45 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
                                        tmp = tmp->prev;
                                        list_number += 1;
                                }
-                               cmark_strbuf_printf(man, "\"%d.\" 4", 
list_number);
+                               char list_number_s[20];
+                               sprintf(list_number_s, "\"%d.\" 4", 
list_number);
+                               LIT(list_number_s);
                        }
-                       cr(man);
+                       CR();
                } else {
-                       cr(man);
+                       CR();
                }
                break;
 
        case CMARK_NODE_HEADER:
                if (entering) {
-                       cr(man);
-                       cmark_strbuf_puts(man,
-                                         cmark_node_get_header_level(node) == 
1 ?
-                                         ".SH" : ".SS");
-                       cr(man);
+                       CR();
+                       LIT(cmark_node_get_header_level(node) == 1 ?
+                           ".SH" : ".SS");
+                       CR();
                } else {
-                       cr(man);
+                       CR();
                }
                break;
 
        case CMARK_NODE_CODE_BLOCK:
-               cr(man);
-               cmark_strbuf_puts(man, ".IP\n.nf\n\\f[C]\n");
-               escape_man(man, node->as.code.literal.data,
-                          node->as.code.literal.len);
-               cr(man);
-               cmark_strbuf_puts(man, "\\f[]\n.fi");
-               cr(man);
+               CR();
+               LIT(".IP\n.nf\n\\f[C]\n");
+               OUT(cmark_node_get_literal(node),
+                   false,
+                   NORMAL);
+               CR();
+               LIT("\\f[]\n.fi");
+               CR();
                break;
 
        case CMARK_NODE_HTML:
                break;
 
        case CMARK_NODE_HRULE:
-               cr(man);
-               cmark_strbuf_puts(man, ".PP\n  *  *  *  *  *");
-               cr(man);
+               CR();
+               LIT(".PP\n  *  *  *  *  *");
+               CR();
                break;
 
        case CMARK_NODE_PARAGRAPH:
@@ -154,32 +168,36 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
                            node->prev == NULL) {
                                // no blank line or .PP
                        } else {
-                               cr(man);
-                               cmark_strbuf_puts(man, ".PP\n");
+                               CR();
+                               LIT(".PP");
+                               CR();
                        }
                } else {
-                       cr(man);
+                       CR();
                }
                break;
 
        case CMARK_NODE_TEXT:
-               escape_man(man, node->as.literal.data,
-                          node->as.literal.len);
+               OUT(cmark_node_get_literal(node), true, NORMAL);
                break;
 
        case CMARK_NODE_LINEBREAK:
-               cmark_strbuf_puts(man, ".PD 0\n.P\n.PD");
-               cr(man);
+               LIT(".PD 0\n.P\n.PD");
+               CR();
                break;
 
        case CMARK_NODE_SOFTBREAK:
-               cmark_strbuf_putc(man, '\n');
+               if (renderer->width == 0) {
+                       CR();
+               } else {
+                       OUT(" ", true, LITERAL);
+               }
                break;
 
        case CMARK_NODE_CODE:
-               cmark_strbuf_puts(man, "\\f[C]");
-               escape_man(man, node->as.literal.data, node->as.literal.len);
-               cmark_strbuf_puts(man, "\\f[]");
+               LIT("\\f[C]");
+               OUT(cmark_node_get_literal(node), true, NORMAL);
+               LIT("\\f[]");
                break;
 
        case CMARK_NODE_INLINE_HTML:
@@ -187,33 +205,33 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
 
        case CMARK_NODE_STRONG:
                if (entering) {
-                       cmark_strbuf_puts(man, "\\f[B]");
+                       LIT("\\f[B]");
                } else {
-                       cmark_strbuf_puts(man, "\\f[]");
+                       LIT("\\f[]");
                }
                break;
 
        case CMARK_NODE_EMPH:
                if (entering) {
-                       cmark_strbuf_puts(man, "\\f[I]");
+                       LIT("\\f[I]");
                } else {
-                       cmark_strbuf_puts(man, "\\f[]");
+                       LIT("\\f[]");
                }
                break;
 
        case CMARK_NODE_LINK:
                if (!entering) {
-                       cmark_strbuf_printf(man, " (%s)",
-                                           cmark_node_get_url(node));
+                       LIT(" (");
+                       OUT(cmark_node_get_url(node), true, URL);
+                       LIT(")");
                }
                break;
 
        case CMARK_NODE_IMAGE:
                if (entering) {
-                       cmark_strbuf_puts(man, "[IMAGE: ");
-                       state->plain = node;
+                       LIT("[IMAGE: ");
                } else {
-                       cmark_strbuf_puts(man, "]");
+                       LIT("]");
                }
                break;
 
@@ -222,28 +240,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
                break;
        }
 
-       // cmark_strbuf_putc(man, 'x');
        return 1;
 }
 
-char *cmark_render_man(cmark_node *root, long options)
+char *cmark_render_man(cmark_node *root, int options, int width)
 {
-       char *result;
-       cmark_strbuf man = GH_BUF_INIT;
-       struct render_state state = { &man, NULL };
-       cmark_node *cur;
-       cmark_event_type ev_type;
-       cmark_iter *iter = cmark_iter_new(root);
-
-       if (options == 0) options = 0; // avoid warning about unused parameters
-
-       while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
-               cur = cmark_iter_get_node(iter);
-               S_render_node(cur, ev_type, &state);
-       }
-       result = (char *)cmark_strbuf_detach(&man);
-
-       cmark_iter_free(iter);
-       cmark_strbuf_free(&man);
-       return result;
+       return cmark_render(root, options, width, S_outc, S_render_node);
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/node.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.c 
b/compiler/modules/CommonMark/src/node.c
index 3785a27..7b1bb10 100644
--- a/compiler/modules/CommonMark/src/node.c
+++ b/compiler/modules/CommonMark/src/node.c
@@ -7,6 +7,73 @@
 static void
 S_node_unlink(cmark_node *node);
 
+static inline bool
+S_is_block(cmark_node *node)
+{
+       if (node == NULL) {
+               return false;
+       }
+       return node->type >= CMARK_NODE_FIRST_BLOCK
+              && node->type <= CMARK_NODE_LAST_BLOCK;
+}
+
+static inline bool
+S_is_inline(cmark_node *node)
+{
+       if (node == NULL) {
+               return false;
+       }
+       return node->type >= CMARK_NODE_FIRST_INLINE
+              && node->type <= CMARK_NODE_LAST_INLINE;
+}
+
+static bool
+S_can_contain(cmark_node *node, cmark_node *child)
+{
+       cmark_node *cur;
+
+       if (node == NULL || child == NULL) {
+               return false;
+       }
+
+       // Verify that child is not an ancestor of node or equal to node.
+       cur = node;
+       do {
+               if (cur == child) {
+                       return false;
+               }
+               cur = cur->parent;
+       } while (cur != NULL);
+
+       if (child->type == CMARK_NODE_DOCUMENT) {
+               return false;
+       }
+
+       switch (node->type) {
+       case CMARK_NODE_DOCUMENT:
+       case CMARK_NODE_BLOCK_QUOTE:
+       case CMARK_NODE_ITEM:
+               return S_is_block(child)
+                      && child->type != CMARK_NODE_ITEM;
+
+       case CMARK_NODE_LIST:
+               return child->type == CMARK_NODE_ITEM;
+
+       case CMARK_NODE_PARAGRAPH:
+       case CMARK_NODE_HEADER:
+       case CMARK_NODE_EMPH:
+       case CMARK_NODE_STRONG:
+       case CMARK_NODE_LINK:
+       case CMARK_NODE_IMAGE:
+               return S_is_inline(child);
+
+       default:
+               break;
+       }
+
+       return false;
+}
+
 cmark_node*
 cmark_node_new(cmark_node_type type)
 {
@@ -39,7 +106,9 @@ void S_free_nodes(cmark_node *e)
 {
        cmark_node *next;
        while (e != NULL) {
-               cmark_strbuf_free(&e->string_content);
+               if (S_is_block(e)) {
+                       cmark_strbuf_free(&e->string_content);
+               }
                switch (e->type) {
                case NODE_CODE_BLOCK:
                        cmark_chunk_free(&e->as.code.info);
@@ -53,8 +122,8 @@ void S_free_nodes(cmark_node *e)
                        break;
                case NODE_LINK:
                case NODE_IMAGE:
-                       free(e->as.link.url);
-                       free(e->as.link.title);
+                       cmark_chunk_free(&e->as.link.url);
+                       cmark_chunk_free(&e->as.link.title);
                        break;
                default:
                        break;
@@ -189,13 +258,24 @@ cmark_node_last_child(cmark_node *node)
        }
 }
 
-static char*
-S_strdup(const char *str)
+void*
+cmark_node_get_user_data(cmark_node *node)
 {
-       size_t size = strlen(str) + 1;
-       char *dup = (char *)malloc(size);
-       memcpy(dup, str, size);
-       return dup;
+       if (node == NULL) {
+               return NULL;
+       } else {
+               return node->user_data;
+       }
+}
+
+int
+cmark_node_set_user_data(cmark_node *node, void *user_data)
+{
+       if (node == NULL) {
+               return 0;
+       }
+       node->user_data = user_data;
+       return 1;
 }
 
 const char*
@@ -448,7 +528,7 @@ cmark_node_get_url(cmark_node *node)
        switch (node->type) {
        case NODE_LINK:
        case NODE_IMAGE:
-               return (char *)node->as.link.url;
+               return cmark_chunk_to_cstr(&node->as.link.url);
        default:
                break;
        }
@@ -466,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url)
        switch (node->type) {
        case NODE_LINK:
        case NODE_IMAGE:
-               free(node->as.link.url);
-               node->as.link.url = (unsigned char *)S_strdup(url);
+               cmark_chunk_set_cstr(&node->as.link.url, url);
                return 1;
        default:
                break;
@@ -486,7 +565,7 @@ cmark_node_get_title(cmark_node *node)
        switch (node->type) {
        case NODE_LINK:
        case NODE_IMAGE:
-               return (char *)node->as.link.title;
+               return cmark_chunk_to_cstr(&node->as.link.title);
        default:
                break;
        }
@@ -504,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title)
        switch (node->type) {
        case NODE_LINK:
        case NODE_IMAGE:
-               free(node->as.link.title);
-               node->as.link.title = (unsigned char *)S_strdup(title);
+               cmark_chunk_set_cstr(&node->as.link.title, title);
                return 1;
        default:
                break;
@@ -550,73 +628,6 @@ cmark_node_get_end_column(cmark_node *node)
        return node->end_column;
 }
 
-static inline bool
-S_is_block(cmark_node *node)
-{
-       if (node == NULL) {
-               return false;
-       }
-       return node->type >= CMARK_NODE_FIRST_BLOCK
-              && node->type <= CMARK_NODE_LAST_BLOCK;
-}
-
-static inline bool
-S_is_inline(cmark_node *node)
-{
-       if (node == NULL) {
-               return false;
-       }
-       return node->type >= CMARK_NODE_FIRST_INLINE
-              && node->type <= CMARK_NODE_LAST_INLINE;
-}
-
-static bool
-S_can_contain(cmark_node *node, cmark_node *child)
-{
-       cmark_node *cur;
-
-       if (node == NULL || child == NULL) {
-               return false;
-       }
-
-       // Verify that child is not an ancestor of node or equal to node.
-       cur = node;
-       do {
-               if (cur == child) {
-                       return false;
-               }
-               cur = cur->parent;
-       } while (cur != NULL);
-
-       if (child->type == CMARK_NODE_DOCUMENT) {
-               return false;
-       }
-
-       switch (node->type) {
-       case CMARK_NODE_DOCUMENT:
-       case CMARK_NODE_BLOCK_QUOTE:
-       case CMARK_NODE_ITEM:
-               return S_is_block(child)
-                      && child->type != CMARK_NODE_ITEM;
-
-       case CMARK_NODE_LIST:
-               return child->type == CMARK_NODE_ITEM;
-
-       case CMARK_NODE_PARAGRAPH:
-       case CMARK_NODE_HEADER:
-       case CMARK_NODE_EMPH:
-       case CMARK_NODE_STRONG:
-       case CMARK_NODE_LINK:
-       case CMARK_NODE_IMAGE:
-               return S_is_inline(child);
-
-       default:
-               break;
-       }
-
-       return false;
-}
-
 // Unlink a node without adjusting its next, prev, and parent pointers.
 static void
 S_node_unlink(cmark_node *node)

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/node.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/node.h 
b/compiler/modules/CommonMark/src/node.h
index c0c43d3..b579408 100644
--- a/compiler/modules/CommonMark/src/node.h
+++ b/compiler/modules/CommonMark/src/node.h
@@ -6,6 +6,7 @@ extern "C" {
 #endif
 
 #include <stdio.h>
+#include <stdint.h>
 
 #include "cmark.h"
 #include "buffer.h"
@@ -22,12 +23,13 @@ typedef struct {
 } cmark_list;
 
 typedef struct {
-       bool              fenced;
-       int               fence_length;
-       int               fence_offset;
-       unsigned char     fence_char;
        cmark_chunk       info;
        cmark_chunk       literal;
+       int               fence_length;
+       /* fence_offset must be 0-3, so we can use int8_t */
+       int8_t            fence_offset;
+       unsigned char     fence_char;
+       bool              fenced;
 } cmark_code;
 
 typedef struct {
@@ -36,23 +38,26 @@ typedef struct {
 } cmark_header;
 
 typedef struct {
-       unsigned char *url;
-       unsigned char *title;
+       cmark_chunk url;
+       cmark_chunk title;
 } cmark_link;
 
 struct cmark_node {
-       cmark_node_type type;
-
        struct cmark_node *next;
        struct cmark_node *prev;
        struct cmark_node *parent;
        struct cmark_node *first_child;
        struct cmark_node *last_child;
 
+       void *user_data;
+
        int start_line;
        int start_column;
        int end_line;
        int end_column;
+
+       cmark_node_type type;
+
        bool open;
        bool last_line_blank;
 
@@ -64,6 +69,7 @@ struct cmark_node {
                cmark_code        code;
                cmark_header      header;
                cmark_link        link;
+               int               html_block_type;
        } as;
 };
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/parser.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/parser.h 
b/compiler/modules/CommonMark/src/parser.h
index 3c8def9..01a7aeb 100644
--- a/compiler/modules/CommonMark/src/parser.h
+++ b/compiler/modules/CommonMark/src/parser.h
@@ -16,9 +16,16 @@ struct cmark_parser {
        struct cmark_node* root;
        struct cmark_node* current;
        int line_number;
+       bufsize_t offset;
+       bufsize_t column;
+       bufsize_t first_nonspace;
+       bufsize_t first_nonspace_column;
+       int indent;
+       bool blank;
        cmark_strbuf *curline;
-       int last_line_length;
+       bufsize_t last_line_length;
        cmark_strbuf *linebuf;
+       int options;
 };
 
 #ifdef __cplusplus

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/references.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.c 
b/compiler/modules/CommonMark/src/references.c
index 37bf4cb..1d3d56d 100644
--- a/compiler/modules/CommonMark/src/references.c
+++ b/compiler/modules/CommonMark/src/references.c
@@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref)
 {
        if(ref != NULL) {
                free(ref->label);
-               free(ref->url);
-               free(ref->title);
+               cmark_chunk_free(&ref->url);
+               cmark_chunk_free(&ref->title);
                free(ref);
        }
 }

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/references.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/references.h 
b/compiler/modules/CommonMark/src/references.h
index 69325bb..a360cd5 100644
--- a/compiler/modules/CommonMark/src/references.h
+++ b/compiler/modules/CommonMark/src/references.h
@@ -12,8 +12,8 @@ extern "C" {
 struct cmark_reference {
        struct cmark_reference *next;
        unsigned char *label;
-       unsigned char *url;
-       unsigned char *title;
+       cmark_chunk url;
+       cmark_chunk title;
        unsigned int hash;
 };
 

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/render.c
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/render.c 
b/compiler/modules/CommonMark/src/render.c
new file mode 100644
index 0000000..2f1faac
--- /dev/null
+++ b/compiler/modules/CommonMark/src/render.c
@@ -0,0 +1,186 @@
+#include <stdlib.h>
+#include "buffer.h"
+#include "chunk.h"
+#include "cmark.h"
+#include "utf8.h"
+#include "render.h"
+
+static inline
+void S_cr(cmark_renderer *renderer)
+{
+       if (renderer->need_cr < 1) {
+               renderer->need_cr = 1;
+       }
+}
+
+static inline
+void S_blankline(cmark_renderer *renderer)
+{
+       if (renderer->need_cr < 2) {
+               renderer->need_cr = 2;
+       }
+}
+
+static
+void S_out(cmark_renderer *renderer,
+           const char *source,
+           bool wrap,
+           cmark_escaping escape)
+{
+       int length = cmark_strbuf_safe_strlen(source);
+       unsigned char nextc;
+       int32_t c;
+       int i = 0;
+       int len;
+       cmark_chunk remainder = cmark_chunk_literal("");
+       int k = renderer->buffer->size - 1;
+
+       wrap = wrap && !renderer->no_wrap;
+
+       if (renderer->in_tight_list_item && renderer->need_cr > 1) {
+               renderer->need_cr = 1;
+       }
+       while (renderer->need_cr) {
+               if (k < 0 || renderer->buffer->ptr[k] == '\n') {
+                       k -= 1;
+               } else {
+                       cmark_strbuf_putc(renderer->buffer, '\n');
+                       if (renderer->need_cr > 1) {
+                               cmark_strbuf_put(renderer->buffer, 
renderer->prefix->ptr,
+                                                renderer->prefix->size);
+                       }
+               }
+               renderer->column = 0;
+               renderer->begin_line = true;
+               renderer->need_cr -= 1;
+       }
+
+       while (i < length) {
+               if (renderer->begin_line) {
+                       cmark_strbuf_put(renderer->buffer, 
renderer->prefix->ptr,
+                                        renderer->prefix->size);
+                       // note: this assumes prefix is ascii:
+                       renderer->column = renderer->prefix->size;
+               }
+
+               len = utf8proc_iterate((const uint8_t *)source + i, length - i, 
&c);
+               if (len == -1) { // error condition
+                       return;  // return without rendering rest of string
+               }
+               nextc = source[i + len];
+               if (c == 32 && wrap) {
+                       if (!renderer->begin_line) {
+                               cmark_strbuf_putc(renderer->buffer, ' ');
+                               renderer->column += 1;
+                               renderer->begin_line = false;
+                               renderer->last_breakable = 
renderer->buffer->size -
+                                                          1;
+                               // skip following spaces
+                               while (source[i + 1] == ' ') {
+                                       i++;
+                               }
+                       }
+
+               } else if (c == 10) {
+                       cmark_strbuf_putc(renderer->buffer, '\n');
+                       renderer->column = 0;
+                       renderer->begin_line = true;
+                       renderer->last_breakable = 0;
+               } else if (escape == LITERAL) {
+                       cmark_render_code_point(renderer, c);
+                       renderer->begin_line = false;
+               } else {
+                       (renderer->outc)(renderer, escape, c, nextc);
+                       renderer->begin_line = false;
+               }
+
+               // If adding the character went beyond width, look for an
+               // earlier place where the line could be broken:
+               if (renderer->width > 0 &&
+                   renderer->column > renderer->width &&
+                   !renderer->begin_line &&
+                   renderer->last_breakable > 0) {
+
+                       // copy from last_breakable to remainder
+                       cmark_chunk_set_cstr(&remainder, (char *) 
renderer->buffer->ptr + renderer->last_breakable + 1);
+                       // truncate at last_breakable
+                       cmark_strbuf_truncate(renderer->buffer, 
renderer->last_breakable);
+                       // add newline, prefix, and remainder
+                       cmark_strbuf_putc(renderer->buffer, '\n');
+                       cmark_strbuf_put(renderer->buffer, 
renderer->prefix->ptr,
+                                        renderer->prefix->size);
+                       cmark_strbuf_put(renderer->buffer, remainder.data, 
remainder.len);
+                       renderer->column = renderer->prefix->size + 
remainder.len;
+                       cmark_chunk_free(&remainder);
+                       renderer->last_breakable = 0;
+                       renderer->begin_line = false;
+               }
+
+               i += len;
+       }
+}
+
+// Assumes no newlines, assumes ascii content:
+void
+cmark_render_ascii(cmark_renderer* renderer, const char* s)
+{
+       int origsize = renderer->buffer->size;
+       cmark_strbuf_puts(renderer->buffer, s);
+       renderer->column += renderer->buffer->size - origsize;
+}
+
+void
+cmark_render_code_point(cmark_renderer *renderer, uint32_t c)
+{
+       utf8proc_encode_char(c, renderer->buffer);
+       renderer->column += 1;
+}
+
+char*
+cmark_render(cmark_node *root,
+             int options,
+             int width,
+             void (*outc)(cmark_renderer*,
+                          cmark_escaping,
+                          int32_t,
+                          unsigned char),
+             int (*render_node)(cmark_renderer *renderer,
+                                cmark_node *node,
+                                cmark_event_type ev_type,
+                                int options))
+{
+       cmark_strbuf pref = GH_BUF_INIT;
+       cmark_strbuf buf = GH_BUF_INIT;
+       cmark_node *cur;
+       cmark_event_type ev_type;
+       char *result;
+       cmark_iter *iter = cmark_iter_new(root);
+
+       cmark_renderer renderer = { &buf, &pref, 0, width,
+                                   0, 0, true, false, false,
+                                   outc, S_cr, S_blankline, S_out
+                                 };
+
+       while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+               cur = cmark_iter_get_node(iter);
+               if (!render_node(&renderer, cur, ev_type, options)) {
+                       // a false value causes us to skip processing
+                       // the node's contents.  this is used for
+                       // autolinks.
+                       cmark_iter_reset(iter, cur, CMARK_EVENT_EXIT);
+               }
+       }
+
+       // ensure final newline
+       if (renderer.buffer->ptr[renderer.buffer->size - 1] != '\n') {
+               cmark_strbuf_putc(renderer.buffer, '\n');
+       }
+
+       result = (char *)cmark_strbuf_detach(renderer.buffer);
+
+       cmark_iter_free(iter);
+       cmark_strbuf_free(renderer.prefix);
+       cmark_strbuf_free(renderer.buffer);
+
+       return result;
+}

http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/89c7b809/compiler/modules/CommonMark/src/render.h
----------------------------------------------------------------------
diff --git a/compiler/modules/CommonMark/src/render.h 
b/compiler/modules/CommonMark/src/render.h
new file mode 100644
index 0000000..ca541bc
--- /dev/null
+++ b/compiler/modules/CommonMark/src/render.h
@@ -0,0 +1,66 @@
+#ifndef CMARK_RENDER_H
+#define CMARK_RENDER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include "buffer.h"
+#include "chunk.h"
+
+typedef enum  {
+       LITERAL,
+       NORMAL,
+       TITLE,
+       URL
+} cmark_escaping;
+
+struct cmark_renderer {
+       cmark_strbuf* buffer;
+       cmark_strbuf* prefix;
+       int column;
+       int width;
+       int need_cr;
+       bufsize_t last_breakable;
+       bool begin_line;
+       bool no_wrap;
+       bool in_tight_list_item;
+       void (*outc)(struct cmark_renderer*,
+                    cmark_escaping,
+                    int32_t,
+                    unsigned char);
+       void (*cr)(struct cmark_renderer*);
+       void (*blankline)(struct cmark_renderer*);
+       void (*out)(struct cmark_renderer*,
+                   const char *,
+                   bool,
+                   cmark_escaping);
+};
+
+typedef struct cmark_renderer cmark_renderer;
+
+void
+cmark_render_ascii(cmark_renderer *renderer, const char* s);
+
+void
+cmark_render_code_point(cmark_renderer *renderer, uint32_t c);
+
+char*
+cmark_render(cmark_node *root,
+            int options,
+            int width,
+            void (*outc)(cmark_renderer*,
+                         cmark_escaping,
+                         int32_t,
+                         unsigned char),
+            int (*render_node)(cmark_renderer *renderer,
+                               cmark_node *node,
+                               cmark_event_type ev_type,
+                               int options));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

Reply via email to