Switch man page creator to Markdown
Project: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/commit/3561512c Tree: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/tree/3561512c Diff: http://git-wip-us.apache.org/repos/asf/lucy-clownfish/diff/3561512c Branch: refs/heads/markdown_v2 Commit: 3561512c465fc44831682469df3af053a1797796 Parents: c53bd88 Author: Nick Wellnhofer <[email protected]> Authored: Sun Nov 9 16:38:23 2014 +0100 Committer: Nick Wellnhofer <[email protected]> Committed: Tue Dec 2 18:31:14 2014 +0100 ---------------------------------------------------------------------- compiler/src/CFCCMan.c | 360 +++++++++++++++++++++++++++++++++----------- 1 file changed, 269 insertions(+), 91 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy-clownfish/blob/3561512c/compiler/src/CFCCMan.c ---------------------------------------------------------------------- diff --git a/compiler/src/CFCCMan.c b/compiler/src/CFCCMan.c index 17ad340..650eab5 100644 --- a/compiler/src/CFCCMan.c +++ b/compiler/src/CFCCMan.c @@ -16,6 +16,8 @@ #include <string.h> +#include <cmark.h> + #include "charmony.h" #include "CFCCMan.h" #include "CFCClass.h" @@ -25,6 +27,7 @@ #include "CFCParamList.h" #include "CFCSymbol.h" #include "CFCType.h" +#include "CFCUri.h" #include "CFCUtil.h" #include "CFCVariable.h" @@ -33,12 +36,6 @@ #define false 0 #endif -typedef struct CFCPodLink { - size_t total_size; - const char *text; - size_t text_size; -} CFCPodLink; - static char* S_man_create_name(CFCClass *klass); @@ -68,10 +65,13 @@ static char* S_man_create_inheritance(CFCClass *klass); static char* -S_man_escape_content(const char *content); +S_md_to_man(const char *md, int needs_indent); + +static char* +S_nodes_to_man(cmark_node *node, int needs_indent); -static void -S_parse_pod_link(const char *content, CFCPodLink *pod_link); +static char* +S_man_escape(const char *content); char* CFCCMan_create_man_page(CFCClass *klass) { @@ -125,17 +125,19 @@ S_man_create_name(CFCClass *klass) { char *result = CFCUtil_strdup(".SH NAME\n"); result = CFCUtil_cat(result, CFCClass_get_class_name(klass), NULL); + const char *raw_brief = NULL; CFCDocuComment *docucom = CFCClass_get_docucomment(klass); if (docucom) { - const char *raw_brief = CFCDocuComment_get_brief(docucom); - if (raw_brief && raw_brief[0] != '\0') { - char *brief = S_man_escape_content(raw_brief); - result = CFCUtil_cat(result, " \\- ", brief, NULL); - FREEMEM(brief); - } + raw_brief = CFCDocuComment_get_brief(docucom); + } + if (raw_brief && raw_brief[0] != '\0') { + char *brief = S_md_to_man(raw_brief, false); + result = CFCUtil_cat(result, " \\- ", brief, NULL); + FREEMEM(brief); + } + else { + result = CFCUtil_cat(result, "\n", NULL); } - - result = CFCUtil_cat(result, "\n", NULL); return result; } @@ -156,8 +158,8 @@ S_man_create_description(CFCClass *klass) { const char *raw_description = CFCDocuComment_get_long(docucom); if (!raw_description || raw_description[0] == '\0') { return result; } - char *description = S_man_escape_content(raw_description); - result = CFCUtil_cat(result, ".SH DESCRIPTION\n", description, "\n", NULL); + char *description = S_md_to_man(raw_description, false); + result = CFCUtil_cat(result, ".SH DESCRIPTION\n", description, NULL); FREEMEM(description); return result; @@ -298,11 +300,12 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym, const char *pattern = ".TP\n" ".B %s\n" - ".na\n" + ".nf\n" + ".fam C\n" "%s%s\n" - ".br\n" "%s" - ".ad\n"; + ".fam\n" + ".fi\n"; char *result = CFCUtil_sprintf(pattern, short_sym, return_type_c, incremented, param_list); @@ -325,8 +328,8 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym, if (docucomment) { // Description const char *raw_desc = CFCDocuComment_get_description(docucomment); - char *desc = S_man_escape_content(raw_desc); - result = CFCUtil_cat(result, ".IP\n", desc, "\n", NULL); + char *desc = S_md_to_man(raw_desc, true); + result = CFCUtil_cat(result, ".IP\n", desc, NULL); FREEMEM(desc); // Params @@ -337,9 +340,9 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym, if (param_names[0]) { result = CFCUtil_cat(result, ".RS\n", NULL); for (size_t i = 0; param_names[i] != NULL; i++) { - char *doc = S_man_escape_content(param_docs[i]); + char *doc = S_md_to_man(param_docs[i], true); result = CFCUtil_cat(result, ".TP\n.I ", param_names[i], - "\n", doc, "\n", NULL); + "\n", doc, NULL); FREEMEM(doc); } result = CFCUtil_cat(result, ".RE\n", NULL); @@ -348,9 +351,8 @@ S_man_create_func(CFCClass *klass, CFCFunction *func, const char *short_sym, // Return value const char *retval_doc = CFCDocuComment_get_retval(docucomment); if (retval_doc && strlen(retval_doc)) { - char *doc = S_man_escape_content(retval_doc); - result = CFCUtil_cat(result, ".IP\n.B Returns:\n", doc, "\n", - NULL); + char *doc = S_md_to_man(retval_doc, true); + result = CFCUtil_cat(result, ".IP\n.B Returns:\n", doc, NULL); FREEMEM(doc); } } @@ -375,7 +377,7 @@ S_man_create_param_list(CFCFunction *func, const char *full_sym) { const char *type_c = CFCType_to_c(type); const char *name = CFCVariable_micro_sym(variable); - result = CFCUtil_cat(result, "\n.br\n.RB \" ", type_c, " \" ", name, + result = CFCUtil_cat(result, "\n.RB \" ", type_c, " \" ", name, NULL); if (variables[i+1] || CFCType_decremented(type)) { @@ -383,14 +385,14 @@ S_man_create_param_list(CFCFunction *func, const char *full_sym) { if (variables[i+1]) { result = CFCUtil_cat(result, ",", NULL); } - else { + if (CFCType_decremented(type)) { result = CFCUtil_cat(result, " // decremented", NULL); } result = CFCUtil_cat(result, "\"", NULL); } } - result = CFCUtil_cat(result, "\n.br\n);\n.br\n", NULL); + result = CFCUtil_cat(result, "\n);\n", NULL); return result; } @@ -415,12 +417,235 @@ S_man_create_inheritance(CFCClass *klass) { } static char* -S_man_escape_content(const char *content) { +S_md_to_man(const char *md, int needs_indent) { + cmark_node *doc = cmark_parse_document(md, strlen(md)); + char *result = S_nodes_to_man(doc, needs_indent); + cmark_node_free(doc); + + return result; +} + +static char* +S_nodes_to_man(cmark_node *node, int needs_indent) { + char *result = CFCUtil_strdup(""); + int has_indent = needs_indent; + int has_vspace = true; + + while (node) { + cmark_node_type type = cmark_node_get_type(node); + + switch (type) { + case NODE_DOCUMENT: { + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + result = CFCUtil_cat(result, children_man, NULL); + FREEMEM(children_man); + break; + } + + case NODE_PARAGRAPH: { + if (needs_indent && !has_indent) { + result = CFCUtil_cat(result, ".IP\n", NULL); + has_indent = true; + } + else if (!needs_indent && has_indent) { + result = CFCUtil_cat(result, ".P\n", NULL); + has_indent = false; + } + else if (!has_vspace) { + result = CFCUtil_cat(result, "\n", NULL); + } + + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + result = CFCUtil_cat(result, children_man, "\n", NULL); + FREEMEM(children_man); + + has_vspace = false; + + break; + } + + case NODE_BLOCK_QUOTE: { + if (needs_indent) { + result = CFCUtil_cat(result, ".RS\n", NULL); + } + + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, true); + result = CFCUtil_cat(result, ".IP\n", children_man, NULL); + FREEMEM(children_man); + + if (needs_indent) { + result = CFCUtil_cat(result, ".RE\n", NULL); + has_indent = false; + } + else { + has_indent = true; + } + + break; + } + + case NODE_LIST_ITEM: { + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, true); + result = CFCUtil_cat(result, ".IP \\(bu\n", children_man, + NULL); + FREEMEM(children_man); + break; + } + + case NODE_LIST: { + if (needs_indent) { + result = CFCUtil_cat(result, ".RS\n", NULL); + } + + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + result = CFCUtil_cat(result, children_man, NULL); + FREEMEM(children_man); + + if (needs_indent) { + result = CFCUtil_cat(result, ".RE\n", NULL); + has_indent = false; + } + else { + has_indent = true; + } + + break; + } + + case NODE_HEADER: { + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + result = CFCUtil_cat(result, ".SS\n", children_man, "\n", NULL); + FREEMEM(children_man); + has_indent = false; + has_vspace = true; + break; + } + + case NODE_CODE_BLOCK: { + if (needs_indent) { + result = CFCUtil_cat(result, ".RS\n", NULL); + } + + const char *content = cmark_node_get_string_content(node); + char *escaped = S_man_escape(content); + result = CFCUtil_cat(result, ".IP\n.nf\n.fam C\n", escaped, + ".fam\n.fi\n", NULL); + FREEMEM(escaped); + + if (needs_indent) { + result = CFCUtil_cat(result, ".RE\n", NULL); + has_indent = false; + } + else { + has_indent = true; + } + + break; + } + + case NODE_HTML: + CFCUtil_warn("HTML not supported in man pages"); + break; + + case NODE_HRULE: + break; + + case NODE_REFERENCE_DEF: + break; + + case NODE_TEXT: { + const char *content = cmark_node_get_string_content(node); + char *escaped = S_man_escape(content); + result = CFCUtil_cat(result, escaped, NULL); + FREEMEM(escaped); + break; + } + + case NODE_LINEBREAK: + result = CFCUtil_cat(result, "\n.br\n", NULL); + break; + + case NODE_SOFTBREAK: + result = CFCUtil_cat(result, "\n", NULL); + break; + + case NODE_INLINE_CODE: { + const char *content = cmark_node_get_string_content(node); + char *escaped = S_man_escape(content); + result = CFCUtil_cat(result, "\\FC", escaped, "\\F[]", NULL); + FREEMEM(escaped); + break; + } + + case NODE_INLINE_HTML: { + const char *html = cmark_node_get_string_content(node); + CFCUtil_warn("HTML not supported in man pages: %s", html); + break; + } + + case NODE_LINK: { + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + const char *url = cmark_node_get_url(node); + if (CFCUri_is_clownfish_uri(url)) { + result = CFCUtil_cat(result, children_man, NULL); + } + else { + result = CFCUtil_cat(result, "\n.UR ", url, "\n", + children_man, "\n.UE\n", + NULL); + } + FREEMEM(children_man); + break; + } + + case NODE_IMAGE: + CFCUtil_warn("Images not supported in man pages"); + break; + + case NODE_STRONG: { + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + result = CFCUtil_cat(result, "\\fB", children_man, "\\f[]", + NULL); + FREEMEM(children_man); + break; + } + + case NODE_EMPH: { + cmark_node *child = cmark_node_first_child(node); + char *children_man = S_nodes_to_man(child, needs_indent); + result = CFCUtil_cat(result, "\\fI", children_man, "\\f[]", + NULL); + FREEMEM(children_man); + break; + } + + default: + CFCUtil_die("Invalid cmark node type: %d", type); + break; + } + + node = cmark_node_next(node); + } + + return result; +} + +static char* +S_man_escape(const char *content) { + size_t len = strlen(content); size_t result_len = 0; - size_t result_cap = strlen(content) + 256; + size_t result_cap = len + 256; char *result = (char*)MALLOCATE(result_cap + 1); - for (size_t i = 0; content[i]; i++) { + for (size_t i = 0; i < len; i++) { const char *subst = content + i; size_t subst_size = 1; @@ -435,36 +660,18 @@ S_man_escape_content(const char *content) { subst = "\\-"; subst_size = 2; break; - case '\n': - // Escape dot after newline. - if (content[i+1] == '.') { - subst = "\n\\"; - subst_size = 2; - } - break; - case '<': - // <code> markup. - if (strncmp(content + i + 1, "code>", 5) == 0) { - subst = "\\fI"; + case '.': + // Escape dot at start of line. + if (i == 0 || content[i-1] == '\n') { + subst = "\\&."; subst_size = 3; - i += 5; - } - else if (strncmp(content + i + 1, "/code>", 6) == 0) { - subst = "\\fP"; - subst_size = 3; - i += 6; } break; - case 'L': - if (content[i+1] == '<') { - // POD-style link. - struct CFCPodLink pod_link; - S_parse_pod_link(content + i + 2, &pod_link); - if (pod_link.total_size) { - subst = pod_link.text; - subst_size = pod_link.text_size; - i += pod_link.total_size + 1; - } + case '\'': + // Escape single quote at start of line. + if (i == 0 || content[i-1] == '\n') { + subst = "\\&'"; + subst_size = 3; } break; default: @@ -485,32 +692,3 @@ S_man_escape_content(const char *content) { return result; } -// Quick and dirty parsing of POD links. The syntax isn't fully supported -// and the result isn't man-escaped. But it should be good enough for now -// since at some point we'll switch to another format anyway. -static void -S_parse_pod_link(const char *content, CFCPodLink *pod_link) { - int in_text = true; - - for (size_t i = 0; i < 256 && content[i]; ++i) { - if (content[i] == '|') { - if (in_text) { - pod_link->text_size = i; - in_text = false; - } - } - else if (content[i] == '>') { - pod_link->total_size = i + 1; - pod_link->text = content; - if (in_text) { - pod_link->text_size = i; - } - return; - } - } - - pod_link->total_size = 0; - pod_link->text = NULL; - pod_link->text_size = 0; -} -
