q66 pushed a commit to branch master.

http://git.enlightenment.org/core/efl.git/commit/?id=d2105f99d4a4b562bef1641ecc5f371d0e5ae7d7

commit d2105f99d4a4b562bef1641ecc5f371d0e5ae7d7
Author: Daniel Kolesa <d.kol...@osg.samsung.com>
Date:   Thu Dec 1 16:37:01 2016 +0100

    eolian: add documentation tokenizer
    
    This provides an API to tokenize Eolian docstrings. It does not
    yet switch the rest of the infra to it, but it does have tests.
    It doesn't verify correctness of references, as that's Eolian's
    job. Therefore it's also your job to provide it with strings that
    do not contain invalid references. Lua bindings are to come and
    a complete switch will be done later.
    
    @feature
---
 src/lib/eolian/Eolian.h           |  85 ++++++++++++++
 src/lib/eolian/eolian_database.c  | 241 ++++++++++++++++++++++++++++++++++++++
 src/tests/eolian/eolian_parsing.c |  82 +++++++++++++
 3 files changed, 408 insertions(+)

diff --git a/src/lib/eolian/Eolian.h b/src/lib/eolian/Eolian.h
index ef17102..57c5669 100644
--- a/src/lib/eolian/Eolian.h
+++ b/src/lib/eolian/Eolian.h
@@ -336,6 +336,25 @@ typedef enum
    EOLIAN_DECL_VAR
 } Eolian_Declaration_Type;
 
+typedef enum
+{
+   EOLIAN_DOC_TOKEN_UNKNOWN = -1,
+   EOLIAN_DOC_TOKEN_TEXT,
+   EOLIAN_DOC_TOKEN_REF,
+   EOLIAN_DOC_TOKEN_REF_EVENT,
+   EOLIAN_DOC_TOKEN_MARK_NOTE,
+   EOLIAN_DOC_TOKEN_MARK_WARNING,
+   EOLIAN_DOC_TOKEN_MARK_REMARK,
+   EOLIAN_DOC_TOKEN_MARK_TODO,
+   EOLIAN_DOC_TOKEN_MARKUP_MONOSPACE
+} Eolian_Doc_Token_Type;
+
+typedef struct _Eolian_Doc_Token
+{
+   Eolian_Doc_Token_Type type;
+   const char *text, *text_end;
+} Eolian_Doc_Token;
+
 /*
  * @brief Parse the given .eo or .eot file and fill the database.
  *
@@ -2293,6 +2312,72 @@ EAPI Eina_Stringshare 
*eolian_documentation_description_get(const Eolian_Documen
  */
 EAPI Eina_Stringshare *eolian_documentation_since_get(const 
Eolian_Documentation *doc);
 
+/*
+ * @brief Split a documentation string into individual paragraphs.
+ *
+ * The items of the resulting list are strings that are fred with free().
+ *
+ * @param[in] doc the documentation string
+ * @return a list of allocated strings containing paragraphs
+ *
+ * @ingroup Eolian
+ */
+EAPI Eina_List *eolian_documentation_string_split(const char *doc);
+
+/*
+ * @brief Tokenize a documentation paragraph.
+ *
+ * This gradually splits the string into pieces (text, references, paragraph
+ * separators etc.) so that it can be more easily turned into a representation
+ * you want. On failure, token is initialized with EOLIAN_DOC_TOKEN_UNKNOWN.
+ *
+ * The function never allocates any memory and doesn't hold any state, instead
+ * the returned continuation has to be passed as first param on next iteration
+ * and you have to make sure the input data stays valid until you're completely
+ * done.
+ *
+ * The input string is assumed to be a single paragraph with all unnecessary
+ * whitespace already trimmed.
+ *
+ * If the given token is NULL, it will still tokenize, but without saving 
anything.
+ *
+ * @param[in] doc the documentation string
+ * @param[out] ret the token
+ * @return a continuation of the input string
+ *
+ * @ingroup Eolian
+ */
+EAPI const char *eolian_documentation_tokenize(const char *doc, 
Eolian_Doc_Token *ret);
+
+/*
+ * @brief Initialize a documentation token into an empty state.
+ *
+ * @param[in] tok the token
+ * @return the token type
+ */
+EAPI void eolian_doc_token_init(Eolian_Doc_Token *tok);
+
+/*
+ * @brief Get the type of a documentation token.
+ *
+ * @param[in] tok the token
+ * @return the token type
+ */
+EAPI Eolian_Doc_Token_Type eolian_doc_token_type_get(const Eolian_Doc_Token 
*tok);
+
+/*
+ * @brief Get the text of a documentation token.
+ *
+ * Works on every token type, but for unknown tokens it returns NULL.
+ * You need to free the text once you're done using normal free().
+ * This makes sure all escapes in the original doc comments are properly
+ * removed so you can use the string as-is.
+ *
+ * @param[in] tok the token
+ * @return the token text
+ */
+EAPI char *eolian_doc_token_text_get(const Eolian_Doc_Token *tok);
+
 #endif
 
 /**
diff --git a/src/lib/eolian/eolian_database.c b/src/lib/eolian/eolian_database.c
index 5153653..a8293aa 100644
--- a/src/lib/eolian/eolian_database.c
+++ b/src/lib/eolian/eolian_database.c
@@ -2,6 +2,7 @@
 # include "config.h"
 #endif
 
+#include <ctype.h>
 #include <libgen.h>
 #include <Eina.h>
 #include "eo_parser.h"
@@ -211,6 +212,246 @@ eolian_documentation_since_get(const Eolian_Documentation 
*doc)
    return doc->since;
 }
 
+EAPI Eina_List *
+eolian_documentation_string_split(const char *doc)
+{
+   EINA_SAFETY_ON_NULL_RETURN_VAL(doc, NULL);
+   const char *sep = strstr(doc, "\n\n");
+   Eina_List *ret = NULL;
+   while (doc)
+     {
+        Eina_Strbuf *buf = eina_strbuf_new();
+        if (sep)
+          eina_strbuf_append_length(buf, doc, sep - doc);
+        else
+          eina_strbuf_append(buf, doc);
+        eina_strbuf_trim(buf);
+        if (eina_strbuf_length_get(buf))
+          ret = eina_list_append(ret, eina_strbuf_string_steal(buf));
+        eina_strbuf_free(buf);
+        if (!sep)
+          break;
+        doc = sep + 2;
+        sep = strstr(doc, "\n\n");
+     }
+   return ret;
+}
+
+static Eina_Bool
+_skip_ref_word(const char **doc)
+{
+   if (((*doc)[0] != '_') && !isalpha((*doc)[0]))
+     return EINA_FALSE;
+
+   while (((*doc)[0] == '_') || isalnum((*doc)[0]))
+     ++*doc;
+
+   return EINA_TRUE;
+}
+
+/* this make sure the format is correct at least, it cannot verify the
+ * correctness of the reference itself (but Eolian will do it in its
+ * lexer, so there is nothing to worry about; all references are guaranteed
+ * to be right
+ */
+static Eolian_Doc_Token_Type
+_get_ref_token(const char *doc, const char **doc_end)
+{
+   /* not a ref at all, for convenience */
+   if (doc[0] != '@')
+     return EOLIAN_DOC_TOKEN_UNKNOWN;
+
+   ++doc;
+
+   Eina_Bool is_event = (doc[0] == '[');
+   if (is_event)
+     ++doc;
+
+   if ((doc[0] == '.') && (doc[1] != '_') && !isalpha(doc[1]))
+     return EOLIAN_DOC_TOKEN_UNKNOWN;
+
+   if (doc[0] == '.')
+     ++doc;
+
+   if (_skip_ref_word(&doc))
+     {
+        while (doc[0] == '.')
+          {
+             ++doc;
+             if (!_skip_ref_word(&doc))
+               {
+                  --doc;
+                  break;
+               }
+          }
+        if (is_event) while (doc[0] == ',')
+          {
+             ++doc;
+             if (!_skip_ref_word(&doc))
+               {
+                  --doc;
+                  break;
+               }
+          }
+     }
+
+   if (is_event)
+     {
+        if (doc[0] != ']')
+          return EOLIAN_DOC_TOKEN_UNKNOWN;
+        ++doc;
+     }
+
+   if (doc_end)
+     *doc_end = doc;
+
+   /* got a reference */
+   return is_event ? EOLIAN_DOC_TOKEN_REF_EVENT : EOLIAN_DOC_TOKEN_REF;
+}
+
+EAPI const char *
+eolian_documentation_tokenize(const char *doc, Eolian_Doc_Token *ret)
+{
+   /* token is used for statekeeping, so force it */
+   EINA_SAFETY_ON_NULL_RETURN_VAL(ret, NULL);
+
+   /* we've reached the end or invalid input */
+   if (!doc || !doc[0])
+     {
+        ret->text = ret->text_end = NULL;
+        ret->type = EOLIAN_DOC_TOKEN_UNKNOWN;
+        return NULL;
+     }
+
+   Eina_Bool cont = (ret->type != EOLIAN_DOC_TOKEN_UNKNOWN);
+
+   /* we can only check notes etc at beginning of parsing */
+   if (cont)
+     goto mloop;
+
+#define CMP_MARK_NOTE(doc, note) !strncmp(doc, note ": ", sizeof(note) + 1)
+
+   /* different types of notes */
+   if (CMP_MARK_NOTE(doc, "Note"))
+     {
+        ret->text = doc;
+        ret->text_end = doc + sizeof("Note:");
+        ret->type = EOLIAN_DOC_TOKEN_MARK_NOTE;
+        return ret->text_end;
+     }
+   else if (CMP_MARK_NOTE(doc, "Warning"))
+     {
+        ret->text = doc;
+        ret->text_end = doc + sizeof("Warning:");
+        ret->type = EOLIAN_DOC_TOKEN_MARK_WARNING;
+        return ret->text_end;
+     }
+   else if (CMP_MARK_NOTE(doc, "Remark"))
+     {
+        ret->text = doc;
+        ret->text_end = doc + sizeof("Remark:");
+        ret->type = EOLIAN_DOC_TOKEN_MARK_REMARK;
+        return ret->text_end;
+     }
+   else if (CMP_MARK_NOTE(doc, "TODO"))
+     {
+        ret->text = doc;
+        ret->text_end = doc + sizeof("TODO:");
+        ret->type = EOLIAN_DOC_TOKEN_MARK_TODO;
+        return ret->text_end;
+     }
+
+#undef CMP_MARK_NOTE
+
+mloop:
+
+   /* monospace markup ($foo) */
+   if ((doc[0] == '$') && ((doc[1] == '_') || isalpha(doc[1])))
+     {
+        ret->text = ++doc;
+        ret->text_end = ret->text;
+        while ((ret->text_end[0] == '_') || isalnum(ret->text_end[0]))
+          ++ret->text_end;
+        ret->type = EOLIAN_DOC_TOKEN_MARKUP_MONOSPACE;
+        return ret->text_end;
+     }
+
+   /* references */
+   Eolian_Doc_Token_Type rtp = _get_ref_token(doc, &ret->text_end);
+   if (rtp != EOLIAN_DOC_TOKEN_UNKNOWN)
+     {
+        ret->text = doc + 1;
+        ret->type = rtp;
+        return ret->text_end;
+     }
+
+   const char *schr = doc, *pschr = NULL;
+   /* keep finding potential tokens until a suitable one is found
+    * terminate text token there (it also means next token can directly
+    * be tested for event/monospace)
+    */
+   while ((schr = strpbrk(schr, "@$")))
+     {
+        /* escape sequences */
+        if ((schr != doc) && (schr[-1] == '\\'))
+          {
+             schr += 1;
+             continue;
+          }
+        /* monospace markup */
+        if ((schr[0] == '$') && ((schr[1] == '_') || isalpha(schr[1])))
+          {
+             pschr = schr;
+             break;
+          }
+        /* references */
+        if (_get_ref_token(schr, NULL) != EOLIAN_DOC_TOKEN_UNKNOWN)
+          {
+             pschr = schr;
+             break;
+          }
+        /* nothing, keep matching text from next char on */
+        schr += 1;
+     }
+
+   /* figure out where we actually end */
+   ret->text = doc;
+   ret->text_end = pschr ? pschr : (doc + strlen(doc));
+   ret->type = EOLIAN_DOC_TOKEN_TEXT;
+   return ret->text_end;
+}
+
+EAPI void eolian_doc_token_init(Eolian_Doc_Token *tok)
+{
+   if (!tok)
+     return;
+   tok->type = EOLIAN_DOC_TOKEN_UNKNOWN;
+   tok->text = tok->text_end = NULL;
+}
+
+EAPI Eolian_Doc_Token_Type
+eolian_doc_token_type_get(const Eolian_Doc_Token *tok)
+{
+   EINA_SAFETY_ON_NULL_RETURN_VAL(tok, EOLIAN_DOC_TOKEN_UNKNOWN);
+   return tok->type;
+}
+
+EAPI char *
+eolian_doc_token_text_get(const Eolian_Doc_Token *tok)
+{
+   EINA_SAFETY_ON_NULL_RETURN_VAL(tok, NULL);
+   if (tok->type == EOLIAN_DOC_TOKEN_UNKNOWN)
+     return NULL;
+   Eina_Strbuf *buf = eina_strbuf_new();
+   for (const char *p = tok->text; p != tok->text_end; ++p)
+     {
+        if (*p == '\\') ++p;
+        if (p != tok->text_end)
+          eina_strbuf_append_char(buf, *p);
+     }
+   return eina_strbuf_string_steal(buf);
+}
+
 #define EO_SUFFIX ".eo"
 #define EOT_SUFFIX ".eot"
 
diff --git a/src/tests/eolian/eolian_parsing.c 
b/src/tests/eolian/eolian_parsing.c
index 1c61e9f..e4b320e 100644
--- a/src/tests/eolian/eolian_parsing.c
+++ b/src/tests/eolian/eolian_parsing.c
@@ -1188,6 +1188,88 @@ START_TEST(eolian_docs)
    fail_if(strcmp(eolian_documentation_since_get(doc),
                   "1.66"));
 
+   const char *sdesc = eolian_documentation_description_get(doc);
+   Eina_List *sdoc = eolian_documentation_string_split(sdesc);
+
+   char *dpar = eina_list_data_get(sdoc);
+   fail_if(strcmp(dpar, "Note: This is a note."));
+   sdoc = eina_list_remove_list(sdoc, sdoc);
+   dpar = eina_list_data_get(sdoc);
+   fail_if(strcmp(dpar, "This is a longer description for struct Foo."));
+   EINA_LIST_FREE(sdoc, dpar)
+     free(dpar);
+
+   const char *tdoc = "Note: This is $something, see @Blah, @.bleh, "
+                      "@Foo.Bar.baz, \\@ref foo and @[Things.Stuffs.foo,bar].";
+
+   Eolian_Doc_Token tok;
+   eolian_doc_token_init(&tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_UNKNOWN);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_MARK_NOTE);
+   char *txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "Note: "));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_TEXT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "This is "));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != 
EOLIAN_DOC_TOKEN_MARKUP_MONOSPACE);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "something"));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_TEXT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, ", see "));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_REF);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "Blah"));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_TEXT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, ", "));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_REF);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, ".bleh"));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_TEXT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, ", "));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_REF);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "Foo.Bar.baz"));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_TEXT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, ", @ref foo and "));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_REF_EVENT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "[Things.Stuffs.foo,bar]"));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(tdoc[0] != '\0');
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_TEXT);
+   txt = eolian_doc_token_text_get(&tok);
+   fail_if(strcmp(txt, "."));
+   free(txt);
+   tdoc = eolian_documentation_tokenize(tdoc, &tok);
+   fail_if(tdoc != NULL);
+   fail_if(eolian_doc_token_type_get(&tok) != EOLIAN_DOC_TOKEN_UNKNOWN);
+
    fail_if(!(sfl = eolian_typedecl_struct_field_get(tdl, "field1")));
    fail_if(!(doc = eolian_typedecl_struct_field_documentation_get(sfl)));
    fail_if(strcmp(eolian_documentation_summary_get(doc),

-- 


Reply via email to