q66 pushed a commit to branch master. http://git.enlightenment.org/core/efl.git/commit/?id=5c3cc82bddfda8ec7889b75c8dc1e6c55e3452a3
commit 5c3cc82bddfda8ec7889b75c8dc1e6c55e3452a3 Author: Daniel Kolesa <d.kol...@samsung.com> Date: Tue Jul 15 10:09:50 2014 +0100 eolian: comment eo_lexer.h --- src/lib/eolian/eo_lexer.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/src/lib/eolian/eo_lexer.h b/src/lib/eolian/eo_lexer.h index 277df41..56077cb 100644 --- a/src/lib/eolian/eo_lexer.h +++ b/src/lib/eolian/eo_lexer.h @@ -7,6 +7,8 @@ #include <Eolian.h> #include "eo_definitions.h" +/* a token is an int, custom tokens start at this - single-char tokens are + * simply represented by their ascii */ #define START_CUSTOM 257 enum Tokens @@ -14,6 +16,8 @@ enum Tokens TOK_COMMENT = START_CUSTOM, TOK_EOF, TOK_VALUE }; +/* all keywords in eolian, they can still be used as names (they're TOK_VALUE) + * they just fill in the "kw" field of the token */ #define KEYWORDS KW(class), KW(const), KW(private), KW(protected), \ KW(return), KW(signed), KW(struct), KW(unsigned), KW(virtual), \ \ @@ -42,6 +46,7 @@ enum Tokens \ KW(true), KW(false), KW(null) +/* "regular" keyword and @ prefixed keyword */ #define KW(x) KW_##x #define KWAT(x) KW_at_##x @@ -54,6 +59,9 @@ enum Keywords #undef KW #undef KWAT +/* a token - "token" is the actual token id, "value" is the value of a token + * if needed - NULL otherwise - for example the value of a TOK_VALUE, "kw" + * is the keyword id if this is a keyword, it's 0 when not a keyword */ typedef struct _Eo_Token { int token; @@ -68,6 +76,9 @@ enum Nodes NODE_STRUCT }; +/* represents a node, aka a result of parsing - currently class, typedef + * or struct, they're all stored in a list in lexer state and their type + * is determined by enum Nodes above */ typedef struct _Eo_Node { unsigned char type; @@ -79,21 +90,49 @@ typedef struct _Eo_Node }; } Eo_Node; +/* keeps all lexer state */ typedef struct _Eo_Lexer { + /* current character being tested */ int current; + /* column is token aware column number, for example when lexing a keyword + * it points to the beginning of it after the lexing is done, icolumn is + * token unaware, always pointing to current column */ int column, icolumn; + /* the current line number */ int line_number; + /* t: "normal" - token to lex into, "lookahead" - a lookahead token, used + * to look one token past "t", when we need to check for a token after the + * current one and use it in a conditional without consuming the current + * token - used in pretty few cases - because we have one extra lookahead + * token, that makes our grammar LL(2) - two tokens in total */ Eo_Token t, lookahead; + /* a string buffer used to keep contents of token currently being read, + * if needed at all */ Eina_Strbuf *buff; + /* a handle pointing to a memory mapped file representing the file we're + * currently lexing */ Eina_File *handle; + /* the source file name */ const char *source; + /* points to the current character in our mmapped file being lexed, just + * incremented until the end */ const char *stream; + /* end pointer - required to check if we've reached past the file, as + * mmapped data will give us no EOF */ const char *stream_end; + /* points to the current line being lexed, used by error messages to + * display the current line with a caret at the respective column */ const char *stream_line; + /* this is jumped to when an error happens */ jmp_buf err_jmp; + /* represents the results of parsing */ Eina_List *nodes; + /* represents the temporaries, every object that is allocated by the + * parser is temporarily put here so the resources can be reclaimed in + * case of error - and it's nulled when it's written into a more permanent + * position (e.g. as part of another struct, or into nodes */ Eo_Lexer_Temps tmp; } Eo_Lexer; @@ -101,17 +140,31 @@ int eo_lexer_init (void); int eo_lexer_shutdown (void); Eo_Lexer *eo_lexer_new (const char *source); void eo_lexer_free (Eo_Lexer *ls); +/* gets a TOK_VALUE balanced token, aka keeps lexing everything until the + * "end" character, but keeps it balanced, so if it hits "beg" during lexing, + * it won't end at the next "end", useful for lexing between () or [] */ int eo_lexer_get_balanced (Eo_Lexer *ls, char beg, char end); +/* gets a TOK_VALUE until the "end" character */ int eo_lexer_get_until (Eo_Lexer *ls, char end); +/* gets a regular token, singlechar or one of TOK_something */ int eo_lexer_get (Eo_Lexer *ls); +/* like above, but allows you to specify a list of custom characters that can + * be used as part of identifiers */ int eo_lexer_get_ident (Eo_Lexer *ls, const char *chars); +/* lookahead token - see Eo_Lexer */ int eo_lexer_lookahead (Eo_Lexer *ls); int eo_lexer_lookahead_ident(Eo_Lexer *ls, const char *chars); +/* "throws" an error, with a custom message and custom token */ void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token); +/* like above, but uses the lexstate->t.token, aka current token */ void eo_lexer_syntax_error (Eo_Lexer *ls, const char *msg); +/* turns the token into a string, writes into the given buffer */ void eo_lexer_token_to_str (int token, char *buf); +/* returns the string representation of a keyword */ const char *eo_lexer_keyword_str_get(int kw); +/* checks if the given keyword is a builtin type */ Eina_Bool eo_lexer_is_type_keyword(int kw); +/* gets the C type name for a builtin type name - e.g. uchar -> unsigned char */ const char *eo_lexer_get_c_type (int kw); extern int _eo_lexer_log_dom; --