q66 pushed a commit to branch master. http://git.enlightenment.org/core/efl.git/commit/?id=6c268d98e5d721bf29953e892eec2fdd36e0a6c4
commit 6c268d98e5d721bf29953e892eec2fdd36e0a6c4 Author: Daniel Kolesa <[email protected]> Date: Thu Jul 17 14:17:19 2014 +0100 eolian: lexing of string literals (including support for escape sequences etc.) --- src/lib/eolian/eo_lexer.c | 133 ++++++++++++++++++++++++++++++++++++++++++--- src/lib/eolian/eo_lexer.h | 2 +- src/lib/eolian/eo_parser.c | 49 +++++++++-------- 3 files changed, 152 insertions(+), 32 deletions(-) diff --git a/src/lib/eolian/eo_lexer.c b/src/lib/eolian/eo_lexer.c index e6af35f..46a2078 100644 --- a/src/lib/eolian/eo_lexer.c +++ b/src/lib/eolian/eo_lexer.c @@ -44,7 +44,7 @@ next_char(Eo_Lexer *ls) static const char * const tokens[] = { - "<comment>", "<number>", "<value>", + "<comment>", "<string>", "<number>", "<value>", KEYWORDS }; @@ -104,9 +104,9 @@ init_hash(void) unsigned int i; if (keyword_map) return; keyword_map = eina_hash_string_superfast_new(NULL); - for (i = 3; i < (sizeof(tokens) / sizeof(const char*)); ++i) + for (i = 4; i < (sizeof(tokens) / sizeof(const char*)); ++i) { - eina_hash_add(keyword_map, tokens[i], (void*)(size_t)(i - 2)); + eina_hash_add(keyword_map, tokens[i], (void*)(size_t)(i - 3)); } } @@ -193,7 +193,117 @@ read_long_comment(Eo_Lexer *ls, Eo_Token *tok) } } eina_strbuf_trim(ls->buff); - if (tok) tok->value = eina_strbuf_string_get(ls->buff); + if (tok) tok->value = eina_stringshare_add(eina_strbuf_string_get(ls->buff)); +} + +static void +esc_error(Eo_Lexer *ls, int *c, int n, const char *msg) +{ + int i; + eina_strbuf_reset(ls->buff); + eina_strbuf_append_char(ls->buff, '\\'); + for (i = 0; i < n && c[i]; ++i) + eina_strbuf_append_char(ls->buff, c[i]); + eo_lexer_lex_error(ls, msg, TOK_STRING); +} + +static int +hex_val(int c) +{ + if (c >= 'a') return c - 'a' + 10; + if (c >= 'A') return c - 'A' + 10; + return c - '0'; +} + +static int +read_hex_esc(Eo_Lexer *ls) +{ + int c[3] = { 'x' }; + int i, r = 0; + for (i = 1; i < 3; ++i) + { + next_char(ls); + c[i] = ls->current; + if (!isxdigit(c[i])) + esc_error(ls, c, i + 1, "hexadecimal digit expected"); + r = (r << 4) + hex_val(c[i]); + } + return r; +} + +static int +read_dec_esc(Eo_Lexer *ls) +{ + int c[3]; + int i, r = 0; + for (i = 0; i < 3 && isdigit(ls->current); ++i) + { + c[i] = ls->current; + r = r * 10 + (c[i] - '0'); + next_char(ls); + } + if (r > UCHAR_MAX) + esc_error(ls, c, i, "decimal escape too large"); + return r; +} + +static void +read_string(Eo_Lexer *ls, Eo_Token *tok) +{ + int del = ls->current; + eina_strbuf_reset(ls->buff); + eina_strbuf_append_char(ls->buff, del); + while (ls->current != del) switch (ls->current) + { + case '\0': + eo_lexer_lex_error(ls, "unfinished string", -1); + break; + case '\n': case '\r': + eo_lexer_lex_error(ls, "unfinished string", TOK_STRING); + break; + case '\\': + { + next_char(ls); + switch (ls->current) + { + case 'a': eina_strbuf_append_char(ls->buff, '\a'); goto next; + case 'b': eina_strbuf_append_char(ls->buff, '\b'); goto next; + case 'f': eina_strbuf_append_char(ls->buff, '\f'); goto next; + case 'n': eina_strbuf_append_char(ls->buff, '\n'); goto next; + case 'r': eina_strbuf_append_char(ls->buff, '\r'); goto next; + case 't': eina_strbuf_append_char(ls->buff, '\t'); goto next; + case 'v': eina_strbuf_append_char(ls->buff, '\v'); goto next; + case 'x': + eina_strbuf_append_char(ls->buff, read_hex_esc(ls)); + goto next; + case '\n': case '\r': + next_line(ls); + eina_strbuf_append_char(ls->buff, '\n'); + goto skip; + case '\\': case '"': case '\'': + eina_strbuf_append_char(ls->buff, ls->current); + goto skip; + case '\0': + goto skip; + default: + if (!isdigit(ls->current)) + esc_error(ls, &ls->current, 1, "invalid escape sequence"); + eina_strbuf_append_char(ls->buff, read_dec_esc(ls)); + goto skip; + } +next: + next_char(ls); +skip: + break; + } + default: + eina_strbuf_append_char(ls->buff, ls->current); + next_char(ls); + } + eina_strbuf_append_char(ls->buff, ls->current); + next_char(ls); + tok->value = eina_stringshare_add_length(eina_strbuf_string_get(ls->buff) + 1, + eina_strbuf_length_get(ls->buff) - 2); } static int @@ -345,6 +455,7 @@ static int lex(Eo_Lexer *ls, Eo_Token *tok) { eina_strbuf_reset(ls->buff); + tok->value = NULL; for (;;) switch (ls->current) { case '\n': @@ -371,6 +482,9 @@ lex(Eo_Lexer *ls, Eo_Token *tok) continue; case '\0': return -1; + case '"': case '\'': + read_string(ls, tok); + return TOK_STRING; case '.': next_char(ls); if (!isdigit(ls->current)) return '.'; @@ -412,7 +526,7 @@ lex(Eo_Lexer *ls, Eo_Token *tok) ls->column = col + 1; if (at_kw && tok->kw == 0) eo_lexer_syntax_error(ls, "invalid keyword"); - tok->value = str; + tok->value = eina_stringshare_add(str); return TOK_VALUE; } else @@ -450,7 +564,7 @@ lex_balanced(Eo_Lexer *ls, Eo_Token *tok, char beg, char end) eina_strbuf_trim(ls->buff); str = eina_strbuf_string_get(ls->buff); tok->kw = (int)(uintptr_t)eina_hash_find(keyword_map, str); - tok->value = str; + tok->value = eina_stringshare_add(str); ls->column = col + 1; return TOK_VALUE; } @@ -532,6 +646,11 @@ eo_lexer_get_balanced(Eo_Lexer *ls, char beg, char end) int eo_lexer_get(Eo_Lexer *ls) { + if (ls->t.token >= START_CUSTOM && ls->t.token != TOK_NUMBER) + { + eina_stringshare_del(ls->t.value); + ls->t.value = NULL; + } if (ls->lookahead.token >= 0) { ls->t = ls->lookahead; @@ -594,7 +713,7 @@ eo_lexer_token_to_str(int token, char *buf) const char * eo_lexer_keyword_str_get(int kw) { - return tokens[kw + 2]; + return tokens[kw + 3]; } Eina_Bool diff --git a/src/lib/eolian/eo_lexer.h b/src/lib/eolian/eo_lexer.h index 801d9bd..260d5a2 100644 --- a/src/lib/eolian/eo_lexer.h +++ b/src/lib/eolian/eo_lexer.h @@ -13,7 +13,7 @@ enum Tokens { - TOK_COMMENT = START_CUSTOM, TOK_NUMBER, TOK_VALUE + TOK_COMMENT = START_CUSTOM, TOK_STRING, TOK_NUMBER, TOK_VALUE }; /* all keywords in eolian, they can still be used as names (they're TOK_VALUE) diff --git a/src/lib/eolian/eo_parser.c b/src/lib/eolian/eo_parser.c index 24a45bb..9ddcf25 100644 --- a/src/lib/eolian/eo_parser.c +++ b/src/lib/eolian/eo_parser.c @@ -240,7 +240,7 @@ parse_struct(Eo_Lexer *ls, const char *name, Eina_Bool is_extern) check_next(ls, '{'); if (ls->t.token == TOK_COMMENT) { - def->comment = eina_stringshare_add(ls->t.value); + def->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } while (ls->t.token != '}') @@ -251,7 +251,7 @@ parse_struct(Eo_Lexer *ls, const char *name, Eina_Bool is_extern) check(ls, TOK_VALUE); if (eina_hash_find(def->fields, ls->t.value)) eo_lexer_syntax_error(ls, "double field definition"); - fname = eina_stringshare_add(ls->t.value); + fname = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_next(ls, ':'); tp = parse_type_struct_nonvoid(ls, EINA_TRUE, EINA_FALSE); @@ -263,7 +263,7 @@ parse_struct(Eo_Lexer *ls, const char *name, Eina_Bool is_extern) check_next(ls, ';'); if (ls->t.token == TOK_COMMENT) { - fdef->comment = eina_stringshare_add(ls->t.value); + fdef->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } } @@ -329,7 +329,7 @@ parse_type_struct(Eo_Lexer *ls, Eina_Bool allow_struct, Eina_Bool allow_anon) check(ls, TOK_VALUE); if (eo_lexer_get_c_type(ls->t.kw)) eo_lexer_syntax_error(ls, "invalid struct name"); - sname = eina_stringshare_add(ls->t.value); + sname = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); if (ls->t.token == '{') return parse_struct(ls, sname, is_extern); @@ -339,7 +339,7 @@ parse_type_struct(Eo_Lexer *ls, Eina_Bool allow_struct, Eina_Bool allow_anon) check(ls, TOK_VALUE); if (eo_lexer_get_c_type(ls->t.kw)) eo_lexer_syntax_error(ls, "invalid struct name"); - sname = eina_stringshare_add(ls->t.value); + sname = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } def = push_type(ls); @@ -359,7 +359,8 @@ parse_type_struct(Eo_Lexer *ls, Eina_Bool allow_struct, Eina_Bool allow_anon) def->type = EOLIAN_TYPE_REGULAR; check(ls, TOK_VALUE); ctype = eo_lexer_get_c_type(ls->t.kw); - def->name = eina_stringshare_add(ctype ? ctype : ls->t.value); + def->name = ctype ? eina_stringshare_add(ctype) + : eina_stringshare_ref(ls->t.value); } eo_lexer_get(ls); parse_ptr: @@ -406,7 +407,7 @@ parse_typedef(Eo_Lexer *ls) eo_lexer_get(ls); } check(ls, TOK_VALUE); - ls->tmp.typedef_def->alias = eina_stringshare_add(ls->t.value); + ls->tmp.typedef_def->alias = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); (void)!!test_next(ls, ':'); ls->tmp.typedef_def->type = parse_type_struct_nonvoid(ls, EINA_TRUE, @@ -430,7 +431,7 @@ parse_return(Eo_Lexer *ls, Eina_Bool allow_void) { int line = ls->line_number, col = ls->column; eo_lexer_get_balanced(ls, '(', ')'); - ret->default_ret_val = eina_stringshare_add(ls->t.value); + ret->default_ret_val = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_match(ls, ')', '(', line, col); } @@ -442,7 +443,7 @@ parse_return(Eo_Lexer *ls, Eina_Bool allow_void) check_next(ls, ';'); if (ls->t.token == TOK_COMMENT) { - ret->comment = eina_stringshare_add(ls->t.value); + ret->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } } @@ -478,7 +479,7 @@ parse_param(Eo_Lexer *ls, Eina_Bool allow_inout) par->type = parse_type(ls); pop_type(ls); check(ls, TOK_VALUE); - par->name = eina_stringshare_add(ls->t.value); + par->name = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); if (ls->t.kw == KW_at_nonull) { @@ -488,7 +489,7 @@ parse_param(Eo_Lexer *ls, Eina_Bool allow_inout) check_next(ls, ';'); if (ls->t.token == TOK_COMMENT) { - par->comment = eina_stringshare_add(ls->t.value); + par->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } } @@ -498,7 +499,7 @@ parse_legacy(Eo_Lexer *ls) { eo_lexer_get(ls); check(ls, TOK_VALUE); - ls->tmp.legacy_def = eina_stringshare_add(ls->t.value); + ls->tmp.legacy_def = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_next(ls, ';'); } @@ -510,7 +511,7 @@ parse_attrs(Eo_Lexer *ls) Eina_Bool has_const = EINA_FALSE; acc = calloc(1, sizeof(Eo_Accessor_Param)); ls->tmp.accessor_param = acc; - acc->name = eina_stringshare_add(ls->t.value); + acc->name = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_next(ls, ':'); check(ls, TOK_VALUE); @@ -545,7 +546,7 @@ parse_accessor(Eo_Lexer *ls) check_next(ls, '{'); if (ls->t.token == TOK_COMMENT) { - acc->comment = eina_stringshare_add(ls->t.value); + acc->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } for (;;) switch (ls->t.kw) @@ -611,7 +612,7 @@ parse_property(Eo_Lexer *ls) prop->scope = EOLIAN_SCOPE_PROTECTED; eo_lexer_get(ls); } - prop->name = eina_stringshare_add(ls->t.value); + prop->name = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); line = ls->line_number; col = ls->column; @@ -664,7 +665,7 @@ parse_method(Eo_Lexer *ls, Eina_Bool ctor) { if (ls->t.token != TOK_VALUE) eo_lexer_syntax_error(ls, "expected method name"); - meth->name = eina_stringshare_add(ls->t.value); + meth->name = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } else @@ -675,7 +676,7 @@ parse_method(Eo_Lexer *ls, Eina_Bool ctor) eo_lexer_get(ls); } check(ls, TOK_VALUE); - meth->name = eina_stringshare_add(ls->t.value); + meth->name = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } line = ls->line_number; @@ -683,7 +684,7 @@ parse_method(Eo_Lexer *ls, Eina_Bool ctor) check_next(ls, '{'); if (ls->t.token == TOK_COMMENT) { - meth->comment = eina_stringshare_add(ls->t.value); + meth->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } for (;;) switch (ls->t.kw) @@ -846,7 +847,7 @@ parse_event(Eo_Lexer *ls) eo_lexer_get(ls); if (ls->t.token == TOK_COMMENT) { - ev->comment = eina_stringshare_add(ls->t.value); + ev->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } } @@ -935,7 +936,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, Eolian_Class_Type type) has_events = EINA_FALSE; if (ls->t.token == TOK_COMMENT) { - ls->tmp.kls->comment = eina_stringshare_add(ls->t.value); + ls->tmp.kls->comment = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); } for (;;) switch (ls->t.kw) @@ -945,7 +946,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, Eolian_Class_Type type) eo_lexer_get(ls); check_next(ls, ':'); check(ls, TOK_VALUE); - ls->tmp.kls->legacy_prefix = eina_stringshare_add(ls->t.value); + ls->tmp.kls->legacy_prefix = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_next(ls, ';'); break; @@ -954,7 +955,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, Eolian_Class_Type type) eo_lexer_get(ls); check_next(ls, ':'); check(ls, TOK_VALUE); - ls->tmp.kls->eo_prefix = eina_stringshare_add(ls->t.value); + ls->tmp.kls->eo_prefix = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_next(ls, ';'); break; @@ -963,7 +964,7 @@ parse_class_body(Eo_Lexer *ls, Eina_Bool allow_ctors, Eolian_Class_Type type) eo_lexer_get(ls); check_next(ls, ':'); check(ls, TOK_VALUE); - ls->tmp.kls->data_type = eina_stringshare_add(ls->t.value); + ls->tmp.kls->data_type = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); check_next(ls, ';'); break; @@ -1065,7 +1066,7 @@ parse_unit(Eo_Lexer *ls, Eina_Bool eot) check(ls, TOK_VALUE); if (eo_lexer_get_c_type(ls->t.kw)) eo_lexer_syntax_error(ls, "invalid struct name"); - name = eina_stringshare_add(ls->t.value); + name = eina_stringshare_ref(ls->t.value); eo_lexer_get(ls); parse_struct(ls, name, is_extern); pop_type(ls); --
