Revision: 17524
Author: [email protected]
Date: Wed Nov 6 15:45:04 2013 UTC
Log: Experimental parser: easier to read rules and default rule
[email protected]
BUG=
Review URL: https://codereview.chromium.org/59403010
http://code.google.com/p/v8/source/detail?r=17524
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/rule_lexer.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 6 08:50:55
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 6 15:45:04
2013 UTC
@@ -27,170 +27,185 @@
whitespace_char = [ \t\v\f\r:ws:\240];
whitespace = whitespace_char+;
-identifier_start = [$_a-zA-Z:lit:];
-identifier_char = [$_a-zA-Z0-9:lit:];
-not_identifier_char = [^:identifier_char:];
-line_terminator = [\n\r]+;
+identifier_start = [$_a-zA-Z:lit:]; # TODO add relevant latin1 char codes
+identifier_char = [0-9:identifier_start:];
+line_terminator = [\n\r];
digit = [0-9];
hex_digit = [0-9a-fA-F];
maybe_exponent = ("e" [\-+]? digit+)?;
number = ("0x" hex_digit+) | (("." digit+ maybe_exponent) | (digit+ ("."
digit*)? maybe_exponent));
-<Normal> "break" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::BREAK); }
-<Normal> "case" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::CASE); }
-<Normal> "catch" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::CATCH); }
-<Normal> "class" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); }
-<Normal> "const" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::CONST); }
-<Normal> "continue" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::CONTINUE); }
-<Normal> "debugger" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::DEBUGGER); }
-<Normal> "default" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::DEFAULT); }
-<Normal> "delete" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::DELETE); }
-<Normal> "do" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::DO); }
-<Normal> "else" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::ELSE); }
-<Normal> "enum" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); }
-<Normal> "export" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); }
-<Normal> "extends" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); }
-<Normal> "false" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FALSE_LITERAL); }
-<Normal> "finally" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FINALLY); }
-<Normal> "for" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FOR); }
-<Normal> "function" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUNCTION); }
-<Normal> "if" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::IF); }
-<Normal> "implements" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "import" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); }
-<Normal> "in" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::IN); }
-<Normal> "instanceof" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::INSTANCEOF); }
-<Normal> "interface" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "let" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "new" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NEW); }
-<Normal> "null" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::NULL_LITERAL); }
-<Normal> "package" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "private" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "protected" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "public" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "return" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::RETURN); }
-<Normal> "static" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); }
-<Normal> "super" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); }
-<Normal> "switch" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::SWITCH); }
-<Normal> "this" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::THIS); }
-<Normal> "throw" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::THROW); }
-<Normal> "true" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::TRUE_LITERAL); }
-<Normal> "try" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::TRY); }
-<Normal> "typeof" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::TYPEOF); }
-<Normal> "var" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::VAR); }
-<Normal> "void" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::VOID); }
-<Normal> "while" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::WHILE); }
-<Normal> "with" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::WITH); }
-<Normal> "yield" not_identifier_char {
PUSH_TOKEN_LOOKAHEAD(Token::YIELD); }
+<default>
+"|=" { PUSH_TOKEN(ASSIGN_BIT_OR); }
+"^=" { PUSH_TOKEN(ASSIGN_BIT_XOR); }
+"&=" { PUSH_TOKEN(ASSIGN_BIT_AND); }
+"+=" { PUSH_TOKEN(ASSIGN_ADD); }
+"-=" { PUSH_TOKEN(ASSIGN_SUB); }
+"*=" { PUSH_TOKEN(ASSIGN_MUL); }
+"/=" { PUSH_TOKEN(ASSIGN_DIV); }
+"%=" { PUSH_TOKEN(ASSIGN_MOD); }
-<Normal> "|=" { PUSH_TOKEN(Token::ASSIGN_BIT_OR); }
-<Normal> "^=" { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); }
-<Normal> "&=" { PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
-<Normal> "+=" { PUSH_TOKEN(Token::ASSIGN_ADD); }
-<Normal> "-=" { PUSH_TOKEN(Token::ASSIGN_SUB); }
-<Normal> "*=" { PUSH_TOKEN(Token::ASSIGN_MUL); }
-<Normal> "/=" { PUSH_TOKEN(Token::ASSIGN_DIV); }
-<Normal> "%=" { PUSH_TOKEN(Token::ASSIGN_MOD); }
+"===" { PUSH_TOKEN(EQ_STRICT); }
+"==" { PUSH_TOKEN(EQ); }
+"=" { PUSH_TOKEN(ASSIGN); }
+"!==" { PUSH_TOKEN(NE_STRICT); }
+"!=" { PUSH_TOKEN(NE); }
+"!" { PUSH_TOKEN(NOT); }
-<Normal> "===" { PUSH_TOKEN(Token::EQ_STRICT); }
-<Normal> "==" { PUSH_TOKEN(Token::EQ); }
-<Normal> "=" { PUSH_TOKEN(Token::ASSIGN); }
-<Normal> "!==" { PUSH_TOKEN(Token::NE_STRICT); }
-<Normal> "!=" { PUSH_TOKEN(Token::NE); }
-<Normal> "!" { PUSH_TOKEN(Token::NOT); }
+"//" <<SingleLineComment>> # TODO save offset?
+"/*" <<MultiLineComment>>
+"<!--" <<HtmlComment>>
+#whitespace* "-->" { if (just_seen_line_terminator_) {
YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; }
else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }
-<Normal> "//" :=> SingleLineComment
-<Normal> whitespace* "-->" { if (just_seen_line_terminator_) {
YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; }
else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }
-<Normal> "/*" :=> MultiLineComment
-<Normal> "<!--" :=> HtmlComment
+">>>=" { PUSH_TOKEN(ASSIGN_SHR); }
+">>>" { PUSH_TOKEN(SHR); }
+"<<=" { PUSH_TOKEN(ASSIGN_SHL); }
+">>=" { PUSH_TOKEN(ASSIGN_SAR); }
+"<=" { PUSH_TOKEN(LTE); }
+">=" { PUSH_TOKEN(GTE); }
+"<<" { PUSH_TOKEN(SHL); }
+">>" { PUSH_TOKEN(SAR); }
+"<" { PUSH_TOKEN(LT); }
+">" { PUSH_TOKEN(GT); }
-<Normal> ">>>=" { PUSH_TOKEN(Token::ASSIGN_SHR); }
-<Normal> ">>>" { PUSH_TOKEN(Token::SHR); }
-<Normal> "<<=" { PUSH_TOKEN(Token::ASSIGN_SHL); }
-<Normal> ">>=" { PUSH_TOKEN(Token::ASSIGN_SAR); }
-<Normal> "<=" { PUSH_TOKEN(Token::LTE); }
-<Normal> ">=" { PUSH_TOKEN(Token::GTE); }
-<Normal> "<<" { PUSH_TOKEN(Token::SHL); }
-<Normal> ">>" { PUSH_TOKEN(Token::SAR); }
-<Normal> "<" { PUSH_TOKEN(Token::LT); }
-<Normal> ">" { PUSH_TOKEN(Token::GT); }
+number { PUSH_TOKEN(NUMBER); }
+# number identifier_char { PUSH_TOKEN(ILLEGAL); }
-<Normal> number not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NUMBER);
}
-<Normal> number identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
+"(" { PUSH_TOKEN(LPAREN); }
+")" { PUSH_TOKEN(RPAREN); }
+"[" { PUSH_TOKEN(LBRACK); }
+"]" { PUSH_TOKEN(RBRACK); }
+"{" { PUSH_TOKEN(LBRACE); }
+"}" { PUSH_TOKEN(RBRACE); }
+":" { PUSH_TOKEN(COLON); }
+";" { PUSH_TOKEN(SEMICOLON); }
+"." { PUSH_TOKEN(PERIOD); }
+"?" { PUSH_TOKEN(CONDITIONAL); }
+"++" { PUSH_TOKEN(INC); }
+"--" { PUSH_TOKEN(DEC); }
-<Normal> "(" { PUSH_TOKEN(Token::LPAREN); }
-<Normal> ")" { PUSH_TOKEN(Token::RPAREN); }
-<Normal> "[" { PUSH_TOKEN(Token::LBRACK); }
-<Normal> "]" { PUSH_TOKEN(Token::RBRACK); }
-<Normal> "{" { PUSH_TOKEN(Token::LBRACE); }
-<Normal> "}" { PUSH_TOKEN(Token::RBRACE); }
-<Normal> ":" { PUSH_TOKEN(Token::COLON); }
-<Normal> ";" { PUSH_TOKEN(Token::SEMICOLON); }
-<Normal> "." { PUSH_TOKEN(Token::PERIOD); }
-<Normal> "?" { PUSH_TOKEN(Token::CONDITIONAL); }
-<Normal> "++" { PUSH_TOKEN(Token::INC); }
-<Normal> "--" { PUSH_TOKEN(Token::DEC); }
+"||" { PUSH_TOKEN(OR); }
+"&&" { PUSH_TOKEN(AND); }
-<Normal> "||" { PUSH_TOKEN(Token::OR); }
-<Normal> "&&" { PUSH_TOKEN(Token::AND); }
+"|" { PUSH_TOKEN(BIT_OR); }
+"^" { PUSH_TOKEN(BIT_XOR); }
+"&" { PUSH_TOKEN(BIT_AND); }
+"+" { PUSH_TOKEN(ADD); }
+"-" { PUSH_TOKEN(SUB); }
+"*" { PUSH_TOKEN(MUL); }
+"/" { PUSH_TOKEN(DIV); }
+"%" { PUSH_TOKEN(MOD); }
+"~" { PUSH_TOKEN(BIT_NOT); }
+"," { PUSH_TOKEN(COMMA); }
-<Normal> "|" { PUSH_TOKEN(Token::BIT_OR); }
-<Normal> "^" { PUSH_TOKEN(Token::BIT_XOR); }
-<Normal> "&" { PUSH_TOKEN(Token::BIT_AND); }
-<Normal> "+" { PUSH_TOKEN(Token::ADD); }
-<Normal> "-" { PUSH_TOKEN(Token::SUB); }
-<Normal> "*" { PUSH_TOKEN(Token::MUL); }
-<Normal> "/" { PUSH_TOKEN(Token::DIV); }
-<Normal> "%" { PUSH_TOKEN(Token::MOD); }
-<Normal> "~" { PUSH_TOKEN(Token::BIT_NOT); }
-<Normal> "," { PUSH_TOKEN(Token::COMMA); }
+line_terminator+ { PUSH_LINE_TERMINATOR(); }
+whitespace <<continue>>
-<Normal> line_terminator { PUSH_LINE_TERMINATOR(); }
-<Normal> whitespace { SKIP(); }
+"\"" <<DoubleQuoteString>> # TODO mark these transitions as
ignoring this character
+"'" <<SingleQuoteString>>
-<Normal> "\"" :=> DoubleQuoteString
-<Normal> "'" :=> SingleQuoteString
+identifier_start <<Identifier>> # TODO merge identifier dfa...
+/\\u[0-9a-fA-F]{4}/ {
+ if (V8_LIKELY(ValidIdentifierStart())) {
+ JUMP(Identifier);
+ }
+ PUSH_TOKEN(ILLEGAL);
+}
-<Normal> identifier_start :=> Identifier
-<Normal> /\\u[0-9a-fA-F]{4}/ { if (ValidIdentifierStart()) {
YYSETCONDITION(kConditionIdentifier); goto yyc_Identifier; }
send(Token::ILLEGAL); start_ = cursor_; goto yyc_Normal; }
-<Normal> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
+eof <<terminate>>
+default { PUSH_TOKEN(ILLEGAL); }
-<Normal> eof { PUSH_EOF_AND_RETURN();}
-<Normal> any { PUSH_TOKEN(Token::ILLEGAL); }
+<DoubleQuoteString>
+"\\" <<continue>>
+"\\\"" <<continue>>
+"\"" { PUSH_TOKEN(STRING); } <<break>>
+/\\\n\r?/ <<continue>>
+/\\\r\n?/ <<continue>>
+/\n\r/ { PUSH_TOKEN(ILLEGAL); } <<break>>
+eof <<terminate_illegal>>
+default <<continue>>
-<DoubleQuoteString> "\\\\" { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> "\\\"" { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> "\"" { PUSH_TOKEN(Token::STRING);}
-<DoubleQuoteString> /\\\n\r?/ { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> /\\\r\n?/ { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> /\n/ => Normal {
PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
-<DoubleQuoteString> /\r/ => Normal {
PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
-<DoubleQuoteString> eof { TERMINATE_ILLEGAL(); }
-<DoubleQuoteString> any { goto yyc_DoubleQuoteString; }
+<SingleQuoteString>
+"\\" <<continue>>
+"\\'" <<continue>>
+"'" { PUSH_TOKEN(STRING); } <<break>>
+/\\\n\r?/ <<continue>>
+/\\\r\n?/ <<continue>>
+/\n\r/ { PUSH_TOKEN(ILLEGAL); } <<break>>
+eof <<terminate_illegal>>
+default <<continue>>
-<SingleQuoteString> "\\" { goto yyc_SingleQuoteString; }
-<SingleQuoteString> "\\'" { goto yyc_SingleQuoteString; }
-<SingleQuoteString> "'" { PUSH_TOKEN(Token::STRING); }
-<SingleQuoteString> /\\\n\r?/ { goto yyc_SingleQuoteString; }
-<SingleQuoteString> /\\\r\n?/ { goto yyc_SingleQuoteString; }
-<SingleQuoteString> /\n/ => Normal {
PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
-<SingleQuoteString> /\r/ => Normal {
PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
-<SingleQuoteString> eof { TERMINATE_ILLEGAL(); }
-<SingleQuoteString> any { goto yyc_SingleQuoteString; }
+<Identifier>
+identifier_char+ <<continue>>
+/\\u[0-9a-fA-F]{4}/ {
+ if (V8_UNLIKELY(!ValidIdentifierStart())) {
+ PUSH_TOKEN(ILLEGAL);
+ JUMP(Normal);
+ }
+}
+default { PUSH_TOKEN(IDENTIFIER); } <<break>>
-<Identifier> identifier_char+ { goto yyc_Identifier; }
-<Identifier> /\\u[0-9a-fA-F]{4}/ { if (ValidIdentifierPart()) { goto
yyc_Identifier; } YYSETCONDITION(kConditionNormal); send(Token::ILLEGAL);
start_ = cursor_; goto yyc_Normal; }
-<Identifier> "\\" { PUSH_TOKEN(Token::ILLEGAL); }
-<Identifier> any { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }
+<SingleLineComment>
+line_terminator { PUSH_LINE_TERMINATOR(); } <<break>>
+eof <<terminate>>
+default <<continue>>
-<SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
-<SingleLineComment> eof { start_ = cursor_ - 1;
PUSH_TOKEN(Token::EOS); }
-<SingleLineComment> any { goto yyc_SingleLineComment; }
+<MultiLineComment>
+"*/" <<break>>
+line_terminator+ { PUSH_LINE_TERMINATOR(); }
+eof <<terminate>>
+default <<continue>>
-<MultiLineComment> "*/" { PUSH_LINE_TERMINATOR();}
-<MultiLineComment> eof { start_ = cursor_ - 1;
PUSH_TOKEN(Token::EOS); }
-<MultiLineComment> any { goto yyc_MultiLineComment; }
+<HtmlComment>
+"-->" <<break>>
+line_terminator+ { PUSH_LINE_TERMINATOR(); }
+eof <<terminate>>
+default <<continue>>
-<HtmlComment> "-->" { PUSH_LINE_TERMINATOR();}
-<HtmlComment> line_terminator+ { PUSH_LINE_TERMINATOR();}
-<HtmlComment> eof { start_ = cursor_ - 1; PUSH_TOKEN(Token::EOS); }
-<HtmlComment> any { goto yyc_HtmlComment; }
+<default>
+# all keywords
+"break" { PUSH_TOKEN(BREAK); } <<break>>
+"case" { PUSH_TOKEN(CASE); } <<break>>
+"catch" { PUSH_TOKEN(CATCH); } <<break>>
+"class" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"const" { PUSH_TOKEN(CONST); } <<break>>
+"continue" { PUSH_TOKEN(CONTINUE); } <<break>>
+"debugger" { PUSH_TOKEN(DEBUGGER); } <<break>>
+"default" { PUSH_TOKEN(DEFAULT); } <<break>>
+"delete" { PUSH_TOKEN(DELETE); } <<break>>
+"do" { PUSH_TOKEN(DO); } <<break>>
+"else" { PUSH_TOKEN(ELSE); } <<break>>
+"enum" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"export" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"extends" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"false" { PUSH_TOKEN(FALSE_LITERAL); } <<break>>
+"finally" { PUSH_TOKEN(FINALLY); } <<break>>
+"for" { PUSH_TOKEN(FOR); } <<break>>
+"function" { PUSH_TOKEN(FUNCTION); } <<break>>
+"if" { PUSH_TOKEN(IF); } <<break>>
+"implements" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"import" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"in" { PUSH_TOKEN(IN); } <<break>>
+"instanceof" { PUSH_TOKEN(INSTANCEOF); } <<break>>
+"interface" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"let" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"new" { PUSH_TOKEN(NEW); } <<break>>
+"null" { PUSH_TOKEN(NULL_LITERAL); } <<break>>
+"package" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"private" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"protected" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"public" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"return" { PUSH_TOKEN(RETURN); } <<break>>
+"static" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"super" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"switch" { PUSH_TOKEN(SWITCH); } <<break>>
+"this" { PUSH_TOKEN(THIS); } <<break>>
+"throw" { PUSH_TOKEN(THROW); } <<break>>
+"true" { PUSH_TOKEN(TRUE_LITERAL); } <<break>>
+"try" { PUSH_TOKEN(TRY); } <<break>>
+"typeof" { PUSH_TOKEN(TYPEOF); } <<break>>
+"var" { PUSH_TOKEN(VAR); } <<break>>
+"void" { PUSH_TOKEN(VOID); } <<break>>
+"while" { PUSH_TOKEN(WHILE); } <<break>>
+"with" { PUSH_TOKEN(WITH); } <<break>>
+"yield" { PUSH_TOKEN(YIELD); } <<break>>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 6
10:15:06 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 6
15:45:04 2013 UTC
@@ -117,6 +117,7 @@
def f(node, node_content):
for key, (state, action) in node.transitions().items():
+ key = str(key).replace('\\', '\\\\')
if action:
node_content.append(
" S_%s -> S_%s [ label = \"%s {%s} -> %s\" ];" %
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Wed
Nov 6 10:23:13 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed
Nov 6 15:45:04 2013 UTC
@@ -76,7 +76,7 @@
builder.set_character_classes(parser_state.character_classes)
for k, v in parser_state.rules.items():
graphs = []
- for (rule_type, graph, identifier, action) in v:
+ for (graph, code, action) in v['regex']:
# graphs.append(NfaBuilder.add_action(graph, (action, identifier)))
graphs.append(graph)
rule_map[k] = builder.nfa(NfaBuilder.or_graphs(graphs))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 6
10:15:06 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 6
15:45:04 2013 UTC
@@ -357,6 +357,7 @@
for key, values in node.transitions().items():
if key == TransitionKey.epsilon():
key = "ε"
+ key = str(key).replace('\\', '\\\\')
for value in values:
if value[1]:
node_content.append(
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Tue
Nov 5 12:37:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Wed
Nov 6 15:45:04 2013 UTC
@@ -30,12 +30,11 @@
class RuleLexer:
tokens = (
+ 'DEFAULT',
'IDENTIFIER',
'STRING',
'REGEX',
'CHARACTER_CLASS_REGEX',
- 'TRANSITION',
- 'TRANSITION_WITH_CODE',
'PLUS',
'QUESTION_MARK',
@@ -47,6 +46,8 @@
'LESS_THAN',
'GREATER_THAN',
'SEMICOLON',
+ 'ACTION_OPEN',
+ 'ACTION_CLOSE',
'LEFT_BRACKET',
'RIGHT_BRACKET',
@@ -65,23 +66,28 @@
r'\#.*[\n\r]+'
pass
- t_IDENTIFIER = r'[a-zA-Z0-9_]+'
+ def t_IDENTIFIER(self, t):
+ r'[a-zA-Z][a-zA-Z0-9_]*'
+ if t.value == 'default':
+ t.type = 'DEFAULT'
+ return t
+
t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
t_REGEX = r'/[^\/]+/'
t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
- t_TRANSITION = r':=>'
- t_TRANSITION_WITH_CODE = r'=>'
t_PLUS = r'\+'
t_QUESTION_MARK = r'\?'
t_STAR = r'\*'
t_OR = r'\|'
- t_EQUALS = r'='
+ t_EQUALS = '='
t_LEFT_PARENTHESIS = r'\('
t_RIGHT_PARENTHESIS = r'\)'
- t_LESS_THAN = r'<'
- t_GREATER_THAN = r'>'
- t_SEMICOLON = r';'
+ t_LESS_THAN = '<'
+ t_GREATER_THAN = '>'
+ t_SEMICOLON = ';'
+ t_ACTION_OPEN = '<<'
+ t_ACTION_CLOSE = '>>'
def t_LEFT_BRACKET(self, t):
r'{'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed
Nov 6 09:28:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed
Nov 6 15:45:04 2013 UTC
@@ -36,10 +36,9 @@
def __init__(self):
self.aliases = {
'eof' : RegexParser.parse("[\\0]"),
- 'any' : RegexParser.parse("."),
}
self.character_classes = {}
- self.current_transition = None
+ self.current_state = None
self.rules = {}
def parse(self, string):
@@ -53,15 +52,11 @@
self.__state = None
def p_statements(self, p):
- 'statements : statement maybe_statements'
+ 'statements : aliases rules'
- def p_maybe_statement(self, p):
- '''maybe_statements : statements
- | empty'''
-
- def p_statement(self, p):
- '''statement : alias_rule
- | transition_rule'''
+ def p_aliases(self, p):
+ '''aliases : alias_rule aliases
+ | empty'''
def p_alias_rule(self, p):
'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
@@ -74,32 +69,47 @@
assert not p[1] in classes
classes[p[1]] = TransitionKey.character_class(graph, classes)
+ def p_rules(self, p):
+ '''rules : state_change transition_rules rules
+ | empty'''
+
+ def p_state_change(self, p):
+ '''state_change : LESS_THAN IDENTIFIER GREATER_THAN
+ | LESS_THAN DEFAULT GREATER_THAN'''
+ state = self.__state
+ state.current_state = p[2]
+ assert state.current_state
+ if not state.current_state in state.rules:
+ state.rules[state.current_state] = {
+ 'default': None,
+ 'regex' : []
+ }
+ p[0] = state.current_state
+
+ def p_transition_rules(self, p):
+ '''transition_rules : transition_rule transition_rules
+ | empty'''
+
def p_transition_rule(self, p):
- '''transition_rule : transition composite_regex code
- | transition composite_regex TRANSITION IDENTIFIER
- | transition composite_regex TRANSITION_WITH_CODE IDENTIFIER
code'''
- transition = p[0]
- regex = p[2]
- rules = self.__state.rules[self.__state.current_transition]
- if len(p) == 4:
- rules.append(('simple', regex, None, p[3]))
- elif len(p) == 5:
- rules.append(('transition', regex, p[4], None))
- elif len(p) == 6:
- rules.append(('transition_with_code', regex, p[4], p[5]))
+ '''transition_rule : composite_regex_or_default code action
+ | composite_regex_or_default empty action
+ | composite_regex_or_default code empty'''
+ rules = self.__state.rules[self.__state.current_state]
+ rule = (p[1], p[2], p[3])
+ if p[1] == 'default':
+ assert not rules['default']
+ rules['default'] = rule
else:
- raise Exception()
+ rules['regex'].append(rule)
+
+ def p_action(self, p):
+ 'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'
+ p[0] = p[2]
- def p_transition(self, p):
- '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''
- # | empty''' TODO skipping transition without sr conflict
- state = self.__state
- if p[1]:
- state.current_transition = p[2]
- assert state.current_transition
- if not state.current_transition in state.rules:
- state.rules[state.current_transition] = []
- p[0] = state.current_transition
+ def p_composite_regex_or_default(self, p):
+ '''composite_regex_or_default : DEFAULT
+ | composite_regex'''
+ p[0] = p[1]
def p_composite_regex(self, p):
'''composite_regex : regex_parts OR regex_parts
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.