Revision: 17524
Author:   [email protected]
Date:     Wed Nov  6 15:45:04 2013 UTC
Log:      Experimental parser: easier to read rules and default rule

[email protected]

BUG=

Review URL: https://codereview.chromium.org/59403010
http://code.google.com/p/v8/source/detail?r=17524

Modified:
 /branches/experimental/parser/src/lexer/lexer_py.re
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/rule_lexer.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py

=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 6 08:50:55 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 6 15:45:04 2013 UTC
@@ -27,170 +27,185 @@

 whitespace_char = [ \t\v\f\r:ws:\240];
 whitespace = whitespace_char+;
-identifier_start = [$_a-zA-Z:lit:];
-identifier_char = [$_a-zA-Z0-9:lit:];
-not_identifier_char = [^:identifier_char:];
-line_terminator = [\n\r]+;
+identifier_start = [$_a-zA-Z:lit:];   # TODO add relevant latin1 char codes
+identifier_char = [0-9:identifier_start:];
+line_terminator = [\n\r];
 digit = [0-9];
 hex_digit = [0-9a-fA-F];
 maybe_exponent = ("e" [\-+]? digit+)?;
number = ("0x" hex_digit+) | (("." digit+ maybe_exponent) | (digit+ ("." digit*)? maybe_exponent));

-<Normal> "break" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::BREAK); } -<Normal> "case" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::CASE); } -<Normal> "catch" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::CATCH); } -<Normal> "class" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); } -<Normal> "const" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::CONST); } -<Normal> "continue" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::CONTINUE); } -<Normal> "debugger" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::DEBUGGER); } -<Normal> "default" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::DEFAULT); } -<Normal> "delete" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::DELETE); } -<Normal> "do" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::DO); } -<Normal> "else" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::ELSE); } -<Normal> "enum" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); } -<Normal> "export" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); } -<Normal> "extends" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); } -<Normal> "false" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FALSE_LITERAL); } -<Normal> "finally" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FINALLY); } -<Normal> "for" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FOR); } -<Normal> "function" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUNCTION); } -<Normal> "if" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::IF); } -<Normal> "implements" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "import" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); } -<Normal> "in" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::IN); } -<Normal> "instanceof" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::INSTANCEOF); } -<Normal> "interface" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "let" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "new" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NEW); } -<Normal> "null" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NULL_LITERAL); } -<Normal> "package" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "private" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "protected" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "public" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "return" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::RETURN); } -<Normal> "static" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_STRICT_RESERVED_WORD); } -<Normal> "super" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::FUTURE_RESERVED_WORD); } -<Normal> "switch" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::SWITCH); } -<Normal> "this" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::THIS); } -<Normal> "throw" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::THROW); } -<Normal> "true" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::TRUE_LITERAL); } -<Normal> "try" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::TRY); } -<Normal> "typeof" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::TYPEOF); } -<Normal> "var" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::VAR); } -<Normal> "void" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::VOID); } -<Normal> "while" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::WHILE); } -<Normal> "with" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::WITH); } -<Normal> "yield" not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::YIELD); }
+<default>
+"|="          { PUSH_TOKEN(ASSIGN_BIT_OR); }
+"^="          { PUSH_TOKEN(ASSIGN_BIT_XOR); }
+"&="          { PUSH_TOKEN(ASSIGN_BIT_AND); }
+"+="          { PUSH_TOKEN(ASSIGN_ADD); }
+"-="          { PUSH_TOKEN(ASSIGN_SUB); }
+"*="          { PUSH_TOKEN(ASSIGN_MUL); }
+"/="          { PUSH_TOKEN(ASSIGN_DIV); }
+"%="          { PUSH_TOKEN(ASSIGN_MOD); }

-<Normal> "|="          { PUSH_TOKEN(Token::ASSIGN_BIT_OR); }
-<Normal> "^="          { PUSH_TOKEN(Token::ASSIGN_BIT_XOR); }
-<Normal> "&="          { PUSH_TOKEN(Token::ASSIGN_BIT_AND); }
-<Normal> "+="          { PUSH_TOKEN(Token::ASSIGN_ADD); }
-<Normal> "-="          { PUSH_TOKEN(Token::ASSIGN_SUB); }
-<Normal> "*="          { PUSH_TOKEN(Token::ASSIGN_MUL); }
-<Normal> "/="          { PUSH_TOKEN(Token::ASSIGN_DIV); }
-<Normal> "%="          { PUSH_TOKEN(Token::ASSIGN_MOD); }
+"==="         { PUSH_TOKEN(EQ_STRICT); }
+"=="          { PUSH_TOKEN(EQ); }
+"="           { PUSH_TOKEN(ASSIGN); }
+"!=="         { PUSH_TOKEN(NE_STRICT); }
+"!="          { PUSH_TOKEN(NE); }
+"!"           { PUSH_TOKEN(NOT); }

-<Normal> "==="         { PUSH_TOKEN(Token::EQ_STRICT); }
-<Normal> "=="          { PUSH_TOKEN(Token::EQ); }
-<Normal> "="           { PUSH_TOKEN(Token::ASSIGN); }
-<Normal> "!=="         { PUSH_TOKEN(Token::NE_STRICT); }
-<Normal> "!="          { PUSH_TOKEN(Token::NE); }
-<Normal> "!"           { PUSH_TOKEN(Token::NOT); }
+"//"          <<SingleLineComment>>   # TODO save offset?
+"/*"          <<MultiLineComment>>
+"<!--"        <<HtmlComment>>
+#whitespace* "-->" { if (just_seen_line_terminator_) { YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; } else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }

-<Normal> "//"          :=> SingleLineComment
-<Normal> whitespace* "-->" { if (just_seen_line_terminator_) { YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; } else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }
-<Normal> "/*"          :=> MultiLineComment
-<Normal> "<!--"        :=> HtmlComment
+">>>="        { PUSH_TOKEN(ASSIGN_SHR); }
+">>>"         { PUSH_TOKEN(SHR); }
+"<<="         { PUSH_TOKEN(ASSIGN_SHL); }
+">>="         { PUSH_TOKEN(ASSIGN_SAR); }
+"<="          { PUSH_TOKEN(LTE); }
+">="          { PUSH_TOKEN(GTE); }
+"<<"          { PUSH_TOKEN(SHL); }
+">>"          { PUSH_TOKEN(SAR); }
+"<"           { PUSH_TOKEN(LT); }
+">"           { PUSH_TOKEN(GT); }

-<Normal> ">>>="        { PUSH_TOKEN(Token::ASSIGN_SHR); }
-<Normal> ">>>"         { PUSH_TOKEN(Token::SHR); }
-<Normal> "<<="         { PUSH_TOKEN(Token::ASSIGN_SHL); }
-<Normal> ">>="         { PUSH_TOKEN(Token::ASSIGN_SAR); }
-<Normal> "<="          { PUSH_TOKEN(Token::LTE); }
-<Normal> ">="          { PUSH_TOKEN(Token::GTE); }
-<Normal> "<<"          { PUSH_TOKEN(Token::SHL); }
-<Normal> ">>"          { PUSH_TOKEN(Token::SAR); }
-<Normal> "<"           { PUSH_TOKEN(Token::LT); }
-<Normal> ">"           { PUSH_TOKEN(Token::GT); }
+number        { PUSH_TOKEN(NUMBER); }
+# number identifier_char   { PUSH_TOKEN(ILLEGAL); }

-<Normal> number not_identifier_char { PUSH_TOKEN_LOOKAHEAD(Token::NUMBER); }
-<Normal> number identifier_char   { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
+"("           { PUSH_TOKEN(LPAREN); }
+")"           { PUSH_TOKEN(RPAREN); }
+"["           { PUSH_TOKEN(LBRACK); }
+"]"           { PUSH_TOKEN(RBRACK); }
+"{"           { PUSH_TOKEN(LBRACE); }
+"}"           { PUSH_TOKEN(RBRACE); }
+":"           { PUSH_TOKEN(COLON); }
+";"           { PUSH_TOKEN(SEMICOLON); }
+"."           { PUSH_TOKEN(PERIOD); }
+"?"           { PUSH_TOKEN(CONDITIONAL); }
+"++"          { PUSH_TOKEN(INC); }
+"--"          { PUSH_TOKEN(DEC); }

-<Normal> "("           { PUSH_TOKEN(Token::LPAREN); }
-<Normal> ")"           { PUSH_TOKEN(Token::RPAREN); }
-<Normal> "["           { PUSH_TOKEN(Token::LBRACK); }
-<Normal> "]"           { PUSH_TOKEN(Token::RBRACK); }
-<Normal> "{"           { PUSH_TOKEN(Token::LBRACE); }
-<Normal> "}"           { PUSH_TOKEN(Token::RBRACE); }
-<Normal> ":"           { PUSH_TOKEN(Token::COLON); }
-<Normal> ";"           { PUSH_TOKEN(Token::SEMICOLON); }
-<Normal> "."           { PUSH_TOKEN(Token::PERIOD); }
-<Normal> "?"           { PUSH_TOKEN(Token::CONDITIONAL); }
-<Normal> "++"          { PUSH_TOKEN(Token::INC); }
-<Normal> "--"          { PUSH_TOKEN(Token::DEC); }
+"||"          { PUSH_TOKEN(OR); }
+"&&"          { PUSH_TOKEN(AND); }

-<Normal> "||"          { PUSH_TOKEN(Token::OR); }
-<Normal> "&&"          { PUSH_TOKEN(Token::AND); }
+"|"           { PUSH_TOKEN(BIT_OR); }
+"^"           { PUSH_TOKEN(BIT_XOR); }
+"&"           { PUSH_TOKEN(BIT_AND); }
+"+"           { PUSH_TOKEN(ADD); }
+"-"           { PUSH_TOKEN(SUB); }
+"*"           { PUSH_TOKEN(MUL); }
+"/"           { PUSH_TOKEN(DIV); }
+"%"           { PUSH_TOKEN(MOD); }
+"~"           { PUSH_TOKEN(BIT_NOT); }
+","           { PUSH_TOKEN(COMMA); }

-<Normal> "|"           { PUSH_TOKEN(Token::BIT_OR); }
-<Normal> "^"           { PUSH_TOKEN(Token::BIT_XOR); }
-<Normal> "&"           { PUSH_TOKEN(Token::BIT_AND); }
-<Normal> "+"           { PUSH_TOKEN(Token::ADD); }
-<Normal> "-"           { PUSH_TOKEN(Token::SUB); }
-<Normal> "*"           { PUSH_TOKEN(Token::MUL); }
-<Normal> "/"           { PUSH_TOKEN(Token::DIV); }
-<Normal> "%"           { PUSH_TOKEN(Token::MOD); }
-<Normal> "~"           { PUSH_TOKEN(Token::BIT_NOT); }
-<Normal> ","           { PUSH_TOKEN(Token::COMMA); }
+line_terminator+  { PUSH_LINE_TERMINATOR(); }
+whitespace     <<continue>>

-<Normal> line_terminator  { PUSH_LINE_TERMINATOR(); }
-<Normal> whitespace       { SKIP(); }
+"\"" <<DoubleQuoteString>> # TODO mark these transitions as ignoring this character
+"'"            <<SingleQuoteString>>

-<Normal> "\""           :=> DoubleQuoteString
-<Normal> "'"           :=> SingleQuoteString
+identifier_start     <<Identifier>>  # TODO merge identifier dfa...
+/\\u[0-9a-fA-F]{4}/ {
+  if (V8_LIKELY(ValidIdentifierStart())) {
+    JUMP(Identifier);
+  }
+  PUSH_TOKEN(ILLEGAL);
+}

-<Normal> identifier_start     :=> Identifier
-<Normal> /\\u[0-9a-fA-F]{4}/ { if (ValidIdentifierStart()) { YYSETCONDITION(kConditionIdentifier); goto yyc_Identifier; } send(Token::ILLEGAL); start_ = cursor_; goto yyc_Normal; }
-<Normal> "\\"                 { PUSH_TOKEN(Token::ILLEGAL); }
+eof           <<terminate>>
+default       { PUSH_TOKEN(ILLEGAL); }

-<Normal> eof           { PUSH_EOF_AND_RETURN();}
-<Normal> any           { PUSH_TOKEN(Token::ILLEGAL); }
+<DoubleQuoteString>
+"\\"      <<continue>>
+"\\\""    <<continue>>
+"\""      { PUSH_TOKEN(STRING); } <<break>>
+/\\\n\r?/ <<continue>>
+/\\\r\n?/ <<continue>>
+/\n\r/    { PUSH_TOKEN(ILLEGAL); } <<break>>
+eof       <<terminate_illegal>>
+default   <<continue>>

-<DoubleQuoteString> "\\\\"  { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> "\\\""  { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> "\""     { PUSH_TOKEN(Token::STRING);}
-<DoubleQuoteString> /\\\n\r?/ { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> /\\\r\n?/ { goto yyc_DoubleQuoteString; }
-<DoubleQuoteString> /\n/ => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); } -<DoubleQuoteString> /\r/ => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
-<DoubleQuoteString> eof     { TERMINATE_ILLEGAL(); }
-<DoubleQuoteString> any     { goto yyc_DoubleQuoteString; }
+<SingleQuoteString>
+"\\"      <<continue>>
+"\\'"     <<continue>>
+"'"       { PUSH_TOKEN(STRING); } <<break>>
+/\\\n\r?/ <<continue>>
+/\\\r\n?/ <<continue>>
+/\n\r/    { PUSH_TOKEN(ILLEGAL); } <<break>>
+eof       <<terminate_illegal>>
+default   <<continue>>

-<SingleQuoteString> "\\"  { goto yyc_SingleQuoteString; }
-<SingleQuoteString> "\\'"   { goto yyc_SingleQuoteString; }
-<SingleQuoteString> "'"     { PUSH_TOKEN(Token::STRING); }
-<SingleQuoteString> /\\\n\r?/ { goto yyc_SingleQuoteString; }
-<SingleQuoteString> /\\\r\n?/ { goto yyc_SingleQuoteString; }
-<SingleQuoteString> /\n/ => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); } -<SingleQuoteString> /\r/ => Normal { PUSH_TOKEN_LOOKAHEAD(Token::ILLEGAL); }
-<SingleQuoteString> eof     { TERMINATE_ILLEGAL(); }
-<SingleQuoteString> any     { goto yyc_SingleQuoteString; }
+<Identifier>
+identifier_char+    <<continue>>
+/\\u[0-9a-fA-F]{4}/ {
+  if (V8_UNLIKELY(!ValidIdentifierStart())) {
+    PUSH_TOKEN(ILLEGAL);
+    JUMP(Normal);
+  }
+}
+default             { PUSH_TOKEN(IDENTIFIER); }  <<break>>

-<Identifier> identifier_char+  { goto yyc_Identifier; }
-<Identifier> /\\u[0-9a-fA-F]{4}/ { if (ValidIdentifierPart()) { goto yyc_Identifier; } YYSETCONDITION(kConditionNormal); send(Token::ILLEGAL); start_ = cursor_; goto yyc_Normal; }
-<Identifier> "\\"              { PUSH_TOKEN(Token::ILLEGAL); }
-<Identifier> any               { PUSH_TOKEN_LOOKAHEAD(Token::IDENTIFIER); }
+<SingleLineComment>
+line_terminator  { PUSH_LINE_TERMINATOR(); } <<break>>
+eof              <<terminate>>
+default          <<continue>>

-<SingleLineComment> line_terminator { PUSH_LINE_TERMINATOR();}
-<SingleLineComment> eof { start_ = cursor_ - 1; PUSH_TOKEN(Token::EOS); }
-<SingleLineComment> any             { goto yyc_SingleLineComment; }
+<MultiLineComment>
+"*/"             <<break>>
+line_terminator+ { PUSH_LINE_TERMINATOR(); }
+eof              <<terminate>>
+default          <<continue>>

-<MultiLineComment> "*/"  { PUSH_LINE_TERMINATOR();}
-<MultiLineComment> eof { start_ = cursor_ - 1; PUSH_TOKEN(Token::EOS); }
-<MultiLineComment> any      { goto yyc_MultiLineComment; }
+<HtmlComment>
+"-->"            <<break>>
+line_terminator+ { PUSH_LINE_TERMINATOR(); }
+eof              <<terminate>>
+default          <<continue>>

-<HtmlComment> "-->"      { PUSH_LINE_TERMINATOR();}
-<HtmlComment> line_terminator+ { PUSH_LINE_TERMINATOR();}
-<HtmlComment> eof        { start_ = cursor_ - 1; PUSH_TOKEN(Token::EOS); }
-<HtmlComment> any        { goto yyc_HtmlComment; }
+<default>
+# all keywords
+"break"       { PUSH_TOKEN(BREAK); } <<break>>
+"case"        { PUSH_TOKEN(CASE); } <<break>>
+"catch"       { PUSH_TOKEN(CATCH); } <<break>>
+"class"       { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"const"       { PUSH_TOKEN(CONST); } <<break>>
+"continue"    { PUSH_TOKEN(CONTINUE); } <<break>>
+"debugger"    { PUSH_TOKEN(DEBUGGER); } <<break>>
+"default"     { PUSH_TOKEN(DEFAULT); } <<break>>
+"delete"      { PUSH_TOKEN(DELETE); } <<break>>
+"do"          { PUSH_TOKEN(DO); } <<break>>
+"else"        { PUSH_TOKEN(ELSE); } <<break>>
+"enum"        { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"export"      { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"extends"     { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"false"       { PUSH_TOKEN(FALSE_LITERAL); } <<break>>
+"finally"     { PUSH_TOKEN(FINALLY); } <<break>>
+"for"         { PUSH_TOKEN(FOR); } <<break>>
+"function"    { PUSH_TOKEN(FUNCTION); } <<break>>
+"if"          { PUSH_TOKEN(IF); } <<break>>
+"implements"  { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"import"      { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"in"          { PUSH_TOKEN(IN); } <<break>>
+"instanceof"  { PUSH_TOKEN(INSTANCEOF); } <<break>>
+"interface"   { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"let"         { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"new"         { PUSH_TOKEN(NEW); } <<break>>
+"null"        { PUSH_TOKEN(NULL_LITERAL); } <<break>>
+"package"     { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"private"     { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"protected"   { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"public"      { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"return"      { PUSH_TOKEN(RETURN); } <<break>>
+"static"      { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
+"super"       { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
+"switch"      { PUSH_TOKEN(SWITCH); } <<break>>
+"this"        { PUSH_TOKEN(THIS); } <<break>>
+"throw"       { PUSH_TOKEN(THROW); } <<break>>
+"true"        { PUSH_TOKEN(TRUE_LITERAL); } <<break>>
+"try"         { PUSH_TOKEN(TRY); } <<break>>
+"typeof"      { PUSH_TOKEN(TYPEOF); } <<break>>
+"var"         { PUSH_TOKEN(VAR); } <<break>>
+"void"        { PUSH_TOKEN(VOID); } <<break>>
+"while"       { PUSH_TOKEN(WHILE); } <<break>>
+"with"        { PUSH_TOKEN(WITH); } <<break>>
+"yield"       { PUSH_TOKEN(YIELD); } <<break>>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 6 10:15:06 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 6 15:45:04 2013 UTC
@@ -117,6 +117,7 @@

     def f(node, node_content):
       for key, (state, action) in node.transitions().items():
+        key = str(key).replace('\\', '\\\\')
         if action:
           node_content.append(
               "  S_%s -> S_%s [ label = \"%s {%s} -> %s\" ];" %
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Wed Nov 6 10:23:13 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed Nov 6 15:45:04 2013 UTC
@@ -76,7 +76,7 @@
   builder.set_character_classes(parser_state.character_classes)
   for k, v in parser_state.rules.items():
     graphs = []
-    for (rule_type, graph, identifier, action) in v:
+    for (graph, code, action) in v['regex']:
       # graphs.append(NfaBuilder.add_action(graph, (action, identifier)))
       graphs.append(graph)
     rule_map[k] = builder.nfa(NfaBuilder.or_graphs(graphs))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 6 10:15:06 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 6 15:45:04 2013 UTC
@@ -357,6 +357,7 @@
       for key, values in node.transitions().items():
         if key == TransitionKey.epsilon():
           key = "&epsilon;"
+        key = str(key).replace('\\', '\\\\')
         for value in values:
           if value[1]:
             node_content.append(
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Tue Nov 5 12:37:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Wed Nov 6 15:45:04 2013 UTC
@@ -30,12 +30,11 @@
 class RuleLexer:

   tokens = (
+    'DEFAULT',
     'IDENTIFIER',
     'STRING',
     'REGEX',
     'CHARACTER_CLASS_REGEX',
-    'TRANSITION',
-    'TRANSITION_WITH_CODE',

     'PLUS',
     'QUESTION_MARK',
@@ -47,6 +46,8 @@
     'LESS_THAN',
     'GREATER_THAN',
     'SEMICOLON',
+    'ACTION_OPEN',
+    'ACTION_CLOSE',

     'LEFT_BRACKET',
     'RIGHT_BRACKET',
@@ -65,23 +66,28 @@
     r'\#.*[\n\r]+'
     pass

-  t_IDENTIFIER = r'[a-zA-Z0-9_]+'
+  def t_IDENTIFIER(self, t):
+    r'[a-zA-Z][a-zA-Z0-9_]*'
+    if t.value == 'default':
+      t.type = 'DEFAULT'
+    return t
+
   t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
   t_REGEX = r'/[^\/]+/'
   t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
-  t_TRANSITION = r':=>'
-  t_TRANSITION_WITH_CODE = r'=>'

   t_PLUS = r'\+'
   t_QUESTION_MARK = r'\?'
   t_STAR = r'\*'
   t_OR = r'\|'
-  t_EQUALS = r'='
+  t_EQUALS = '='
   t_LEFT_PARENTHESIS = r'\('
   t_RIGHT_PARENTHESIS = r'\)'
-  t_LESS_THAN = r'<'
-  t_GREATER_THAN = r'>'
-  t_SEMICOLON = r';'
+  t_LESS_THAN = '<'
+  t_GREATER_THAN = '>'
+  t_SEMICOLON = ';'
+  t_ACTION_OPEN = '<<'
+  t_ACTION_CLOSE = '>>'

   def t_LEFT_BRACKET(self, t):
     r'{'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed Nov 6 09:28:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed Nov 6 15:45:04 2013 UTC
@@ -36,10 +36,9 @@
   def __init__(self):
     self.aliases = {
       'eof' : RegexParser.parse("[\\0]"),
-      'any' : RegexParser.parse("."),
     }
     self.character_classes = {}
-    self.current_transition = None
+    self.current_state = None
     self.rules = {}

   def parse(self, string):
@@ -53,15 +52,11 @@
     self.__state = None

   def p_statements(self, p):
-    'statements : statement maybe_statements'
+    'statements : aliases rules'

-  def p_maybe_statement(self, p):
-    '''maybe_statements : statements
-                        | empty'''
-
-  def p_statement(self, p):
-    '''statement : alias_rule
-                 | transition_rule'''
+  def p_aliases(self, p):
+    '''aliases : alias_rule aliases
+               | empty'''

   def p_alias_rule(self, p):
     'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
@@ -74,32 +69,47 @@
       assert not p[1] in classes
       classes[p[1]] = TransitionKey.character_class(graph, classes)

+  def p_rules(self, p):
+    '''rules : state_change transition_rules rules
+             | empty'''
+
+  def p_state_change(self, p):
+    '''state_change : LESS_THAN IDENTIFIER GREATER_THAN
+                    | LESS_THAN DEFAULT GREATER_THAN'''
+    state = self.__state
+    state.current_state = p[2]
+    assert state.current_state
+    if not state.current_state in state.rules:
+      state.rules[state.current_state] = {
+        'default': None,
+        'regex' : []
+      }
+    p[0] = state.current_state
+
+  def p_transition_rules(self, p):
+    '''transition_rules : transition_rule transition_rules
+                        | empty'''
+
   def p_transition_rule(self, p):
-    '''transition_rule : transition composite_regex code
-         | transition composite_regex TRANSITION IDENTIFIER
- | transition composite_regex TRANSITION_WITH_CODE IDENTIFIER code'''
-    transition = p[0]
-    regex = p[2]
-    rules = self.__state.rules[self.__state.current_transition]
-    if len(p) == 4:
-      rules.append(('simple', regex, None, p[3]))
-    elif len(p) == 5:
-      rules.append(('transition', regex, p[4], None))
-    elif len(p) == 6:
-      rules.append(('transition_with_code', regex, p[4], p[5]))
+    '''transition_rule : composite_regex_or_default code action
+                       | composite_regex_or_default empty action
+                       | composite_regex_or_default code empty'''
+    rules = self.__state.rules[self.__state.current_state]
+    rule = (p[1], p[2], p[3])
+    if p[1] == 'default':
+      assert not rules['default']
+      rules['default'] = rule
     else:
-      raise Exception()
+      rules['regex'].append(rule)
+
+  def p_action(self, p):
+    'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'
+    p[0] = p[2]

-  def p_transition(self, p):
-    '''transition : LESS_THAN IDENTIFIER GREATER_THAN'''
-                  # | empty''' TODO skipping transition without sr conflict
-    state = self.__state
-    if p[1]:
-      state.current_transition = p[2]
-    assert state.current_transition
-    if not state.current_transition in state.rules:
-      state.rules[state.current_transition] = []
-    p[0] = state.current_transition
+  def p_composite_regex_or_default(self, p):
+    '''composite_regex_or_default : DEFAULT
+                                  | composite_regex'''
+    p[0] = p[1]

   def p_composite_regex(self, p):
     '''composite_regex : regex_parts OR regex_parts

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to