Revision: 17761
Author:   [email protected]
Date:     Thu Nov 14 20:25:22 2013 UTC
Log:      Experimental parser: rule grammar refactor

[email protected]

BUG=

Review URL: https://codereview.chromium.org/62103017
http://code.google.com/p/v8/source/detail?r=17761

Modified:
 /branches/experimental/parser/src/lexer/lexer_py.re
 /branches/experimental/parser/tools/lexer_generator/action_test.py
 /branches/experimental/parser/tools/lexer_generator/automaton.py
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/lexer_test.py
 /branches/experimental/parser/tools/lexer_generator/rule_lexer.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py

=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Thu Nov 14 17:30:55 2013 UTC +++ /branches/experimental/parser/src/lexer/lexer_py.re Thu Nov 14 20:25:22 2013 UTC
@@ -27,180 +27,194 @@

 whitespace_char = [ \t\v\f\r:ws:\240];
 whitespace = whitespace_char+;
-identifier_start = [$_a-zA-Z:lit:];   # TODO add relevant latin1 char codes
+identifier_start = [$_a-zA-Z:lit:];
 identifier_char = [0-9:identifier_start:];
 line_terminator = [\n\r];
 digit = [0-9];
 hex_digit = [0-9a-fA-F];
-maybe_exponent = ([eE] [\-+]? digit+)?;
-number = ("0x" hex_digit+) | (("." digit+ maybe_exponent) | (digit+ ("." digit*)? maybe_exponent));
+maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
+number =
+  /0x[:hex_digit:]+/ | (
+  /\.[:digit:]+/ maybe_exponent |
+  /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );

-<default>
-"|="          push_token(ASSIGN_BIT_OR)
-"^="          push_token(ASSIGN_BIT_XOR)
-"&="          push_token(ASSIGN_BIT_AND)
-"+="          push_token(ASSIGN_ADD)
-"-="          push_token(ASSIGN_SUB)
-"*="          push_token(ASSIGN_MUL)
-"/="          push_token(ASSIGN_DIV)
-"%="          push_token(ASSIGN_MOD)
+# grammar is
+#   regex <action_on_state_entry|action_on_match|transition>
+#
+# actions can be c code enclosed in {} or identifiers to be passed to codegen
+# transition must be in continue or the name of a subgraph

-"==="         push_token(EQ_STRICT)
-"=="          push_token(EQ)
-"="           push_token(ASSIGN)
-"!=="         push_token(NE_STRICT)
-"!="          push_token(NE)
-"!"           push_token(NOT)
+<<default>>
+"|="          <|push_token(ASSIGN_BIT_OR)|>
+"^="          <|push_token(ASSIGN_BIT_XOR)|>
+"&="          <|push_token(ASSIGN_BIT_AND)|>
+"+="          <|push_token(ASSIGN_ADD)|>
+"-="          <|push_token(ASSIGN_SUB)|>
+"*="          <|push_token(ASSIGN_MUL)|>
+"/="          <|push_token(ASSIGN_DIV)|>
+"%="          <|push_token(ASSIGN_MOD)|>

-"//"          <<SingleLineComment>>
-"/*"          <<MultiLineComment>>
-"<!--"        <<HtmlComment>>
+"==="         <|push_token(EQ_STRICT)|>
+"=="          <|push_token(EQ)|>
+"="           <|push_token(ASSIGN)|>
+"!=="         <|push_token(NE_STRICT)|>
+"!="          <|push_token(NE)|>
+"!"           <|push_token(NOT)|>
+
+"//"          <||SingleLineComment>
+"/*"          <||MultiLineComment>
+"<!--"        <||HtmlComment>

#whitespace* "-->" { if (just_seen_line_terminator_) { YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; } else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }

-">>>="        push_token(ASSIGN_SHR)
-">>>"         push_token(SHR)
-"<<="         push_token(ASSIGN_SHL)
-">>="         push_token(ASSIGN_SAR)
-"<="          push_token(LTE)
-">="          push_token(GTE)
-"<<"          push_token(SHL)
-">>"          push_token(SAR)
-"<"           push_token(LT)
-">"           push_token(GT)
+">>>="        <|push_token(ASSIGN_SHR)|>
+">>>"         <|push_token(SHR)|>
+"<<="         <|push_token(ASSIGN_SHL)|>
+">>="         <|push_token(ASSIGN_SAR)|>
+"<="          <|push_token(LTE)|>
+">="          <|push_token(GTE)|>
+"<<"          <|push_token(SHL)|>
+">>"          <|push_token(SAR)|>
+"<"           <|push_token(LT)|>
+">"           <|push_token(GT)|>

-number        push_token(NUMBER)
-# number identifier_char   push_token(ILLEGAL)
+number        <|push_token(NUMBER)|>
+# is this necessary?
+number identifier_char   <|push_token(ILLEGAL)|>

-"("           push_token(LPAREN)
-")"           push_token(RPAREN)
-"["           push_token(LBRACK)
-"]"           push_token(RBRACK)
-"{"           push_token(LBRACE)
-"}"           push_token(RBRACE)
-":"           push_token(COLON)
-";"           push_token(SEMICOLON)
-"."           push_token(PERIOD)
-"?"           push_token(CONDITIONAL)
-"++"          push_token(INC)
-"--"          push_token(DEC)
+"("           <|push_token(LPAREN)|>
+")"           <|push_token(RPAREN)|>
+"["           <|push_token(LBRACK)|>
+"]"           <|push_token(RBRACK)|>
+"{"           <|push_token(LBRACE)|>
+"}"           <|push_token(RBRACE)|>
+":"           <|push_token(COLON)|>
+";"           <|push_token(SEMICOLON)|>
+"."           <|push_token(PERIOD)|>
+"?"           <|push_token(CONDITIONAL)|>
+"++"          <|push_token(INC)|>
+"--"          <|push_token(DEC)|>

-"||"          push_token(OR)
-"&&"          push_token(AND)
+"||"          <|push_token(OR)|>
+"&&"          <|push_token(AND)|>

-"|"           push_token(BIT_OR)
-"^"           push_token(BIT_XOR)
-"&"           push_token(BIT_AND)
-"+"           push_token(ADD)
-"-"           push_token(SUB)
-"*"           push_token(MUL)
-"/"           push_token(DIV)
-"%"           push_token(MOD)
-"~"           push_token(BIT_NOT)
-","           push_token(COMMA)
+"|"           <|push_token(BIT_OR)|>
+"^"           <|push_token(BIT_XOR)|>
+"&"           <|push_token(BIT_AND)|>
+"+"           <|push_token(ADD)|>
+"-"           <|push_token(SUB)|>
+"*"           <|push_token(MUL)|>
+"/"           <|push_token(DIV)|>
+"%"           <|push_token(MOD)|>
+"~"           <|push_token(BIT_NOT)|>
+","           <|push_token(COMMA)|>

-line_terminator+  { PUSH_LINE_TERMINATOR(); }
-whitespace     <<skip>>
+line_terminator+  <|push_line_terminator|>
+whitespace        <|skip|>

-"\""           <<DoubleQuoteString>>
-"'"            <<SingleQuoteString>>
+"\""           <||DoubleQuoteString>
+"'"            <||SingleQuoteString>

 # all keywords
-"break"       push_token(BREAK)
-"case"        push_token(CASE)
-"catch"       push_token(CATCH)
-"class"       push_token(FUTURE_RESERVED_WORD)
-"const"       push_token(CONST)
-"continue"    push_token(CONTINUE)
-"debugger"    push_token(DEBUGGER)
-"default"     push_token(DEFAULT)
-"delete"      push_token(DELETE)
-"do"          push_token(DO)
-"else"        push_token(ELSE)
-"enum"        push_token(FUTURE_RESERVED_WORD)
-"export"      push_token(FUTURE_RESERVED_WORD)
-"extends"     push_token(FUTURE_RESERVED_WORD)
-"false"       push_token(FALSE_LITERAL)
-"finally"     push_token(FINALLY)
-"for"         push_token(FOR)
-"function"    push_token(FUNCTION)
-"if"          push_token(IF)
-"implements"  push_token(FUTURE_STRICT_RESERVED_WORD)
-"import"      push_token(FUTURE_RESERVED_WORD)
-"in"          push_token(IN)
-"instanceof"  push_token(INSTANCEOF)
-"interface"   push_token(FUTURE_STRICT_RESERVED_WORD)
-"let"         push_token(FUTURE_STRICT_RESERVED_WORD)
-"new"         push_token(NEW)
-"null"        push_token(NULL_LITERAL)
-"package"     push_token(FUTURE_STRICT_RESERVED_WORD)
-"private"     push_token(FUTURE_STRICT_RESERVED_WORD)
-"protected"   push_token(FUTURE_STRICT_RESERVED_WORD)
-"public"      push_token(FUTURE_STRICT_RESERVED_WORD)
-"return"      push_token(RETURN)
-"static"      push_token(FUTURE_STRICT_RESERVED_WORD)
-"super"       push_token(FUTURE_RESERVED_WORD)
-"switch"      push_token(SWITCH)
-"this"        push_token(THIS)
-"throw"       push_token(THROW)
-"true"        push_token(TRUE_LITERAL)
-"try"         push_token(TRY)
-"typeof"      push_token(TYPEOF)
-"var"         push_token(VAR)
-"void"        push_token(VOID)
-"while"       push_token(WHILE)
-"with"        push_token(WITH)
-"yield"       push_token(YIELD)
+"break"       <|push_token(BREAK)|>
+"case"        <|push_token(CASE)|>
+"catch"       <|push_token(CATCH)|>
+"class"       <|push_token(FUTURE_RESERVED_WORD)|>
+"const"       <|push_token(CONST)|>
+"continue"    <|push_token(CONTINUE)|>
+"debugger"    <|push_token(DEBUGGER)|>
+"default"     <|push_token(DEFAULT)|>
+"delete"      <|push_token(DELETE)|>
+"do"          <|push_token(DO)|>
+"else"        <|push_token(ELSE)|>
+"enum"        <|push_token(FUTURE_RESERVED_WORD)|>
+"export"      <|push_token(FUTURE_RESERVED_WORD)|>
+"extends"     <|push_token(FUTURE_RESERVED_WORD)|>
+"false"       <|push_token(FALSE_LITERAL)|>
+"finally"     <|push_token(FINALLY)|>
+"for"         <|push_token(FOR)|>
+"function"    <|push_token(FUNCTION)|>
+"if"          <|push_token(IF)|>
+"implements"  <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"import"      <|push_token(FUTURE_RESERVED_WORD)|>
+"in"          <|push_token(IN)|>
+"instanceof"  <|push_token(INSTANCEOF)|>
+"interface"   <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"let"         <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"new"         <|push_token(NEW)|>
+"null"        <|push_token(NULL_LITERAL)|>
+"package"     <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"private"     <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"protected"   <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"public"      <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"return"      <|push_token(RETURN)|>
+"static"      <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"super"       <|push_token(FUTURE_RESERVED_WORD)|>
+"switch"      <|push_token(SWITCH)|>
+"this"        <|push_token(THIS)|>
+"throw"       <|push_token(THROW)|>
+"true"        <|push_token(TRUE_LITERAL)|>
+"try"         <|push_token(TRY)|>
+"typeof"      <|push_token(TYPEOF)|>
+"var"         <|push_token(VAR)|>
+"void"        <|push_token(VOID)|>
+"while"       <|push_token(WHILE)|>
+"with"        <|push_token(WITH)|>
+"yield"       <|push_token(YIELD)|>

-identifier_start push_token(IDENTIFIER) <<Identifier>>
-/\\u[0-9a-fA-F]{4}/ {
+identifier_start <|push_token(IDENTIFIER)|Identifier>
+/\\u[0-9a-fA-F]{4}/ <{
   if (V8_UNLIKELY(!ValidIdentifierStart())) {
     PUSH_TOKEN(Token::ILLEGAL);
+    // need to goto something here
   }
-} <<Identifier>>
+}|push_token(IDENTIFIER)|Identifier>

-eof             <<terminate>>
-default_action  push_token(ILLEGAL)
+eof             <|terminate|>
+default_action  <push_token(ILLEGAL)>

-<DoubleQuoteString>
-/\\\n\r?/ <<continue>>
-/\\\r\n?/ <<continue>>
-/\\./     <<continue>>
-/\n|\r/   push_token(ILLEGAL)
-"\""      push_token(STRING)
-eof       <<terminate_illegal>>
-catch_all <<continue>>
+<<DoubleQuoteString>>
+/\\\n\r?/ <||continue>
+/\\\r\n?/ <||continue>
+/\\./     <||continue>
+/\n|\r/   <|push_token(ILLEGAL)|>
+"\""      <|push_token(STRING)|>
+eof       <|terminate_illegal|>
+catch_all <||continue>

-<SingleQuoteString>
-/\\\n\r?/ <<continue>>
-/\\\r\n?/ <<continue>>
-/\\./     <<continue>>
-/\n|\r/   push_token(ILLEGAL)
-"'"       push_token(STRING)
-eof       <<terminate_illegal>>
-catch_all <<continue>>
+<<SingleQuoteString>>
+/\\\n\r?/ <||continue>
+/\\\r\n?/ <||continue>
+/\\./     <||continue>
+/\n|\r/   <|push_token(ILLEGAL)|>
+"'"       <|push_token(STRING)|>
+eof       <|terminate_illegal|>
+catch_all <||continue>

-<Identifier>
-identifier_char push_token(IDENTIFIER) <<continue>>
-/\\u[0-9a-fA-F]{4}/ {
+<<Identifier>>
+identifier_char <|push_token(IDENTIFIER)|continue>
+/\\u[0-9a-fA-F]{4}/ <{
   if (V8_UNLIKELY(!ValidIdentifierStart())) {
     PUSH_TOKEN(Token::ILLEGAL);
+    // need to goto something here
   }
-} <<continue>>
+}|push_token(IDENTIFIER)|continue>

-<SingleLineComment>
-line_terminator  { PUSH_LINE_TERMINATOR(); }
-catch_all <<continue>>
+<<SingleLineComment>>
+line_terminator  <|push_line_terminator|>
+catch_all <||continue>

-<MultiLineComment>
-"*/"             <<skip>>
-/\*[^\/]/      <<continue>>
-line_terminator { PUSH_LINE_TERMINATOR(); } <<continue>>
-catch_all <<continue>>
+<<MultiLineComment>>
+"*/"             <|skip|>
+# TODO find a way to generate the below rule
+/\*[^\/]/        <||continue>
+line_terminator  <|push_line_terminator|continue>
+catch_all        <||continue>

-<HtmlComment>
-"-->"            <<skip>>
-/--./            <<continue>>
-/-./             <<continue>>
-line_terminator { PUSH_LINE_TERMINATOR(); } <<continue>>
-catch_all <<continue>>
+<<HtmlComment>>
+"-->"            <|skip|>
+# TODO find a way to generate the below rules
+/--./            <||continue>
+/-./             <||continue>
+line_terminator  <|push_line_terminator|continue>
+catch_all <||continue>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/action_test.py Thu Nov 14 07:25:37 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/action_test.py Thu Nov 14 20:25:22 2013 UTC
@@ -40,15 +40,16 @@
 class ActionTestCase(unittest.TestCase):

     def __verify_last_action(self, automata, string, expected_code):
+      expected_code = (expected_code, None)
       for automaton in [automata.dfa(), automata.minimal_dfa()]:
         actions = list(automaton.collect_actions(string))
         self.assertEqual(actions[-1], Action('TERMINATE'))
-        self.assertEqual(actions[-2].data(), expected_code)
+        self.assertEqual(actions[-2].match_action(), expected_code)

     def test_action_precedence(self):
-      rules = '''<default>
-                 "key" { KEYWORD } <<break>>
-                 /[a-z]+/ { ID } <<break>>'''
+      rules = '''<<default>>
+                 "key" <|KEYWORD|>
+                 /[a-z]+/ <|ID|>'''
       automata_for_conditions = process_rules(rules)
       self.assertEqual(len(automata_for_conditions), 1)
       self.assertTrue('default' in automata_for_conditions)
@@ -61,9 +62,9 @@
       self.__verify_last_action(automata, 'keys', 'ID')

     def test_wrong_action_precedence(self):
-      rules = '''<default>
-                 /[a-z]+/ { ID } <<break>>
-                 "key" { KEYWORD } <<break>>'''
+      rules = '''<<default>>
+                 /[a-z]+/ <|ID|>
+                 "key" <|KEYWORD|>'''
       automata_for_conditions = process_rules(rules)
       self.assertEqual(len(automata_for_conditions), 1)
       self.assertTrue('default' in automata_for_conditions)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Thu Nov 14 10:58:08 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/automaton.py Thu Nov 14 20:25:22 2013 UTC
@@ -31,33 +31,31 @@

 class Action(object):

-  def __init__(self, type, data = None, precedence = -1):
+  def __init__(self, entry_action, match_action = None, precedence = -1):
     assert type
-    self.__type = type
-    self.__data = data
+    self.__entry_action = entry_action
+    self.__match_action = match_action
     self.__precedence = precedence

-  def type(self):
-    return self.__type
+  def entry_action(self):
+    return self.__entry_action

-  def data(self):
-    return self.__data
+  def match_action(self):
+    return self.__match_action

   def precedence(self):
     return self.__precedence

   def __hash__(self):
-    return hash((self.__type, self.__data))
+    return hash((self.__entry_action, self.__match_action))

   def __eq__(self, other):
     return (isinstance(other, self.__class__) and
-            self.__type == other.__type and
-            self.__data == other.__data)
+            self.__entry_action == other.__entry_action and
+            self.__match_action == other.__match_action)

   def __str__(self):
-    if not self.__data:
-      return "action<%s>" % self.__type
-    return "action<%s, %s>" % (self.__type, self.__data)
+    return "action<%s, %s>" % (self.__entry_action, self.__match_action)

 class AutomatonState(object):

@@ -135,14 +133,7 @@

     def f(node, (node_content, edge_content)):
       if node.action():
-        action = node.action()
-        if action.type() == 'code':
-          action_text = action.data()
-        elif action.type() == 'push_token':
-          action_text = "token(" + action.data() + ")"
-        else:
-          action_text = action.type()
-        action_text = escape(action_text)
+        action_text = escape(node.action())
         node_content.append('  S_l%s[shape = box, label="%s"];' %
                             (node.node_number(), action_text))
         node_content.append('  S_%s -> S_l%s [arrowhead = none];' %
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Nov 14 10:16:32 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Nov 14 20:25:22 2013 UTC
@@ -133,7 +133,7 @@

   def matches(self, string):
     actions = list(self.collect_actions(string))
-    return actions and actions[-1].type() == 'TERMINATE'
+    return actions and actions[-1].entry_action() == 'TERMINATE'

   def lex(self, string):
     state = self.__start
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Thu Nov 14 07:25:37 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Thu Nov 14 20:25:22 2013 UTC
@@ -32,8 +32,8 @@
 class LexerTestCase(unittest.TestCase):

   def __verify_action_stream(self, rules, string, expected):
- expected = map(lambda (action, s) : (Action('code', action), s), expected)
-    expected.append((Action('terminate'), '\0'))
+ expected = map(lambda (action, s) : (Action(None, (action, None)), s), expected)
+    expected.append((Action(None, ('terminate', None)), '\0'))
     automata = RuleProcessor.parse(rules).default_automata()
     for automaton in [automata.dfa(), automata.minimal_dfa()]:
         for i, (action, start, stop) in enumerate(automaton.lex(string)):
@@ -42,12 +42,12 @@

   def test_simple(self):
     rules = '''
-    <default>
-    "("           { LBRACE }
-    ")"           { RBRACE }
+    <<default>>
+    "("           <|LBRACE|>
+    ")"           <|RBRACE|>

-    "foo"         { FOO }
-    eof           <<terminate>>'''
+    "foo"         <|FOO|>
+    eof           <|terminate|>'''

     string = 'foo()\0'
     self.__verify_action_stream(rules, string,
@@ -55,11 +55,11 @@

   def test_maximal_matching(self):
     rules = '''
-    <default>
-    "<"           { LT }
-    "<<"          { SHL }
-    " "           { SPACE }
-    eof           <<terminate>>'''
+    <<default>>
+    "<"           <|LT|>
+    "<<"          <|SHL|>
+    " "           <|SPACE|>
+    eof           <|terminate|>'''

     string = '<< <\0'
     self.__verify_action_stream(rules, string,
@@ -69,9 +69,9 @@
     rules = '''
     digit = [0-9];
     number = (digit+ ("." digit+)?);
-    <default>
-    number        { NUMBER }
-    eof           <<terminate>>'''
+    <<default>>
+    number        <|NUMBER|>
+    eof           <|terminate|>'''

     string = '555\0'
     self.__verify_action_stream(rules, string, [('NUMBER', '555')])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu Nov 14 17:30:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu Nov 14 20:25:22 2013 UTC
@@ -30,10 +30,8 @@
 class RuleLexer:

   tokens = (
-    'DEFAULT',
     'DEFAULT_ACTION',
     'CATCH_ALL',
-    'PUSH_TOKEN',

     'IDENTIFIER',
     'STRING',
@@ -47,8 +45,8 @@
     'STAR',
     'LEFT_PARENTHESIS',
     'RIGHT_PARENTHESIS',
-    'LESS_THAN',
-    'GREATER_THAN',
+    'GRAPH_OPEN',
+    'GRAPH_CLOSE',
     'SEMICOLON',
     'ACTION_OPEN',
     'ACTION_CLOSE',
@@ -71,7 +69,7 @@
     pass

   __special_identifiers = set(map(lambda s: s.lower(),
-    ['DEFAULT', 'DEFAULT_ACTION', 'CATCH_ALL', 'PUSH_TOKEN']))
+    ['DEFAULT_ACTION', 'CATCH_ALL']))

   def t_IDENTIFIER(self, t):
     r'[a-zA-Z][a-zA-Z0-9_]*'
@@ -90,11 +88,11 @@
   t_EQUALS = '='
   t_LEFT_PARENTHESIS = r'\('
   t_RIGHT_PARENTHESIS = r'\)'
-  t_LESS_THAN = '<'
-  t_GREATER_THAN = '>'
+  t_GRAPH_OPEN = '<<'
+  t_GRAPH_CLOSE = '>>'
   t_SEMICOLON = ';'
-  t_ACTION_OPEN = '<<'
-  t_ACTION_CLOSE = '>>'
+  t_ACTION_OPEN = '<'
+  t_ACTION_CLOSE = '>'

   def t_LEFT_BRACKET(self, t):
     r'{'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu Nov 14 17:30:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu Nov 14 20:25:22 2013 UTC
@@ -51,8 +51,7 @@

   tokens = RuleLexer.tokens
   __rule_precedence_counter = 0
-  __keyword_transitions = set([
-      'continue', 'break', 'terminate', 'terminate_illegal', 'skip'])
+  __keyword_transitions = set(['continue'])

   def __init__(self):
     self.__state = None
@@ -80,8 +79,7 @@
              | empty'''

   def p_state_change(self, p):
-    '''state_change : LESS_THAN IDENTIFIER GREATER_THAN
-                    | LESS_THAN DEFAULT GREATER_THAN'''
+    'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'
     state = self.__state
     state.current_state = p[2]
     assert state.current_state
@@ -98,41 +96,61 @@
                         | empty'''

   def p_transition_rule(self, p):
-    '''transition_rule : composite_regex code_or_token action
-                       | composite_regex empty action
-                       | composite_regex code_or_token empty
-                       | DEFAULT_ACTION code_or_token empty
-                       | CATCH_ALL empty action'''
-    transition = p[3]
+    '''transition_rule : composite_regex action
+                       | DEFAULT_ACTION default_action
+                       | CATCH_ALL action'''
+    precedence = RuleParser.__rule_precedence_counter
+    RuleParser.__rule_precedence_counter += 1
+    action = p[2]
+    (entry_action, match_action, transition) = action
     if transition and not transition in self.__keyword_transitions:
-      assert not transition == 'default'
+      assert not transition == 'default', "can't append default graph"
       self.__state.transitions.add(transition)
-    RuleParser.__rule_precedence_counter += 1
     rules = self.__state.rules[self.__state.current_state]
-    code = p[2]
     if p[1] == 'default_action':
       assert self.__state.current_state == 'default'
       assert not rules['default_action']
-      rules['default_action'] = code
+      rules['default_action'] = action
     elif p[1] == 'catch_all':
       assert not rules['catch_all']
-      rules['catch_all'] = transition
+      rules['catch_all'] = (precedence, action)
     else:
-      rule = (p[1], RuleParser.__rule_precedence_counter, code, transition)
-      rules['regex'].append(rule)
+      regex = p[1]
+      rules['regex'].append((regex, precedence, action))

-  def p_code_or_token(self, p):
-    '''code_or_token : code
-                     | push_token'''
+  def p_action(self, p):
+ '''action : ACTION_OPEN maybe_action_part OR maybe_action_part OR maybe_transition ACTION_CLOSE'''
+    p[0] = (p[2], p[4], p[6])
+
+  def p_default_action(self, p):
+    'default_action : ACTION_OPEN action_part ACTION_CLOSE'
+    p[0] = (None, p[2], None)
+
+  def p_maybe_action_part(self, p):
+    '''maybe_action_part : action_part
+                         | empty'''
     p[0] = p[1]

-  def p_push_token(self, p):
-    'push_token : PUSH_TOKEN LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS'
-    p[0] = (p[1], p[3])
+  def p_action_part(self, p):
+    '''action_part : code
+                         | identifier_action'''
+    p[0] = p[1]
+
+  def p_maybe_transition(self, p):
+    '''maybe_transition : IDENTIFIER
+                        | empty'''
+    p[0] = p[1]

-  def p_action(self, p):
-    'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'
-    p[0] = p[2]
+  def p_identifier_action(self, p):
+    '''identifier_action : IDENTIFIER
+ | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS'''
+    assert p[1] != 'code'
+    if len(p) == 2:
+      p[0] = (p[1], None)
+    elif len(p) == 5:
+      p[0] = (p[1], p[2])
+    else:
+      raise Exception()

   def p_composite_regex(self, p):
     '''composite_regex : regex_parts OR regex_parts
@@ -274,36 +292,31 @@
     builder = NfaBuilder()
     builder.set_character_classes(parser_state.character_classes)
     assert 'default' in parser_state.rules
-    def process(k, v):
+    def process(subgraph, v):
       graphs = []
       continues = 0
-      for (graph, precedence, code, transition) in v['regex']:
-        default_code = v['default_action']
-        if code or default_code:
-          (code_type, code_value) = code if code else default_code
-          action = Action(code_type, code_value, precedence)
+      for graph, precedence, action in v['regex']:
+        (entry_action, match_action, transition) = action
+        if entry_action or match_action:
+          action = Action(entry_action, match_action, precedence)
           graph = NfaBuilder.add_action(graph, action)
-        if not transition or transition == 'break':
+        if not transition:
           pass
         elif transition == 'continue':
-          assert not k == 'default'
+          assert not subgraph == 'default'
           continues += 1
           graph = NfaBuilder.add_continue(graph)
-        elif (transition == 'terminate' or
-              transition == 'terminate_illegal' or
-              transition == 'skip'):
-          assert not code
- graph = NfaBuilder.add_action(graph, Action(transition, None, -1))
         else:
-          assert k == 'default'
-          subgraph_modifier = '*' if code else None
+          assert subgraph == 'default'
+          subgraph_modifier = None
           graph = NfaBuilder.join_subgraph(
             graph, transition, rule_map[transition], subgraph_modifier)
         graphs.append(graph)
       if continues == len(graphs):
         graphs.append(NfaBuilder.epsilon())
       if v['catch_all']:
-        assert v['catch_all'] == 'continue'
+        (precedence, catch_all) = v['catch_all']
+        assert catch_all == (None, None, 'continue'), "unimplemented"
         graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
       graph = NfaBuilder.or_graphs(graphs)
       rule_map[k] = graph
@@ -315,6 +328,6 @@
     # build the automata
     for rule_name, graph in rule_map.items():
       self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)
-
+    # process default_action
     default_action = parser_state.rules['default']['default_action']
- self.default_action = Action(default_action[0], default_action[1]) if default_action else None + self.default_action = Action(None, default_action[1]) if default_action else None
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py Tue Nov 12 07:12:31 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py Thu Nov 14 20:25:22 2013 UTC
@@ -40,12 +40,12 @@
    def test_basic(self):
      self.parse('''
 alias = /regex/;
-<cond1> /regex/ <<cond2>>
-<cond1> alias <<cond2>>
-<cond2> /regex/ {body}
-<cond2> alias {body}
-<cond3> /regex/ {body} <<cond1>>
-<cond3> alias {body} <<cond1>>''')
+<<cond1>> /regex/ <||cond2>
+<<cond1>> alias <||cond2>
+<<cond2>> /regex/ <|{body}|>
+<<cond2>> alias <|{body}|>
+<<cond3>> /regex/ <{body}||>
+<<cond3>> alias <{body}||>''')

      self.assertTrue(len(self.state.aliases), 1)
      self.assertTrue('alias' in self.state.aliases)
@@ -73,8 +73,8 @@
    def test_more_complicated(self):
      self.parse('''
 alias = "regex;with;semicolon";
-<cond1> "regex3}with}braces}" {body {with} braces }
-<cond1> "regex4{with{braces}" {body {with} braces }''')
+<<cond1>> "regex3}with}braces}" <|{body {with} braces }|>
+<<cond1>> "regex4{with{braces}" <{body {with} braces }||>''')

      self.assertEquals(self.state.aliases['alias'],
                        RegexParser.parse("regex;with;semicolon"))
@@ -86,13 +86,13 @@
      #     ('body', 'body {with} braces }'))

    def test_body_with_if(self):
-     self.parse('<cond> "regex" { if (foo) { bar } }')
+     self.parse('<<cond>> "regex" <|{ if (foo) { bar } }|>')
      # self.assertEquals(
      #     self.parse['cond']['regex'],
      #     ('body', 'if (foo) { bar }'))

    def test_regexp_with_count(self):
-     self.parse('<cond> /regex{1,3}/ { if (foo) { bar } }')
+     self.parse('<<cond>> /regex{1,3}/ <|{ if (foo) { bar } }|>')
      # self.assertEquals(
      #     self.parse['cond']['regex{1,3}'],
      #     ('body', 'if (foo) { bar }'))

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to