Revision: 19064
Author:   [email protected]
Date:     Tue Feb  4 11:58:53 2014 UTC
Log:      Experimental parser: support subgraph inlining

[email protected]

BUG=

Review URL: https://codereview.chromium.org/138973007
http://code.google.com/p/v8/source/detail?r=19064

Deleted:
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/rule_lexer.py
Modified:
 /branches/experimental/parser/src/lexer/lexer_py.re
 /branches/experimental/parser/tools/gyp/v8.gyp
 /branches/experimental/parser/tools/lexer_generator/nfa_builder.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Mon Nov 18 10:01:13 2013 UTC
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-#       copyright notice, this list of conditions and the following
-#       disclaimer in the documentation and/or other materials provided
-#       with the distribution.
-#     * Neither the name of Google Inc. nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import ply.lex as lex
-
-def build_escape_map(chars):
-  def add_escape(d, char):
-    d['\\' + char] = char
-    return d
-  return reduce(add_escape, chars,
-    {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
-
-class RegexLexer:
-
-  tokens = (
-
-    'GROUP_BEGIN',
-    'GROUP_END',
-
-    'CLASS_BEGIN',
-    'CLASS_END',
-
-    'OR',
-    'ONE_OR_MORE',
-    'ZERO_OR_MORE',
-    'ZERO_OR_ONE',
-    'ANY',
-
-    'REPEAT_BEGIN',
-    'REPEAT_END',
-
-    'NUMBER',
-    'COMMA',
-    'LITERAL',
-
-    'RANGE',
-    'NOT',
-    'CLASS_LITERAL',
-    'CLASS_LITERAL_AS_OCTAL',
-    'CHARACTER_CLASS',
-  )
-
-  states = (
-    ('class','exclusive'),
-    ('repeat','exclusive'),
-  )
-
-  __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
-
-  def t_ESCAPED_LITERAL(self, t):
-    r'\\.'
-    t.type = 'LITERAL'
-    t.value = RegexLexer.__escaped_literals[t.value]
-    return t
-
-  t_GROUP_BEGIN = r'\('
-  t_GROUP_END = r'\)'
-
-  t_OR = r'\|'
-  t_ONE_OR_MORE = r'\+'
-  t_ZERO_OR_MORE = r'\*'
-  t_ZERO_OR_ONE = r'\?'
-
-  t_ANY = r'\.'
-
-  t_LITERAL = r'.'
-
-  def t_CLASS_BEGIN(self, t):
-    r'\['
-    self.lexer.push_state('class')
-    return t
-
-  def t_class_CLASS_END(self, t):
-    r'\]'
-    self.lexer.pop_state()
-    return t
-
-  t_class_RANGE = '-'
-  t_class_NOT = '\^'
-  t_class_CHARACTER_CLASS = r':\w+:'
-
-  def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
-    r'\\\d+'
-    return t
-
-  __escaped_class_literals = build_escape_map("^[]-:\\")
-
-  def t_class_ESCAPED_CLASS_LITERAL(self, t):
-    r'\\.'
-    t.type = 'CLASS_LITERAL'
-    t.value = RegexLexer.__escaped_class_literals[t.value]
-    return t
-
-  t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
-
-  def t_REPEAT_BEGIN(self, t):
-    r'\{'
-    self.lexer.push_state('repeat')
-    return t
-
-  def t_repeat_REPEAT_END(self, t):
-    r'\}'
-    self.lexer.pop_state()
-    return t
-
-  t_repeat_NUMBER = r'[0-9]+'
-  t_repeat_COMMA = r','
-
-  t_ANY_ignore  = '\n'
-
-  def t_ANY_error(self, t):
-    raise Exception("Illegal character '%s'" % t.value[0])
-
-  def build(self, **kwargs):
-    self.lexer = lex.lex(module=self, **kwargs)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu Jan 23 11:39:04 2014 UTC
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-#     * Redistributions of source code must retain the above copyright
-#       notice, this list of conditions and the following disclaimer.
-#     * Redistributions in binary form must reproduce the above
-#       copyright notice, this list of conditions and the following
-#       disclaimer in the documentation and/or other materials provided
-#       with the distribution.
-#     * Neither the name of Google Inc. nor the names of its
-#       contributors may be used to endorse or promote products derived
-#       from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import ply.lex as lex
-
-class RuleLexer:
-
-  tokens = (
-    'DEFAULT_ACTION',
-    'EOS',
-    'CATCH_ALL',
-
-    'IDENTIFIER',
-    'STRING',
-    'REGEX',
-    'CHARACTER_CLASS_REGEX',
-
-    'PLUS',
-    'QUESTION_MARK',
-    'EQUALS',
-    'OR',
-    'STAR',
-    'LEFT_PARENTHESIS',
-    'RIGHT_PARENTHESIS',
-    'GRAPH_OPEN',
-    'GRAPH_CLOSE',
-    'SEMICOLON',
-    'ACTION_OPEN',
-    'ACTION_CLOSE',
-
-    'COMMA',
-  )
-
-  t_ignore = " \t\n\r"
-
-  def t_COMMENT(self, t):
-    r'\#.*[\n\r]+'
-    pass
-
-  __special_identifiers = set(map(lambda s: s.lower(),
-    ['DEFAULT_ACTION', 'CATCH_ALL', 'EOS']))
-
-  def t_IDENTIFIER(self, t):
-    r'[a-zA-Z0-9_]+'
-    if t.value in self.__special_identifiers:
-      t.type = t.value.upper()
-    return t
-
-  t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
-  t_REGEX = r'/(\\/|[^/])+/'
-  t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
-
-  t_PLUS = r'\+'
-  t_QUESTION_MARK = r'\?'
-  t_STAR = r'\*'
-  t_OR = r'\|'
-  t_EQUALS = '='
-  t_LEFT_PARENTHESIS = r'\('
-  t_RIGHT_PARENTHESIS = r'\)'
-  t_GRAPH_OPEN = '<<'
-  t_GRAPH_CLOSE = '>>'
-  t_SEMICOLON = ';'
-  t_ACTION_OPEN = '<'
-  t_ACTION_CLOSE = '>'
-  t_COMMA = ','
-
-  def t_LEFT_BRACKET(self, t):
-    r'{'
-    self.lexer.push_state('code')
-    self.nesting = 1
-    return t
-
-  def t_ANY_error(self, t):
-    raise Exception("Illegal character '%s'" % t.value[0])
-
-  def build(self, **kwargs):
-    self.lexer = lex.lex(module=self, **kwargs)
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Thu Jan 23 11:39:04 2014 UTC +++ /branches/experimental/parser/src/lexer/lexer_py.re Tue Feb 4 11:58:53 2014 UTC
@@ -32,6 +32,7 @@
 identifier_char = [:identifier_start::identifier_part_not_letter:];
 digit = [0-9];
 hex_digit = [0-9a-fA-F];
+unicode_escape = /\\u[:hex_digit:]{4}/;
 single_escape_char = ['"\\bfnrtv];
 maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
 octal_number = /0[0-7]+/;
@@ -194,52 +195,46 @@
 "yield"       <|token(YIELD)|>

 identifier_start <|token(IDENTIFIER)|Identifier>
-/\\u[:hex_digit:]{4}/ <check_escaped_identifier_start|token(IDENTIFIER)| Identifier> +unicode_escape <check_escaped_identifier_start|token(IDENTIFIER)| Identifier>

 eos             <terminate>
 default_action  <do_token_and_go_forward(ILLEGAL)>

 <<DoubleQuoteString>>
-"\\" line_terminator_sequence <set_has_escapes||continue>
-/\\[x][:hex_digit:]{2}/       <set_has_escapes||continue>
-/\\[u][:hex_digit:]{4}/       <set_has_escapes||continue>
-/\\[1-7]/                     <octal_inside_string||continue>
-/\\[0-7][0-7]+/               <octal_inside_string||continue>
-"\\0"                         <set_has_escapes||continue>
-/\\[^xu0-7:line_terminator:]/ <set_has_escapes||continue>
-"\\"                          <|token(ILLEGAL)|>
-line_terminator               <|token(ILLEGAL)|>
+epsilon                       <StringSubgraph>
 "\""                          <|token(STRING)|>
 catch_all                     <||continue>
 eos                           <terminate_illegal>

 <<SingleQuoteString>>
-# TODO subgraph for '\'
-"\\" line_terminator_sequence <set_has_escapes||continue>
-/\\[x][:hex_digit:]{2}/       <set_has_escapes||continue>
-/\\[u][:hex_digit:]{4}/       <set_has_escapes||continue>
-/\\[1-7]/                     <octal_inside_string||continue>
-/\\[0-7][0-7]+/               <octal_inside_string||continue>
-"\\0"                         <set_has_escapes||continue>
-/\\[^xu0-7:line_terminator:]/ <set_has_escapes||continue>
-"\\"                          <|token(ILLEGAL)|>
-line_terminator               <|token(ILLEGAL)|>
+epsilon                       <StringSubgraph>
 "'"                           <|token(STRING)|>
 catch_all                     <||continue>
 eos                           <terminate_illegal>

+<<StringSubgraph>>
+"\\" line_terminator_sequence <set_has_escapes||continue(1)>
+/\\[x][:hex_digit:]{2}/       <set_has_escapes||continue(1)>
+unicode_escape                <set_has_escapes||continue(1)>
+/\\[1-7]/                     <octal_inside_string||continue(1)>
+/\\[0-7][0-7]+/               <octal_inside_string||continue(1)>
+"\\0"                         <set_has_escapes||continue(1)>
+/\\[^xu0-7:line_terminator:]/ <set_has_escapes||continue(1)>
+"\\"                          <|token(ILLEGAL)|>
+line_terminator               <|token(ILLEGAL)|>
+
 <<Identifier>>
 identifier_char <|token(IDENTIFIER)|continue>
/\\u[:hex_digit:]{4}/ <check_escaped_identifier_part|token(IDENTIFIER)| continue>

 <<SingleLineComment>>
 line_terminator  <|line_terminator|>
+catch_all        <||continue>
 eos              <skip_and_terminate>
-catch_all        <||continue>

 <<MultiLineComment>>
 /\*+\//          <|skip|>
-# TODO find a way to generate the below rule
+# TODO(dcarney): find a way to generate the below rule
 /\*+[^\/*]/      <||continue>
 line_terminator  <line_terminator_in_multiline_comment||continue>
 catch_all        <||continue>
=======================================
--- /branches/experimental/parser/tools/gyp/v8.gyp Mon Jan 20 12:27:20 2014 UTC +++ /branches/experimental/parser/tools/gyp/v8.gyp Tue Feb 4 11:58:53 2014 UTC
@@ -248,9 +248,7 @@
           '../../tools/lexer_generator/__init__.py',
           '../../tools/lexer_generator/nfa_builder.py',
           '../../tools/lexer_generator/nfa.py',
-          '../../tools/lexer_generator/regex_lexer.py',
           '../../tools/lexer_generator/regex_parser.py',
-          '../../tools/lexer_generator/rule_lexer.py',
           '../../tools/lexer_generator/rule_parser.py',
           '../../tools/lexer_generator/transition_keys.py',
         ],
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue Feb 4 10:36:32 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue Feb 4 11:58:53 2014 UTC
@@ -147,10 +147,11 @@
       end.add_epsilon_transition(global_end)
     return (start, [end])

-  def __continue(self, subtree):
+  def __continue(self, subtree, depth):
     'add an epsilon transitions to the start node of the current subtree'
     (start, ends) = self.__process(subtree)
-    state = self.__peek_state()
+    index = -1 - min(int(depth), len(self.__states) - 1)
+    state = self.__states[index]
     if not state['start_node']:
       state['start_node'] = self.__new_state()
     self.__patch_ends(ends, state['start_node'])
@@ -161,8 +162,11 @@

   def __join(self, tree, name):
     (subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name)
-    (start, ends) = self.__process(tree)
-    self.__patch_ends(ends, subtree_start)
+    if tree:
+      (start, ends) = self.__process(tree)
+      self.__patch_ends(ends, subtree_start)
+    else:
+      start = subtree_start
     if subtree_end:
       return (start, [subtree_end])
     else:
@@ -196,9 +200,6 @@
   def __pop_state(self):
     return self.__states.pop()

-  def __peek_state(self):
-    return self.__states[-1]
-
   def __nfa(self, name):
     tree = self.__subtree_map[name]
     start_node_number = self.__node_number
@@ -271,8 +272,8 @@
     return Term('ACTION', term, action.to_term())

   @staticmethod
-  def add_continue(term):
-    return Term('CONTINUE', term)
+  def add_continue(tree, depth):
+    return Term('CONTINUE', tree, depth)

   @staticmethod
   def unique_key(name):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Mon Feb 3 21:28:33 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Feb 4 11:58:53 2014 UTC
@@ -25,11 +25,123 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+import ply.lex as lex
 import ply.yacc as yacc
 from types import ListType, TupleType
 from regex_lexer import RegexLexer
 from action import Term

+def build_escape_map(chars):
+  def add_escape(d, char):
+    d['\\' + char] = char
+    return d
+  return reduce(add_escape, chars,
+    {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
+class RegexLexer:
+
+  tokens = (
+
+    'GROUP_BEGIN',
+    'GROUP_END',
+
+    'CLASS_BEGIN',
+    'CLASS_END',
+
+    'OR',
+    'ONE_OR_MORE',
+    'ZERO_OR_MORE',
+    'ZERO_OR_ONE',
+    'ANY',
+
+    'REPEAT_BEGIN',
+    'REPEAT_END',
+
+    'NUMBER',
+    'COMMA',
+    'LITERAL',
+
+    'RANGE',
+    'NOT',
+    'CLASS_LITERAL',
+    'CLASS_LITERAL_AS_OCTAL',
+    'CHARACTER_CLASS',
+  )
+
+  states = (
+    ('class','exclusive'),
+    ('repeat','exclusive'),
+  )
+
+  __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
+
+  def t_ESCAPED_LITERAL(self, t):
+    r'\\.'
+    t.type = 'LITERAL'
+    t.value = RegexLexer.__escaped_literals[t.value]
+    return t
+
+  t_GROUP_BEGIN = r'\('
+  t_GROUP_END = r'\)'
+
+  t_OR = r'\|'
+  t_ONE_OR_MORE = r'\+'
+  t_ZERO_OR_MORE = r'\*'
+  t_ZERO_OR_ONE = r'\?'
+
+  t_ANY = r'\.'
+
+  t_LITERAL = r'.'
+
+  def t_CLASS_BEGIN(self, t):
+    r'\['
+    self.lexer.push_state('class')
+    return t
+
+  def t_class_CLASS_END(self, t):
+    r'\]'
+    self.lexer.pop_state()
+    return t
+
+  t_class_RANGE = '-'
+  t_class_NOT = '\^'
+  t_class_CHARACTER_CLASS = r':\w+:'
+
+  def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
+    r'\\\d+'
+    return t
+
+  __escaped_class_literals = build_escape_map("^[]-:\\")
+
+  def t_class_ESCAPED_CLASS_LITERAL(self, t):
+    r'\\.'
+    t.type = 'CLASS_LITERAL'
+    t.value = RegexLexer.__escaped_class_literals[t.value]
+    return t
+
+  t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
+
+  def t_REPEAT_BEGIN(self, t):
+    r'\{'
+    self.lexer.push_state('repeat')
+    return t
+
+  def t_repeat_REPEAT_END(self, t):
+    r'\}'
+    self.lexer.pop_state()
+    return t
+
+  t_repeat_NUMBER = r'[0-9]+'
+  t_repeat_COMMA = r','
+
+  t_ANY_ignore  = '\n'
+
+  def t_ANY_error(self, t):
+    raise Exception("Illegal character '%s'" % t.value[0])
+
+  def build(self, **kwargs):
+    self.lexer = lex.lex(module=self, **kwargs)
+
 class RegexParser:

   tokens = RegexLexer.tokens
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Feb 4 10:36:32 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Feb 4 11:58:53 2014 UTC
@@ -25,15 +25,89 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+import ply.lex as lex
 import ply.yacc as yacc
 from action import Term, Action
-from rule_lexer import RuleLexer
 from regex_parser import RegexParser
 from nfa_builder import NfaBuilder
 from dfa import Dfa
 from dfa_optimizer import DfaOptimizer
 from transition_keys import TransitionKey, KeyEncoding

+class RuleLexer:
+
+  tokens = (
+    'DEFAULT_ACTION',
+    'EPSILON',
+    'EOS',
+    'CATCH_ALL',
+
+    'IDENTIFIER',
+    'STRING',
+    'REGEX',
+    'CHARACTER_CLASS_REGEX',
+
+    'PLUS',
+    'QUESTION_MARK',
+    'EQUALS',
+    'OR',
+    'STAR',
+    'LEFT_PARENTHESIS',
+    'RIGHT_PARENTHESIS',
+    'GRAPH_OPEN',
+    'GRAPH_CLOSE',
+    'SEMICOLON',
+    'ACTION_OPEN',
+    'ACTION_CLOSE',
+
+    'COMMA',
+  )
+
+  t_ignore = " \t\n\r"
+
+  def t_COMMENT(self, t):
+    r'\#.*[\n\r]+'
+    pass
+
+  __special_identifiers = set(
+    ['default_action', 'epsilon', 'catch_all', 'eos'])
+
+  def t_IDENTIFIER(self, t):
+    r'[a-zA-Z0-9_]+'
+    if t.value in self.__special_identifiers:
+      t.type = t.value.upper()
+    return t
+
+  t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
+  t_REGEX = r'/(\\/|[^/])+/'
+  t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
+
+  t_PLUS = r'\+'
+  t_QUESTION_MARK = r'\?'
+  t_STAR = r'\*'
+  t_OR = r'\|'
+  t_EQUALS = '='
+  t_LEFT_PARENTHESIS = r'\('
+  t_RIGHT_PARENTHESIS = r'\)'
+  t_GRAPH_OPEN = '<<'
+  t_GRAPH_CLOSE = '>>'
+  t_SEMICOLON = ';'
+  t_ACTION_OPEN = '<'
+  t_ACTION_CLOSE = '>'
+  t_COMMA = ','
+
+  def t_LEFT_BRACKET(self, t):
+    r'{'
+    self.lexer.push_state('code')
+    self.nesting = 1
+    return t
+
+  def t_ANY_error(self, t):
+    raise Exception("Illegal character '%s'" % t.value[0])
+
+  def build(self, **kwargs):
+    self.lexer = lex.lex(module=self, **kwargs)
+
 class RuleParserState:

   def __init__(self, encoding):
@@ -88,7 +162,7 @@
       state.rules[state.current_state] = {
         'default_action': Term.empty_term(),
         'uniques' : {},
-        'regex' : []
+        'trees' : []
       }
     p[0] = state.current_state

@@ -100,27 +174,45 @@
     '''transition_rule : composite_regex action
                        | DEFAULT_ACTION default_action
                        | EOS eos
+                       | EPSILON epsilon
                        | CATCH_ALL action'''
     precedence = RuleParser.__rule_precedence_counter
     RuleParser.__rule_precedence_counter += 1
     action = p[2]
     (entry_action, match_action, transition) = action
-    if transition and not transition in self.__keyword_transitions:
-      assert not transition == 'default', "can't append default graph"
-      self.__state.transitions.add(transition)
+    if transition and not transition.name() in self.__keyword_transitions:
+ assert not transition.name() == 'default', "can't append default graph"
+      self.__state.transitions.add(transition.name())
     rules = self.__state.rules[self.__state.current_state]
+    # process default action
     if p[1] == 'default_action':
       assert self.__state.current_state == 'default'
       assert not rules['default_action']
       assert not entry_action
       rules['default_action'] = match_action
-    elif p[1] == 'eos' or p[1] == 'catch_all':
+      return
+    # process tree
+    if p[1] == 'eos' or p[1] == 'catch_all':
       assert p[1] not in rules['uniques']
       rules['uniques'][p[1]] = True
- rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence, action))
+      tree = NfaBuilder.unique_key(p[1])
+    elif p[1] == 'epsilon':
+      tree = Term.empty_term()
+    else:
+      tree = p[1]  # regex case
+    # handle actions
+    if entry_action or match_action:
+      tree = NfaBuilder.add_action(
+        tree, Action(entry_action, match_action, precedence))
+    # handle transitions
+    if not transition:
+      pass
+    elif transition.name() == 'continue':
+      tree = NfaBuilder.add_continue(tree, transition.args()[0])
     else:
-      regex = p[1]
-      rules['regex'].append((regex, precedence, action))
+      tree = NfaBuilder.join_subtree(tree, transition.name())
+    # store tree
+    rules['trees'].append(tree)

   def p_action(self, p):
'''action : ACTION_OPEN maybe_identifier_action OR maybe_identifier_action OR maybe_transition ACTION_CLOSE'''
@@ -128,11 +220,16 @@

   def p_default_action(self, p):
     'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'
-    p[0] = (Term.empty_term(), p[2], None)
+    p[0] = (Term.empty_term(), p[2], Term.empty_term())

   def p_eos(self, p):
     'eos : ACTION_OPEN identifier_action ACTION_CLOSE'
-    p[0] = (Term.empty_term(), p[2], None)
+    p[0] = (Term.empty_term(), p[2], Term.empty_term())
+
+  def p_epsilon(self, p):
+    'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE'
+    assert p[2], 'cannot have an empty epsilon transition'
+    p[0] = (Term.empty_term(), Term.empty_term(), p[2])

   def p_maybe_identifier_action(self, p):
     '''maybe_identifier_action : identifier_action
@@ -141,8 +238,14 @@

   def p_maybe_transition(self, p):
     '''maybe_transition : IDENTIFIER
+ | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS
                         | empty'''
-    p[0] = p[1]
+    if len(p) == 5:
+      assert p[1] == 'continue', 'only continue can take arguments'
+      p[0] = Term(p[1], p[3])
+    else:
+      assert len(p) == 2
+      p[0] = Term(p[1], '0') if p[1] else Term.empty_term()

   def p_identifier_action(self, p):
     '''identifier_action : IDENTIFIER
@@ -312,24 +415,11 @@
   def __process_parser_state(self, parser_state):
     assert 'default' in parser_state.rules
     rule_map = {}
-    def process(tree_name, v):
-      trees = []
-      for tree, precedence, action in v['regex']:
-        (entry_action, match_action, transition) = action
-        if entry_action or match_action:
-          tree = NfaBuilder.add_action(
-            tree, Action(entry_action, match_action, precedence))
-        if not transition:
-          pass
-        elif transition == 'continue':
-          tree = NfaBuilder.add_continue(tree)
-        else:
-          tree = NfaBuilder.join_subtree(tree, transition)
-        trees.append(tree)
-      rule_map[tree_name] = NfaBuilder.or_terms(trees)
     # process all subgraphs
-    for k, v in parser_state.rules.items():
-      process(k, v)
+    for tree_name, v in parser_state.rules.items():
+      if not v['trees']:
+        continue
+      rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])
     # build the automata
     for name, tree in rule_map.items():
       self.__automata[name] = RuleProcessor.Automata(

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to