Revision: 19064
Author: [email protected]
Date: Tue Feb 4 11:58:53 2014 UTC
Log: Experimental parser: support subgraph inlining
[email protected]
BUG=
Review URL: https://codereview.chromium.org/138973007
http://code.google.com/p/v8/source/detail?r=19064
Deleted:
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
/branches/experimental/parser/tools/lexer_generator/rule_lexer.py
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/gyp/v8.gyp
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Mon
Nov 18 10:01:13 2013 UTC
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import ply.lex as lex
-
-def build_escape_map(chars):
- def add_escape(d, char):
- d['\\' + char] = char
- return d
- return reduce(add_escape, chars,
- {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
-
-class RegexLexer:
-
- tokens = (
-
- 'GROUP_BEGIN',
- 'GROUP_END',
-
- 'CLASS_BEGIN',
- 'CLASS_END',
-
- 'OR',
- 'ONE_OR_MORE',
- 'ZERO_OR_MORE',
- 'ZERO_OR_ONE',
- 'ANY',
-
- 'REPEAT_BEGIN',
- 'REPEAT_END',
-
- 'NUMBER',
- 'COMMA',
- 'LITERAL',
-
- 'RANGE',
- 'NOT',
- 'CLASS_LITERAL',
- 'CLASS_LITERAL_AS_OCTAL',
- 'CHARACTER_CLASS',
- )
-
- states = (
- ('class','exclusive'),
- ('repeat','exclusive'),
- )
-
- __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
-
- def t_ESCAPED_LITERAL(self, t):
- r'\\.'
- t.type = 'LITERAL'
- t.value = RegexLexer.__escaped_literals[t.value]
- return t
-
- t_GROUP_BEGIN = r'\('
- t_GROUP_END = r'\)'
-
- t_OR = r'\|'
- t_ONE_OR_MORE = r'\+'
- t_ZERO_OR_MORE = r'\*'
- t_ZERO_OR_ONE = r'\?'
-
- t_ANY = r'\.'
-
- t_LITERAL = r'.'
-
- def t_CLASS_BEGIN(self, t):
- r'\['
- self.lexer.push_state('class')
- return t
-
- def t_class_CLASS_END(self, t):
- r'\]'
- self.lexer.pop_state()
- return t
-
- t_class_RANGE = '-'
- t_class_NOT = '\^'
- t_class_CHARACTER_CLASS = r':\w+:'
-
- def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
- r'\\\d+'
- return t
-
- __escaped_class_literals = build_escape_map("^[]-:\\")
-
- def t_class_ESCAPED_CLASS_LITERAL(self, t):
- r'\\.'
- t.type = 'CLASS_LITERAL'
- t.value = RegexLexer.__escaped_class_literals[t.value]
- return t
-
- t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
-
- def t_REPEAT_BEGIN(self, t):
- r'\{'
- self.lexer.push_state('repeat')
- return t
-
- def t_repeat_REPEAT_END(self, t):
- r'\}'
- self.lexer.pop_state()
- return t
-
- t_repeat_NUMBER = r'[0-9]+'
- t_repeat_COMMA = r','
-
- t_ANY_ignore = '\n'
-
- def t_ANY_error(self, t):
- raise Exception("Illegal character '%s'" % t.value[0])
-
- def build(self, **kwargs):
- self.lexer = lex.lex(module=self, **kwargs)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu
Jan 23 11:39:04 2014 UTC
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials provided
-# with the distribution.
-# * Neither the name of Google Inc. nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import ply.lex as lex
-
-class RuleLexer:
-
- tokens = (
- 'DEFAULT_ACTION',
- 'EOS',
- 'CATCH_ALL',
-
- 'IDENTIFIER',
- 'STRING',
- 'REGEX',
- 'CHARACTER_CLASS_REGEX',
-
- 'PLUS',
- 'QUESTION_MARK',
- 'EQUALS',
- 'OR',
- 'STAR',
- 'LEFT_PARENTHESIS',
- 'RIGHT_PARENTHESIS',
- 'GRAPH_OPEN',
- 'GRAPH_CLOSE',
- 'SEMICOLON',
- 'ACTION_OPEN',
- 'ACTION_CLOSE',
-
- 'COMMA',
- )
-
- t_ignore = " \t\n\r"
-
- def t_COMMENT(self, t):
- r'\#.*[\n\r]+'
- pass
-
- __special_identifiers = set(map(lambda s: s.lower(),
- ['DEFAULT_ACTION', 'CATCH_ALL', 'EOS']))
-
- def t_IDENTIFIER(self, t):
- r'[a-zA-Z0-9_]+'
- if t.value in self.__special_identifiers:
- t.type = t.value.upper()
- return t
-
- t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
- t_REGEX = r'/(\\/|[^/])+/'
- t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
-
- t_PLUS = r'\+'
- t_QUESTION_MARK = r'\?'
- t_STAR = r'\*'
- t_OR = r'\|'
- t_EQUALS = '='
- t_LEFT_PARENTHESIS = r'\('
- t_RIGHT_PARENTHESIS = r'\)'
- t_GRAPH_OPEN = '<<'
- t_GRAPH_CLOSE = '>>'
- t_SEMICOLON = ';'
- t_ACTION_OPEN = '<'
- t_ACTION_CLOSE = '>'
- t_COMMA = ','
-
- def t_LEFT_BRACKET(self, t):
- r'{'
- self.lexer.push_state('code')
- self.nesting = 1
- return t
-
- def t_ANY_error(self, t):
- raise Exception("Illegal character '%s'" % t.value[0])
-
- def build(self, **kwargs):
- self.lexer = lex.lex(module=self, **kwargs)
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Thu Jan 23 11:39:04
2014 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Tue Feb 4 11:58:53
2014 UTC
@@ -32,6 +32,7 @@
identifier_char = [:identifier_start::identifier_part_not_letter:];
digit = [0-9];
hex_digit = [0-9a-fA-F];
+unicode_escape = /\\u[:hex_digit:]{4}/;
single_escape_char = ['"\\bfnrtv];
maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
octal_number = /0[0-7]+/;
@@ -194,52 +195,46 @@
"yield" <|token(YIELD)|>
identifier_start <|token(IDENTIFIER)|Identifier>
-/\\u[:hex_digit:]{4}/ <check_escaped_identifier_start|token(IDENTIFIER)|
Identifier>
+unicode_escape <check_escaped_identifier_start|token(IDENTIFIER)|
Identifier>
eos <terminate>
default_action <do_token_and_go_forward(ILLEGAL)>
<<DoubleQuoteString>>
-"\\" line_terminator_sequence <set_has_escapes||continue>
-/\\[x][:hex_digit:]{2}/ <set_has_escapes||continue>
-/\\[u][:hex_digit:]{4}/ <set_has_escapes||continue>
-/\\[1-7]/ <octal_inside_string||continue>
-/\\[0-7][0-7]+/ <octal_inside_string||continue>
-"\\0" <set_has_escapes||continue>
-/\\[^xu0-7:line_terminator:]/ <set_has_escapes||continue>
-"\\" <|token(ILLEGAL)|>
-line_terminator <|token(ILLEGAL)|>
+epsilon <StringSubgraph>
"\"" <|token(STRING)|>
catch_all <||continue>
eos <terminate_illegal>
<<SingleQuoteString>>
-# TODO subgraph for '\'
-"\\" line_terminator_sequence <set_has_escapes||continue>
-/\\[x][:hex_digit:]{2}/ <set_has_escapes||continue>
-/\\[u][:hex_digit:]{4}/ <set_has_escapes||continue>
-/\\[1-7]/ <octal_inside_string||continue>
-/\\[0-7][0-7]+/ <octal_inside_string||continue>
-"\\0" <set_has_escapes||continue>
-/\\[^xu0-7:line_terminator:]/ <set_has_escapes||continue>
-"\\" <|token(ILLEGAL)|>
-line_terminator <|token(ILLEGAL)|>
+epsilon <StringSubgraph>
"'" <|token(STRING)|>
catch_all <||continue>
eos <terminate_illegal>
+<<StringSubgraph>>
+"\\" line_terminator_sequence <set_has_escapes||continue(1)>
+/\\[x][:hex_digit:]{2}/ <set_has_escapes||continue(1)>
+unicode_escape <set_has_escapes||continue(1)>
+/\\[1-7]/ <octal_inside_string||continue(1)>
+/\\[0-7][0-7]+/ <octal_inside_string||continue(1)>
+"\\0" <set_has_escapes||continue(1)>
+/\\[^xu0-7:line_terminator:]/ <set_has_escapes||continue(1)>
+"\\" <|token(ILLEGAL)|>
+line_terminator <|token(ILLEGAL)|>
+
<<Identifier>>
identifier_char <|token(IDENTIFIER)|continue>
/\\u[:hex_digit:]{4}/ <check_escaped_identifier_part|token(IDENTIFIER)|
continue>
<<SingleLineComment>>
line_terminator <|line_terminator|>
+catch_all <||continue>
eos <skip_and_terminate>
-catch_all <||continue>
<<MultiLineComment>>
/\*+\// <|skip|>
-# TODO find a way to generate the below rule
+# TODO(dcarney): find a way to generate the below rule
/\*+[^\/*]/ <||continue>
line_terminator <line_terminator_in_multiline_comment||continue>
catch_all <||continue>
=======================================
--- /branches/experimental/parser/tools/gyp/v8.gyp Mon Jan 20 12:27:20 2014
UTC
+++ /branches/experimental/parser/tools/gyp/v8.gyp Tue Feb 4 11:58:53 2014
UTC
@@ -248,9 +248,7 @@
'../../tools/lexer_generator/__init__.py',
'../../tools/lexer_generator/nfa_builder.py',
'../../tools/lexer_generator/nfa.py',
- '../../tools/lexer_generator/regex_lexer.py',
'../../tools/lexer_generator/regex_parser.py',
- '../../tools/lexer_generator/rule_lexer.py',
'../../tools/lexer_generator/rule_parser.py',
'../../tools/lexer_generator/transition_keys.py',
],
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue
Feb 4 10:36:32 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue
Feb 4 11:58:53 2014 UTC
@@ -147,10 +147,11 @@
end.add_epsilon_transition(global_end)
return (start, [end])
- def __continue(self, subtree):
+ def __continue(self, subtree, depth):
'add an epsilon transitions to the start node of the current subtree'
(start, ends) = self.__process(subtree)
- state = self.__peek_state()
+ index = -1 - min(int(depth), len(self.__states) - 1)
+ state = self.__states[index]
if not state['start_node']:
state['start_node'] = self.__new_state()
self.__patch_ends(ends, state['start_node'])
@@ -161,8 +162,11 @@
def __join(self, tree, name):
(subtree_start, subtree_end, nodes_in_subtree) = self.__nfa(name)
- (start, ends) = self.__process(tree)
- self.__patch_ends(ends, subtree_start)
+ if tree:
+ (start, ends) = self.__process(tree)
+ self.__patch_ends(ends, subtree_start)
+ else:
+ start = subtree_start
if subtree_end:
return (start, [subtree_end])
else:
@@ -196,9 +200,6 @@
def __pop_state(self):
return self.__states.pop()
- def __peek_state(self):
- return self.__states[-1]
-
def __nfa(self, name):
tree = self.__subtree_map[name]
start_node_number = self.__node_number
@@ -271,8 +272,8 @@
return Term('ACTION', term, action.to_term())
@staticmethod
- def add_continue(term):
- return Term('CONTINUE', term)
+ def add_continue(tree, depth):
+ return Term('CONTINUE', tree, depth)
@staticmethod
def unique_key(name):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Mon
Feb 3 21:28:33 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue
Feb 4 11:58:53 2014 UTC
@@ -25,11 +25,123 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ply.lex as lex
import ply.yacc as yacc
from types import ListType, TupleType
from regex_lexer import RegexLexer
from action import Term
+def build_escape_map(chars):
+ def add_escape(d, char):
+ d['\\' + char] = char
+ return d
+ return reduce(add_escape, chars,
+ {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
+class RegexLexer:
+
+ tokens = (
+
+ 'GROUP_BEGIN',
+ 'GROUP_END',
+
+ 'CLASS_BEGIN',
+ 'CLASS_END',
+
+ 'OR',
+ 'ONE_OR_MORE',
+ 'ZERO_OR_MORE',
+ 'ZERO_OR_ONE',
+ 'ANY',
+
+ 'REPEAT_BEGIN',
+ 'REPEAT_END',
+
+ 'NUMBER',
+ 'COMMA',
+ 'LITERAL',
+
+ 'RANGE',
+ 'NOT',
+ 'CLASS_LITERAL',
+ 'CLASS_LITERAL_AS_OCTAL',
+ 'CHARACTER_CLASS',
+ )
+
+ states = (
+ ('class','exclusive'),
+ ('repeat','exclusive'),
+ )
+
+ __escaped_literals = build_escape_map("(){}[]?+.*|'\"\\")
+
+ def t_ESCAPED_LITERAL(self, t):
+ r'\\.'
+ t.type = 'LITERAL'
+ t.value = RegexLexer.__escaped_literals[t.value]
+ return t
+
+ t_GROUP_BEGIN = r'\('
+ t_GROUP_END = r'\)'
+
+ t_OR = r'\|'
+ t_ONE_OR_MORE = r'\+'
+ t_ZERO_OR_MORE = r'\*'
+ t_ZERO_OR_ONE = r'\?'
+
+ t_ANY = r'\.'
+
+ t_LITERAL = r'.'
+
+ def t_CLASS_BEGIN(self, t):
+ r'\['
+ self.lexer.push_state('class')
+ return t
+
+ def t_class_CLASS_END(self, t):
+ r'\]'
+ self.lexer.pop_state()
+ return t
+
+ t_class_RANGE = '-'
+ t_class_NOT = '\^'
+ t_class_CHARACTER_CLASS = r':\w+:'
+
+ def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
+ r'\\\d+'
+ return t
+
+ __escaped_class_literals = build_escape_map("^[]-:\\")
+
+ def t_class_ESCAPED_CLASS_LITERAL(self, t):
+ r'\\.'
+ t.type = 'CLASS_LITERAL'
+ t.value = RegexLexer.__escaped_class_literals[t.value]
+ return t
+
+ t_class_CLASS_LITERAL = r'[\w *$_+\'\"/]'
+
+ def t_REPEAT_BEGIN(self, t):
+ r'\{'
+ self.lexer.push_state('repeat')
+ return t
+
+ def t_repeat_REPEAT_END(self, t):
+ r'\}'
+ self.lexer.pop_state()
+ return t
+
+ t_repeat_NUMBER = r'[0-9]+'
+ t_repeat_COMMA = r','
+
+ t_ANY_ignore = '\n'
+
+ def t_ANY_error(self, t):
+ raise Exception("Illegal character '%s'" % t.value[0])
+
+ def build(self, **kwargs):
+ self.lexer = lex.lex(module=self, **kwargs)
+
class RegexParser:
tokens = RegexLexer.tokens
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Feb 4 10:36:32 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Feb 4 11:58:53 2014 UTC
@@ -25,15 +25,89 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ply.lex as lex
import ply.yacc as yacc
from action import Term, Action
-from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
from dfa import Dfa
from dfa_optimizer import DfaOptimizer
from transition_keys import TransitionKey, KeyEncoding
+class RuleLexer:
+
+ tokens = (
+ 'DEFAULT_ACTION',
+ 'EPSILON',
+ 'EOS',
+ 'CATCH_ALL',
+
+ 'IDENTIFIER',
+ 'STRING',
+ 'REGEX',
+ 'CHARACTER_CLASS_REGEX',
+
+ 'PLUS',
+ 'QUESTION_MARK',
+ 'EQUALS',
+ 'OR',
+ 'STAR',
+ 'LEFT_PARENTHESIS',
+ 'RIGHT_PARENTHESIS',
+ 'GRAPH_OPEN',
+ 'GRAPH_CLOSE',
+ 'SEMICOLON',
+ 'ACTION_OPEN',
+ 'ACTION_CLOSE',
+
+ 'COMMA',
+ )
+
+ t_ignore = " \t\n\r"
+
+ def t_COMMENT(self, t):
+ r'\#.*[\n\r]+'
+ pass
+
+ __special_identifiers = set(
+ ['default_action', 'epsilon', 'catch_all', 'eos'])
+
+ def t_IDENTIFIER(self, t):
+ r'[a-zA-Z0-9_]+'
+ if t.value in self.__special_identifiers:
+ t.type = t.value.upper()
+ return t
+
+ t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
+ t_REGEX = r'/(\\/|[^/])+/'
+ t_CHARACTER_CLASS_REGEX = r'\[([^\]]|\\\])+\]'
+
+ t_PLUS = r'\+'
+ t_QUESTION_MARK = r'\?'
+ t_STAR = r'\*'
+ t_OR = r'\|'
+ t_EQUALS = '='
+ t_LEFT_PARENTHESIS = r'\('
+ t_RIGHT_PARENTHESIS = r'\)'
+ t_GRAPH_OPEN = '<<'
+ t_GRAPH_CLOSE = '>>'
+ t_SEMICOLON = ';'
+ t_ACTION_OPEN = '<'
+ t_ACTION_CLOSE = '>'
+ t_COMMA = ','
+
+ def t_LEFT_BRACKET(self, t):
+ r'{'
+ self.lexer.push_state('code')
+ self.nesting = 1
+ return t
+
+ def t_ANY_error(self, t):
+ raise Exception("Illegal character '%s'" % t.value[0])
+
+ def build(self, **kwargs):
+ self.lexer = lex.lex(module=self, **kwargs)
+
class RuleParserState:
def __init__(self, encoding):
@@ -88,7 +162,7 @@
state.rules[state.current_state] = {
'default_action': Term.empty_term(),
'uniques' : {},
- 'regex' : []
+ 'trees' : []
}
p[0] = state.current_state
@@ -100,27 +174,45 @@
'''transition_rule : composite_regex action
| DEFAULT_ACTION default_action
| EOS eos
+ | EPSILON epsilon
| CATCH_ALL action'''
precedence = RuleParser.__rule_precedence_counter
RuleParser.__rule_precedence_counter += 1
action = p[2]
(entry_action, match_action, transition) = action
- if transition and not transition in self.__keyword_transitions:
- assert not transition == 'default', "can't append default graph"
- self.__state.transitions.add(transition)
+ if transition and not transition.name() in self.__keyword_transitions:
+ assert not transition.name() == 'default', "can't append default
graph"
+ self.__state.transitions.add(transition.name())
rules = self.__state.rules[self.__state.current_state]
+ # process default action
if p[1] == 'default_action':
assert self.__state.current_state == 'default'
assert not rules['default_action']
assert not entry_action
rules['default_action'] = match_action
- elif p[1] == 'eos' or p[1] == 'catch_all':
+ return
+ # process tree
+ if p[1] == 'eos' or p[1] == 'catch_all':
assert p[1] not in rules['uniques']
rules['uniques'][p[1]] = True
- rules['regex'].append((NfaBuilder.unique_key(p[1]), precedence,
action))
+ tree = NfaBuilder.unique_key(p[1])
+ elif p[1] == 'epsilon':
+ tree = Term.empty_term()
+ else:
+ tree = p[1] # regex case
+ # handle actions
+ if entry_action or match_action:
+ tree = NfaBuilder.add_action(
+ tree, Action(entry_action, match_action, precedence))
+ # handle transitions
+ if not transition:
+ pass
+ elif transition.name() == 'continue':
+ tree = NfaBuilder.add_continue(tree, transition.args()[0])
else:
- regex = p[1]
- rules['regex'].append((regex, precedence, action))
+ tree = NfaBuilder.join_subtree(tree, transition.name())
+ # store tree
+ rules['trees'].append(tree)
def p_action(self, p):
'''action : ACTION_OPEN maybe_identifier_action OR
maybe_identifier_action OR maybe_transition ACTION_CLOSE'''
@@ -128,11 +220,16 @@
def p_default_action(self, p):
'default_action : ACTION_OPEN identifier_action ACTION_CLOSE'
- p[0] = (Term.empty_term(), p[2], None)
+ p[0] = (Term.empty_term(), p[2], Term.empty_term())
def p_eos(self, p):
'eos : ACTION_OPEN identifier_action ACTION_CLOSE'
- p[0] = (Term.empty_term(), p[2], None)
+ p[0] = (Term.empty_term(), p[2], Term.empty_term())
+
+ def p_epsilon(self, p):
+ 'epsilon : ACTION_OPEN maybe_transition ACTION_CLOSE'
+ assert p[2], 'cannot have an empty epsilon transition'
+ p[0] = (Term.empty_term(), Term.empty_term(), p[2])
def p_maybe_identifier_action(self, p):
'''maybe_identifier_action : identifier_action
@@ -141,8 +238,14 @@
def p_maybe_transition(self, p):
'''maybe_transition : IDENTIFIER
+ | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER
RIGHT_PARENTHESIS
| empty'''
- p[0] = p[1]
+ if len(p) == 5:
+ assert p[1] == 'continue', 'only continue can take arguments'
+ p[0] = Term(p[1], p[3])
+ else:
+ assert len(p) == 2
+ p[0] = Term(p[1], '0') if p[1] else Term.empty_term()
def p_identifier_action(self, p):
'''identifier_action : IDENTIFIER
@@ -312,24 +415,11 @@
def __process_parser_state(self, parser_state):
assert 'default' in parser_state.rules
rule_map = {}
- def process(tree_name, v):
- trees = []
- for tree, precedence, action in v['regex']:
- (entry_action, match_action, transition) = action
- if entry_action or match_action:
- tree = NfaBuilder.add_action(
- tree, Action(entry_action, match_action, precedence))
- if not transition:
- pass
- elif transition == 'continue':
- tree = NfaBuilder.add_continue(tree)
- else:
- tree = NfaBuilder.join_subtree(tree, transition)
- trees.append(tree)
- rule_map[tree_name] = NfaBuilder.or_terms(trees)
# process all subgraphs
- for k, v in parser_state.rules.items():
- process(k, v)
+ for tree_name, v in parser_state.rules.items():
+ if not v['trees']:
+ continue
+ rule_map[tree_name] = NfaBuilder.or_terms(v['trees'])
# build the automata
for name, tree in rule_map.items():
self.__automata[name] = RuleProcessor.Automata(
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.