Revision: 17761
Author: [email protected]
Date: Thu Nov 14 20:25:22 2013 UTC
Log: Experimental parser: rule grammar refactor
[email protected]
BUG=
Review URL: https://codereview.chromium.org/62103017
http://code.google.com/p/v8/source/detail?r=17761
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/action_test.py
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/rule_lexer.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/rule_parser_test.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Thu Nov 14 17:30:55
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Thu Nov 14 20:25:22
2013 UTC
@@ -27,180 +27,194 @@
whitespace_char = [ \t\v\f\r:ws:\240];
whitespace = whitespace_char+;
-identifier_start = [$_a-zA-Z:lit:]; # TODO add relevant latin1 char codes
+identifier_start = [$_a-zA-Z:lit:];
identifier_char = [0-9:identifier_start:];
line_terminator = [\n\r];
digit = [0-9];
hex_digit = [0-9a-fA-F];
-maybe_exponent = ([eE] [\-+]? digit+)?;
-number = ("0x" hex_digit+) | (("." digit+ maybe_exponent) | (digit+ ("."
digit*)? maybe_exponent));
+maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
+number =
+ /0x[:hex_digit:]+/ | (
+ /\.[:digit:]+/ maybe_exponent |
+ /[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
-<default>
-"|=" push_token(ASSIGN_BIT_OR)
-"^=" push_token(ASSIGN_BIT_XOR)
-"&=" push_token(ASSIGN_BIT_AND)
-"+=" push_token(ASSIGN_ADD)
-"-=" push_token(ASSIGN_SUB)
-"*=" push_token(ASSIGN_MUL)
-"/=" push_token(ASSIGN_DIV)
-"%=" push_token(ASSIGN_MOD)
+# grammar is
+# regex <action_on_state_entry|action_on_match|transition>
+#
+# actions can be c code enclosed in {} or identifiers to be passed to
codegen
+# transition must be in continue or the name of a subgraph
-"===" push_token(EQ_STRICT)
-"==" push_token(EQ)
-"=" push_token(ASSIGN)
-"!==" push_token(NE_STRICT)
-"!=" push_token(NE)
-"!" push_token(NOT)
+<<default>>
+"|=" <|push_token(ASSIGN_BIT_OR)|>
+"^=" <|push_token(ASSIGN_BIT_XOR)|>
+"&=" <|push_token(ASSIGN_BIT_AND)|>
+"+=" <|push_token(ASSIGN_ADD)|>
+"-=" <|push_token(ASSIGN_SUB)|>
+"*=" <|push_token(ASSIGN_MUL)|>
+"/=" <|push_token(ASSIGN_DIV)|>
+"%=" <|push_token(ASSIGN_MOD)|>
-"//" <<SingleLineComment>>
-"/*" <<MultiLineComment>>
-"<!--" <<HtmlComment>>
+"===" <|push_token(EQ_STRICT)|>
+"==" <|push_token(EQ)|>
+"=" <|push_token(ASSIGN)|>
+"!==" <|push_token(NE_STRICT)|>
+"!=" <|push_token(NE)|>
+"!" <|push_token(NOT)|>
+
+"//" <||SingleLineComment>
+"/*" <||MultiLineComment>
+"<!--" <||HtmlComment>
#whitespace* "-->" { if (just_seen_line_terminator_) {
YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; }
else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }
-">>>=" push_token(ASSIGN_SHR)
-">>>" push_token(SHR)
-"<<=" push_token(ASSIGN_SHL)
-">>=" push_token(ASSIGN_SAR)
-"<=" push_token(LTE)
-">=" push_token(GTE)
-"<<" push_token(SHL)
-">>" push_token(SAR)
-"<" push_token(LT)
-">" push_token(GT)
+">>>=" <|push_token(ASSIGN_SHR)|>
+">>>" <|push_token(SHR)|>
+"<<=" <|push_token(ASSIGN_SHL)|>
+">>=" <|push_token(ASSIGN_SAR)|>
+"<=" <|push_token(LTE)|>
+">=" <|push_token(GTE)|>
+"<<" <|push_token(SHL)|>
+">>" <|push_token(SAR)|>
+"<" <|push_token(LT)|>
+">" <|push_token(GT)|>
-number push_token(NUMBER)
-# number identifier_char push_token(ILLEGAL)
+number <|push_token(NUMBER)|>
+# is this necessary?
+number identifier_char <|push_token(ILLEGAL)|>
-"(" push_token(LPAREN)
-")" push_token(RPAREN)
-"[" push_token(LBRACK)
-"]" push_token(RBRACK)
-"{" push_token(LBRACE)
-"}" push_token(RBRACE)
-":" push_token(COLON)
-";" push_token(SEMICOLON)
-"." push_token(PERIOD)
-"?" push_token(CONDITIONAL)
-"++" push_token(INC)
-"--" push_token(DEC)
+"(" <|push_token(LPAREN)|>
+")" <|push_token(RPAREN)|>
+"[" <|push_token(LBRACK)|>
+"]" <|push_token(RBRACK)|>
+"{" <|push_token(LBRACE)|>
+"}" <|push_token(RBRACE)|>
+":" <|push_token(COLON)|>
+";" <|push_token(SEMICOLON)|>
+"." <|push_token(PERIOD)|>
+"?" <|push_token(CONDITIONAL)|>
+"++" <|push_token(INC)|>
+"--" <|push_token(DEC)|>
-"||" push_token(OR)
-"&&" push_token(AND)
+"||" <|push_token(OR)|>
+"&&" <|push_token(AND)|>
-"|" push_token(BIT_OR)
-"^" push_token(BIT_XOR)
-"&" push_token(BIT_AND)
-"+" push_token(ADD)
-"-" push_token(SUB)
-"*" push_token(MUL)
-"/" push_token(DIV)
-"%" push_token(MOD)
-"~" push_token(BIT_NOT)
-"," push_token(COMMA)
+"|" <|push_token(BIT_OR)|>
+"^" <|push_token(BIT_XOR)|>
+"&" <|push_token(BIT_AND)|>
+"+" <|push_token(ADD)|>
+"-" <|push_token(SUB)|>
+"*" <|push_token(MUL)|>
+"/" <|push_token(DIV)|>
+"%" <|push_token(MOD)|>
+"~" <|push_token(BIT_NOT)|>
+"," <|push_token(COMMA)|>
-line_terminator+ { PUSH_LINE_TERMINATOR(); }
-whitespace <<skip>>
+line_terminator+ <|push_line_terminator|>
+whitespace <|skip|>
-"\"" <<DoubleQuoteString>>
-"'" <<SingleQuoteString>>
+"\"" <||DoubleQuoteString>
+"'" <||SingleQuoteString>
# all keywords
-"break" push_token(BREAK)
-"case" push_token(CASE)
-"catch" push_token(CATCH)
-"class" push_token(FUTURE_RESERVED_WORD)
-"const" push_token(CONST)
-"continue" push_token(CONTINUE)
-"debugger" push_token(DEBUGGER)
-"default" push_token(DEFAULT)
-"delete" push_token(DELETE)
-"do" push_token(DO)
-"else" push_token(ELSE)
-"enum" push_token(FUTURE_RESERVED_WORD)
-"export" push_token(FUTURE_RESERVED_WORD)
-"extends" push_token(FUTURE_RESERVED_WORD)
-"false" push_token(FALSE_LITERAL)
-"finally" push_token(FINALLY)
-"for" push_token(FOR)
-"function" push_token(FUNCTION)
-"if" push_token(IF)
-"implements" push_token(FUTURE_STRICT_RESERVED_WORD)
-"import" push_token(FUTURE_RESERVED_WORD)
-"in" push_token(IN)
-"instanceof" push_token(INSTANCEOF)
-"interface" push_token(FUTURE_STRICT_RESERVED_WORD)
-"let" push_token(FUTURE_STRICT_RESERVED_WORD)
-"new" push_token(NEW)
-"null" push_token(NULL_LITERAL)
-"package" push_token(FUTURE_STRICT_RESERVED_WORD)
-"private" push_token(FUTURE_STRICT_RESERVED_WORD)
-"protected" push_token(FUTURE_STRICT_RESERVED_WORD)
-"public" push_token(FUTURE_STRICT_RESERVED_WORD)
-"return" push_token(RETURN)
-"static" push_token(FUTURE_STRICT_RESERVED_WORD)
-"super" push_token(FUTURE_RESERVED_WORD)
-"switch" push_token(SWITCH)
-"this" push_token(THIS)
-"throw" push_token(THROW)
-"true" push_token(TRUE_LITERAL)
-"try" push_token(TRY)
-"typeof" push_token(TYPEOF)
-"var" push_token(VAR)
-"void" push_token(VOID)
-"while" push_token(WHILE)
-"with" push_token(WITH)
-"yield" push_token(YIELD)
+"break" <|push_token(BREAK)|>
+"case" <|push_token(CASE)|>
+"catch" <|push_token(CATCH)|>
+"class" <|push_token(FUTURE_RESERVED_WORD)|>
+"const" <|push_token(CONST)|>
+"continue" <|push_token(CONTINUE)|>
+"debugger" <|push_token(DEBUGGER)|>
+"default" <|push_token(DEFAULT)|>
+"delete" <|push_token(DELETE)|>
+"do" <|push_token(DO)|>
+"else" <|push_token(ELSE)|>
+"enum" <|push_token(FUTURE_RESERVED_WORD)|>
+"export" <|push_token(FUTURE_RESERVED_WORD)|>
+"extends" <|push_token(FUTURE_RESERVED_WORD)|>
+"false" <|push_token(FALSE_LITERAL)|>
+"finally" <|push_token(FINALLY)|>
+"for" <|push_token(FOR)|>
+"function" <|push_token(FUNCTION)|>
+"if" <|push_token(IF)|>
+"implements" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"import" <|push_token(FUTURE_RESERVED_WORD)|>
+"in" <|push_token(IN)|>
+"instanceof" <|push_token(INSTANCEOF)|>
+"interface" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"let" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"new" <|push_token(NEW)|>
+"null" <|push_token(NULL_LITERAL)|>
+"package" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"private" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"protected" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"public" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"return" <|push_token(RETURN)|>
+"static" <|push_token(FUTURE_STRICT_RESERVED_WORD)|>
+"super" <|push_token(FUTURE_RESERVED_WORD)|>
+"switch" <|push_token(SWITCH)|>
+"this" <|push_token(THIS)|>
+"throw" <|push_token(THROW)|>
+"true" <|push_token(TRUE_LITERAL)|>
+"try" <|push_token(TRY)|>
+"typeof" <|push_token(TYPEOF)|>
+"var" <|push_token(VAR)|>
+"void" <|push_token(VOID)|>
+"while" <|push_token(WHILE)|>
+"with" <|push_token(WITH)|>
+"yield" <|push_token(YIELD)|>
-identifier_start push_token(IDENTIFIER) <<Identifier>>
-/\\u[0-9a-fA-F]{4}/ {
+identifier_start <|push_token(IDENTIFIER)|Identifier>
+/\\u[0-9a-fA-F]{4}/ <{
if (V8_UNLIKELY(!ValidIdentifierStart())) {
PUSH_TOKEN(Token::ILLEGAL);
+ // need to goto something here
}
-} <<Identifier>>
+}|push_token(IDENTIFIER)|Identifier>
-eof <<terminate>>
-default_action push_token(ILLEGAL)
+eof <|terminate|>
+default_action <push_token(ILLEGAL)>
-<DoubleQuoteString>
-/\\\n\r?/ <<continue>>
-/\\\r\n?/ <<continue>>
-/\\./ <<continue>>
-/\n|\r/ push_token(ILLEGAL)
-"\"" push_token(STRING)
-eof <<terminate_illegal>>
-catch_all <<continue>>
+<<DoubleQuoteString>>
+/\\\n\r?/ <||continue>
+/\\\r\n?/ <||continue>
+/\\./ <||continue>
+/\n|\r/ <|push_token(ILLEGAL)|>
+"\"" <|push_token(STRING)|>
+eof <|terminate_illegal|>
+catch_all <||continue>
-<SingleQuoteString>
-/\\\n\r?/ <<continue>>
-/\\\r\n?/ <<continue>>
-/\\./ <<continue>>
-/\n|\r/ push_token(ILLEGAL)
-"'" push_token(STRING)
-eof <<terminate_illegal>>
-catch_all <<continue>>
+<<SingleQuoteString>>
+/\\\n\r?/ <||continue>
+/\\\r\n?/ <||continue>
+/\\./ <||continue>
+/\n|\r/ <|push_token(ILLEGAL)|>
+"'" <|push_token(STRING)|>
+eof <|terminate_illegal|>
+catch_all <||continue>
-<Identifier>
-identifier_char push_token(IDENTIFIER) <<continue>>
-/\\u[0-9a-fA-F]{4}/ {
+<<Identifier>>
+identifier_char <|push_token(IDENTIFIER)|continue>
+/\\u[0-9a-fA-F]{4}/ <{
if (V8_UNLIKELY(!ValidIdentifierStart())) {
PUSH_TOKEN(Token::ILLEGAL);
+ // need to goto something here
}
-} <<continue>>
+}|push_token(IDENTIFIER)|continue>
-<SingleLineComment>
-line_terminator { PUSH_LINE_TERMINATOR(); }
-catch_all <<continue>>
+<<SingleLineComment>>
+line_terminator <|push_line_terminator|>
+catch_all <||continue>
-<MultiLineComment>
-"*/" <<skip>>
-/\*[^\/]/ <<continue>>
-line_terminator { PUSH_LINE_TERMINATOR(); } <<continue>>
-catch_all <<continue>>
+<<MultiLineComment>>
+"*/" <|skip|>
+# TODO find a way to generate the below rule
+/\*[^\/]/ <||continue>
+line_terminator <|push_line_terminator|continue>
+catch_all <||continue>
-<HtmlComment>
-"-->" <<skip>>
-/--./ <<continue>>
-/-./ <<continue>>
-line_terminator { PUSH_LINE_TERMINATOR(); } <<continue>>
-catch_all <<continue>>
+<<HtmlComment>>
+"-->" <|skip|>
+# TODO find a way to generate the below rules
+/--./ <||continue>
+/-./ <||continue>
+line_terminator <|push_line_terminator|continue>
+catch_all <||continue>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/action_test.py Thu
Nov 14 07:25:37 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/action_test.py Thu
Nov 14 20:25:22 2013 UTC
@@ -40,15 +40,16 @@
class ActionTestCase(unittest.TestCase):
def __verify_last_action(self, automata, string, expected_code):
+ expected_code = (expected_code, None)
for automaton in [automata.dfa(), automata.minimal_dfa()]:
actions = list(automaton.collect_actions(string))
self.assertEqual(actions[-1], Action('TERMINATE'))
- self.assertEqual(actions[-2].data(), expected_code)
+ self.assertEqual(actions[-2].match_action(), expected_code)
def test_action_precedence(self):
- rules = '''<default>
- "key" { KEYWORD } <<break>>
- /[a-z]+/ { ID } <<break>>'''
+ rules = '''<<default>>
+ "key" <|KEYWORD|>
+ /[a-z]+/ <|ID|>'''
automata_for_conditions = process_rules(rules)
self.assertEqual(len(automata_for_conditions), 1)
self.assertTrue('default' in automata_for_conditions)
@@ -61,9 +62,9 @@
self.__verify_last_action(automata, 'keys', 'ID')
def test_wrong_action_precedence(self):
- rules = '''<default>
- /[a-z]+/ { ID } <<break>>
- "key" { KEYWORD } <<break>>'''
+ rules = '''<<default>>
+ /[a-z]+/ <|ID|>
+ "key" <|KEYWORD|>'''
automata_for_conditions = process_rules(rules)
self.assertEqual(len(automata_for_conditions), 1)
self.assertTrue('default' in automata_for_conditions)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Thu
Nov 14 10:58:08 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Thu
Nov 14 20:25:22 2013 UTC
@@ -31,33 +31,31 @@
class Action(object):
- def __init__(self, type, data = None, precedence = -1):
+ def __init__(self, entry_action, match_action = None, precedence = -1):
assert type
- self.__type = type
- self.__data = data
+ self.__entry_action = entry_action
+ self.__match_action = match_action
self.__precedence = precedence
- def type(self):
- return self.__type
+ def entry_action(self):
+ return self.__entry_action
- def data(self):
- return self.__data
+ def match_action(self):
+ return self.__match_action
def precedence(self):
return self.__precedence
def __hash__(self):
- return hash((self.__type, self.__data))
+ return hash((self.__entry_action, self.__match_action))
def __eq__(self, other):
return (isinstance(other, self.__class__) and
- self.__type == other.__type and
- self.__data == other.__data)
+ self.__entry_action == other.__entry_action and
+ self.__match_action == other.__match_action)
def __str__(self):
- if not self.__data:
- return "action<%s>" % self.__type
- return "action<%s, %s>" % (self.__type, self.__data)
+ return "action<%s, %s>" % (self.__entry_action, self.__match_action)
class AutomatonState(object):
@@ -135,14 +133,7 @@
def f(node, (node_content, edge_content)):
if node.action():
- action = node.action()
- if action.type() == 'code':
- action_text = action.data()
- elif action.type() == 'push_token':
- action_text = "token(" + action.data() + ")"
- else:
- action_text = action.type()
- action_text = escape(action_text)
+ action_text = escape(node.action())
node_content.append(' S_l%s[shape = box, label="%s"];' %
(node.node_number(), action_text))
node_content.append(' S_%s -> S_l%s [arrowhead = none];' %
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Nov 14
10:16:32 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Nov 14
20:25:22 2013 UTC
@@ -133,7 +133,7 @@
def matches(self, string):
actions = list(self.collect_actions(string))
- return actions and actions[-1].type() == 'TERMINATE'
+ return actions and actions[-1].entry_action() == 'TERMINATE'
def lex(self, string):
state = self.__start
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Thu
Nov 14 07:25:37 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Thu
Nov 14 20:25:22 2013 UTC
@@ -32,8 +32,8 @@
class LexerTestCase(unittest.TestCase):
def __verify_action_stream(self, rules, string, expected):
- expected = map(lambda (action, s) : (Action('code', action), s),
expected)
- expected.append((Action('terminate'), '\0'))
+ expected = map(lambda (action, s) : (Action(None, (action, None)), s),
expected)
+ expected.append((Action(None, ('terminate', None)), '\0'))
automata = RuleProcessor.parse(rules).default_automata()
for automaton in [automata.dfa(), automata.minimal_dfa()]:
for i, (action, start, stop) in enumerate(automaton.lex(string)):
@@ -42,12 +42,12 @@
def test_simple(self):
rules = '''
- <default>
- "(" { LBRACE }
- ")" { RBRACE }
+ <<default>>
+ "(" <|LBRACE|>
+ ")" <|RBRACE|>
- "foo" { FOO }
- eof <<terminate>>'''
+ "foo" <|FOO|>
+ eof <|terminate|>'''
string = 'foo()\0'
self.__verify_action_stream(rules, string,
@@ -55,11 +55,11 @@
def test_maximal_matching(self):
rules = '''
- <default>
- "<" { LT }
- "<<" { SHL }
- " " { SPACE }
- eof <<terminate>>'''
+ <<default>>
+ "<" <|LT|>
+ "<<" <|SHL|>
+ " " <|SPACE|>
+ eof <|terminate|>'''
string = '<< <\0'
self.__verify_action_stream(rules, string,
@@ -69,9 +69,9 @@
rules = '''
digit = [0-9];
number = (digit+ ("." digit+)?);
- <default>
- number { NUMBER }
- eof <<terminate>>'''
+ <<default>>
+ number <|NUMBER|>
+ eof <|terminate|>'''
string = '555\0'
self.__verify_action_stream(rules, string, [('NUMBER', '555')])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu
Nov 14 17:30:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu
Nov 14 20:25:22 2013 UTC
@@ -30,10 +30,8 @@
class RuleLexer:
tokens = (
- 'DEFAULT',
'DEFAULT_ACTION',
'CATCH_ALL',
- 'PUSH_TOKEN',
'IDENTIFIER',
'STRING',
@@ -47,8 +45,8 @@
'STAR',
'LEFT_PARENTHESIS',
'RIGHT_PARENTHESIS',
- 'LESS_THAN',
- 'GREATER_THAN',
+ 'GRAPH_OPEN',
+ 'GRAPH_CLOSE',
'SEMICOLON',
'ACTION_OPEN',
'ACTION_CLOSE',
@@ -71,7 +69,7 @@
pass
__special_identifiers = set(map(lambda s: s.lower(),
- ['DEFAULT', 'DEFAULT_ACTION', 'CATCH_ALL', 'PUSH_TOKEN']))
+ ['DEFAULT_ACTION', 'CATCH_ALL']))
def t_IDENTIFIER(self, t):
r'[a-zA-Z][a-zA-Z0-9_]*'
@@ -90,11 +88,11 @@
t_EQUALS = '='
t_LEFT_PARENTHESIS = r'\('
t_RIGHT_PARENTHESIS = r'\)'
- t_LESS_THAN = '<'
- t_GREATER_THAN = '>'
+ t_GRAPH_OPEN = '<<'
+ t_GRAPH_CLOSE = '>>'
t_SEMICOLON = ';'
- t_ACTION_OPEN = '<<'
- t_ACTION_CLOSE = '>>'
+ t_ACTION_OPEN = '<'
+ t_ACTION_CLOSE = '>'
def t_LEFT_BRACKET(self, t):
r'{'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Nov 14 17:30:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Nov 14 20:25:22 2013 UTC
@@ -51,8 +51,7 @@
tokens = RuleLexer.tokens
__rule_precedence_counter = 0
- __keyword_transitions = set([
- 'continue', 'break', 'terminate', 'terminate_illegal', 'skip'])
+ __keyword_transitions = set(['continue'])
def __init__(self):
self.__state = None
@@ -80,8 +79,7 @@
| empty'''
def p_state_change(self, p):
- '''state_change : LESS_THAN IDENTIFIER GREATER_THAN
- | LESS_THAN DEFAULT GREATER_THAN'''
+ 'state_change : GRAPH_OPEN IDENTIFIER GRAPH_CLOSE'
state = self.__state
state.current_state = p[2]
assert state.current_state
@@ -98,41 +96,61 @@
| empty'''
def p_transition_rule(self, p):
- '''transition_rule : composite_regex code_or_token action
- | composite_regex empty action
- | composite_regex code_or_token empty
- | DEFAULT_ACTION code_or_token empty
- | CATCH_ALL empty action'''
- transition = p[3]
+ '''transition_rule : composite_regex action
+ | DEFAULT_ACTION default_action
+ | CATCH_ALL action'''
+ precedence = RuleParser.__rule_precedence_counter
+ RuleParser.__rule_precedence_counter += 1
+ action = p[2]
+ (entry_action, match_action, transition) = action
if transition and not transition in self.__keyword_transitions:
- assert not transition == 'default'
+ assert not transition == 'default', "can't append default graph"
self.__state.transitions.add(transition)
- RuleParser.__rule_precedence_counter += 1
rules = self.__state.rules[self.__state.current_state]
- code = p[2]
if p[1] == 'default_action':
assert self.__state.current_state == 'default'
assert not rules['default_action']
- rules['default_action'] = code
+ rules['default_action'] = action
elif p[1] == 'catch_all':
assert not rules['catch_all']
- rules['catch_all'] = transition
+ rules['catch_all'] = (precedence, action)
else:
- rule = (p[1], RuleParser.__rule_precedence_counter, code, transition)
- rules['regex'].append(rule)
+ regex = p[1]
+ rules['regex'].append((regex, precedence, action))
- def p_code_or_token(self, p):
- '''code_or_token : code
- | push_token'''
+ def p_action(self, p):
+ '''action : ACTION_OPEN maybe_action_part OR maybe_action_part OR
maybe_transition ACTION_CLOSE'''
+ p[0] = (p[2], p[4], p[6])
+
+ def p_default_action(self, p):
+ 'default_action : ACTION_OPEN action_part ACTION_CLOSE'
+ p[0] = (None, p[2], None)
+
+ def p_maybe_action_part(self, p):
+ '''maybe_action_part : action_part
+ | empty'''
p[0] = p[1]
- def p_push_token(self, p):
- 'push_token : PUSH_TOKEN LEFT_PARENTHESIS IDENTIFIER RIGHT_PARENTHESIS'
- p[0] = (p[1], p[3])
+ def p_action_part(self, p):
+ '''action_part : code
+ | identifier_action'''
+ p[0] = p[1]
+
+ def p_maybe_transition(self, p):
+ '''maybe_transition : IDENTIFIER
+ | empty'''
+ p[0] = p[1]
- def p_action(self, p):
- 'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'
- p[0] = p[2]
+ def p_identifier_action(self, p):
+ '''identifier_action : IDENTIFIER
+ | IDENTIFIER LEFT_PARENTHESIS IDENTIFIER
RIGHT_PARENTHESIS'''
+ assert p[1] != 'code'
+ if len(p) == 2:
+ p[0] = (p[1], None)
+ elif len(p) == 5:
+ p[0] = (p[1], p[2])
+ else:
+ raise Exception()
def p_composite_regex(self, p):
'''composite_regex : regex_parts OR regex_parts
@@ -274,36 +292,31 @@
builder = NfaBuilder()
builder.set_character_classes(parser_state.character_classes)
assert 'default' in parser_state.rules
- def process(k, v):
+ def process(subgraph, v):
graphs = []
continues = 0
- for (graph, precedence, code, transition) in v['regex']:
- default_code = v['default_action']
- if code or default_code:
- (code_type, code_value) = code if code else default_code
- action = Action(code_type, code_value, precedence)
+ for graph, precedence, action in v['regex']:
+ (entry_action, match_action, transition) = action
+ if entry_action or match_action:
+ action = Action(entry_action, match_action, precedence)
graph = NfaBuilder.add_action(graph, action)
- if not transition or transition == 'break':
+ if not transition:
pass
elif transition == 'continue':
- assert not k == 'default'
+ assert not subgraph == 'default'
continues += 1
graph = NfaBuilder.add_continue(graph)
- elif (transition == 'terminate' or
- transition == 'terminate_illegal' or
- transition == 'skip'):
- assert not code
- graph = NfaBuilder.add_action(graph, Action(transition, None,
-1))
else:
- assert k == 'default'
- subgraph_modifier = '*' if code else None
+ assert subgraph == 'default'
+ subgraph_modifier = None
graph = NfaBuilder.join_subgraph(
graph, transition, rule_map[transition], subgraph_modifier)
graphs.append(graph)
if continues == len(graphs):
graphs.append(NfaBuilder.epsilon())
if v['catch_all']:
- assert v['catch_all'] == 'continue'
+ (precedence, catch_all) = v['catch_all']
+ assert catch_all == (None, None, 'continue'), "unimplemented"
graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
graph = NfaBuilder.or_graphs(graphs)
rule_map[k] = graph
@@ -315,6 +328,6 @@
# build the automata
for rule_name, graph in rule_map.items():
self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)
-
+ # process default_action
default_action = parser_state.rules['default']['default_action']
- self.default_action = Action(default_action[0], default_action[1]) if
default_action else None
+ self.default_action = Action(None, default_action[1]) if
default_action else None
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py
Tue Nov 12 07:12:31 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py
Thu Nov 14 20:25:22 2013 UTC
@@ -40,12 +40,12 @@
def test_basic(self):
self.parse('''
alias = /regex/;
-<cond1> /regex/ <<cond2>>
-<cond1> alias <<cond2>>
-<cond2> /regex/ {body}
-<cond2> alias {body}
-<cond3> /regex/ {body} <<cond1>>
-<cond3> alias {body} <<cond1>>''')
+<<cond1>> /regex/ <||cond2>
+<<cond1>> alias <||cond2>
+<<cond2>> /regex/ <|{body}|>
+<<cond2>> alias <|{body}|>
+<<cond3>> /regex/ <{body}||>
+<<cond3>> alias <{body}||>''')
self.assertTrue(len(self.state.aliases), 1)
self.assertTrue('alias' in self.state.aliases)
@@ -73,8 +73,8 @@
def test_more_complicated(self):
self.parse('''
alias = "regex;with;semicolon";
-<cond1> "regex3}with}braces}" {body {with} braces }
-<cond1> "regex4{with{braces}" {body {with} braces }''')
+<<cond1>> "regex3}with}braces}" <|{body {with} braces }|>
+<<cond1>> "regex4{with{braces}" <{body {with} braces }||>''')
self.assertEquals(self.state.aliases['alias'],
RegexParser.parse("regex;with;semicolon"))
@@ -86,13 +86,13 @@
# ('body', 'body {with} braces }'))
def test_body_with_if(self):
- self.parse('<cond> "regex" { if (foo) { bar } }')
+ self.parse('<<cond>> "regex" <|{ if (foo) { bar } }|>')
# self.assertEquals(
# self.parse['cond']['regex'],
# ('body', 'if (foo) { bar }'))
def test_regexp_with_count(self):
- self.parse('<cond> /regex{1,3}/ { if (foo) { bar } }')
+ self.parse('<<cond>> /regex{1,3}/ <|{ if (foo) { bar } }|>')
# self.assertEquals(
# self.parse['cond']['regex{1,3}'],
# ('body', 'if (foo) { bar }'))
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.