Revision: 17612
Author: [email protected]
Date: Mon Nov 11 07:52:15 2013 UTC
Log: Experimental parser: better actions
[email protected]
BUG=
Review URL: https://codereview.chromium.org/68343004
http://code.google.com/p/v8/source/detail?r=17612
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/rule_lexer.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Fri Nov 8 11:52:04
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Mon Nov 11 07:52:15
2013 UTC
@@ -52,9 +52,10 @@
"!=" { PUSH_TOKEN(NE); }
"!" { PUSH_TOKEN(NOT); }
-"//" <<SingleLineComment>> # TODO save offset?
+"//" <<SingleLineComment>>
"/*" <<MultiLineComment>>
"<!--" <<HtmlComment>>
+
#whitespace* "-->" { if (just_seen_line_terminator_) {
YYSETCONDITION(kConditionSingleLineComment); goto yyc_SingleLineComment; }
else { --cursor_; send(Token::DEC); start_ = cursor_; goto yyc_Normal; } }
">>>=" { PUSH_TOKEN(ASSIGN_SHR); }
@@ -99,112 +100,112 @@
"," { PUSH_TOKEN(COMMA); }
line_terminator+ { PUSH_LINE_TERMINATOR(); }
-whitespace <<continue>>
+whitespace { SKIP } # TODO implement skip
"\"" <<DoubleQuoteString>>
"'" <<SingleQuoteString>>
# all keywords
-"break" { PUSH_TOKEN(BREAK); } <<break>>
-"case" { PUSH_TOKEN(CASE); } <<break>>
-"catch" { PUSH_TOKEN(CATCH); } <<break>>
-"class" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
-"const" { PUSH_TOKEN(CONST); } <<break>>
-"continue" { PUSH_TOKEN(CONTINUE); } <<break>>
-"debugger" { PUSH_TOKEN(DEBUGGER); } <<break>>
-"default" { PUSH_TOKEN(DEFAULT); } <<break>>
-"delete" { PUSH_TOKEN(DELETE); } <<break>>
-"do" { PUSH_TOKEN(DO); } <<break>>
-"else" { PUSH_TOKEN(ELSE); } <<break>>
-"enum" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
-"export" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
-"extends" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
-"false" { PUSH_TOKEN(FALSE_LITERAL); } <<break>>
-"finally" { PUSH_TOKEN(FINALLY); } <<break>>
-"for" { PUSH_TOKEN(FOR); } <<break>>
-"function" { PUSH_TOKEN(FUNCTION); } <<break>>
-"if" { PUSH_TOKEN(IF); } <<break>>
-"implements" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"import" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
-"in" { PUSH_TOKEN(IN); } <<break>>
-"instanceof" { PUSH_TOKEN(INSTANCEOF); } <<break>>
-"interface" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"let" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"new" { PUSH_TOKEN(NEW); } <<break>>
-"null" { PUSH_TOKEN(NULL_LITERAL); } <<break>>
-"package" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"private" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"protected" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"public" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"return" { PUSH_TOKEN(RETURN); } <<break>>
-"static" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); } <<break>>
-"super" { PUSH_TOKEN(FUTURE_RESERVED_WORD); } <<break>>
-"switch" { PUSH_TOKEN(SWITCH); } <<break>>
-"this" { PUSH_TOKEN(THIS); } <<break>>
-"throw" { PUSH_TOKEN(THROW); } <<break>>
-"true" { PUSH_TOKEN(TRUE_LITERAL); } <<break>>
-"try" { PUSH_TOKEN(TRY); } <<break>>
-"typeof" { PUSH_TOKEN(TYPEOF); } <<break>>
-"var" { PUSH_TOKEN(VAR); } <<break>>
-"void" { PUSH_TOKEN(VOID); } <<break>>
-"while" { PUSH_TOKEN(WHILE); } <<break>>
-"with" { PUSH_TOKEN(WITH); } <<break>>
-"yield" { PUSH_TOKEN(YIELD); } <<break>>
+"break" { PUSH_TOKEN(BREAK); }
+"case" { PUSH_TOKEN(CASE); }
+"catch" { PUSH_TOKEN(CATCH); }
+"class" { PUSH_TOKEN(FUTURE_RESERVED_WORD); }
+"const" { PUSH_TOKEN(CONST); }
+"continue" { PUSH_TOKEN(CONTINUE); }
+"debugger" { PUSH_TOKEN(DEBUGGER); }
+"default" { PUSH_TOKEN(DEFAULT); }
+"delete" { PUSH_TOKEN(DELETE); }
+"do" { PUSH_TOKEN(DO); }
+"else" { PUSH_TOKEN(ELSE); }
+"enum" { PUSH_TOKEN(FUTURE_RESERVED_WORD); }
+"export" { PUSH_TOKEN(FUTURE_RESERVED_WORD); }
+"extends" { PUSH_TOKEN(FUTURE_RESERVED_WORD); }
+"false" { PUSH_TOKEN(FALSE_LITERAL); }
+"finally" { PUSH_TOKEN(FINALLY); }
+"for" { PUSH_TOKEN(FOR); }
+"function" { PUSH_TOKEN(FUNCTION); }
+"if" { PUSH_TOKEN(IF); }
+"implements" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"import" { PUSH_TOKEN(FUTURE_RESERVED_WORD); }
+"in" { PUSH_TOKEN(IN); }
+"instanceof" { PUSH_TOKEN(INSTANCEOF); }
+"interface" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"let" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"new" { PUSH_TOKEN(NEW); }
+"null" { PUSH_TOKEN(NULL_LITERAL); }
+"package" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"private" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"protected" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"public" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"return" { PUSH_TOKEN(RETURN); }
+"static" { PUSH_TOKEN(FUTURE_STRICT_RESERVED_WORD); }
+"super" { PUSH_TOKEN(FUTURE_RESERVED_WORD); }
+"switch" { PUSH_TOKEN(SWITCH); }
+"this" { PUSH_TOKEN(THIS); }
+"throw" { PUSH_TOKEN(THROW); }
+"true" { PUSH_TOKEN(TRUE_LITERAL); }
+"try" { PUSH_TOKEN(TRY); }
+"typeof" { PUSH_TOKEN(TYPEOF); }
+"var" { PUSH_TOKEN(VAR); }
+"void" { PUSH_TOKEN(VOID); }
+"while" { PUSH_TOKEN(WHILE); }
+"with" { PUSH_TOKEN(WITH); }
+"yield" { PUSH_TOKEN(YIELD); }
-identifier_start <<Identifier>>
+identifier_start { PUSH_TOKEN(IDENTIFIER); } <<Identifier>>
/\\u[0-9a-fA-F]{4}/ {
- if (V8_LIKELY(ValidIdentifierStart())) {
- JUMP(Identifier);
+ if (V8_UNLIKELY(!ValidIdentifierStart())) {
+ PUSH_TOKEN(ILLEGAL);
}
- PUSH_TOKEN(ILLEGAL);
} <<Identifier>>
-eof <<terminate>>
-default { PUSH_TOKEN(ILLEGAL); }
+eof <<terminate>>
+default_action { PUSH_TOKEN(ILLEGAL); }
<DoubleQuoteString>
"\\" <<continue>>
"\\\"" <<continue>>
-"\"" { PUSH_TOKEN(STRING); } <<break>>
+"\"" { PUSH_TOKEN(STRING); }
/\\\n\r?/ <<continue>>
/\\\r\n?/ <<continue>>
-/\n|\r/ { PUSH_TOKEN(ILLEGAL); } <<break>>
+/\n|\r/ { PUSH_TOKEN(ILLEGAL); }
eof <<terminate_illegal>>
-default <<continue>>
+/./ <<continue>>
<SingleQuoteString>
"\\" <<continue>>
"\\'" <<continue>>
-"'" { PUSH_TOKEN(STRING); } <<break>>
+"'" { PUSH_TOKEN(STRING); }
/\\\n\r?/ <<continue>>
/\\\r\n?/ <<continue>>
-/\n|\r/ { PUSH_TOKEN(ILLEGAL); } <<break>>
+/\n|\r/ { PUSH_TOKEN(ILLEGAL); }
eof <<terminate_illegal>>
-default <<continue>>
+/./ <<continue>>
<Identifier>
-identifier_char+ <<continue>>
+identifier_char <<continue>>
/\\u[0-9a-fA-F]{4}/ {
if (V8_UNLIKELY(!ValidIdentifierStart())) {
PUSH_TOKEN(ILLEGAL);
- JUMP(Normal);
}
-}
-default { PUSH_TOKEN(IDENTIFIER); } <<break>>
+} <<continue>>
+default_action { PUSH_TOKEN(IDENTIFIER); }
<SingleLineComment>
-line_terminator { PUSH_LINE_TERMINATOR(); } <<break>>
+line_terminator { PUSH_LINE_TERMINATOR(); }
eof <<terminate>>
-default <<continue>>
+/./ <<continue>>
<MultiLineComment>
-"*/" <<break>>
-line_terminator+ { PUSH_LINE_TERMINATOR(); }
+"*/" { SKIP }
+# need to force action
+line_terminator+ { PUSH_LINE_TERMINATOR(); } <<continue>>
eof <<terminate>>
-default <<continue>>
+/./ <<continue>>
<HtmlComment>
-"-->" <<break>>
-line_terminator+ { PUSH_LINE_TERMINATOR(); }
+"-->" { SKIP }
+# need to force action
+line_terminator+ { PUSH_LINE_TERMINATOR(); } <<continue>>
eof <<terminate>>
-default <<continue>>
+/./ <<continue>>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Thu
Nov 7 10:26:29 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Mon
Nov 11 07:52:15 2013 UTC
@@ -39,6 +39,9 @@
def node_number(self):
return self.__node_number
+ def __str__(self):
+ return "%s(%d)" % (type(self), self.node_number())
+
class Automaton(object):
@staticmethod
@@ -54,7 +57,7 @@
return state
@staticmethod
- def generate_dot(start_node, terminal_set, edge_iterator):
+ def generate_dot(start_node, terminal_set, edge_iterator,
state_iterator):
def escape(v):
v = str(v).replace('\r', '\\\\r')
@@ -68,17 +71,15 @@
for key, values in node.transitions().items():
if key == TransitionKey.epsilon():
key = "ε"
- # TODO pass this action as parameter
- if type(values) == TupleType:
- values = [values]
- for (state, action) in values:
+ for state in state_iterator(values):
+ action = state.action()
if action:
- content = " S_%s -> S_%s [ label = \"%s {%s} -> %s\" ];" % (
+ content = " S_%s -> S_%s [ label = \"%s {%s}:%d\" ];" % (
node.node_number(),
state.node_number(),
escape(key),
escape(action[1]),
- escape(action[2]))
+ action[0])
else:
content = " S_%s -> S_%s [ label = \"%s\" ];" % (
node.node_number(), state.node_number(), escape(key))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Fri Nov 8
13:26:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Mon Nov 11
07:52:15 2013 UTC
@@ -31,67 +31,68 @@
class DfaState(AutomatonState):
- def __init__(self, name, node_number):
+ def __init__(self, name, node_number, actions):
super(DfaState, self).__init__(node_number)
self.__name = name
self.__transitions = {}
+ self.__actions = actions
def name(self):
return self.__name
- def add_transition(self, key, action, state):
+ def action(self):
+ return self.__actions[0] if self.__actions else None
+
+ def actions(self):
+ return self.__actions
+
+ def add_transition(self, key, state):
assert not self.__transitions.has_key(key)
- self.__transitions[key] = (state, action)
+ self.__transitions[key] = state
def transitions(self):
return self.__transitions
- def __str__(self):
- return str(self.node_number())
-
class Dfa(Automaton):
def __init__(self, start_name, mapping):
super(Dfa, self).__init__()
self.__terminal_set = set()
name_map = {}
- action_map = {}
- for i, name in enumerate(mapping.keys()):
- name_map[name] = DfaState(name, i)
+ for i, (name, node_data) in enumerate(mapping.items()):
+ node = DfaState(name, i, node_data['actions'])
+ name_map[name] = node
+ if node_data['terminal']:
+ self.__terminal_set.add(node)
for name, node_data in mapping.items():
node = name_map[name]
- if node_data['terminal']:
- self.__terminal_set.add(node)
inversion = {}
- for key, (state, action) in node_data['transitions'].items():
+ for key, state in node_data['transitions'].items():
if not state in inversion:
- inversion[state] = {}
- # TODO fix this
- action_key = str(action)
- if not action_key in action_map:
- action_map[action_key] = action
- if not action_key in inversion[state]:
- inversion[state][action_key] = []
- inversion[state][action_key].append(key)
- for state, values in inversion.items():
- for action_key, keys in values.items():
- merged_key = TransitionKey.merged_key(keys)
- action = action_map[action_key]
- node.add_transition(merged_key, action, name_map[state])
+ inversion[state] = []
+ inversion[state].append(key)
+ for state, keys in inversion.items():
+ merged_key = TransitionKey.merged_key(keys)
+ node.add_transition(merged_key, name_map[state])
self.__start = name_map[start_name]
assert self.__terminal_set
+ @staticmethod
+ def __match_char(state, char):
+ match = [s for k, s in state.transitions().items() if
k.matches_char(char)]
+ if not match: return None
+ assert len(match) == 1
+ return match[0]
+
def collect_actions(self, string):
state = self.__start
for c in string:
- next = [s for k, s in state.transitions().items() if
k.matches_char(c)]
- if not next:
+ state = Dfa.__match_char(state, c)
+ if not state:
yield ('MISS',)
return
- assert len(next) == 1
- (state, action) = next[0]
- if action:
- yield action
+ if state.action():
+ yield state.action()
if state in self.__terminal_set:
yield ('TERMINATE', )
else:
@@ -103,43 +104,26 @@
def lex(self, string):
state = self.__start
- stored_action = None
- pos = 0
- while pos < len(string):
- c = string[pos]
- next = [s for k, s in state.transitions().items() if
k.matches_char(c)]
+ last_position = 0
+ for pos, c in enumerate(string):
+ next = Dfa.__match_char(state, c)
if not next:
- # Maybe we have a stored action before. Take it and backtrack to
the
- # position where that action was.
- if stored_action:
- yield stored_action
- return
- # FIXME: Otherwise, use the default rule if this happens at the
start
- # state of the automaton.
-
- assert len(next) == 1
- (state, action) = next[0]
-
- # Special actions: terminate
- if action and action[2] == 'terminate':
- if stored_action:
- yield stored_action
- yield (action, pos)
- return
-
- # Normally we don't know yet whether to take the action - it depends
on
- # what comes next.
- if action:
- stored_action = (action, pos)
-
- pos += 1
+ assert state.action() # must invoke default action here
+ yield (state.action()[1], last_position, pos)
+ last_position = pos
+ # lex next token
+ next = Dfa.__match_char(self.__start, c)
+ assert next
+ state = next
+ assert state.action() # must invoke default action here
+ yield (state.action()[1], last_position, len(string))
def __visit_all_edges(self, visitor, state):
edge = set([self.__start])
- first = lambda v: set([x[0] for x in v])
- next_edge = lambda node: first(node.transitions().values())
+ next_edge = lambda node: set(node.transitions().values())
return self.visit_edges(edge, next_edge, visitor, state)
def to_dot(self):
iterator = lambda visitor, state: self.__visit_all_edges(visitor,
state)
- return self.generate_dot(self.__start, self.__terminal_set, iterator)
+ state_iterator = lambda x : [x]
+ return self.generate_dot(self.__start, self.__terminal_set, iterator,
state_iterator)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri
Nov 8 12:43:25 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Mon
Nov 11 07:52:15 2013 UTC
@@ -90,39 +90,35 @@
builder.set_character_classes(parser_state.character_classes)
assert 'default' in parser_state.rules
def process(k, v):
- assert 'default' in v
graphs = []
- for (graph, action) in v['regex']:
- (precedence, code, transition) = action
- if code:
- graph = NfaBuilder.add_action(graph, (precedence, code, None))
+ for (graph, (precedence, code, transition)) in v['regex']:
+ default_code = v['default_action']
+ action = code if code else default_code
+ if action:
+ graph = NfaBuilder.add_action(graph, (precedence, action))
if transition == 'continue':
- if not v['default'][1][2] == 'continue':
- graph = NfaBuilder.add_continue(graph)
- else:
- pass # TODO null key
- elif (transition == 'break' or
- transition == 'terminate' or
+ assert not k == 'default'
+ graph = NfaBuilder.add_continue(graph)
+ elif transition == 'break':
+ pass
+ elif (transition == 'terminate' or
transition == 'terminate_illegal'):
- graph = NfaBuilder.add_action(graph, (10000, transition, None))
+ assert not code
+ graph = NfaBuilder.add_action(graph, (-1, transition))
else:
assert k == 'default'
- graph = NfaBuilder.join_subgraph(graph, transition,
rule_map[transition])
+ subgraph_modifier = '*' if code else None
+ graph = NfaBuilder.join_subgraph(
+ graph, transition, rule_map[transition], subgraph_modifier)
graphs.append(graph)
graph = NfaBuilder.or_graphs(graphs)
- # merge default action
- (precedence, code, transition) = v['default'][1]
- assert transition == 'continue' or transition == 'break'
- if transition == 'continue':
- assert k != 'default'
- graph = NfaBuilder.add_incoming_action(graph, (10000, k, None))
- if code:
- graph = NfaBuilder.add_incoming_action(graph, (precedence, code,
None))
rule_map[k] = graph
+ # process first the subgraphs, then the default graph
for k, v in parser_state.rules.items():
if k == 'default': continue
process(k, v)
process('default', parser_state.rules['default'])
+ # build the automata
for rule_name, graph in rule_map.items():
nfa = builder.nfa(graph)
(start, dfa_nodes) = nfa.compute_dfa()
@@ -132,22 +128,8 @@
# Lexes strings with the help of DFAs procuded by the grammar. For sanity
# checking the automata.
def lex(self, string):
- (nfa, dfa) = self.__automata['default'] # FIXME
-
- action_stream = []
- terminate_seen = False
- offset = 0
- while not terminate_seen and string:
- result = list(dfa.lex(string))
- last_position = 0
- for (action, position) in result:
- action_stream.append((action[1], action[2], last_position +
offset, position + 1 + offset, string[last_position:(position + 1)]))
- last_position = position
- if action[2] == 'terminate':
- terminate_seen = True
- string = string[(last_position + 1):]
- offset += last_position
- return action_stream
+ (nfa, dfa) = self.__automata['default']
+ return dfa.lex(string)
if __name__ == '__main__':
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Fri
Nov 8 12:36:06 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon
Nov 11 07:52:15 2013 UTC
@@ -30,60 +30,44 @@
class LexerTestCase(unittest.TestCase):
- def __verify_action_stream(self, stream, expected_stream):
- for (ix, item) in enumerate(expected_stream):
- self.assertEquals(stream[ix][0], item[0])
- self.assertEquals(stream[ix][4], item[1])
+ def __verify_action_stream(self, rules, string, expected_stream):
+ expected_stream.append(('terminate', '\0'))
+ for i, (action, start, stop) in
enumerate(Generator(rules).lex(string)):
+ self.assertEquals(expected_stream[i][0], action)
+ self.assertEquals(expected_stream[i][1], string[start : stop])
def test_simple(self):
rules = '''
<default>
- "(" { LBRACE } <<continue>>
- ")" { RBRACE } <<continue>>
+ "(" { LBRACE }
+ ")" { RBRACE }
- "foo" { FOO } <<continue>>
- eof <<terminate>>
- default <<break>>'''
-
- generator = Generator(rules)
+ "foo" { FOO }
+ eof <<terminate>>'''
string = 'foo()\0'
- self.__verify_action_stream(
- generator.lex(string),
+ self.__verify_action_stream(rules, string,
[('FOO', 'foo'), ('LBRACE', '('), ('RBRACE', ')')])
def test_maximal_matching(self):
rules = '''
<default>
- "<" { LT } <<continue>>
- "<<" { SHL } <<continue>>
- " " { SPACE } <<continue>>
- eof <<terminate>>
- default <<break>>'''
-
- generator = Generator(rules)
+ "<" { LT }
+ "<<" { SHL }
+ " " { SPACE }
+ eof <<terminate>>'''
string = '<< <\0'
- self.__verify_action_stream(
- generator.lex(string),
+ self.__verify_action_stream(rules, string,
[('SHL', '<<'), ('SPACE', ' '), ('LT', '<')])
-
def test_consecutive_epsilon_transitions(self):
- # This rule set will create a NFA where we first have an epsilon
transition,
- # then following that, an epsilon transition with an action. This will
test
- # that the action is correctly pulled forward when creating the dfa.
rules = '''
digit = [0-9];
number = (digit+ ("." digit+)?);
<default>
- number { NUMBER } <<continue>>
- eof <<terminate>>
- default <<break>>'''
-
- generator = Generator(rules)
+ number { NUMBER }
+ eof <<terminate>>'''
string = '555\0'
- self.__verify_action_stream(
- generator.lex(string),
- [('NUMBER', '555')])
+ self.__verify_action_stream(rules, string, [('NUMBER', '555')])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Fri Nov 8
11:52:04 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Nov 11
07:52:15 2013 UTC
@@ -37,7 +37,7 @@
self.__transitions = {}
self.__unclosed = set()
self.__epsilon_closure = None
- self.__transition_action = None
+ self.__action = None
def epsilon_closure(self):
return self.__epsilon_closure
@@ -47,10 +47,14 @@
assert self.__epsilon_closure == None
self.__epsilon_closure = frozenset(closure)
- def set_transition_action(self, action):
+ def action(self):
+ assert self.is_closed()
+ return self.__action
+
+ def set_action(self, action):
assert not self.is_closed()
- assert self.__transition_action == None
- self.__transition_action = action
+ assert not self.__action
+ self.__action = action
def transitions(self):
assert self.is_closed()
@@ -58,27 +62,25 @@
def next_states(self, key_filter):
assert self.is_closed()
- first = lambda v: [x[0] for x in v]
- f = lambda acc, (k, v) : acc if not key_filter(k) else acc |
set(first(v))
+ f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(v)
return reduce(f, self.__transitions.items(), set())
- def __add_transition(self, key, action, next_state):
+ def __add_transition(self, key, next_state):
if next_state == None:
- assert not action
assert not self.is_closed(), "already closed"
self.__unclosed.add(key)
return
if not key in self.__transitions:
self.__transitions[key] = set()
- self.__transitions[key].add((next_state, action))
+ self.__transitions[key].add(next_state)
def add_unclosed_transition(self, key):
assert key != TransitionKey.epsilon()
- self.__add_transition(key, None, None)
+ self.__add_transition(key, None)
def add_epsilon_transition(self, state):
assert state != None
- self.__add_transition(TransitionKey.epsilon(), None, state)
+ self.__add_transition(TransitionKey.epsilon(), state)
def is_closed(self):
return self.__unclosed == None
@@ -86,14 +88,13 @@
def close(self, end):
assert not self.is_closed()
unclosed, self.__unclosed = self.__unclosed, None
- action, self.__transition_action = self.__transition_action, None
if end == None:
- assert not unclosed and not action
+ assert not unclosed
return
for key in unclosed:
- self.__add_transition(key, action, end)
+ self.__add_transition(key, end)
if not unclosed:
- self.__add_transition(TransitionKey.epsilon(), action, end)
+ self.__add_transition(TransitionKey.epsilon(), end)
def __matches(self, match_func, value):
f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc
@@ -105,9 +106,6 @@
def key_matches(self, value):
return self.__matches(lambda k, v : k.matches_key(v), value)
- def __str__(self):
- return "NfaState(" + str(self.node_number()) + ")"
-
@staticmethod
def gather_transition_keys(state_set):
f = lambda acc, state: acc | set(state.__transitions.keys())
@@ -205,21 +203,12 @@
return self.__key_state(TransitionKey.any())
def __action(self, graph):
- incoming = graph[3]
(start, ends) = self.__process(graph[1])
action = graph[2]
- if incoming:
- new_start = self.__new_state()
- new_start.set_transition_action(action)
- new_start.close(start)
- start = new_start
- else:
- new_end = self.__new_state()
- for end in ends:
- end.set_transition_action(action)
- self.__patch_ends(ends, new_end)
- ends = [new_end]
- return (start, ends)
+ end = self.__new_state()
+ self.__patch_ends(ends, end)
+ end.set_action(action)
+ return (start, [end])
def __continue(self, graph):
(start, ends) = self.__process(graph[1])
@@ -233,12 +222,16 @@
return (start, ends)
def __join(self, graph):
- (graph, name, subgraph) = graph[1:]
+ (graph, name, subgraph, modifier) = graph[1:]
subgraphs = self.__peek_state()['subgraphs']
if not name in subgraphs:
subgraphs[name] = self.__nfa(subgraph)
(subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]
(start, ends) = self.__process(graph)
+ if modifier:
+ assert modifier == 'ZERO_OR_MORE'
+ for end in ends:
+ end.add_epsilon_transition(subgraph_end)
self.__patch_ends(ends, subgraph_start)
end = self.__new_state()
subgraph_end.add_epsilon_transition(end)
@@ -290,19 +283,17 @@
@staticmethod
def add_action(graph, action):
- return ('ACTION', graph, action, False)
-
- @staticmethod
- def add_incoming_action(graph, action):
- return ('ACTION', graph, action, True)
+ return ('ACTION', graph, action)
@staticmethod
def add_continue(graph):
return ('CONTINUE', graph)
@staticmethod
- def join_subgraph(graph, name, subgraph):
- return ('JOIN', graph, name, subgraph)
+ def join_subgraph(graph, name, subgraph, modifier):
+ if modifier:
+ modifier = NfaBuilder.__modifer_map[modifier]
+ return ('JOIN', graph, name, subgraph, modifier)
@staticmethod
def or_graphs(graphs):
@@ -372,7 +363,7 @@
transitions = reduce(f, valid_states, set())
if not transitions:
return False
- valid_states = Nfa.__close(set([x[0] for x in transitions]))
+ valid_states = Nfa.__close(transitions)
return self.__end in valid_states
@staticmethod
@@ -382,38 +373,25 @@
name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
if name in dfa_nodes:
return name
+ def gather_actions(states):
+ actions = set()
+ for state in states:
+ if state.action():
+ actions.add(state.action())
+ actions = list(actions)
+ actions.sort()
+ return actions
dfa_nodes[name] = {
'transitions': {},
- 'terminal': end_node in nfa_state_set}
+ 'terminal': end_node in nfa_state_set,
+ 'actions' : gather_actions(nfa_state_set)}
for key in NfaState.gather_transition_keys(nfa_state_set):
+ match_states = set()
f = lambda acc, state: acc | state.key_matches(key)
- transitions = reduce(f, nfa_state_set, set())
- match_states = set()
- actions = []
- for (state, action) in transitions:
+ for state in reduce(f, nfa_state_set, set()):
match_states.add(state)
- if action:
- actions.append(action)
-
- # Pull in actions which can be taken with an epsilon transition
from the
- # match state.
- e = TransitionKey.epsilon()
- if e in state.transitions():
- for e_trans in state.transitions()[e]:
- if e_trans[1]:
- actions.append(e_trans[1])
- for s in state.epsilon_closure():
- if e in s.transitions():
- for e_trans in s.transitions()[e]:
- if e_trans[1]:
- actions.append(e_trans[1])
-
- assert len(match_states) == len(transitions)
-
- actions.sort()
- action = actions[0] if actions else None
transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
- dfa_nodes[name]['transitions'][key] = (transition_state, action)
+ dfa_nodes[name]['transitions'][key] = transition_state
return name
def compute_dfa(self):
@@ -424,4 +402,5 @@
def to_dot(self):
iterator = lambda visitor, state: self.__visit_all_edges(visitor,
state)
- return self.generate_dot(self.__start, set([self.__end]), iterator)
+ state_iterator = lambda x : x
+ return self.generate_dot(self.__start, set([self.__end]), iterator,
state_iterator)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Wed
Nov 6 15:45:04 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Mon
Nov 11 07:52:15 2013 UTC
@@ -31,6 +31,7 @@
tokens = (
'DEFAULT',
+ 'DEFAULT_ACTION',
'IDENTIFIER',
'STRING',
'REGEX',
@@ -70,6 +71,8 @@
r'[a-zA-Z][a-zA-Z0-9_]*'
if t.value == 'default':
t.type = 'DEFAULT'
+ if t.value == 'default_action':
+ t.type = 'DEFAULT_ACTION'
return t
t_STRING = r'"((\\("|\w|\\))|[^\\"])+"'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri
Nov 8 10:33:44 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon
Nov 11 07:52:15 2013 UTC
@@ -85,7 +85,7 @@
assert state.current_state
if not state.current_state in state.rules:
state.rules[state.current_state] = {
- 'default': None,
+ 'default_action': None,
'regex' : []
}
p[0] = state.current_state
@@ -95,33 +95,28 @@
| empty'''
def p_transition_rule(self, p):
- '''transition_rule : composite_regex_or_default code action
- | composite_regex_or_default empty action
- | composite_regex_or_default code empty'''
- transition = p[3] if p[3] else 'continue'
- if transition == 'continue' and self.__state.current_state
== 'default':
- transition = 'break'
+ '''transition_rule : composite_regex code action
+ | composite_regex empty action
+ | composite_regex code empty
+ | DEFAULT_ACTION code empty'''
+ transition = p[3] if p[3] else 'break'
if not transition in self.__keyword_transitions:
assert not transition == 'default'
self.__state.transitions.add(transition)
- rule = (p[1], (RuleParser.__rule_precedence_counter, p[2], transition))
RuleParser.__rule_precedence_counter += 1
rules = self.__state.rules[self.__state.current_state]
- if p[1] == 'default':
- assert not rules['default']
- rules['default'] = rule
+ code = p[2]
+ if p[1] == 'default_action':
+ assert not rules['default_action']
+ rules['default_action'] = code
else:
+ rule = (p[1], (RuleParser.__rule_precedence_counter, code,
transition))
rules['regex'].append(rule)
def p_action(self, p):
'action : ACTION_OPEN IDENTIFIER ACTION_CLOSE'
p[0] = p[2]
- def p_composite_regex_or_default(self, p):
- '''composite_regex_or_default : DEFAULT
- | composite_regex'''
- p[0] = p[1]
-
def p_composite_regex(self, p):
'''composite_regex : regex_parts OR regex_parts
| regex_parts'''
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.