Revision: 19502
Author: [email protected]
Date: Thu Feb 20 08:59:23 2014 UTC
Log: Experimental parser: add dfa path iterator
[email protected]
BUG=
Review URL: https://codereview.chromium.org/172893003
http://code.google.com/p/v8/source/detail?r=19502
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/backtracking_generator.py
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/generator.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Feb 19 15:52:12
2014 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Thu Feb 20 08:59:23
2014 UTC
@@ -142,8 +142,8 @@
line_terminator+ <|line_terminator|>
/[:whitespace:]+/ <|skip|>
-"\"" <set_marker(1)||DoubleQuoteString>
-"'" <set_marker(1)||SingleQuoteString>
+"\"" <set_marker(1)|token(ILLEGAL)|DoubleQuoteString>
+"'" <set_marker(1)|token(ILLEGAL)|SingleQuoteString>
# all keywords
"break" <|token(BREAK)|>
@@ -192,11 +192,12 @@
"with" <|token(WITH)|>
"yield" <|token(YIELD)|>
-identifier_start <|token(IDENTIFIER)|Identifier>
-unicode_escape <check_escaped_identifier_start|token(IDENTIFIER)|
Identifier>
+identifier_start <|token(IDENTIFIER)|Identifier>
+unicode_escape <check_escaped_identifier_start|token(IDENTIFIER)|
Identifier>
+"\\" <|token(ILLEGAL)|> # ambiguous backtracking otherwise
eos <terminate>
-default_action <do_token_and_go_forward(ILLEGAL)>
+default_action <default>
<<DoubleQuoteString>>
epsilon <StringSubgraph>
@@ -222,8 +223,9 @@
line_terminator <|token(ILLEGAL)|>
<<Identifier>>
-identifier_char <|token(IDENTIFIER)|continue>
-/\\u[:hex_digit:]{4}/ <check_escaped_identifier_part|token(IDENTIFIER)|
continue>
+identifier_char <|token(IDENTIFIER)|continue>
+unicode_escape <check_escaped_identifier_part|token(IDENTIFIER)|continue>
+"\\" <|token(ILLEGAL)|> # ambiguous backtracking otherwise
<<SingleLineComment>>
line_terminator <|line_terminator|>
@@ -232,7 +234,6 @@
<<MultiLineComment>>
/\*+\// <|skip|>
-# TODO(dcarney): find a way to generate the below rule
/\*+[^\/*]/ <||continue>
line_terminator <line_terminator_in_multiline_comment||continue>
catch_all <||continue>
=======================================
---
/branches/experimental/parser/tools/lexer_generator/backtracking_generator.py
Wed Feb 19 15:52:12 2014 UTC
+++
/branches/experimental/parser/tools/lexer_generator/backtracking_generator.py
Thu Feb 20 08:59:23 2014 UTC
@@ -40,6 +40,36 @@
self.__default_action = default_action
self.__log = log
+ @staticmethod
+ def __process_error_states(dfa, error_map, action_state_map):
+ path_map = { k : [] for k in error_map.iterkeys()}
+ error_states = set(error_map.iterkeys())
+ for path, states_in_path, terminal in dfa.path_iter():
+ matching = error_states & states_in_path
+ if not matching:
+ continue
+ last_action = None
+ path_length = 0
+ for (key, state) in path:
+ if state in error_states:
+ assert last_action != None
+ path_map[state].append([k for k, s in path[1:path_length]])
+ elif state in action_state_map:
+ last_action = state
+ path_length += 1
+ print reduce(lambda acc, x : acc + len(x), path_map.itervalues(), 0)
+ for state, key_paths in path_map.iteritems():
+ count = 0
+ action_str = map(str, error_map[state][0])
+ for key_path in key_paths:
+ print "path %d %s %s" % (
+ state.node_number(),
+ action_str,
+ " --> ".join(map(str, key_path)))
+ count += 1
+ if count > 10:
+ break
+
def __generate(self):
dfa = self.__dfa
default_action = self.__default_action
@@ -72,6 +102,7 @@
if (not start_state in incoming_transitions and
not start_state in action_states):
action_states[start_state] = default_action
+ error_map = {}
for state in incoming_transitions.iterkeys():
if state in omega_states or state in action_states:
continue
@@ -98,12 +129,11 @@
next.add(incoming_state)
seen |= unchecked
unchecked = next - seen
- # accumulate unique actions
+ # Accumulate unique actions.
actions = set(map(lambda x : action_states[x].term(), match_edge))
assert actions
if not len(actions) == 1:
- # TODO(dcarney): need to warn here after second pass
- # actions = set([default_action])
+ error_map[state] = actions, match_edge
continue
action = iter(actions).next()
# don't install default actions
@@ -111,6 +141,11 @@
continue
store_states |= match_edge
backtracking_map[state.node_number()] = (action, match_edge)
+ # Bail if error occurred.
+ if error_map:
+ self.__process_error_states(dfa, error_map, action_states)
+ raise Exception('ambiguous backtracking')
+ # Now install backtracking everywhere.
def install_backtracking_action(new_states, node_number):
omega_state_id = str(node_number) + '_bt'
key = TransitionKey.omega()
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Wed Feb 19 15:52:12 2014 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Thu Feb 20 08:59:23 2014 UTC
@@ -139,8 +139,8 @@
{% elif type == 'do_stored_token' %}
DO_TOKEN(stored_token)
return;
- {% elif type == 'do_token_and_go_forward' %}
- DO_TOKEN(Token::{{args[0]}});
+ {% elif type == 'default' %}
+ DO_TOKEN(Token::ILLEGAL);
FORWARD();
RESET_START();
return;
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Feb 19
15:52:12 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Feb 20
08:59:23 2014 UTC
@@ -168,3 +168,37 @@
incoming_transitions[transition_state].append((key, state))
self.visit_all_states(f)
return incoming_transitions
+
+ def path_iter(self):
+ '''Walk all paths through the dfa, ending at nodes with no transitions
+ or the first reappearance of a node. Yields data structures which
cannot
+ be modified or stored.
+ Yields path: [(key, state)], states_in_path: set, is_terminal : boolean
+ path always starts with (None, state_state)'''
+ start_state = self.start_state()
+ path = [(None, start_state)]
+ in_path = set([start_state])
+ iters = [(start_state.key_state_iter(), False)]
+ while path:
+ assert len(path) == len(in_path)
+ assert len(path) == len(iters)
+ try:
+ (it, called) = iters[-1]
+ (key, state) = it.next()
+ if not called:
+ iters[-1] = (it, True)
+ path.append((key, state))
+ if state in in_path:
+ # Terminate nested iteration.
+ yield path, in_path, False
+ path.pop()
+ continue
+ # Continue nesting.
+ in_path.add(state)
+ iters.append((state.key_state_iter(), False))
+ except StopIteration:
+ # No more transitions in iterator, pop everything.
+ (it, called) = iters.pop()
+ if not called:
+ yield path, in_path, True
+ in_path.remove(path.pop()[1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Wed
Feb 19 10:54:59 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Thu
Feb 20 08:59:23 2014 UTC
@@ -141,6 +141,7 @@
parser.add_argument('--debug-code', action='store_true')
parser.add_argument('--profile', action='store_true')
parser.add_argument('--rule-html')
+ parser.add_argument('--count-paths', action='store_true')
args = parser.parse_args()
minimize_default = not args.no_minimize_default
@@ -168,6 +169,13 @@
dfa.node_count(), mdfa.node_count())
DfaMinimizer.set_verify(True)
+ if args.count_paths:
+ path_count = 0
+ print 'counting'
+ for path in
rule_processor.default_automata().minimal_dfa().path_iter():
+ path_count += 1
+ print 'done', path_count
+
html_file = args.html
if html_file:
html = generate_html(
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.