Revision: 19477
Author: [email protected]
Date: Wed Feb 19 10:54:59 2014 UTC
Log: Experimental parser: always apply default transitions
[email protected]
BUG=
Review URL: https://codereview.chromium.org/170253007
http://code.google.com/p/v8/source/detail?r=19477
Modified:
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
/branches/experimental/parser/tools/lexer_generator/dot_utilities.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
/branches/experimental/parser/tools/lexer_generator/term.py
/branches/experimental/parser/tools/lexer_generator/test/term_test.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Mon
Feb 17 11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed
Feb 19 10:54:59 2014 UTC
@@ -79,7 +79,7 @@
return isinstance(other, self.__class__) and self.__term ==
other.__term
def __str__(self):
- return "action <%s>" % ('' if not self.__term else str(self.__term))
+ return str(self.__term)
class AutomatonState(object):
'''A base class for dfa and nfa states. Immutable'''
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Mon Feb 17
12:33:05 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Feb 19
10:54:59 2014 UTC
@@ -38,20 +38,42 @@
self.__action = action
assert isinstance(action, Action)
+ def sort_transitions(self):
+ self.__transitions = sorted(self.__transitions,
+ cmp = lambda (k1, v1), (k2, v2): cmp(k1,
k2))
+
def name(self):
return self.__name
def action(self):
return self.__action
- def transition_count(self):
- return len(self.__transitions)
-
def omega_transition(self):
- if TransitionKey.omega() in self.__transitions:
- return self.__transitions[TransitionKey.omega()]
+ if (self.__transitions and
+ self.__transitions[-1][0] == TransitionKey.omega()):
+ return self.__transitions[-1][1]
return None
+ def match_action(self):
+ '''returns an action if this state's omega transition terminates
+ immediately and has an action'''
+ omega_chain = list(self.omega_chain_iter())
+ if len(omega_chain) == 1 and omega_chain[0][1] == 0:
+ return omega_chain[0][0].action()
+ return Action.empty_action()
+
+ def omega_chain_iter(self):
+ state = self
+ while True:
+ state = state.omega_transition()
+ if not state:
+ return
+ transistion_count = len(state.__transitions)
+ yield (state, transistion_count)
+ if not (transistion_count == 0 or
+ (transistion_count == 1 and state.omega_transition())):
+ return
+
def epsilon_closure_iter(self):
return iter([])
@@ -66,7 +88,7 @@
state_filter = lambda x: True,
match_func = lambda x, y: True,
yield_func = lambda x, y: (x, y)):
- for key, state in self.__transitions.items():
+ for (key, state) in self.__transitions:
if key_filter(key) and state_filter(state) and match_func(key,
state):
yield yield_func(key, state)
@@ -74,17 +96,15 @@
@staticmethod
def __add_transition(transitions, key, state):
- assert key != None
- assert not key == TransitionKey.epsilon()
- assert not transitions.has_key(key)
- transitions[key] = state
+ assert key != None and key != TransitionKey.epsilon()
+ transitions.append((key, state))
def __init__(self, encoding, start_name, mapping):
super(Dfa, self).__init__(encoding)
self.__terminal_set = set()
name_map = {}
for name, node_data in mapping.items():
- transitions = {}
+ transitions = []
node = DfaState(name, node_data['action'], transitions)
name_map[name] = (node, transitions)
if node_data['terminal']:
@@ -93,7 +113,10 @@
for name, node_data in mapping.items():
(node, transitions) = name_map[name]
inversion = {}
+ omega_state = None
for key, state in node_data['transitions'].items():
+ if key == TransitionKey.omega():
+ omega_state = name_map[state][0]
if not state in inversion:
inversion[state] = []
inversion[state].append(key)
@@ -101,6 +124,8 @@
all_keys += keys
merged_key = TransitionKey.merged_key(encoding, keys)
self.__add_transition(transitions, merged_key, name_map[state][0])
+ node.sort_transitions()
+ assert node.omega_transition() == omega_state
self.__start = name_map[start_name][0]
self.__node_count = len(mapping)
self.__disjoint_keys = sorted(
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Mon Feb 17 11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Wed Feb 19 10:54:59 2014 UTC
@@ -166,12 +166,6 @@
dfa.visit_all_states(f)
return incoming_transitions
- @staticmethod
- def __match_action(state):
- state = state.omega_transition()
- state = state if state and state.transition_count() == 0 else None
- return Action.empty_action() if not state else state.action()
-
@staticmethod
def __build_replacement_map(dfa):
replacements = {}
@@ -187,14 +181,14 @@
if state.action(): # TODO(dcarney): allow this via action chaining
continue
# We only perform this optimization on 'token' actions
- match_action = DfaOptimizer.__match_action(state)
+ match_action = state.match_action()
if not match_action.name() == 'token':
continue
assert state.omega_transition() in dfa.terminal_set()
# we can drop incoming edges for states with a match action
# of either 'token' or 'harmony_token'
def is_replacement_candidate(state):
- action = DfaOptimizer.__match_action(state)
+ action = state.match_action()
return action.name() == 'token' or action.name() == 'harmony_token'
for (incoming_key, incoming_state) in incoming:
# check to see if incoming edges are matched by an outgoing edge
@@ -236,13 +230,13 @@
@staticmethod
def __split_target_state(state):
old_name = DfaOptimizer.__new_state_name(state)
- old_match_action = DfaOptimizer.__match_action(state)
+ old_match_action = state.match_action()
assert old_match_action.name() == 'token', 'unimplemented'
precedence = old_match_action.precedence()
match_action = Action(Term('do_stored_token'), precedence)
- head_action = Action(
- Term('store_token', old_match_action.term().args()[0]), precedence)
- tail_action = Action(Term('no_op'), precedence)
+ stored_token = old_match_action.term().args()[0]
+ head_action = Action(Term('store_token', stored_token), precedence)
+ tail_action = Action(Term('no_op', stored_token), precedence)
head = DfaOptimizer.__new_state(False, head_action)
tail = DfaOptimizer.__new_state(False, tail_action)
match = DfaOptimizer.__new_state(True, match_action)
@@ -253,9 +247,9 @@
# generate a store action to replace an existing action
@staticmethod
def __replacement_action(state, transition_state):
- old_action = DfaOptimizer.__match_action(state)
+ old_action = state.match_action()
assert old_action
- transition_action =
DfaOptimizer.__match_action(transition_state).term()
+ transition_action = transition_state.match_action().term()
if old_action.term() == transition_action:
# no need to store token
return Action.empty_action()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dot_utilities.py
Mon Feb 17 11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dot_utilities.py
Wed Feb 19 10:54:59 2014 UTC
@@ -29,6 +29,7 @@
from key_encoding import KeyEncoding
from term import Term
from transition_key import TransitionKey
+from automaton import Action
def escape_for_dot(v):
v = str(v)
@@ -70,11 +71,51 @@
process(term)
return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
-def automaton_to_dot(automaton):
-
- def f(node, (node_content, edge_content)):
- if node.action():
- action_text = escape_for_dot(node.action())
+def automaton_to_dot(automaton, merge = False):
+ terminal_set = automaton.terminal_set()
+ to_skip = set([])
+ # get a replacement action for a state's omega transition
+ def analyse_omega_chain(state):
+ omega_chain = list(state.omega_chain_iter())
+ if len(omega_chain) == 1:
+ omega_state = omega_chain[0][0]
+ # Handle match nodes
+ if omega_chain[0][1] == 0:
+ if omega_state in terminal_set:
+ to_skip.add(omega_state) # don't draw omega transition state
+ terminal_set.add(state)
+ return omega_state.action()
+ elif len(omega_chain) == 2:
+ first_action = omega_chain[0][0].action().name()
+ if ((first_action == 'store_token' or
+ first_action == 'store_harmony_token') and
+ omega_chain[1][0].action().name() == 'no_op'):
+ if (omega_chain[0][0].action().term().args() ==
+ omega_chain[1][0].action().term().args()):
+ return Action(Term('goto', omega_chain[0][0].node_number()), 0)
+ else:
+ to_skip.add(omega_chain[0][0]) # don't draw first transition
+ return Action(Term('chain',
+ omega_chain[0][0].action().term(),
+ Term('goto', omega_chain[1][0].node_number())), 0)
+ return Action.empty_action()
+ # draw state
+ skipped = set([])
+ drawn = set([])
+ def draw(node, (node_content, edge_content)):
+ if node in to_skip: # skip match nodes
+ skipped.add(node)
+ return (node_content, edge_content)
+ drawn.add(node)
+ omega_action = analyse_omega_chain(node) if merge else
Action.empty_action()
+ node_action = node.action()
+ if node_action.name() == 'no_op':
+ node_action = Action.empty_action()
+ if node_action or omega_action:
+ action_text = str(node.action())
+ if omega_action:
+ action_text += ' | ' + str(omega_action)
+ action_text = '< %s >' % escape_for_dot(action_text)
node_content.append(' S_l%s[shape = box, label="%s"];' %
(node.node_number(), action_text))
node_content.append(' S_%s -> S_l%s [arrowhead = none];' %
@@ -83,6 +124,8 @@
if key == TransitionKey.epsilon():
key = "ε"
elif key == TransitionKey.omega():
+ if omega_action: # don't draw omega transition
+ continue
key = "ω"
else:
key = key.to_string(automaton.encoding())
@@ -90,12 +133,15 @@
node.node_number(), state.node_number(), escape_for_dot(key)))
return (node_content, edge_content)
- (node_content, edge_content) = automaton.visit_all_states(f, ([], []))
+ (node_content, edge_content) = automaton.visit_all_states(draw, ([], []))
+
+ assert skipped == to_skip
+ terminal_set -= to_skip
+ assert automaton.node_count() == len(skipped) + len(drawn)
start_set = automaton.start_set()
assert len(start_set) == 1
start_node = iter(start_set).next()
- terminal_set = automaton.terminal_set()
terminals = ["S_%d;" % x.node_number() for x in terminal_set]
start_number = start_node.node_number()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Mon
Feb 17 11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed
Feb 19 10:54:59 2014 UTC
@@ -66,7 +66,7 @@
%s
</script>'''
-def generate_html(rule_processor, minimize_default):
+def generate_html(rule_processor, minimize_default, merge):
scripts = []
loads = []
for i, (name, automata) in
enumerate(list(rule_processor.automata_iter())):
@@ -77,10 +77,10 @@
(nfa_i, dfa_i, mdfa_i) = ("nfa_%d" % i, "dfa_%d" % i, "mdfa_%d" % i)
scripts.append(script_template % (nfa_i, automaton_to_dot(nfa)))
loads.append(load_template % ("nfa [%s]" % name, nfa_i))
- scripts.append(script_template % (dfa_i, automaton_to_dot(dfa)))
+ scripts.append(script_template % (dfa_i, automaton_to_dot(dfa, merge)))
loads.append(load_template % ("dfa [%s]" % name, dfa_i))
if mdfa and mdfa.node_count() != dfa.node_count():
- scripts.append(script_template % (mdfa_i, automaton_to_dot(mdfa)))
+ scripts.append(script_template % (mdfa_i, automaton_to_dot(mdfa,
merge)))
loads.append(load_template % ("mdfa [%s]" % name, mdfa_i))
body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
return file_template % body
@@ -128,6 +128,7 @@
parser = argparse.ArgumentParser()
parser.add_argument('--html')
+ parser.add_argument('--no-merge-html', action='store_true')
parser.add_argument('--re', default='src/lexer/lexer_py.re')
parser.add_argument('--input')
parser.add_argument('--code')
@@ -169,7 +170,8 @@
html_file = args.html
if html_file:
- html = generate_html(rule_processor, minimize_default)
+ html = generate_html(
+ rule_processor, minimize_default, not args.no_merge_html)
with open(args.html, 'w') as f:
f.write(html)
if verbose:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Feb 17
11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Feb 19
10:54:59 2014 UTC
@@ -131,7 +131,11 @@
super(Nfa, self).__init__(encoding)
self.__start = start
self.__end = end
- self.__verify(nodes_created)
+ self.__node_count = nodes_created
+ self.__verify()
+
+ def node_count(self):
+ return self.__node_count
def start_state(self):
return self.__start
@@ -139,12 +143,12 @@
def terminal_set(self):
return set([self.__end])
- def __verify(self, nodes_created):
+ def __verify(self):
def f(node, count):
node.post_creation_verify()
return count + 1
count = self.visit_all_states(f, 0)
- assert count == nodes_created
+ assert count == self.__node_count
@staticmethod
def __gather_transition_keys(encoding, state_set):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Mon
Feb 17 11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Wed
Feb 19 10:54:59 2014 UTC
@@ -153,15 +153,19 @@
self.__patch_ends(ends, node)
return (start, [node])
- def __match_action(self, action, precedence, subtree):
- (start, ends) = self.__process(subtree)
+ def __build_match_node(self, action):
node = self.__new_state()
- node.set_action(Action(action, precedence))
+ node.set_action(action)
# Force all match actions to be terminal.
node.close(self.__global_end)
# patch via a match edge into existing graph
match_node = self.__new_state()
match_node.add_transition(TransitionKey.omega(), node)
+ return match_node
+
+ def __match_action(self, action, precedence, subtree):
+ (start, ends) = self.__process(subtree)
+ match_node = self.__build_match_node(Action(action, precedence))
self.__patch_ends(ends, match_node)
return (start, [match_node])
@@ -291,14 +295,17 @@
return terms[0] if len(terms) == 1 else Term('CAT', *terms)
@staticmethod
- def nfa(encoding, character_classes, subtree_map, name):
+ def nfa(encoding, character_classes, subtree_map,
+ name, default_action = Action.empty_action()):
self = NfaBuilder(encoding, character_classes, subtree_map)
(start, ends) = self.__subgraph(name)
# close all ends
+ for e in ends:
+ match_node = self.__build_match_node(default_action)
+ e.close(match_node)
+ match_node.close(None)
end = self.__global_end
end.close(None)
- # TODO(dcarney): patch in default action
- self.__patch_ends(ends, end)
# Prepare the nfa
self.__compute_epsilon_closures(start)
f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/term.py Mon Feb 17
11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/term.py Wed Feb 19
10:54:59 2014 UTC
@@ -67,5 +67,8 @@
# TODO(dcarney): escape '(', ')' and ',' in strings
def __str__(self):
if self.__str == None:
- self.__str = '(%s)' % ','.join(map(str, self.__tuple))
+ if not self:
+ self.__str = ''
+ else:
+ self.__str = '%s(%s)' % (self.name(), ','.join(map(str,
self.args())))
return self.__str
=======================================
--- /branches/experimental/parser/tools/lexer_generator/test/term_test.py
Mon Feb 17 11:26:21 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/test/term_test.py
Wed Feb 19 10:54:59 2014 UTC
@@ -35,17 +35,17 @@
t = Term('a')
self.assertEqual('a', t.name())
self.assertEqual((), t.args())
- self.assertEqual("(a)", str(t))
+ self.assertEqual("a()", str(t))
self.assertEqual(Term('a'), t)
t = Term('a', 'b', 'c')
self.assertEqual('a', t.name())
self.assertEqual(('b', 'c'), t.args())
- self.assertEqual("(a,b,c)", str(t))
+ self.assertEqual("a(b,c)", str(t))
self.assertEqual(Term('a', 'b', 'c'), t)
t = Term('a', Term('b', 'c'))
self.assertEqual('a', t.name())
self.assertEqual((Term('b', 'c'), ), t.args())
- self.assertEqual("(a,(b,c))", str(t))
+ self.assertEqual("a(b(c))", str(t))
self.assertEqual(Term('a', Term('b', 'c')), t)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.