Revision: 19477
Author:   [email protected]
Date:     Wed Feb 19 10:54:59 2014 UTC
Log:      Experimental parser: always apply default transitions

[email protected]

BUG=

Review URL: https://codereview.chromium.org/170253007
http://code.google.com/p/v8/source/detail?r=19477

Modified:
 /branches/experimental/parser/tools/lexer_generator/automaton.py
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
 /branches/experimental/parser/tools/lexer_generator/dot_utilities.py
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/nfa_builder.py
 /branches/experimental/parser/tools/lexer_generator/term.py
 /branches/experimental/parser/tools/lexer_generator/test/term_test.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed Feb 19 10:54:59 2014 UTC
@@ -79,7 +79,7 @@
return isinstance(other, self.__class__) and self.__term == other.__term

   def __str__(self):
-    return "action <%s>" % ('' if not self.__term else str(self.__term))
+    return str(self.__term)

 class AutomatonState(object):
   '''A base class for dfa and nfa states.  Immutable'''
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Mon Feb 17 12:33:05 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Feb 19 10:54:59 2014 UTC
@@ -38,20 +38,42 @@
     self.__action = action
     assert isinstance(action, Action)

+  def sort_transitions(self):
+    self.__transitions = sorted(self.__transitions,
+ cmp = lambda (k1, v1), (k2, v2): cmp(k1, k2))
+
   def name(self):
     return self.__name

   def action(self):
     return self.__action

-  def transition_count(self):
-    return len(self.__transitions)
-
   def omega_transition(self):
-    if TransitionKey.omega() in self.__transitions:
-      return self.__transitions[TransitionKey.omega()]
+    if (self.__transitions and
+        self.__transitions[-1][0] == TransitionKey.omega()):
+      return self.__transitions[-1][1]
     return None

+  def match_action(self):
+    '''returns an action if this state's omega transition terminates
+    immediately and has an action'''
+    omega_chain = list(self.omega_chain_iter())
+    if len(omega_chain) == 1 and omega_chain[0][1] == 0:
+      return omega_chain[0][0].action()
+    return Action.empty_action()
+
+  def omega_chain_iter(self):
+    state = self
+    while True:
+      state = state.omega_transition()
+      if not state:
+        return
+      transistion_count = len(state.__transitions)
+      yield (state, transistion_count)
+      if not (transistion_count == 0 or
+              (transistion_count == 1 and state.omega_transition())):
+        return
+
   def epsilon_closure_iter(self):
     return iter([])

@@ -66,7 +88,7 @@
     state_filter = lambda x: True,
     match_func = lambda x, y: True,
     yield_func = lambda x, y: (x, y)):
-    for key, state in self.__transitions.items():
+    for (key, state) in self.__transitions:
if key_filter(key) and state_filter(state) and match_func(key, state):
         yield yield_func(key, state)

@@ -74,17 +96,15 @@

   @staticmethod
   def __add_transition(transitions, key, state):
-    assert key != None
-    assert not key == TransitionKey.epsilon()
-    assert not transitions.has_key(key)
-    transitions[key] = state
+    assert key != None and key != TransitionKey.epsilon()
+    transitions.append((key, state))

   def __init__(self, encoding, start_name, mapping):
     super(Dfa, self).__init__(encoding)
     self.__terminal_set = set()
     name_map = {}
     for name, node_data in mapping.items():
-      transitions = {}
+      transitions = []
       node = DfaState(name, node_data['action'], transitions)
       name_map[name] = (node, transitions)
       if node_data['terminal']:
@@ -93,7 +113,10 @@
     for name, node_data in mapping.items():
       (node, transitions) = name_map[name]
       inversion = {}
+      omega_state = None
       for key, state in node_data['transitions'].items():
+        if key == TransitionKey.omega():
+          omega_state = name_map[state][0]
         if not state in inversion:
           inversion[state] = []
         inversion[state].append(key)
@@ -101,6 +124,8 @@
         all_keys += keys
         merged_key = TransitionKey.merged_key(encoding, keys)
         self.__add_transition(transitions, merged_key, name_map[state][0])
+      node.sort_transitions()
+      assert node.omega_transition() == omega_state
     self.__start = name_map[start_name][0]
     self.__node_count = len(mapping)
     self.__disjoint_keys = sorted(
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py Wed Feb 19 10:54:59 2014 UTC
@@ -166,12 +166,6 @@
     dfa.visit_all_states(f)
     return incoming_transitions

-  @staticmethod
-  def __match_action(state):
-    state = state.omega_transition()
-    state = state if state and state.transition_count() == 0 else None
-    return Action.empty_action() if not state else state.action()
-
   @staticmethod
   def __build_replacement_map(dfa):
     replacements = {}
@@ -187,14 +181,14 @@
       if state.action():  # TODO(dcarney): allow this via action chaining
         continue
       # We only perform this optimization on 'token' actions
-      match_action = DfaOptimizer.__match_action(state)
+      match_action = state.match_action()
       if not match_action.name() == 'token':
         continue
       assert state.omega_transition() in dfa.terminal_set()
       # we can drop incoming edges for states with a match action
       # of either 'token' or 'harmony_token'
       def is_replacement_candidate(state):
-        action = DfaOptimizer.__match_action(state)
+        action = state.match_action()
         return action.name() == 'token' or action.name() == 'harmony_token'
       for (incoming_key, incoming_state) in incoming:
         # check to see if incoming edges are matched by an outgoing edge
@@ -236,13 +230,13 @@
   @staticmethod
   def __split_target_state(state):
     old_name = DfaOptimizer.__new_state_name(state)
-    old_match_action = DfaOptimizer.__match_action(state)
+    old_match_action = state.match_action()
     assert old_match_action.name() == 'token', 'unimplemented'
     precedence = old_match_action.precedence()
     match_action = Action(Term('do_stored_token'), precedence)
-    head_action = Action(
-      Term('store_token', old_match_action.term().args()[0]), precedence)
-    tail_action = Action(Term('no_op'), precedence)
+    stored_token = old_match_action.term().args()[0]
+    head_action = Action(Term('store_token', stored_token), precedence)
+    tail_action = Action(Term('no_op', stored_token), precedence)
     head = DfaOptimizer.__new_state(False, head_action)
     tail = DfaOptimizer.__new_state(False, tail_action)
     match = DfaOptimizer.__new_state(True, match_action)
@@ -253,9 +247,9 @@
   # generate a store action to replace an existing action
   @staticmethod
   def __replacement_action(state, transition_state):
-    old_action = DfaOptimizer.__match_action(state)
+    old_action = state.match_action()
     assert old_action
- transition_action = DfaOptimizer.__match_action(transition_state).term()
+    transition_action = transition_state.match_action().term()
     if old_action.term() == transition_action:
       # no need to store token
       return Action.empty_action()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dot_utilities.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/dot_utilities.py Wed Feb 19 10:54:59 2014 UTC
@@ -29,6 +29,7 @@
 from key_encoding import KeyEncoding
 from term import Term
 from transition_key import TransitionKey
+from automaton import Action

 def escape_for_dot(v):
   v = str(v)
@@ -70,11 +71,51 @@
   process(term)
   return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))

-def automaton_to_dot(automaton):
-
-  def f(node, (node_content, edge_content)):
-    if node.action():
-      action_text = escape_for_dot(node.action())
+def automaton_to_dot(automaton, merge = False):
+  terminal_set = automaton.terminal_set()
+  to_skip = set([])
+  # get a replacement action for a state's omega transition
+  def analyse_omega_chain(state):
+    omega_chain = list(state.omega_chain_iter())
+    if len(omega_chain) == 1:
+      omega_state = omega_chain[0][0]
+      # Handle match nodes
+      if omega_chain[0][1] == 0:
+        if omega_state in terminal_set:
+          to_skip.add(omega_state)  # don't draw omega transition state
+          terminal_set.add(state)
+          return omega_state.action()
+    elif len(omega_chain) == 2:
+      first_action = omega_chain[0][0].action().name()
+      if ((first_action == 'store_token' or
+           first_action == 'store_harmony_token') and
+          omega_chain[1][0].action().name() == 'no_op'):
+        if (omega_chain[0][0].action().term().args() ==
+            omega_chain[1][0].action().term().args()):
+          return Action(Term('goto', omega_chain[0][0].node_number()), 0)
+        else:
+          to_skip.add(omega_chain[0][0])  # don't draw first transition
+          return Action(Term('chain',
+                        omega_chain[0][0].action().term(),
+                        Term('goto', omega_chain[1][0].node_number())), 0)
+    return Action.empty_action()
+  # draw state
+  skipped = set([])
+  drawn = set([])
+  def draw(node, (node_content, edge_content)):
+    if node in to_skip:  # skip match nodes
+      skipped.add(node)
+      return (node_content, edge_content)
+    drawn.add(node)
+ omega_action = analyse_omega_chain(node) if merge else Action.empty_action()
+    node_action = node.action()
+    if node_action.name() == 'no_op':
+      node_action = Action.empty_action()
+    if node_action or omega_action:
+      action_text = str(node.action())
+      if omega_action:
+        action_text += ' | ' + str(omega_action)
+      action_text = '< %s >' % escape_for_dot(action_text)
       node_content.append('  S_l%s[shape = box, label="%s"];' %
                           (node.node_number(), action_text))
       node_content.append('  S_%s -> S_l%s [arrowhead = none];' %
@@ -83,6 +124,8 @@
       if key == TransitionKey.epsilon():
         key = "&epsilon;"
       elif key == TransitionKey.omega():
+        if omega_action:  # don't draw omega transition
+          continue
         key = "&omega;"
       else:
         key = key.to_string(automaton.encoding())
@@ -90,12 +133,15 @@
           node.node_number(), state.node_number(), escape_for_dot(key)))
     return (node_content, edge_content)

-  (node_content, edge_content) = automaton.visit_all_states(f, ([], []))
+  (node_content, edge_content) = automaton.visit_all_states(draw, ([], []))
+
+  assert skipped == to_skip
+  terminal_set -= to_skip
+  assert automaton.node_count() == len(skipped) + len(drawn)

   start_set = automaton.start_set()
   assert len(start_set) == 1
   start_node = iter(start_set).next()
-  terminal_set = automaton.terminal_set()

   terminals = ["S_%d;" % x.node_number() for x in terminal_set]
   start_number = start_node.node_number()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed Feb 19 10:54:59 2014 UTC
@@ -66,7 +66,7 @@
 %s
     </script>'''

-def generate_html(rule_processor, minimize_default):
+def generate_html(rule_processor, minimize_default, merge):
   scripts = []
   loads = []
for i, (name, automata) in enumerate(list(rule_processor.automata_iter())):
@@ -77,10 +77,10 @@
     (nfa_i, dfa_i, mdfa_i) = ("nfa_%d" % i, "dfa_%d" % i, "mdfa_%d" % i)
     scripts.append(script_template % (nfa_i, automaton_to_dot(nfa)))
     loads.append(load_template % ("nfa [%s]" % name, nfa_i))
-    scripts.append(script_template % (dfa_i, automaton_to_dot(dfa)))
+    scripts.append(script_template % (dfa_i, automaton_to_dot(dfa, merge)))
     loads.append(load_template % ("dfa [%s]" % name, dfa_i))
     if mdfa and mdfa.node_count() != dfa.node_count():
-      scripts.append(script_template % (mdfa_i, automaton_to_dot(mdfa)))
+ scripts.append(script_template % (mdfa_i, automaton_to_dot(mdfa, merge)))
       loads.append(load_template % ("mdfa [%s]" % name, mdfa_i))
   body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
   return file_template % body
@@ -128,6 +128,7 @@

   parser = argparse.ArgumentParser()
   parser.add_argument('--html')
+  parser.add_argument('--no-merge-html', action='store_true')
   parser.add_argument('--re', default='src/lexer/lexer_py.re')
   parser.add_argument('--input')
   parser.add_argument('--code')
@@ -169,7 +170,8 @@

   html_file = args.html
   if html_file:
-    html = generate_html(rule_processor, minimize_default)
+    html = generate_html(
+      rule_processor, minimize_default, not args.no_merge_html)
     with open(args.html, 'w') as f:
       f.write(html)
       if verbose:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Feb 19 10:54:59 2014 UTC
@@ -131,7 +131,11 @@
     super(Nfa, self).__init__(encoding)
     self.__start = start
     self.__end = end
-    self.__verify(nodes_created)
+    self.__node_count = nodes_created
+    self.__verify()
+
+  def node_count(self):
+    return self.__node_count

   def start_state(self):
     return self.__start
@@ -139,12 +143,12 @@
   def terminal_set(self):
     return set([self.__end])

-  def __verify(self, nodes_created):
+  def __verify(self):
     def f(node, count):
       node.post_creation_verify()
       return count + 1
     count = self.visit_all_states(f, 0)
-    assert count == nodes_created
+    assert count == self.__node_count

   @staticmethod
   def __gather_transition_keys(encoding, state_set):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Wed Feb 19 10:54:59 2014 UTC
@@ -153,15 +153,19 @@
     self.__patch_ends(ends, node)
     return (start, [node])

-  def __match_action(self, action, precedence, subtree):
-    (start, ends) = self.__process(subtree)
+  def __build_match_node(self, action):
     node = self.__new_state()
-    node.set_action(Action(action, precedence))
+    node.set_action(action)
     # Force all match actions to be terminal.
     node.close(self.__global_end)
     # patch via a match edge into existing graph
     match_node = self.__new_state()
     match_node.add_transition(TransitionKey.omega(), node)
+    return match_node
+
+  def __match_action(self, action, precedence, subtree):
+    (start, ends) = self.__process(subtree)
+    match_node =  self.__build_match_node(Action(action, precedence))
     self.__patch_ends(ends, match_node)
     return (start, [match_node])

@@ -291,14 +295,17 @@
     return terms[0] if len(terms) == 1 else Term('CAT', *terms)

   @staticmethod
-  def nfa(encoding, character_classes, subtree_map, name):
+  def nfa(encoding, character_classes, subtree_map,
+          name, default_action = Action.empty_action()):
     self = NfaBuilder(encoding, character_classes, subtree_map)
     (start, ends) = self.__subgraph(name)
     # close all ends
+    for e in ends:
+      match_node =  self.__build_match_node(default_action)
+      e.close(match_node)
+      match_node.close(None)
     end = self.__global_end
     end.close(None)
-    # TODO(dcarney): patch in default action
-    self.__patch_ends(ends, end)
     # Prepare the nfa
     self.__compute_epsilon_closures(start)
     f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/term.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/term.py Wed Feb 19 10:54:59 2014 UTC
@@ -67,5 +67,8 @@
   # TODO(dcarney): escape '(', ')' and ',' in strings
   def __str__(self):
     if self.__str == None:
-      self.__str = '(%s)' % ','.join(map(str, self.__tuple))
+      if not self:
+        self.__str = ''
+      else:
+ self.__str = '%s(%s)' % (self.name(), ','.join(map(str, self.args())))
     return self.__str
=======================================
--- /branches/experimental/parser/tools/lexer_generator/test/term_test.py Mon Feb 17 11:26:21 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/test/term_test.py Wed Feb 19 10:54:59 2014 UTC
@@ -35,17 +35,17 @@
     t = Term('a')
     self.assertEqual('a', t.name())
     self.assertEqual((), t.args())
-    self.assertEqual("(a)", str(t))
+    self.assertEqual("a()", str(t))
     self.assertEqual(Term('a'), t)

     t = Term('a', 'b', 'c')
     self.assertEqual('a', t.name())
     self.assertEqual(('b', 'c'), t.args())
-    self.assertEqual("(a,b,c)", str(t))
+    self.assertEqual("a(b,c)", str(t))
     self.assertEqual(Term('a', 'b', 'c'), t)

     t = Term('a', Term('b', 'c'))
     self.assertEqual('a', t.name())
     self.assertEqual((Term('b', 'c'), ), t.args())
-    self.assertEqual("(a,(b,c))", str(t))
+    self.assertEqual("a(b(c))", str(t))
     self.assertEqual(Term('a', Term('b', 'c')), t)

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to