Revision: 18043
Author:   [email protected]
Date:     Mon Nov 25 12:18:26 2013 UTC
Log: Experimental parser: dfa optimization to remove transitions from keyword lexing

[email protected]

BUG=

Review URL: https://codereview.chromium.org/85413006
http://code.google.com/p/v8/source/detail?r=18043

Added:
 /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py Mon Nov 25 12:18:26 2013 UTC
@@ -0,0 +1,167 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from transition_keys import TransitionKey
+from automaton import Action
+from dfa import Dfa
+
+class DfaOptimizer(object):
+
+  def __init__(self, dfa, log):
+    self.__dfa = dfa
+    self.__log = log
+
+  @staticmethod
+  def transistions_match(encoding, incoming_key, incoming_state, state):
+    keys = set(state.key_iter())
+    keys.add(incoming_key)
+    disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)
+    for key in disjoint_keys:
+      if not incoming_key.is_superset_of_key(key):
+        continue
+      transition_state = state.transition_state_for_key(key)
+      if incoming_state.transition_state_for_key(key) != transition_state:
+        return False
+    return True
+
+  def replace_tokens_with_gotos(self):
+ '''replace states with no entry action and match action of type 'token(X)'
+    with new states with entry action store_token(X) and match action
+    goto(state_id) which has (far) fewer transitions'''
+
+    dfa = self.__dfa
+    encoding = dfa.encoding()
+
+    incoming_transitions = {}
+    def build_incoming_transitions(state, visitor_state):
+      for key, transition_state in state.key_state_iter():
+        if not transition_state in incoming_transitions:
+          incoming_transitions[transition_state] = []
+        incoming_transitions[transition_state].append((key,state))
+    dfa.visit_all_states(build_incoming_transitions)
+
+    def is_replacement_candidate(state):
+      action = state.action()
+      if not action or action.entry_action() or not action.match_action():
+        return False
+      if (action.match_action()[0] == 'token' or
+          action.match_action()[0] == 'harmony_token'):
+        return True
+      return False
+
+    replacements = {}
+    for state, incoming in incoming_transitions.items():
+      if len(incoming) < 10:
+        continue
+      if not is_replacement_candidate(state):
+        continue
+      # check to see if incoming edges are matched by an outgoing edge
+      def match_filter((incoming_key, incoming_state)):
+        return (incoming_state != state and  # drop self transitions
+                is_replacement_candidate(incoming_state) and
+                self.transistions_match(
+                  encoding, incoming_key, incoming_state, state))
+      for incoming_key_state in incoming:
+        if not match_filter(incoming_key_state):
+          continue
+        (incoming_key, incoming_state) = incoming_key_state
+        # set this transition as to be replaced
+        if not incoming_state in replacements:
+          replacements[incoming_state] = {}
+        replacements[incoming_state][incoming_key] = state
+        # now see if we can gather other transistions into the goto
+        for key in incoming_state.key_iter():
+          if key == incoming_key:
+            continue
+          if not self.transistions_match(
+              encoding, key, incoming_state, state):
+            continue
+          # this transition can be removed
+          replacements[incoming_state][key] = None
+    if not replacements:
+      return
+    # perform replacement
+    states = {}
+    name = lambda state : str(state.node_number())
+    counters = {'removals' : 0, 'gotos' : 0}
+    store_states = set([])
+    # generate a store action to replace an existing action
+    def replacement_action(old_action, match_action):
+      assert not old_action.entry_action()
+      assert old_action.match_action()
+      if old_action.match_action()[0] == 'token':
+        entry_action = ('store_token', old_action.match_action()[1])
+      elif old_action.match_action()[0] == 'harmony_token':
+ entry_action = ('store_harmony_token', old_action.match_action()[1])
+      else:
+        raise Exception(old_action.match_action())
+      return Action(entry_action, match_action, old_action.precedence())
+    # map the old state to the new state, with fewer transitions and
+    # goto actions
+    def replace_state(state, acc):
+      new_state = {
+        'transitions' : {},
+        'terminal' : state in self.__dfa.terminal_set(),
+        'action' : state.action(),
+      }
+      states[name(state)] = new_state
+ state_replacements = replacements[state] if state in replacements else {}
+      for transition_key, transition_state in state.transitions().items():
+        if not transition_key in state_replacements:
+          new_state['transitions'][transition_key] = name(transition_state)
+          continue
+        replacement = state_replacements[transition_key]
+        del state_replacements[transition_key]
+        # just drop the transition
+        if replacement == None:
+          counters['removals'] += 1
+          continue
+        # do goto replacement
+        counters['gotos'] += 1
+        match_action = ('goto', name(transition_state))
+ new_state['action'] = replacement_action(state.action(), match_action)
+        # will need to patch up transition_state
+        store_states.add(name(transition_state))
+      assert not state_replacements
+    self.__dfa.visit_all_states(replace_state)
+    # now patch up all states with stores
+    for state in store_states:
+      old_action = states[state]['action']
+      match_action = ('do_stored_token', None)
+ states[state]['action'] = replacement_action(old_action, match_action)
+    start_name = name(self.__dfa.start_state())
+    if self.__log:
+      print 'gotos inserted %s' % counters['gotos']
+      print 'transitions removed %s' % counters['removals']
+      print 'do_stored_token actions added %s' % len(store_states)
+    self.__dfa = Dfa(self.__dfa.encoding(), start_name, states)
+
+  @staticmethod
+  def optimize(dfa, log):
+    optimizer = DfaOptimizer(dfa, log)
+    optimizer.replace_tokens_with_gotos()
+    return optimizer.__dfa
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri Nov 22 08:40:59 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Mon Nov 25 12:18:26 2013 UTC
@@ -98,6 +98,7 @@
   parser.add_argument('--input')
   parser.add_argument('--code')
   parser.add_argument('--encoding', default='latin1')
+  parser.add_argument('--optimize-default', action='store_true')
   parser.add_argument('--no-minimize-default', action='store_true')
   parser.add_argument('--no-verify-default', action='store_true')
   parser.add_argument('--no-inline', action='store_true')
@@ -114,6 +115,9 @@
   with open(re_file, 'r') as f:
     rule_processor = RuleProcessor.parse(f.read(), args.encoding)

+  if args.optimize_default:
+    rule_processor.default_automata().optimize_dfa(log = args.verbose)
+
   if minimize_default:
     if args.no_verify_default:
       DfaMinimizer.set_verify(False)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri Nov 22 08:40:59 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon Nov 25 12:18:26 2013 UTC
@@ -31,6 +31,7 @@
 from regex_parser import RegexParser
 from nfa_builder import NfaBuilder
 from dfa import Dfa
+from dfa_optimizer import DfaOptimizer
 from transition_keys import TransitionKey, KeyEncoding

 class RuleParserState:
@@ -293,6 +294,10 @@
         self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes)
       return self.__dfa

+    def optimize_dfa(self, log = False):
+      assert not self.__dfa
+      self.__dfa = DfaOptimizer.optimize(self.dfa(), log)
+
     def minimal_dfa(self):
       if not self.__minimial_dfa:
         self.__minimial_dfa = self.dfa().minimize()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Fri Nov 22 13:16:25 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Nov 25 12:18:26 2013 UTC
@@ -329,7 +329,7 @@
     return ', '.join(strings)

   def __str__(self):
-    self.to_string(None)
+    return self.to_string(None)

   @staticmethod
   def __disjoint_keys(encoding, range_map):

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to