Revision: 17699
Author:   [email protected]
Date:     Wed Nov 13 15:06:07 2013 UTC
Log:      Experimental parser: dfa minimization

[email protected]

BUG=

Review URL: https://codereview.chromium.org/63183007
http://code.google.com/p/v8/source/detail?r=17699

Modified:
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 13 09:58:59 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 13 15:06:07 2013 UTC
@@ -25,6 +25,7 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+from itertools import chain
 from automaton import *
 from transition_keys import TransitionKey

@@ -52,6 +53,21 @@
   def transitions(self):
     return self.__transitions

+  # TODO abstract state matching
+  def __matches(self, match_func, value):
+ f = lambda acc, (k, vs): acc | set([vs]) if match_func(k, value) else acc
+    matches = reduce(f, self.__transitions.items(), set())
+    if not matches:
+      return None
+    assert len(matches) == 1
+    return iter(matches).next()
+
+  def char_matches(self, value):
+    return self.__matches(lambda k, v : k.matches_char(v), value)
+
+  def key_matches(self, value):
+    return self.__matches(lambda k, v : k.matches_key(v), value)
+
 class Dfa(Automaton):

   def __init__(self, start_name, mapping):
@@ -82,6 +98,9 @@
     state_count = self.visit_all_states(lambda state, count: count + 1, 0)
     assert self.__node_count == state_count

+  def node_count(self):
+    return self.__node_count
+
   def start_state(self):
     return self.__start

@@ -133,22 +152,146 @@
     yield (state.action(), last_position, len(string))

   def minimize(self):
-    paritions = []
-    working_set = []
+    return DfaMinimizer(self).minimize()
+
+class StatePartition(object):
+
+  def __init__(self, node_numbers):
+    self.__node_numbers = frozenset(iter(node_numbers))
+    assert self.__node_numbers
+ self.__hash = reduce(lambda acc, x: acc ^ hash(x), self.__node_numbers, 0)
+
+  def set(self):
+    return self.__node_numbers
+
+  def __hash__(self):
+    return self.__hash
+
+  def __eq__(self, other):
+    return (isinstance(other, self.__class__) and
+            self.__node_numbers == other.__node_numbers)
+
+  def __len__(self):
+    return len(self.__node_numbers)
+
+  def __iter__(self):
+    return self.__node_numbers.__iter__()
+
+  def __str__(self):
+    return str([x for x in self.__node_numbers])
+
+class DfaMinimizer:
+
+  def __init__(self, dfa):
+    self.__dfa = dfa
+
+  def __partition(self):
     action_map = {}
     id_map = {}
+    terminal_set = self.__dfa.terminal_set()
     def f(state, visitor_state):
       node_number = state.node_number()
       assert not node_number in id_map
       id_map[node_number] = state
       action = state.action()
+      if action:
+        # TODO add this back
+        # assert state in self.__terminal_set
+        if state not in terminal_set:
+          action = "nonterminal action " + str(action)
+      elif state in terminal_set:
+        action = "terminal_set"
       if not action in action_map:
         action_map[action] = set()
       action_map[action].add(node_number)
-    self.visit_all_states(f)
-    total = 0
+    self.__dfa.visit_all_states(f)
+    partitions = set()
     for p in action_map.values():
-      paritions.append(p)
-      total += len(p)
-    assert total == self.__node_count
+      assert p
+      partitions.add(StatePartition(p))
+    return (id_map, partitions)

+  def __generate_alphabet(self):
+    keys = []
+    self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter()))
+    return TransitionKey.disjoint_keys(chain(*keys))
+
+  @staticmethod
+  def __find_partition(partitions, id):
+    for p in partitions:
+      if id in p:
+        return p
+    assert False
+
+  def __verify_partitions(self, id_map, partitions):
+    assert len(partitions) <= len(id_map)
+    alphabet = self.__generate_alphabet()
+    for partition in partitions:
+      for key in alphabet:
+        first = True
+        mapped_partition = None
+        for state_id in partition:
+          s = id_map[state_id].key_matches(key)
+          if s:
+            s = self.__find_partition(partitions, s.node_number())
+          if first:
+            first = False
+            mapped_partition = s
+          assert mapped_partition == s
+
+  @staticmethod
+  def __partition_count(partitions):
+    return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
+
+  def minimize(self):
+    (id_map, partitions) = self.__partition()
+    node_count = self.__dfa.node_count()
+    assert self.__partition_count(partitions) == node_count
+    if len(partitions) == 1:
+      return self.__dfa
+    working_set = set(partitions)
+    alphabet = self.__generate_alphabet()
+    all_state_ids = set(id_map.keys())
+    while working_set:
+      # print "working_set %s partitions %s nodes %s" % (len(working_set),
+      #                                                  len(partitions),
+      #                                                  node_count)
+      assert working_set <= partitions
+      assert self.__partition_count(partitions) == node_count
+      test_partition = iter(working_set).next()
+      working_set.remove(test_partition)
+      to_split = None
+      for key in alphabet:
+        # print key
+        transition_map = {}
+        map_into_partition = set()
+        for state_id in all_state_ids:
+          maps_to = id_map[state_id].key_matches(key)
+          if maps_to and maps_to.node_number() in test_partition:
+            map_into_partition.add(state_id)
+        if not map_into_partition:
+          continue
+        new_partitions = set()
+        for p in partitions:
+          intersection = p.set().intersection(map_into_partition)
+          difference = p.set().difference(map_into_partition)
+          if not intersection or not difference:
+            new_partitions.add(p)
+            continue
+          intersection = StatePartition(intersection)
+          difference = StatePartition(difference)
+          new_partitions.add(intersection)
+          new_partitions.add(difference)
+          if p in working_set:
+            working_set.remove(p)
+            working_set.add(intersection)
+            working_set.add(difference)
+          elif len(intersection) <= len(difference):
+            working_set.add(intersection)
+          else:
+            working_set.add(difference)
+        partitions = new_partitions
+    self.__verify_partitions(id_map, partitions)
+    if len(partitions) == len(id_map):
+      return self.__dfa
+    # merge partitions
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Wed Nov 13 13:34:33 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed Nov 13 15:06:07 2013 UTC
@@ -96,7 +96,7 @@
   re_file = args.re
   print "parsing %s" % re_file
   with open(re_file, 'r') as f:
-    rule_processor = RuleProcessor.parse(f.read())
+    rule_processor = RuleProcessor.parse(f.read(), False)

   html_file = args.html
   if html_file:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed Nov 13 13:34:33 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed Nov 13 15:06:07 2013 UTC
@@ -217,15 +217,15 @@

 class RuleProcessor(object):

-  def __init__(self, parser_state):
+  def __init__(self, parser_state, minimize_dfa):
     self.__automata = {}
-    self.__process_parser_state(parser_state)
+    self.__process_parser_state(parser_state, minimize_dfa)

   @staticmethod
-  def parse(string):
+  def parse(string, minimize_dfa = True):
     parser_state = RuleParserState()
     RuleParser.parse(string, parser_state)
-    return RuleProcessor(parser_state)
+    return RuleProcessor(parser_state, minimize_dfa)

   def automata_iter(self):
     return iter(self.__automata.items())
@@ -239,11 +239,11 @@

   class Automata(object):

-    def __init__(self, nfa):
+    def __init__(self, nfa, minimize_dfa):
       (start, dfa_nodes) = nfa.compute_dfa()
       self.__nfa = nfa
       self.__dfa = Dfa(start, dfa_nodes)
-      self.__minimial_dfa = self.__dfa.minimize()
+      self.__minimial_dfa = self.__dfa.minimize() if minimize_dfa else None

     def nfa(self):
       return self.__nfa
@@ -254,7 +254,7 @@
     def minimal_dfa(self):
       return self.__minimial_dfa

-  def __process_parser_state(self, parser_state):
+  def __process_parser_state(self, parser_state, minimize_dfa):
     rule_map = {}
     builder = NfaBuilder()
     builder.set_character_classes(parser_state.character_classes)
@@ -298,5 +298,5 @@
     # build the automata
     for rule_name, graph in rule_map.items():
       nfa = builder.nfa(graph)
-      self.__automata[rule_name] = RuleProcessor.Automata(nfa)
+ self.__automata[rule_name] = RuleProcessor.Automata(nfa, minimize_dfa)
     self.default_action = parser_state.rules['default']['default_action']
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 12 18:47:52 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Wed Nov 13 15:06:07 2013 UTC
@@ -41,12 +41,6 @@

   __unique_key_counter = -1

-  @staticmethod
-  def alphabet_iter():
-    for k, (lower, upper) in TransitionKey.__class_bounds.items():
-      for i in range(lower, upper + 1):
-        yield i
-
   @staticmethod
   def __in_latin_1(char):
     bound = TransitionKey.__class_bounds["latin_1"]

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to