Revision: 17895
Author: [email protected]
Date: Wed Nov 20 08:59:01 2013 UTC
Log: Experimental lexer generator: rename_things(Dfa minimization) +=
comments
- DfaState.key_matches -> transition_state_with_key.
- action_map -> initial_partitions
- __partition -> __create_initial_partitions
- if k[0] == "terminal_set" or k[0] == "terminal action" or True: this
always happens
- other small variable renamings around transition -> transition_state
BUG=
[email protected]
Review URL: https://codereview.chromium.org/77733005
http://code.google.com/p/v8/source/detail?r=17895
Modified:
/branches/experimental/parser/tools/lexer_generator/dfa.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Tue Nov 19
18:25:42 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20
08:59:01 2013 UTC
@@ -68,8 +68,10 @@
def next_state_with_char(self, value):
return self.__matches(lambda k, v : k.matches_char(v), value)
- def key_matches(self, value):
- return self.__matches(lambda k, v : k.is_superset_of_key(v), value)
+ def transition_state_with_key(self, key):
+ # 'key' can be a subkey of one of the TransitionKeys for this state,
but it
+ # cannot partially overlap a TransitionKey for this state.
+ return self.__matches(lambda k, v : k.is_superset_of_key(v), key)
class Dfa(Automaton):
@@ -207,14 +209,27 @@
self.__reverse_id_map = None
self.__alphabet = None
- def __partition(self):
+ def __create_initial_partitions(self):
assert not self.__id_map
assert not self.__reverse_id_map
assert not self.__alphabet
- action_map = {}
+
+ # For the minimization, we associate each state with an ID. A set of
states
+ # is represented as a set of state IDs.
id_map = {}
+
+ # First we partition the states into the following groups:
+ # - terminal states without action
+ # - nonterminal states without action
+ # - one group per action: terminal states which have the action
+ # - one group per action: nonterminal states which have the action
+ # These are the keys of initial_partitions. The values are lists of
state
+ # IDs.
+ initial_partitions = {}
terminal_set = self.__dfa.terminal_set()
- all_keys = []
+ all_keys = [] # Will contain all TransitionKeys in the dfa.
+
+ # f fills in initial_partitions, id_map and all_keys.
def f(state, visitor_state):
state_id = len(id_map)
id_map[state_id] = state
@@ -230,29 +245,43 @@
key = ("terminal set",)
else:
key = ("nonterminal set",)
- if not key in action_map:
- action_map[key] = set()
- action_map[key].add(state_id)
+ if not key in initial_partitions:
+ initial_partitions[key] = set()
+ initial_partitions[key].add(state_id)
self.__dfa.visit_all_states(f)
partitions = set()
working_set = set()
- for k, p in action_map.items():
+
+ # Create StatePartition objects:
+ for k, p in initial_partitions.items():
p = StatePartition(p)
partitions.add(p)
- if k[0] == "terminal_set" or k[0] == "terminal action" or True:
- working_set.add(p)
+ working_set.add(p)
reverse_id_map = {v : k for k, v in id_map.items()}
assert len(id_map) == len(reverse_id_map)
self.__reverse_id_map = reverse_id_map
self.__id_map = id_map
+
+ # The alphabet defines the TransitionKeys we need to check when
dedicing if
+ # to states of the dfa can be in the same partition. See the doc
string of
+ # TransitionKey.disjoint_keys.
+
+ # For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet
is
+ # {[a-b], [c-d], [e-h]}. If state S1 has a transition to S2 with
+ # TransitionKey [a-d], and state S3 has a transition to S4 with
+ # TransitionKey [c-h], S1 and S3 cannot be in the same partition. This
will
+ # become clear when we check the transition for TransitionKey [c-d]
(S1 has
+ # a transition to S2, S3 has a transition to S4).
self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
- # map transitions wrt alphabet mapping
+
+ # For each state and each TransitionKey in the alphabet, find out which
+ # transition we take from the state with the TransitionKey.
transitions = {}
for state_id, state in id_map.items():
def f((key_id, key)):
- transition = state.key_matches(key)
- if transition:
- return reverse_id_map[transition]
+ transition_state = state.transition_state_with_key(key)
+ if transition_state:
+ return reverse_id_map[transition_state]
return None
transitions[state_id] = map(f, enumerate(self.__alphabet))
self.__transitions = transitions
@@ -260,16 +289,16 @@
if self.__verify:
for partition in partitions:
for state_id in partition:
- transition_array = transitions[state_id]
+ transition_state_array = transitions[state_id]
state = id_map[state_id]
for key_id, key in enumerate(self.__alphabet):
- transition_id = transition_array[key_id]
- transition_state = state.key_matches(key)
+ transition_state_id = transition_state_array[key_id]
+ transition_state = state.transition_state_with_key(key)
if not transition_state:
- assert transition_id == None
+ assert transition_state_id == None
else:
- assert transition_id != None
- assert transition_id == reverse_id_map[transition_state]
+ assert transition_state_id != None
+ assert transition_state_id ==
reverse_id_map[transition_state]
return (working_set, partitions)
@staticmethod
@@ -303,15 +332,15 @@
transition_id = transition_array[key_id]
if transition_id == None:
if verify:
- assert not state.key_matches(key)
+ assert not state.transition_state_with_key(key)
for s_id in state_ids:
- assert not id_map[s_id].key_matches(key)
+ assert not id_map[s_id].transition_state_with_key(key)
continue
transition_partition = reverse_partition_map[transition_id]
assert transition_partition
if verify:
for s_id in state_ids:
- transition = id_map[s_id].key_matches(key)
+ transition = id_map[s_id].transition_state_with_key(key)
assert transition
test_partition =
reverse_partition_map[reverse_id_map[transition]]
assert test_partition == transition_partition
@@ -321,7 +350,11 @@
return (start_name, mapping)
def minimize(self):
- (working_set, partitions) = self.__partition()
+ '''Minimize the dfa. For the general idea of minimizing automata, see
+ http://en.wikipedia.org/wiki/DFA_minimization. In addition, we need to
take
+ account the actions associated to stages, i.e., we cannot merge two
states
+ which have different actions.'''
+ (working_set, partitions) = self.__create_initial_partitions()
node_count = self.__dfa.node_count()
id_map = self.__id_map
reverse_id_map = self.__reverse_id_map
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.