Revision: 17904
Author: [email protected]
Date: Wed Nov 20 10:31:17 2013 UTC
Log: Experimental lexer generator: rename_things(Dfa minimization) +=
comments part 2
- id_map -> state_to_id
- reverse_id_map -> id_to_state
- __merge_partitions -> __create_states_from_partitions
- name_map -> partition_to_name
- reverse_partition_map -> state_id_to_partition
- mapping -> partition_name_to_node
- transition_id -> transition_state_id (moar)
- x -> state_id
- map_into_partition -> states_transitioning_to_test_partition (still not a
great name)
BUG=
Review URL: https://codereview.chromium.org/78053002
http://code.google.com/p/v8/source/detail?r=17904
Modified:
/branches/experimental/parser/tools/lexer_generator/dfa.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20
10:06:26 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20
10:31:17 2013 UTC
@@ -158,18 +158,18 @@
def __init__(self, dfa):
self.__dfa = dfa
- self.__id_map = None
- self.__reverse_id_map = None
+ self.__id_to_state = None
+ self.__state_to_id = None
self.__alphabet = None
def __create_initial_partitions(self):
- assert not self.__id_map
- assert not self.__reverse_id_map
+ assert not self.__id_to_state
+ assert not self.__state_to_id
assert not self.__alphabet
# For the minimization, we associate each state with an ID. A set of
states
# is represented as a set of state IDs.
- id_map = {}
+ id_to_state = {}
# First we partition the states into the following groups:
# - terminal states without action
@@ -182,10 +182,10 @@
terminal_set = self.__dfa.terminal_set()
all_keys = [] # Will contain all TransitionKeys in the dfa.
- # f fills in initial_partitions, id_map and all_keys.
+ # f fills in initial_partitions, id_to_state and all_keys.
def f(state, visitor_state):
- state_id = len(id_map)
- id_map[state_id] = state
+ state_id = len(id_to_state)
+ id_to_state[state_id] = state
action = state.action()
all_keys.append(state.key_iter())
if action:
@@ -210,13 +210,13 @@
p = StatePartition(p)
partitions.add(p)
working_set.add(p)
- reverse_id_map = {v : k for k, v in id_map.items()}
- assert len(id_map) == len(reverse_id_map)
- self.__reverse_id_map = reverse_id_map
- self.__id_map = id_map
+ state_to_id = {v : k for k, v in id_to_state.items()}
+ assert len(id_to_state) == len(state_to_id)
+ self.__state_to_id = state_to_id
+ self.__id_to_state = id_to_state
# The alphabet defines the TransitionKeys we need to check when
dedicing if
- # to states of the dfa can be in the same partition. See the doc
string of
+ # two states of the dfa can be in the same partition. See the doc
string of
# TransitionKey.disjoint_keys.
# For example, if the TransitionKeys are {[a-d], [c-h]}, the alphabet
is
@@ -230,11 +230,11 @@
# For each state and each TransitionKey in the alphabet, find out which
# transition we take from the state with the TransitionKey.
transitions = {}
- for state_id, state in id_map.items():
+ for state_id, state in id_to_state.items():
def f((key_id, key)):
transition_state = state.transition_state_for_key(key)
if transition_state:
- return reverse_id_map[transition_state]
+ return state_to_id[transition_state]
return None
transitions[state_id] = map(f, enumerate(self.__alphabet))
self.__transitions = transitions
@@ -243,7 +243,7 @@
for partition in partitions:
for state_id in partition:
transition_state_array = transitions[state_id]
- state = id_map[state_id]
+ state = id_to_state[state_id]
for key_id, key in enumerate(self.__alphabet):
transition_state_id = transition_state_array[key_id]
transition_state = state.transition_state_for_key(key)
@@ -251,56 +251,74 @@
assert transition_state_id == None
else:
assert transition_state_id != None
- assert transition_state_id ==
reverse_id_map[transition_state]
+ assert transition_state_id == state_to_id[transition_state]
return (working_set, partitions)
@staticmethod
def __partition_count(partitions):
return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
- def __merge_partitions(self, partitions):
- id_map = self.__id_map
- reverse_id_map = self.__reverse_id_map
+ def __create_states_from_partitions(self, partitions):
+ '''Creates a new set of states where each state corresponds to a
+ partition.'''
+ id_to_state = self.__id_to_state
+ state_to_id = self.__state_to_id
verify = self.__verify
- mapping = {}
- name_map = {}
- reverse_partition_map = {}
+ partition_to_name = {}
+ state_id_to_partition = {}
+
+ # Fill in partition_to_name and state_id_to_partition.
for partition in partitions:
- name_map[partition] = str(partition)
+ partition_to_name[partition] = str(partition)
for state_id in partition:
- reverse_partition_map[state_id] = partition
+ state_id_to_partition[state_id] = partition
transitions = self.__transitions
+
+ # Create nodes for partitions.
+ partition_name_to_node = {}
for partition in partitions:
state_ids = list(partition)
+ # state is a representative state for the partition, and state_id is
it's
+ # ID. To check the transitions between partitions, it's enough to
check
+ # transitions from the representative state. All other states will
have
+ # equivalent transitions, that is, transitions which transition into
the
+ # same partition.
state_id = state_ids[0]
- state = id_map[state_id]
+ state = id_to_state[state_id]
node = {
'transitions' : {},
'terminal' : state in self.__dfa.terminal_set(),
'action' : state.action(),
}
- mapping[str(partition)] = node
- transition_array = transitions[state_id]
+ partition_name_to_node[str(partition)] = node
+ transition_key_to_state_id = transitions[state_id]
for key_id, key in enumerate(self.__alphabet):
- transition_id = transition_array[key_id]
- if transition_id == None:
+ transition_state_id = transition_key_to_state_id[key_id]
+ if transition_state_id == None:
if verify:
+ # There is no transition for that key from state; check that no
+ # other states in the partition have a transition with that key
+ # either.
assert not state.transition_state_for_key(key)
for s_id in state_ids:
- assert not id_map[s_id].transition_state_for_key(key)
+ assert not id_to_state[s_id].transition_state_for_key(key)
continue
- transition_partition = reverse_partition_map[transition_id]
+ transition_partition = state_id_to_partition[transition_state_id]
assert transition_partition
if verify:
+ # There is a transition for that key from state; check that all
other
+ # states in the partition have an equivalent transition
(transition
+ # into the same partition).
for s_id in state_ids:
- transition = id_map[s_id].transition_state_for_key(key)
+ transition = id_to_state[s_id].transition_state_for_key(key)
assert transition
- test_partition =
reverse_partition_map[reverse_id_map[transition]]
+ test_partition = state_id_to_partition[state_to_id[transition]]
assert test_partition == transition_partition
- node['transitions'][key] = name_map[transition_partition]
- start_id = reverse_id_map[self.__dfa.start_state()]
- start_name = name_map[reverse_partition_map[start_id]]
- return (start_name, mapping)
+ # Record the transition between partitions.
+ node['transitions'][key] = partition_to_name[transition_partition]
+ start_id = state_to_id[self.__dfa.start_state()]
+ start_name = partition_to_name[state_id_to_partition[start_id]]
+ return (start_name, partition_name_to_node)
def minimize(self):
'''Minimize the dfa. For the general idea of minimizing automata, see
@@ -309,33 +327,49 @@
which have different actions.'''
(working_set, partitions) = self.__create_initial_partitions()
node_count = self.__dfa.node_count()
- id_map = self.__id_map
- reverse_id_map = self.__reverse_id_map
+ id_to_state = self.__id_to_state
+ state_to_id = self.__state_to_id
+ # transitions is a 2-dimensional array indexed by state_id and index
of the
+ # TransitionKey in the alphabet.
+ # transitions[state_id][transition_key_ix] = transition_state_id
transitions = self.__transitions
key_range = range(0, len(self.__alphabet))
while working_set:
assert working_set <= partitions
assert self.__partition_count(partitions) == node_count
+ # We split other partitions according to test_partition: If a
partition
+ # contains two states S1 and S2, such that S1 transitions to a state
in
+ # test_partition with a TransitionKey K, and S2 transitions to a
state
+ # *not* in test_partition with the same K, then S1 and S2 cannot be
in the
+ # same partition.
test_partition = iter(working_set).next()
working_set.remove(test_partition)
- test_array = [False for x in range(0, len(id_map))]
- for x in test_partition:
- test_array[x] = True
+ state_in_test_partition = [False] * len(id_to_state)
+ for state_id in test_partition:
+ state_in_test_partition[state_id] = True
for key_index in key_range:
- map_into_partition = set()
- for state_id, transition_array in transitions.items():
- transition_id = transition_array[key_index]
- if transition_id != None and test_array[transition_id]:
- map_into_partition.add(state_id)
- if not map_into_partition:
+ # states_transitioning_to_test_partition will contain the
state_ids of
+ # the states (across all partitions) which transition into
+ # test_partition (any state within test_partition) with that key.
+ states_transitioning_to_test_partition = set()
+ for state_id, transition_key_to_state_id in transitions.items():
+ transition_state_id = transition_key_to_state_id[key_index]
+ if (transition_state_id and
+ state_in_test_partition[transition_state_id]):
+ states_transitioning_to_test_partition.add(state_id)
+ if not states_transitioning_to_test_partition:
continue
+ # For each partition, we need to split it in two: {states which
+ # transition into test_partition, states which don't}.
new_partitions = set()
old_partitions = set()
for p in partitions:
- intersection = p.set().intersection(map_into_partition)
+ intersection = p.set().intersection(
+ states_transitioning_to_test_partition)
if not intersection:
continue
- difference = p.set().difference(map_into_partition)
+ difference = p.set().difference(
+ states_transitioning_to_test_partition)
if not difference:
continue
intersection = StatePartition(intersection)
@@ -355,5 +389,5 @@
partitions -= old_partitions
if new_partitions:
partitions |= new_partitions
- (start_name, mapping) = self.__merge_partitions(partitions)
+ (start_name, mapping) =
self.__create_states_from_partitions(partitions)
return Dfa(start_name, mapping)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.