Revision: 17726
Author: [email protected]
Date: Thu Nov 14 10:16:32 2013 UTC
Log: Experimental parser: faster dfa minimization
[email protected]
BUG=
Review URL: https://codereview.chromium.org/69953022
http://code.google.com/p/v8/source/detail?r=17726
Modified:
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Nov 14
06:57:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Nov 14
10:16:32 2013 UTC
@@ -159,12 +159,14 @@
def __init__(self, node_numbers):
self.__node_numbers = node_numbers
assert self.__node_numbers
- self.__hash = reduce(lambda acc, x: acc ^ hash(x),
self.__node_numbers, 0)
+ self.__hash = None
def set(self):
return self.__node_numbers
def __hash__(self):
+ if not self.__hash:
+ self.__hash = reduce(lambda acc, x: acc ^ hash(x),
self.__node_numbers, 0)
return self.__hash
def __eq__(self, other):
@@ -182,17 +184,31 @@
class DfaMinimizer:
+ __verify = True
+
+ @staticmethod
+ def set_verify(verify):
+ DfaMinimizer.__verify = verify
+
def __init__(self, dfa):
self.__dfa = dfa
+ self.__id_map = None
+ self.__reverse_id_map = None
+ self.__alphabet = None
def __partition(self):
+ assert not self.__id_map
+ assert not self.__reverse_id_map
+ assert not self.__alphabet
action_map = {}
id_map = {}
terminal_set = self.__dfa.terminal_set()
+ all_keys = []
def f(state, visitor_state):
- node_number = len(id_map)
- id_map[node_number] = state
+ state_id = len(id_map)
+ id_map[state_id] = state
action = state.action()
+ all_keys.append(state.key_iter())
if action:
# TODO add this back
# assert state in self.__terminal_set
@@ -202,55 +218,61 @@
action = "terminal_set"
if not action in action_map:
action_map[action] = set()
- action_map[action].add(node_number)
+ action_map[action].add(state_id)
self.__dfa.visit_all_states(f)
partitions = set()
for p in action_map.values():
- assert p
partitions.add(StatePartition(p))
- return (id_map, partitions)
-
- def __generate_alphabet(self):
- keys = []
- self.__dfa.visit_all_states(lambda s, acc: keys.append(s.key_iter()))
- return TransitionKey.disjoint_keys(chain(*keys))
-
- @staticmethod
- def __find_partition(partitions, id):
- for p in partitions:
- if id in p:
- return p
- assert False
-
- def __verify_partitions(self, id_map, partitions):
- assert len(partitions) <= len(id_map)
- alphabet = self.__generate_alphabet()
- for partition in partitions:
- for key in alphabet:
- first = True
- mapped_partition = None
+ reverse_id_map = {v : k for k, v in id_map.items()}
+ assert len(id_map) == len(reverse_id_map)
+ self.__reverse_id_map = reverse_id_map
+ self.__id_map = id_map
+ self.__alphabet = list(TransitionKey.disjoint_keys(chain(*all_keys)))
+ # map transitions wrt alphabet mapping
+ transitions = {}
+ for state_id, state in id_map.items():
+ def f((key_id, key)):
+ transition = state.key_matches(key)
+ if transition:
+ return reverse_id_map[transition]
+ return None
+ transitions[state_id] = map(f, enumerate(self.__alphabet))
+ self.__transitions = transitions
+ # verify created structures
+ if self.__verify:
+ for partition in partitions:
for state_id in partition:
- s = id_map[state_id].key_matches(key)
- if s:
- s = self.__find_partition(partitions, s.node_number())
- if first:
- first = False
- mapped_partition = s
- assert mapped_partition == s
+ transition_array = transitions[state_id]
+ state = id_map[state_id]
+ for key_id, key in enumerate(self.__alphabet):
+ transition_id = transition_array[key_id]
+ transition_state = state.key_matches(key)
+ if not transition_state:
+ assert transition_id == None
+ else:
+ assert transition_id != None
+ assert transition_id == reverse_id_map[transition_state]
+ return partitions
@staticmethod
def __partition_count(partitions):
return len(reduce(lambda acc, p: acc | p.set(), partitions, set()))
- def __merge_partitions(self, id_map, partitions):
+ def __merge_partitions(self, partitions):
+ id_map = self.__id_map
+ reverse_id_map = self.__reverse_id_map
+ verify = self.__verify
mapping = {}
name_map = {}
+ reverse_partition_map = {}
for partition in partitions:
name_map[partition] = str(partition)
- alphabet = self.__generate_alphabet()
- reverse_id_map = {v:k for k, v in id_map.items()}
+ for state_id in partition:
+ reverse_partition_map[state_id] = partition
+ transitions = self.__transitions
for partition in partitions:
- state_id = iter(partition).next()
+ state_ids = list(partition)
+ state_id = state_ids[0]
state = id_map[state_id]
node = {
'transitions' : {},
@@ -258,45 +280,37 @@
'action' : state.action(),
}
mapping[str(partition)] = node
- for key in alphabet:
- transition_state = state.key_matches(key)
- if not transition_state:
+ transition_array = transitions[state_id]
+ for key_id, key in enumerate(self.__alphabet):
+ transition_id = transition_array[key_id]
+ if transition_id == None:
+ if verify:
+ assert not state.key_matches(key)
+ for s_id in state_ids:
+ assert not id_map[s_id].key_matches(key)
continue
- transition_id = reverse_id_map[transition_state]
- transition_partition = self.__find_partition(partitions,
transition_id)
+ transition_partition = reverse_partition_map[transition_id]
assert transition_partition
+ if verify:
+ for s_id in state_ids:
+ transition = id_map[s_id].key_matches(key)
+ assert transition
+ test_partition =
reverse_partition_map[reverse_id_map[transition]]
+ assert test_partition == transition_partition
node['transitions'][key] = name_map[transition_partition]
start_id = reverse_id_map[self.__dfa.start_state()]
- start_name = name_map[self.__find_partition(partitions, start_id)]
+ start_name = name_map[reverse_partition_map[start_id]]
return (start_name, mapping)
def minimize(self):
- (id_map, partitions) = self.__partition()
+ partitions = self.__partition()
node_count = self.__dfa.node_count()
- assert self.__partition_count(partitions) == node_count
- if len(partitions) == 1:
- return self.__dfa
+ id_map = self.__id_map
+ reverse_id_map = self.__reverse_id_map
+ transitions = self.__transitions
+ key_range = range(0, len(self.__alphabet))
working_set = set(partitions)
- # map alphabet
- alphabet_mapping = {}
- for i, key in enumerate(self.__generate_alphabet()):
- alphabet_mapping[key] = i
- key_range = range(0, len(alphabet_mapping))
- # map transitions wrt alphabet mapping
- reverse_id_map = {v:k for k, v in id_map.items()}
- transitions = {}
- for id, state in id_map.items():
- def f((key, key_id)):
- transition = state.key_matches(key)
- if transition:
- return reverse_id_map[transition]
- return None
- transitions[id] = map(f, alphabet_mapping.items())
-
while working_set:
- # print "working_set %s partitions %s nodes %s" % (len(working_set),
- # len(partitions),
- # node_count)
assert working_set <= partitions
assert self.__partition_count(partitions) == node_count
test_partition = iter(working_set).next()
@@ -308,7 +322,7 @@
map_into_partition = set()
for state_id, transition_array in transitions.items():
transition_id = transition_array[key_index]
- if transition_id and test_array[transition_id]:
+ if transition_id != None and test_array[transition_id]:
map_into_partition.add(state_id)
if not map_into_partition:
continue
@@ -338,8 +352,5 @@
partitions -= old_partitions
if new_partitions:
partitions |= new_partitions
- # self.__verify_partitions(id_map, partitions)
- if len(partitions) == len(id_map):
- return self.__dfa
- (start_name, mapping) = self.__merge_partitions(id_map, partitions)
+ (start_name, mapping) = self.__merge_partitions(partitions)
return Dfa(start_name, mapping)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Thu
Nov 14 07:25:37 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Thu
Nov 14 10:16:32 2013 UTC
@@ -28,7 +28,7 @@
import argparse
from nfa import Nfa
from nfa_builder import NfaBuilder
-from dfa import Dfa
+from dfa import Dfa, DfaMinimizer
from rule_parser import RuleParser, RuleParserState, RuleProcessor
from code_generator import CodeGenerator
@@ -98,6 +98,7 @@
parser.add_argument('--input')
parser.add_argument('--code')
parser.add_argument('--minimize-default', action='store_true')
+ parser.add_argument('--no-verify-default', action='store_true')
args = parser.parse_args()
re_file = args.re
@@ -105,6 +106,12 @@
with open(re_file, 'r') as f:
rule_processor = RuleProcessor.parse(f.read())
+ if args.minimize_default:
+ if args.no_verify_default:
+ DfaMinimizer.set_verify(False)
+ rule_processor.default_automata().minimal_dfa()
+ DfaMinimizer.set_verify(True)
+
html_file = args.html
if html_file:
html = generate_html(rule_processor, args.minimize_default)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Nov 14 07:25:37 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Nov 14 10:16:32 2013 UTC
@@ -235,21 +235,27 @@
class Automata(object):
- def __init__(self, nfa):
- (start, dfa_nodes) = nfa.compute_dfa()
- self.__nfa = nfa
- self.__dfa = Dfa(start, dfa_nodes)
+ def __init__(self, builder, graph):
+ self.__builder = builder
+ self.__graph = graph
+ self.__nfa = None
+ self.__dfa = None
self.__minimial_dfa = None
def nfa(self):
+ if not self.__nfa:
+ self.__nfa = self.__builder.nfa(self.__graph)
return self.__nfa
def dfa(self):
+ if not self.__dfa:
+ (start, dfa_nodes) = self.nfa().compute_dfa()
+ self.__dfa = Dfa(start, dfa_nodes)
return self.__dfa
def minimal_dfa(self):
if not self.__minimial_dfa:
- self.__minimial_dfa = self.__dfa.minimize()
+ self.__minimial_dfa = self.dfa().minimize()
return self.__minimial_dfa
def __process_parser_state(self, parser_state):
@@ -295,6 +301,5 @@
process('default', parser_state.rules['default'])
# build the automata
for rule_name, graph in rule_map.items():
- nfa = builder.nfa(graph)
- self.__automata[rule_name] = RuleProcessor.Automata(nfa)
+ self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)
self.default_action = parser_state.rules['default']['default_action']
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.