Revision: 18043
Author: [email protected]
Date: Mon Nov 25 12:18:26 2013 UTC
Log: Experimental parser: dfa optimization to remove transitions from
keyword lexing
[email protected]
BUG=
Review URL: https://codereview.chromium.org/85413006
http://code.google.com/p/v8/source/detail?r=18043
Added:
/branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Modified:
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Mon Nov 25 12:18:26 2013 UTC
@@ -0,0 +1,167 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from transition_keys import TransitionKey
+from automaton import Action
+from dfa import Dfa
+
+class DfaOptimizer(object):
+
+ def __init__(self, dfa, log):
+ self.__dfa = dfa
+ self.__log = log
+
+ @staticmethod
+ def transistions_match(encoding, incoming_key, incoming_state, state):
+ keys = set(state.key_iter())
+ keys.add(incoming_key)
+ disjoint_keys = TransitionKey.disjoint_keys(encoding, keys)
+ for key in disjoint_keys:
+ if not incoming_key.is_superset_of_key(key):
+ continue
+ transition_state = state.transition_state_for_key(key)
+ if incoming_state.transition_state_for_key(key) != transition_state:
+ return False
+ return True
+
+ def replace_tokens_with_gotos(self):
+ '''replace states with no entry action and match action of
type 'token(X)'
+ with new states with entry action store_token(X) and match action
+ goto(state_id) which has (far) fewer transitions'''
+
+ dfa = self.__dfa
+ encoding = dfa.encoding()
+
+ incoming_transitions = {}
+ def build_incoming_transitions(state, visitor_state):
+ for key, transition_state in state.key_state_iter():
+ if not transition_state in incoming_transitions:
+ incoming_transitions[transition_state] = []
+ incoming_transitions[transition_state].append((key,state))
+ dfa.visit_all_states(build_incoming_transitions)
+
+ def is_replacement_candidate(state):
+ action = state.action()
+ if not action or action.entry_action() or not action.match_action():
+ return False
+ if (action.match_action()[0] == 'token' or
+ action.match_action()[0] == 'harmony_token'):
+ return True
+ return False
+
+ replacements = {}
+ for state, incoming in incoming_transitions.items():
+ if len(incoming) < 10:
+ continue
+ if not is_replacement_candidate(state):
+ continue
+ # check to see if incoming edges are matched by an outgoing edge
+ def match_filter((incoming_key, incoming_state)):
+ return (incoming_state != state and # drop self transitions
+ is_replacement_candidate(incoming_state) and
+ self.transistions_match(
+ encoding, incoming_key, incoming_state, state))
+ for incoming_key_state in incoming:
+ if not match_filter(incoming_key_state):
+ continue
+ (incoming_key, incoming_state) = incoming_key_state
+ # set this transition as to be replaced
+ if not incoming_state in replacements:
+ replacements[incoming_state] = {}
+ replacements[incoming_state][incoming_key] = state
+ # now see if we can gather other transistions into the goto
+ for key in incoming_state.key_iter():
+ if key == incoming_key:
+ continue
+ if not self.transistions_match(
+ encoding, key, incoming_state, state):
+ continue
+ # this transition can be removed
+ replacements[incoming_state][key] = None
+ if not replacements:
+ return
+ # perform replacement
+ states = {}
+ name = lambda state : str(state.node_number())
+ counters = {'removals' : 0, 'gotos' : 0}
+ store_states = set([])
+ # generate a store action to replace an existing action
+ def replacement_action(old_action, match_action):
+ assert not old_action.entry_action()
+ assert old_action.match_action()
+ if old_action.match_action()[0] == 'token':
+ entry_action = ('store_token', old_action.match_action()[1])
+ elif old_action.match_action()[0] == 'harmony_token':
+ entry_action = ('store_harmony_token',
old_action.match_action()[1])
+ else:
+ raise Exception(old_action.match_action())
+ return Action(entry_action, match_action, old_action.precedence())
+ # map the old state to the new state, with fewer transitions and
+ # goto actions
+ def replace_state(state, acc):
+ new_state = {
+ 'transitions' : {},
+ 'terminal' : state in self.__dfa.terminal_set(),
+ 'action' : state.action(),
+ }
+ states[name(state)] = new_state
+ state_replacements = replacements[state] if state in replacements
else {}
+ for transition_key, transition_state in state.transitions().items():
+ if not transition_key in state_replacements:
+ new_state['transitions'][transition_key] = name(transition_state)
+ continue
+ replacement = state_replacements[transition_key]
+ del state_replacements[transition_key]
+ # just drop the transition
+ if replacement == None:
+ counters['removals'] += 1
+ continue
+ # do goto replacement
+ counters['gotos'] += 1
+ match_action = ('goto', name(transition_state))
+ new_state['action'] = replacement_action(state.action(),
match_action)
+ # will need to patch up transition_state
+ store_states.add(name(transition_state))
+ assert not state_replacements
+ self.__dfa.visit_all_states(replace_state)
+ # now patch up all states with stores
+ for state in store_states:
+ old_action = states[state]['action']
+ match_action = ('do_stored_token', None)
+ states[state]['action'] = replacement_action(old_action,
match_action)
+ start_name = name(self.__dfa.start_state())
+ if self.__log:
+ print 'gotos inserted %s' % counters['gotos']
+ print 'transitions removed %s' % counters['removals']
+ print 'do_stored_token actions added %s' % len(store_states)
+ self.__dfa = Dfa(self.__dfa.encoding(), start_name, states)
+
+ @staticmethod
+ def optimize(dfa, log):
+ optimizer = DfaOptimizer(dfa, log)
+ optimizer.replace_tokens_with_gotos()
+ return optimizer.__dfa
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri
Nov 22 08:40:59 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Mon
Nov 25 12:18:26 2013 UTC
@@ -98,6 +98,7 @@
parser.add_argument('--input')
parser.add_argument('--code')
parser.add_argument('--encoding', default='latin1')
+ parser.add_argument('--optimize-default', action='store_true')
parser.add_argument('--no-minimize-default', action='store_true')
parser.add_argument('--no-verify-default', action='store_true')
parser.add_argument('--no-inline', action='store_true')
@@ -114,6 +115,9 @@
with open(re_file, 'r') as f:
rule_processor = RuleProcessor.parse(f.read(), args.encoding)
+ if args.optimize_default:
+ rule_processor.default_automata().optimize_dfa(log = args.verbose)
+
if minimize_default:
if args.no_verify_default:
DfaMinimizer.set_verify(False)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri
Nov 22 08:40:59 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon
Nov 25 12:18:26 2013 UTC
@@ -31,6 +31,7 @@
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
from dfa import Dfa
+from dfa_optimizer import DfaOptimizer
from transition_keys import TransitionKey, KeyEncoding
class RuleParserState:
@@ -293,6 +294,10 @@
self.__dfa = Dfa(self.nfa().encoding(), start, dfa_nodes)
return self.__dfa
+ def optimize_dfa(self, log = False):
+ assert not self.__dfa
+ self.__dfa = DfaOptimizer.optimize(self.dfa(), log)
+
def minimal_dfa(self):
if not self.__minimial_dfa:
self.__minimial_dfa = self.dfa().minimize()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Fri Nov 22 13:16:25 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Nov 25 12:18:26 2013 UTC
@@ -329,7 +329,7 @@
return ', '.join(strings)
def __str__(self):
- self.to_string(None)
+ return self.to_string(None)
@staticmethod
def __disjoint_keys(encoding, range_map):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.