Revision: 20226
Author: [email protected]
Date: Tue Mar 25 09:21:16 2014 UTC
Log: Experimental parser: add DfaPathWriter
[email protected]
BUG=
Review URL: https://codereview.chromium.org/209473005
http://code.google.com/p/v8/source/detail?r=20226
Added:
/branches/experimental/parser/tools/lexer_generator/dfa_path_writer.py
Modified:
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/transition_key.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dfa_path_writer.py
Tue Mar 25 09:21:16 2014 UTC
@@ -0,0 +1,207 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+from transition_key import TransitionKey
+
+class Path(object):
+
+ def __init__(self, keys, states):
+ self.__keys = keys
+ self.__states = states
+ assert self.__states
+ self.__hash = None
+
+ def sub_path(self):
+ if len(self) == 1:
+ return None
+ return Path(self.__keys[:-1], self.__states[:-1])
+
+ def key_iter(self):
+ return iter(self.__keys)
+
+ def state_at(self, index):
+ return self.__states[index]
+
+ @staticmethod
+ def __hash_function((reps, l), state):
+ n = state.node_number()
+ h = (reps >> (l % 31) + 1) ^ reps ^ n ^ (n << (l % 31) + 1)
+ return (h, l + 1)
+
+ def __hash__(self):
+ if self.__hash == None:
+ self.__hash = reduce(self.__hash_function, self.__states, (101,
-74))[0]
+ return self.__hash
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.__states == other.__states)
+
+ def __len__(self):
+ return len(self.__states)
+
+class DfaPathWriter(object):
+
+ def __init__(self, dfa):
+ self.__dfa = dfa
+
+ def write_lexer_shell_test_file(self, f):
+ dfa = self.__dfa
+ complete_paths = set()
+ total_paths = 0
+ encoding = dfa.encoding()
+ start_state = dfa.start_state()
+ for path, states_in_path, is_non_looping in dfa.path_iter():
+ states = []
+ keys = []
+ for i, (key, state) in enumerate(path[1:]):
+ states.append(state)
+ keys.append(key)
+ if key == TransitionKey.omega() and is_non_looping:
+ assert i == len(path) - 2
+ p = Path(keys, states)
+ assert not p in complete_paths
+ add_tail_transition = False
+ total_paths += len(p)
+ while p != None and (p not in complete_paths):
+ complete_paths.add(p)
+ self.__write_lines(p, start_state, add_tail_transition, encoding,
f)
+ p = p.sub_path()
+ add_tail_transition = True
+ logging.info("complete_paths %d" % len(complete_paths))
+ logging.info("total_paths %d" % total_paths)
+
+ __representative_map = {
+ 'non_primary_letter' : 256,
+ 'non_primary_whitespace' : 5760,
+ 'non_primary_line_terminator' : 8232,
+ 'non_primary_everything_else' : 706,
+ 'non_primary_identifier_part_not_letter' : 768,
+ }
+
+ __eos_rep = -1
+ __omega_rep = -2
+
+ @staticmethod
+ def get_representatives(key, remaining = -1):
+ reps = []
+ contains_special = False
+ for name, r in key.range_iter(None):
+ if name == 'PRIMARY_RANGE':
+ reps.append(r[0])
+ if r[0] != r[1]:
+ reps.append(r[1])
+ elif name == 'CLASS':
+ reps.append(DfaPathWriter.__representative_map[r])
+ elif name == 'UNIQUE':
+ if r == 'eos':
+ assert remaining == 1
+ reps.append(DfaPathWriter.__eos_rep)
+ contains_special = True
+ else:
+ raise Exception('unimplemented %s %s' % (name, str(r)))
+ elif name == 'OMEGA':
+ assert remaining == 0
+ reps.append(DfaPathWriter.__omega_rep)
+ contains_special = True
+ else:
+ raise Exception('unimplemented %s %s' % (name, str(r)))
+ if contains_special:
+ assert len(reps) == 1
+ return reps
+
+ @staticmethod
+ def no_transition_keys(state, encoding):
+ all_keys = set(state.key_iter())
+ eos_found = False
+ all_keys.discard(TransitionKey.omega())
+ eos_key = TransitionKey.unique('eos')
+ if eos_key in all_keys:
+ all_keys.remove(eos_key)
+ eos_found = True
+ # TODO(dcarney): have inverse key return uniques as a separate list
+ inverse_key = TransitionKey.inverse_key(encoding, all_keys)
+ if not inverse_key:
+ return []
+ reps = DfaPathWriter.get_representatives(inverse_key)
+ if not eos_found:
+ reps.append(DfaPathWriter.__eos_rep)
+ return reps
+
+ @staticmethod
+ def __write_line(head, tail, f):
+ length = len(head)
+ if tail != DfaPathWriter.__eos_rep:
+ length += 1
+ if not length:
+ return
+ if tail == DfaPathWriter.__eos_rep:
+ f.write('%s\n' % ' '.join(map(str, head)))
+ elif head:
+ f.write('%s %s\n' % (' '.join(map(str, head)), str(tail)))
+ else:
+ f.write("%s\n" % str(tail))
+
+ def __write_lines(self, path, start_state, add_tail_transition,
encoding, f):
+ current_state = start_state
+ reps = []
+ last_index = len(path) - 1
+ if add_tail_transition:
+ last_index += 1
+ for i, key in enumerate(path.key_iter()):
+ # current_state = current_state.transition_state_for_key(key)
+ # assert current_state == self.__states[i]
+ reps.append(self.get_representatives(key, last_index - i))
+ head = []
+ for i, x in enumerate(reps):
+ assert x
+ if x[0] >= 0:
+ # TODO(dcarney): produce a file with x[-1] here
+ head.append(x[0])
+ elif x[0] == DfaPathWriter.__eos_rep:
+ # this is a trailing eos or a trailing omega following eos.
+ assert i == len(reps) - 1 or (
+ len(x) == 1 and i == len(reps) - 2 and
+ len(reps[-1]) == 1 and reps[-1][0] == DfaPathWriter.__omega_rep)
+ assert add_tail_transition == (i == len(reps) - 1)
+ if i == len(reps) - 1:
+ add_tail_transition = False
+ elif x[0] == DfaPathWriter.__omega_rep:
+ # this is a trailing omega
+ # TODO(dcarney): produce an omega transition, not eos.
+ assert not add_tail_transition
+ assert len(x) == 1 and i == len(reps) - 1
+ else:
+ raise Exception('unreachable')
+ if not add_tail_transition:
+ self.__write_line(head, self.__eos_rep, f)
+ return
+ no_transition_keys = self.no_transition_keys(path.state_at(-1),
encoding)
+ # TODO(dcarney): don't test all edges here.
+ for tail in no_transition_keys:
+ self.__write_line(head, tail, f)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri
Feb 28 13:29:06 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Tue
Mar 25 09:21:16 2014 UTC
@@ -34,6 +34,7 @@
from dfa_minimizer import DfaMinimizer
from rule_parser import RuleParser, RuleParserState, RuleProcessor
from code_generator import CodeGenerator
+from dfa_path_writer import DfaPathWriter
file_template = '''
<html>
@@ -141,6 +142,7 @@
parser.add_argument('--verbose', action='store_true')
parser.add_argument('--debug-code', action='store_true')
parser.add_argument('--profile', action='store_true')
+ parser.add_argument('--lexer-shell-test-file')
parser.add_argument('--rule-html')
args = parser.parse_args()
@@ -168,35 +170,40 @@
dfa.node_count(), mdfa.node_count()))
DfaMinimizer.set_verify(True)
- html_file = args.html
- if html_file:
+ if args.lexer_shell_test_file:
+ dfa = rule_processor.default_automata().dfa()
+ if minimize_default:
+ dfa = rule_processor.default_automata().minimal_dfa()
+ path_writer = DfaPathWriter(dfa)
+ with open(args.lexer_shell_test_file, 'w') as f:
+ path_writer.write_lexer_shell_test_file(f)
+ logging.info("wrote lexer_shell file to %s" %
args.lexer_shell_test_file)
+
+ if args.html:
html = generate_html(
rule_processor, minimize_default, not args.no_merge_html)
with open(args.html, 'w') as f:
f.write(html)
- logging.info("wrote html to %s" % html_file)
+ logging.info("wrote html to %s" % args.html)
- rule_html_file = args.rule_html
- if rule_html_file:
+ if args.rule_html:
html = generate_rule_tree_html(rule_processor)
- with open(rule_html_file, 'w') as f:
+ with open(args.rule_html, 'w') as f:
f.write(html)
- logging.info("wrote html to %s" % rule_html_file)
+ logging.info("wrote rule html to %s" % args.rule_html)
- code_file = args.code
- if code_file:
+ if args.code:
code_generator = CodeGenerator(rule_processor,
minimize_default = minimize_default,
inline = not args.no_inline,
debug_print = args.debug_code)
code = code_generator.process()
- with open(code_file, 'w') as f:
+ with open(args.code, 'w') as f:
f.write(code)
- logging.info("wrote code to %s" % code_file)
+ logging.info("wrote code to %s" % args.code)
- input_file = args.input
- if input_file:
- with open(input_file, 'r') as f:
+ if args.input:
+ with open(args.input, 'r') as f:
lex(rule_processor, f.read())
if args.profile:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_key.py
Wed Feb 19 15:52:12 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_key.py
Tue Mar 25 09:21:16 2014 UTC
@@ -425,6 +425,7 @@
components = TransitionKey.__disjoint_components(encoding, components,
True)
if invert:
components = TransitionKey.__invert_components(encoding, components)
+ components = list(components) # must materialize for compare below
return None if not components else TransitionKey(encoding, components)
@staticmethod
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.