Revision: 20226
Author:   [email protected]
Date:     Tue Mar 25 09:21:16 2014 UTC
Log:      Experimental parser: add DfaPathWriter

[email protected]

BUG=

Review URL: https://codereview.chromium.org/209473005
http://code.google.com/p/v8/source/detail?r=20226

Added:
 /branches/experimental/parser/tools/lexer_generator/dfa_path_writer.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/transition_key.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dfa_path_writer.py Tue Mar 25 09:21:16 2014 UTC
@@ -0,0 +1,207 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+from transition_key import TransitionKey
+
+class Path(object):
+
+  def __init__(self, keys, states):
+    self.__keys = keys
+    self.__states = states
+    assert self.__states
+    self.__hash = None
+
+  def sub_path(self):
+    if len(self) == 1:
+      return None
+    return Path(self.__keys[:-1], self.__states[:-1])
+
+  def key_iter(self):
+    return iter(self.__keys)
+
+  def state_at(self, index):
+    return self.__states[index]
+
+  @staticmethod
+  def __hash_function((reps, l), state):
+    n = state.node_number()
+    h = (reps >> (l % 31) + 1) ^ reps ^ n ^ (n << (l % 31) + 1)
+    return (h, l + 1)
+
+  def __hash__(self):
+    if self.__hash == None:
+ self.__hash = reduce(self.__hash_function, self.__states, (101, -74))[0]
+    return self.__hash
+
+  def __eq__(self, other):
+    return (isinstance(other, self.__class__) and
+            self.__states == other.__states)
+
+  def __len__(self):
+    return len(self.__states)
+
+class DfaPathWriter(object):
+
+  def __init__(self, dfa):
+    self.__dfa = dfa
+
+  def write_lexer_shell_test_file(self, f):
+    dfa = self.__dfa
+    complete_paths = set()
+    total_paths = 0
+    encoding = dfa.encoding()
+    start_state = dfa.start_state()
+    for path, states_in_path, is_non_looping in dfa.path_iter():
+      states = []
+      keys = []
+      for i, (key, state) in enumerate(path[1:]):
+        states.append(state)
+        keys.append(key)
+        if key == TransitionKey.omega() and is_non_looping:
+          assert i == len(path) - 2
+      p = Path(keys, states)
+      assert not p in complete_paths
+      add_tail_transition = False
+      total_paths += len(p)
+      while p != None and (p not in complete_paths):
+        complete_paths.add(p)
+ self.__write_lines(p, start_state, add_tail_transition, encoding, f)
+        p = p.sub_path()
+        add_tail_transition = True
+    logging.info("complete_paths %d" % len(complete_paths))
+    logging.info("total_paths %d" % total_paths)
+
+  __representative_map = {
+    'non_primary_letter' : 256,
+    'non_primary_whitespace' : 5760,
+    'non_primary_line_terminator' : 8232,
+    'non_primary_everything_else' : 706,
+    'non_primary_identifier_part_not_letter' : 768,
+  }
+
+  __eos_rep = -1
+  __omega_rep = -2
+
+  @staticmethod
+  def get_representatives(key, remaining = -1):
+    reps = []
+    contains_special = False
+    for name, r in key.range_iter(None):
+      if name == 'PRIMARY_RANGE':
+        reps.append(r[0])
+        if r[0] != r[1]:
+          reps.append(r[1])
+      elif name == 'CLASS':
+        reps.append(DfaPathWriter.__representative_map[r])
+      elif name == 'UNIQUE':
+        if r == 'eos':
+          assert remaining == 1
+          reps.append(DfaPathWriter.__eos_rep)
+          contains_special = True
+        else:
+          raise Exception('unimplemented %s %s' % (name, str(r)))
+      elif name == 'OMEGA':
+        assert remaining == 0
+        reps.append(DfaPathWriter.__omega_rep)
+        contains_special = True
+      else:
+        raise Exception('unimplemented %s %s' % (name, str(r)))
+    if contains_special:
+      assert len(reps) == 1
+    return reps
+
+  @staticmethod
+  def no_transition_keys(state, encoding):
+    all_keys = set(state.key_iter())
+    eos_found = False
+    all_keys.discard(TransitionKey.omega())
+    eos_key = TransitionKey.unique('eos')
+    if eos_key in all_keys:
+      all_keys.remove(eos_key)
+      eos_found = True
+    # TODO(dcarney): have inverse key return uniques as a separate list
+    inverse_key = TransitionKey.inverse_key(encoding, all_keys)
+    if not inverse_key:
+      return []
+    reps = DfaPathWriter.get_representatives(inverse_key)
+    if not eos_found:
+      reps.append(DfaPathWriter.__eos_rep)
+    return reps
+
+  @staticmethod
+  def __write_line(head, tail, f):
+    length = len(head)
+    if tail != DfaPathWriter.__eos_rep:
+      length += 1
+    if not length:
+      return
+    if tail == DfaPathWriter.__eos_rep:
+      f.write('%s\n' % ' '.join(map(str, head)))
+    elif head:
+      f.write('%s %s\n' % (' '.join(map(str, head)), str(tail)))
+    else:
+      f.write("%s\n" % str(tail))
+
+ def __write_lines(self, path, start_state, add_tail_transition, encoding, f):
+    current_state = start_state
+    reps = []
+    last_index = len(path) - 1
+    if add_tail_transition:
+      last_index += 1
+    for i, key in enumerate(path.key_iter()):
+      # current_state = current_state.transition_state_for_key(key)
+      # assert current_state == self.__states[i]
+      reps.append(self.get_representatives(key, last_index - i))
+    head = []
+    for i, x in enumerate(reps):
+      assert x
+      if x[0] >= 0:
+        # TODO(dcarney): produce a file with x[-1] here
+        head.append(x[0])
+      elif x[0] == DfaPathWriter.__eos_rep:
+        # this is a trailing eos or a trailing omega following eos.
+        assert i == len(reps) - 1 or (
+          len(x) == 1 and i == len(reps) - 2 and
+          len(reps[-1]) == 1 and reps[-1][0] == DfaPathWriter.__omega_rep)
+        assert add_tail_transition == (i == len(reps) - 1)
+        if i == len(reps) - 1:
+          add_tail_transition = False
+      elif x[0] == DfaPathWriter.__omega_rep:
+        # this is a trailing omega
+        # TODO(dcarney): produce an omega transition, not eos.
+        assert not add_tail_transition
+        assert len(x) == 1 and i == len(reps) - 1
+      else:
+        raise Exception('unreachable')
+    if not add_tail_transition:
+      self.__write_line(head, self.__eos_rep, f)
+      return
+ no_transition_keys = self.no_transition_keys(path.state_at(-1), encoding)
+    # TODO(dcarney): don't test all edges here.
+    for tail in no_transition_keys:
+      self.__write_line(head, tail, f)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri Feb 28 13:29:06 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Tue Mar 25 09:21:16 2014 UTC
@@ -34,6 +34,7 @@
 from dfa_minimizer import DfaMinimizer
 from rule_parser import RuleParser, RuleParserState, RuleProcessor
 from code_generator import CodeGenerator
+from dfa_path_writer import DfaPathWriter

 file_template = '''
 <html>
@@ -141,6 +142,7 @@
   parser.add_argument('--verbose', action='store_true')
   parser.add_argument('--debug-code', action='store_true')
   parser.add_argument('--profile', action='store_true')
+  parser.add_argument('--lexer-shell-test-file')
   parser.add_argument('--rule-html')
   args = parser.parse_args()

@@ -168,35 +170,40 @@
         dfa.node_count(), mdfa.node_count()))
     DfaMinimizer.set_verify(True)

-  html_file = args.html
-  if html_file:
+  if args.lexer_shell_test_file:
+    dfa = rule_processor.default_automata().dfa()
+    if minimize_default:
+      dfa = rule_processor.default_automata().minimal_dfa()
+    path_writer = DfaPathWriter(dfa)
+    with open(args.lexer_shell_test_file, 'w') as f:
+      path_writer.write_lexer_shell_test_file(f)
+ logging.info("wrote lexer_shell file to %s" % args.lexer_shell_test_file)
+
+  if args.html:
     html = generate_html(
       rule_processor, minimize_default, not args.no_merge_html)
     with open(args.html, 'w') as f:
       f.write(html)
-      logging.info("wrote html to %s" % html_file)
+      logging.info("wrote html to %s" % args.html)

-  rule_html_file = args.rule_html
-  if rule_html_file:
+  if args.rule_html:
     html = generate_rule_tree_html(rule_processor)
-    with open(rule_html_file, 'w') as f:
+    with open(args.rule_html, 'w') as f:
       f.write(html)
-      logging.info("wrote html to %s" % rule_html_file)
+      logging.info("wrote rule html to %s" % args.rule_html)

-  code_file = args.code
-  if code_file:
+  if args.code:
     code_generator = CodeGenerator(rule_processor,
                                    minimize_default = minimize_default,
                                    inline = not args.no_inline,
                                    debug_print = args.debug_code)
     code = code_generator.process()
-    with open(code_file, 'w') as f:
+    with open(args.code, 'w') as f:
       f.write(code)
-      logging.info("wrote code to %s" % code_file)
+      logging.info("wrote code to %s" % args.code)

-  input_file = args.input
-  if input_file:
-    with open(input_file, 'r') as f:
+  if args.input:
+    with open(args.input, 'r') as f:
       lex(rule_processor, f.read())

   if args.profile:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_key.py Wed Feb 19 15:52:12 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_key.py Tue Mar 25 09:21:16 2014 UTC
@@ -425,6 +425,7 @@
components = TransitionKey.__disjoint_components(encoding, components, True)
     if invert:
       components = TransitionKey.__invert_components(encoding, components)
+    components = list(components)  # must materialize for compare below
     return None if not components else TransitionKey(encoding, components)

   @staticmethod

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to