Revision: 19308
Author:   [email protected]
Date:     Wed Feb 12 08:31:34 2014 UTC
Log:      Experimental parser: split off dot processing

[email protected]

BUG=

Review URL: https://codereview.chromium.org/159853009
http://code.google.com/p/v8/source/detail?r=19308

Added:
 /branches/experimental/parser/tools/lexer_generator/dot_utilities.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/action.py
 /branches/experimental/parser/tools/lexer_generator/automata_test.py
 /branches/experimental/parser/tools/lexer_generator/automaton.py
 /branches/experimental/parser/tools/lexer_generator/encoding.py
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dot_utilities.py Wed Feb 12 08:31:34 2014 UTC
@@ -0,0 +1,117 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import IntType, StringType
+from encoding import KeyEncoding
+from action import Term
+from transition_keys import TransitionKey
+
+def escape_for_dot(v):
+  v = str(v)
+ v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
+  v = v.replace('\\', '\\\\').replace('\"', '\\\"')
+  return v
+
+def map_characters(encoding, term):
+  if term.name() == 'LITERAL':
+    f = lambda x : KeyEncoding.to_str(encoding, x)
+    term = Term('LITERAL', "'%s'" % ''.join(map(f, term.args())))
+  elif term.name() == 'RANGE':
+    f = lambda x : "'%s'" % KeyEncoding.to_str(encoding, x)
+    term = Term('RANGE', *map(f, term.args()))
+  return term
+
+def term_to_dot(term, term_mapper = None):
+  node_ix = [0]
+  node_template = 'node [label="%s"]; N_%d;'
+  edge_template = 'N_%d -> N_%d'
+  nodes = []
+  edges = []
+
+  def process(term):
+    if type(term) == StringType or type(term) == IntType:
+      node_ix[0] += 1
+      nodes.append(node_template % (escape_for_dot(str(term)), node_ix[0]))
+      return node_ix[0]
+    elif isinstance(term, Term):
+      term = term if not term_mapper else term_mapper(term)
+      child_ixs = map(process, term.args())
+      node_ix[0] += 1
+ nodes.append(node_template % (escape_for_dot(term.name()), node_ix[0]))
+      for child_ix in child_ixs:
+        edges.append(edge_template % (node_ix[0], child_ix))
+      return node_ix[0]
+    raise Exception
+
+  process(term)
+  return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
+
+def automaton_to_dot(automaton):
+
+  def f(node, (node_content, edge_content)):
+    if node.action():
+      action_text = escape_for_dot(node.action())
+      node_content.append('  S_l%s[shape = box, label="%s"];' %
+                          (node.node_number(), action_text))
+      node_content.append('  S_%s -> S_l%s [arrowhead = none];' %
+                          (node.node_number(), node.node_number()))
+    for key, state in node.key_state_iter():
+      if key == TransitionKey.epsilon():
+        key = "ε"
+      else:
+        key = key.to_string(automaton.encoding())
+      edge_content.append("  S_%s -> S_%s [ label = \"%s\" ];" % (
+          node.node_number(), state.node_number(), escape_for_dot(key)))
+    return (node_content, edge_content)
+
+  (node_content, edge_content) = automaton.visit_all_states(f, ([], []))
+
+  start_set = automaton.start_set()
+  assert len(start_set) == 1
+  start_node = iter(start_set).next()
+  terminal_set = automaton.terminal_set()
+
+  terminals = ["S_%d;" % x.node_number() for x in terminal_set]
+  start_number = start_node.node_number()
+  start_shape = "circle"
+  if start_node in terminal_set:
+    start_shape = "doublecircle"
+
+  return '''
+digraph finite_state_machine {
+  rankdir=LR;
+  node [shape = %s, style=filled, bgcolor=lightgrey]; S_%s
+  node [shape = doublecircle, style=unfilled]; %s
+  node [shape = circle];
+  %s
+  %s
+}
+  ''' % (start_shape,
+         start_number,
+         " ".join(terminals),
+         "\n".join(edge_content),
+         "\n".join(node_content))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/action.py Tue Feb 11 12:22:19 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/action.py Wed Feb 12 08:31:34 2014 UTC
@@ -74,36 +74,6 @@
       self.__str = '(%s)' % ','.join(map(str, self.__tuple))
     return self.__str

-  def to_dot(self):
-    node_ix = [0]
-    node_template = 'node [label="%s"]; N_%d;'
-    edge_template = 'N_%d -> N_%d'
-    nodes = []
-    edges = []
-
-    def escape(v):  # TODO(dcarney): abstract into utilities
-      v = str(v)
- v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
-      v = v.replace('\\', '\\\\').replace('\"', '\\\"')
-      return v
-
-    def process(term):
-      if type(term) == StringType or type(term) == IntType:
-        node_ix[0] += 1
-        nodes.append(node_template % (escape(str(term)), node_ix[0]))
-        return node_ix[0]
-      elif isinstance(term, Term):
-        child_ixs = map(process, term.args())
-        node_ix[0] += 1
-        nodes.append(node_template % (escape(term.name()), node_ix[0]))
-        for child_ix in child_ixs:
-          edges.append(edge_template % (node_ix[0], child_ix))
-        return node_ix[0]
-      raise Exception
-
-    process(self)
-    return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
-
 class Action(object):

   __empty_action = None
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py Tue Feb 11 18:23:07 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/automata_test.py Wed Feb 12 08:31:34 2014 UTC
@@ -26,6 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import unittest
+from dot_utilities import *
 from automaton import Action
 from regex_parser import RegexParser
 from transition_keys import TransitionKey, KeyEncoding
@@ -81,7 +82,7 @@
   def test_can_construct_dot(self):
     for (regex, matches, not_matches) in self.__test_data:
       for automaton in self.__build_automata(regex):
-        automaton.to_dot()
+        automaton_to_dot(automaton)

   def test_minimization(self):
     encoding = self.__encoding
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Fri Feb 7 16:06:01 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed Feb 12 08:31:34 2014 UTC
@@ -145,55 +145,3 @@
       assert default_action
       action = default_action
     yield (action, last_position, len(string))
-
-  def to_dot(self):
-
-    def escape(v):
-      v = str(v)
- v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
-      v = v.replace('\\', '\\\\').replace('\"', '\\\"')
-      return v
-
-    def f(node, (node_content, edge_content)):
-      if node.action():
-        action_text = escape(node.action())
-        node_content.append('  S_l%s[shape = box, label="%s"];' %
-                            (node.node_number(), action_text))
-        node_content.append('  S_%s -> S_l%s [arrowhead = none];' %
-                            (node.node_number(), node.node_number()))
-      for key, state in node.key_state_iter():
-        if key == TransitionKey.epsilon():
-          key = "ε"
-        else:
-          key = key.to_string(self.encoding())
-        edge_content.append("  S_%s -> S_%s [ label = \"%s\" ];" % (
-            node.node_number(), state.node_number(), escape(key)))
-      return (node_content, edge_content)
-
-    (node_content, edge_content) = self.visit_all_states(f, ([], []))
-
-    start_set = self.start_set()
-    assert len(start_set) == 1
-    start_node = iter(start_set).next()
-    terminal_set = self.terminal_set()
-
-    terminals = ["S_%d;" % x.node_number() for x in terminal_set]
-    start_number = start_node.node_number()
-    start_shape = "circle"
-    if start_node in terminal_set:
-      start_shape = "doublecircle"
-
-    return '''
-digraph finite_state_machine {
-  rankdir=LR;
-  node [shape = %s, style=filled, bgcolor=lightgrey]; S_%s
-  node [shape = doublecircle, style=unfilled]; %s
-  node [shape = circle];
-%s
-%s
-}
-    ''' % (start_shape,
-           start_number,
-           " ".join(terminals),
-           "\n".join(edge_content),
-           "\n".join(node_content))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/encoding.py Wed Feb 12 07:46:44 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/encoding.py Wed Feb 12 08:31:34 2014 UTC
@@ -1,4 +1,4 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
+# Copyright 2014 the V8 project authors. All rights reserved.
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are
 # met:
@@ -46,7 +46,7 @@
     if x > 127:
       return str(x)
     if not x in KeyEncoding.__printable_cache:
-      res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+      res = "%s" % chr(x) if chr(x) in printable else str(x)
       KeyEncoding.__printable_cache[x] = res
     return KeyEncoding.__printable_cache[x]

=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Tue Feb 11 12:22:19 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed Feb 12 08:31:34 2014 UTC
@@ -26,6 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import argparse
+from dot_utilities import *
 from nfa import Nfa
 from nfa_builder import NfaBuilder
 from dfa import Dfa, DfaMinimizer
@@ -73,12 +74,12 @@
     if name != 'default' or minimize_default:
       mdfa = automata.minimal_dfa()
     (nfa_i, dfa_i, mdfa_i) = ("nfa_%d" % i, "dfa_%d" % i, "mdfa_%d" % i)
-    scripts.append(script_template % (nfa_i, nfa.to_dot()))
+    scripts.append(script_template % (nfa_i, automaton_to_dot(nfa)))
     loads.append(load_template % ("nfa [%s]" % name, nfa_i))
-    scripts.append(script_template % (dfa_i, dfa.to_dot()))
+    scripts.append(script_template % (dfa_i, automaton_to_dot(dfa)))
     loads.append(load_template % ("dfa [%s]" % name, dfa_i))
     if mdfa and mdfa.node_count() != dfa.node_count():
-      scripts.append(script_template % (mdfa_i, mdfa.to_dot()))
+      scripts.append(script_template % (mdfa_i, automaton_to_dot(mdfa)))
       loads.append(load_template % ("mdfa [%s]" % name, mdfa_i))
   body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
   return file_template % body
@@ -86,14 +87,15 @@
 def generate_rule_tree_html(rule_processor):
   scripts = []
   loads = []
+  mapper = lambda x : map_characters(rule_processor.encoding(), x)
   for i, (name, alias) in enumerate(list(rule_processor.alias_iter())):
     alias_i = "alias_%d" % i
-    dot = alias.to_dot()
+    dot = term_to_dot(alias, mapper)
     scripts.append(script_template % (alias_i, dot))
     loads.append(load_template % ("alias [%s]" % name, alias_i))
for i, (name, automata) in enumerate(list(rule_processor.automata_iter())):
     rule_i = "rule_%d" % i
-    dot = automata.rule_term().to_dot()
+    dot = term_to_dot(automata.rule_term(), mapper)
     scripts.append(script_template % (rule_i, dot))
     loads.append(load_template % ("rule [%s]" % name, rule_i))
   body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri Feb 7 08:42:33 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed Feb 12 08:31:34 2014 UTC
@@ -331,10 +331,14 @@
   def __init__(self, string, encoding_name):
     self.__automata = {}
     self.__default_action = None
-    self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name))
+    self.__encoding = KeyEncoding.get(encoding_name)
+    self.__parser_state = RuleParserState(self.__encoding)
     RuleParser.parse(string, self.__parser_state)
     self.__process_parser_state()

+  def encoding(self):
+    return self.__encoding
+
   def alias_iter(self):
     return iter(self.__parser_state.aliases.items())

=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Wed Feb 12 07:46:44 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Wed Feb 12 08:31:34 2014 UTC
@@ -241,8 +241,8 @@
       r = component.args()
       to_str = lambda x: KeyEncoding.to_str(encoding, x)
       if r[0] == r[1]:
-        return '%s' % to_str(r[0])
-      return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
+        return "'%s'" % to_str(r[0])
+      return "['%s'-'%s']" % (to_str(r[0]), to_str(r[1]))
     raise Exception('unprintable %s' % component)

   def __flatten(self):

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to