Revision: 19308
Author: [email protected]
Date: Wed Feb 12 08:31:34 2014 UTC
Log: Experimental parser: split off dot processing
[email protected]
BUG=
Review URL: https://codereview.chromium.org/159853009
http://code.google.com/p/v8/source/detail?r=19308
Added:
/branches/experimental/parser/tools/lexer_generator/dot_utilities.py
Modified:
/branches/experimental/parser/tools/lexer_generator/action.py
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/encoding.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dot_utilities.py
Wed Feb 12 08:31:34 2014 UTC
@@ -0,0 +1,117 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import IntType, StringType
+from encoding import KeyEncoding
+from action import Term
+from transition_keys import TransitionKey
+
+def escape_for_dot(v):
+ v = str(v)
+ v =
v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
+ v = v.replace('\\', '\\\\').replace('\"', '\\\"')
+ return v
+
+def map_characters(encoding, term):
+ if term.name() == 'LITERAL':
+ f = lambda x : KeyEncoding.to_str(encoding, x)
+ term = Term('LITERAL', "'%s'" % ''.join(map(f, term.args())))
+ elif term.name() == 'RANGE':
+ f = lambda x : "'%s'" % KeyEncoding.to_str(encoding, x)
+ term = Term('RANGE', *map(f, term.args()))
+ return term
+
+def term_to_dot(term, term_mapper = None):
+ node_ix = [0]
+ node_template = 'node [label="%s"]; N_%d;'
+ edge_template = 'N_%d -> N_%d'
+ nodes = []
+ edges = []
+
+ def process(term):
+ if type(term) == StringType or type(term) == IntType:
+ node_ix[0] += 1
+ nodes.append(node_template % (escape_for_dot(str(term)), node_ix[0]))
+ return node_ix[0]
+ elif isinstance(term, Term):
+ term = term if not term_mapper else term_mapper(term)
+ child_ixs = map(process, term.args())
+ node_ix[0] += 1
+ nodes.append(node_template % (escape_for_dot(term.name()),
node_ix[0]))
+ for child_ix in child_ixs:
+ edges.append(edge_template % (node_ix[0], child_ix))
+ return node_ix[0]
+ raise Exception
+
+ process(term)
+ return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
+
+def automaton_to_dot(automaton):
+
+ def f(node, (node_content, edge_content)):
+ if node.action():
+ action_text = escape_for_dot(node.action())
+ node_content.append(' S_l%s[shape = box, label="%s"];' %
+ (node.node_number(), action_text))
+ node_content.append(' S_%s -> S_l%s [arrowhead = none];' %
+ (node.node_number(), node.node_number()))
+ for key, state in node.key_state_iter():
+ if key == TransitionKey.epsilon():
+ key = "ε"
+ else:
+ key = key.to_string(automaton.encoding())
+ edge_content.append(" S_%s -> S_%s [ label = \"%s\" ];" % (
+ node.node_number(), state.node_number(), escape_for_dot(key)))
+ return (node_content, edge_content)
+
+ (node_content, edge_content) = automaton.visit_all_states(f, ([], []))
+
+ start_set = automaton.start_set()
+ assert len(start_set) == 1
+ start_node = iter(start_set).next()
+ terminal_set = automaton.terminal_set()
+
+ terminals = ["S_%d;" % x.node_number() for x in terminal_set]
+ start_number = start_node.node_number()
+ start_shape = "circle"
+ if start_node in terminal_set:
+ start_shape = "doublecircle"
+
+ return '''
+digraph finite_state_machine {
+ rankdir=LR;
+ node [shape = %s, style=filled, bgcolor=lightgrey]; S_%s
+ node [shape = doublecircle, style=unfilled]; %s
+ node [shape = circle];
+ %s
+ %s
+}
+ ''' % (start_shape,
+ start_number,
+ " ".join(terminals),
+ "\n".join(edge_content),
+ "\n".join(node_content))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/action.py Tue Feb
11 12:22:19 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/action.py Wed Feb
12 08:31:34 2014 UTC
@@ -74,36 +74,6 @@
self.__str = '(%s)' % ','.join(map(str, self.__tuple))
return self.__str
- def to_dot(self):
- node_ix = [0]
- node_template = 'node [label="%s"]; N_%d;'
- edge_template = 'N_%d -> N_%d'
- nodes = []
- edges = []
-
- def escape(v): # TODO(dcarney): abstract into utilities
- v = str(v)
- v =
v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
- v = v.replace('\\', '\\\\').replace('\"', '\\\"')
- return v
-
- def process(term):
- if type(term) == StringType or type(term) == IntType:
- node_ix[0] += 1
- nodes.append(node_template % (escape(str(term)), node_ix[0]))
- return node_ix[0]
- elif isinstance(term, Term):
- child_ixs = map(process, term.args())
- node_ix[0] += 1
- nodes.append(node_template % (escape(term.name()), node_ix[0]))
- for child_ix in child_ixs:
- edges.append(edge_template % (node_ix[0], child_ix))
- return node_ix[0]
- raise Exception
-
- process(self)
- return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
-
class Action(object):
__empty_action = None
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Tue Feb 11 18:23:07 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Wed Feb 12 08:31:34 2014 UTC
@@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
+from dot_utilities import *
from automaton import Action
from regex_parser import RegexParser
from transition_keys import TransitionKey, KeyEncoding
@@ -81,7 +82,7 @@
def test_can_construct_dot(self):
for (regex, matches, not_matches) in self.__test_data:
for automaton in self.__build_automata(regex):
- automaton.to_dot()
+ automaton_to_dot(automaton)
def test_minimization(self):
encoding = self.__encoding
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Fri
Feb 7 16:06:01 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed
Feb 12 08:31:34 2014 UTC
@@ -145,55 +145,3 @@
assert default_action
action = default_action
yield (action, last_position, len(string))
-
- def to_dot(self):
-
- def escape(v):
- v = str(v)
- v =
v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
- v = v.replace('\\', '\\\\').replace('\"', '\\\"')
- return v
-
- def f(node, (node_content, edge_content)):
- if node.action():
- action_text = escape(node.action())
- node_content.append(' S_l%s[shape = box, label="%s"];' %
- (node.node_number(), action_text))
- node_content.append(' S_%s -> S_l%s [arrowhead = none];' %
- (node.node_number(), node.node_number()))
- for key, state in node.key_state_iter():
- if key == TransitionKey.epsilon():
- key = "ε"
- else:
- key = key.to_string(self.encoding())
- edge_content.append(" S_%s -> S_%s [ label = \"%s\" ];" % (
- node.node_number(), state.node_number(), escape(key)))
- return (node_content, edge_content)
-
- (node_content, edge_content) = self.visit_all_states(f, ([], []))
-
- start_set = self.start_set()
- assert len(start_set) == 1
- start_node = iter(start_set).next()
- terminal_set = self.terminal_set()
-
- terminals = ["S_%d;" % x.node_number() for x in terminal_set]
- start_number = start_node.node_number()
- start_shape = "circle"
- if start_node in terminal_set:
- start_shape = "doublecircle"
-
- return '''
-digraph finite_state_machine {
- rankdir=LR;
- node [shape = %s, style=filled, bgcolor=lightgrey]; S_%s
- node [shape = doublecircle, style=unfilled]; %s
- node [shape = circle];
-%s
-%s
-}
- ''' % (start_shape,
- start_number,
- " ".join(terminals),
- "\n".join(edge_content),
- "\n".join(node_content))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/encoding.py Wed Feb
12 07:46:44 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/encoding.py Wed Feb
12 08:31:34 2014 UTC
@@ -1,4 +1,4 @@
-# Copyright 2013 the V8 project authors. All rights reserved.
+# Copyright 2014 the V8 project authors. All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
@@ -46,7 +46,7 @@
if x > 127:
return str(x)
if not x in KeyEncoding.__printable_cache:
- res = "'%s'" % chr(x) if chr(x) in printable else str(x)
+ res = "%s" % chr(x) if chr(x) in printable else str(x)
KeyEncoding.__printable_cache[x] = res
return KeyEncoding.__printable_cache[x]
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Tue
Feb 11 12:22:19 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Wed
Feb 12 08:31:34 2014 UTC
@@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
+from dot_utilities import *
from nfa import Nfa
from nfa_builder import NfaBuilder
from dfa import Dfa, DfaMinimizer
@@ -73,12 +74,12 @@
if name != 'default' or minimize_default:
mdfa = automata.minimal_dfa()
(nfa_i, dfa_i, mdfa_i) = ("nfa_%d" % i, "dfa_%d" % i, "mdfa_%d" % i)
- scripts.append(script_template % (nfa_i, nfa.to_dot()))
+ scripts.append(script_template % (nfa_i, automaton_to_dot(nfa)))
loads.append(load_template % ("nfa [%s]" % name, nfa_i))
- scripts.append(script_template % (dfa_i, dfa.to_dot()))
+ scripts.append(script_template % (dfa_i, automaton_to_dot(dfa)))
loads.append(load_template % ("dfa [%s]" % name, dfa_i))
if mdfa and mdfa.node_count() != dfa.node_count():
- scripts.append(script_template % (mdfa_i, mdfa.to_dot()))
+ scripts.append(script_template % (mdfa_i, automaton_to_dot(mdfa)))
loads.append(load_template % ("mdfa [%s]" % name, mdfa_i))
body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
return file_template % body
@@ -86,14 +87,15 @@
def generate_rule_tree_html(rule_processor):
scripts = []
loads = []
+ mapper = lambda x : map_characters(rule_processor.encoding(), x)
for i, (name, alias) in enumerate(list(rule_processor.alias_iter())):
alias_i = "alias_%d" % i
- dot = alias.to_dot()
+ dot = term_to_dot(alias, mapper)
scripts.append(script_template % (alias_i, dot))
loads.append(load_template % ("alias [%s]" % name, alias_i))
for i, (name, automata) in
enumerate(list(rule_processor.automata_iter())):
rule_i = "rule_%d" % i
- dot = automata.rule_term().to_dot()
+ dot = term_to_dot(automata.rule_term(), mapper)
scripts.append(script_template % (rule_i, dot))
loads.append(load_template % ("rule [%s]" % name, rule_i))
body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri
Feb 7 08:42:33 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed
Feb 12 08:31:34 2014 UTC
@@ -331,10 +331,14 @@
def __init__(self, string, encoding_name):
self.__automata = {}
self.__default_action = None
- self.__parser_state = RuleParserState(KeyEncoding.get(encoding_name))
+ self.__encoding = KeyEncoding.get(encoding_name)
+ self.__parser_state = RuleParserState(self.__encoding)
RuleParser.parse(string, self.__parser_state)
self.__process_parser_state()
+ def encoding(self):
+ return self.__encoding
+
def alias_iter(self):
return iter(self.__parser_state.aliases.items())
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Wed Feb 12 07:46:44 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Wed Feb 12 08:31:34 2014 UTC
@@ -241,8 +241,8 @@
r = component.args()
to_str = lambda x: KeyEncoding.to_str(encoding, x)
if r[0] == r[1]:
- return '%s' % to_str(r[0])
- return '[%s-%s]' % (to_str(r[0]), to_str(r[1]))
+ return "'%s'" % to_str(r[0])
+ return "['%s'-'%s']" % (to_str(r[0]), to_str(r[1]))
raise Exception('unprintable %s' % component)
def __flatten(self):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.