Revision: 17634
Author: [email protected]
Date: Tue Nov 12 09:47:12 2013 UTC
Log: Experimental parser: split out NfaBuilder
[email protected]
BUG=
Review URL: https://codereview.chromium.org/61893023
http://code.google.com/p/v8/source/detail?r=17634
Added:
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
Modified:
/branches/experimental/parser/tools/lexer_generator/action_test.py
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue
Nov 12 09:47:12 2013 UTC
@@ -0,0 +1,251 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import TupleType
+from inspect import getmembers
+from nfa import *
+
+class NfaBuilder(object):
+
+ def __init__(self):
+ self.__node_number = 0
+ self.__operation_map = {}
+ self.__members = getmembers(self)
+ self.__character_classes = {}
+ self.__states = []
+
+ def set_character_classes(self, classes):
+ self.__character_classes = classes
+
+ def __new_state(self):
+ self.__node_number += 1
+ return NfaState(self.__node_number - 1)
+
+ def __or(self, graph):
+ start = self.__new_state()
+ ends = []
+ for x in [self.__process(graph[1]), self.__process(graph[2])]:
+ start.add_epsilon_transition(x[0])
+ ends += x[1]
+ start.close(None)
+ return (start, ends)
+
+ def __one_or_more(self, graph):
+ (start, ends) = self.__process(graph[1])
+ end = self.__new_state()
+ end.add_epsilon_transition(start)
+ self.__patch_ends(ends, end)
+ return (start, [end])
+
+ def __zero_or_more(self, graph):
+ (node, ends) = self.__process(graph[1])
+ start = self.__new_state()
+ start.add_epsilon_transition(node)
+ self.__patch_ends(ends, start)
+ return (start, [start])
+
+ def __zero_or_one(self, graph):
+ (node, ends) = self.__process(graph[1])
+ start = self.__new_state()
+ start.add_epsilon_transition(node)
+ return (start, ends + [start])
+
+ def __repeat(self, graph):
+ param_min = int(graph[1])
+ param_max = int(graph[2])
+ subgraph = graph[3]
+ (start, ends) = self.__process(subgraph)
+ for i in xrange(1, param_min):
+ (start2, ends2) = self.__process(subgraph)
+ self.__patch_ends(ends, start2)
+ ends = ends2
+ if param_min == param_max:
+ return (start, ends)
+
+ midpoints = []
+ for i in xrange(param_min, param_max):
+ midpoint = self.__new_state()
+ self.__patch_ends(ends, midpoint)
+ (start2, ends) = self.__process(subgraph)
+ midpoint.add_epsilon_transition(start2)
+ midpoints.append(midpoint)
+
+ return (start, ends + midpoints)
+
+ def __cat(self, graph):
+ (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
+ self.__patch_ends(left[1], right[0])
+ return (left[0], right[1])
+
+ def __key_state(self, key):
+ state = self.__new_state()
+ state.add_unclosed_transition(key)
+ return (state, [state])
+
+ def __literal(self, graph):
+ return self.__key_state(TransitionKey.single_char(graph[1]))
+
+ def __class(self, graph):
+ return self.__key_state(
+ TransitionKey.character_class(graph, self.__character_classes))
+
+ def __not_class(self, graph):
+ return self.__key_state(
+ TransitionKey.character_class(graph, self.__character_classes))
+
+ def __any(self, graph):
+ return self.__key_state(TransitionKey.any())
+
+ def __epsilon(self, graph):
+ start = self.__new_state()
+ end = self.__new_state()
+ start.close(end)
+ return (start, [end])
+
+ def __action(self, graph):
+ (start, ends) = self.__process(graph[1])
+ action = graph[2]
+ end = self.__new_state()
+ self.__patch_ends(ends, end)
+ end.set_action(action)
+ return (start, [end])
+
+ def __continue(self, graph):
+ (start, ends) = self.__process(graph[1])
+ state = self.__peek_state()
+ if not state['start_node']:
+ state['start_node'] = self.__new_state()
+ self.__patch_ends(ends, state['start_node'])
+ return (start, [])
+
+ def __catch_all(self, graph):
+ return self.__key_state(TransitionKey.unique('catch_all'))
+
+ def __join(self, graph):
+ (graph, name, subgraph, modifier) = graph[1:]
+ subgraphs = self.__peek_state()['subgraphs']
+ if not name in subgraphs:
+ subgraphs[name] = self.__nfa(subgraph)
+ (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]
+ (start, ends) = self.__process(graph)
+ if modifier:
+ assert modifier == 'ZERO_OR_MORE'
+ for end in ends:
+ end.add_epsilon_transition(subgraph_end)
+ self.__patch_ends(ends, subgraph_start)
+ end = self.__new_state()
+ subgraph_end.add_epsilon_transition(end)
+ return (start, [end])
+
+ def __process(self, graph):
+ assert type(graph) == TupleType
+ method = "_NfaBuilder__" + graph[0].lower()
+ if not method in self.__operation_map:
+ matches = filter(lambda (name, func): name == method, self.__members)
+ assert len(matches) == 1
+ self.__operation_map[method] = matches[0][1]
+ return self.__operation_map[method](graph)
+
+ def __patch_ends(self, ends, new_end):
+ for end in ends:
+ end.close(new_end)
+
+ def __push_state(self):
+ self.__states.append({
+ 'start_node' : None,
+ 'subgraphs' : {},
+ 'unpatched_ends' : [],
+ })
+
+ def __pop_state(self):
+ return self.__states.pop()
+
+ def __peek_state(self):
+ return self.__states[len(self.__states) - 1]
+
+ def __nfa(self, graph):
+ start_node_number = self.__node_number
+ self.__push_state()
+ (start, ends) = self.__process(graph)
+ state = self.__pop_state()
+ if state['start_node']:
+ state['start_node'].close(start)
+ start = state['start_node']
+ for k, subgraph in state['subgraphs'].items():
+ subgraph[1].close(None)
+ end = self.__new_state()
+ if self.__states:
+ self.__peek_state()['unpatched_ends'] += state['unpatched_ends']
+ else:
+ self.__patch_ends(state['unpatched_ends'], end)
+ self.__patch_ends(ends, end)
+ return (start, end, self.__node_number - start_node_number)
+
+ def nfa(self, graph):
+ (start, end, nodes_created) = self.__nfa(graph)
+ end.close(None)
+ return Nfa(start, end, nodes_created)
+
+ @staticmethod
+ def add_action(graph, action):
+ return ('ACTION', graph, action)
+
+ @staticmethod
+ def add_continue(graph):
+ return ('CONTINUE', graph)
+
+ @staticmethod
+ def catch_all():
+ return ('CATCH_ALL',)
+
+ @staticmethod
+ def epsilon():
+ return ('EPSILON',)
+
+ @staticmethod
+ def join_subgraph(graph, name, subgraph, modifier):
+ if modifier:
+ modifier = NfaBuilder.__modifer_map[modifier]
+ return ('JOIN', graph, name, subgraph, modifier)
+
+ @staticmethod
+ def or_graphs(graphs):
+ return reduce(lambda acc, g: ('OR', acc, g), graphs)
+
+ @staticmethod
+ def cat_graphs(graphs):
+ return reduce(lambda acc, g: ('CAT', acc, g), graphs)
+
+ __modifer_map = {
+ '+': 'ONE_OR_MORE',
+ '?': 'ZERO_OR_ONE',
+ '*': 'ZERO_OR_MORE',
+ }
+
+ @staticmethod
+ def apply_modifier(modifier, graph):
+ return (NfaBuilder.__modifer_map[modifier], graph)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/action_test.py Thu
Nov 7 12:03:35 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/action_test.py Tue
Nov 12 09:47:12 2013 UTC
@@ -27,7 +27,7 @@
import unittest
from rule_parser import RuleParser, RuleParserState
-from nfa import NfaBuilder
+from nfa_builder import NfaBuilder
from dfa import Dfa
def dfa_from_nfa(nfa):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Tue Nov 12 07:12:31 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Tue Nov 12 09:47:12 2013 UTC
@@ -27,7 +27,7 @@
import unittest
from regex_parser import RegexParser
-from nfa import NfaBuilder
+from nfa_builder import NfaBuilder
from dfa import Dfa
def dfa_from_nfa(nfa):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Tue
Nov 12 07:12:31 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Tue
Nov 12 09:47:12 2013 UTC
@@ -26,7 +26,8 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
-from nfa import Nfa, NfaBuilder
+from nfa import Nfa
+from nfa_builder import NfaBuilder
from dfa import Dfa
from rule_parser import RuleParser, RuleParserState
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 12
07:12:31 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 12
09:47:12 2013 UTC
@@ -25,10 +25,8 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from types import TupleType
from transition_keys import TransitionKey
from automaton import *
-from inspect import getmembers
class NfaState(AutomatonState):
@@ -128,227 +126,6 @@
keys.discard(TransitionKey.epsilon())
return TransitionKey.disjoint_keys(keys)
-class NfaBuilder:
-
- def __init__(self):
- self.__node_number = 0
- self.__operation_map = {}
- self.__members = getmembers(self)
- self.__character_classes = {}
- self.__states = []
-
- def set_character_classes(self, classes):
- self.__character_classes = classes
-
- def __new_state(self):
- self.__node_number += 1
- return NfaState(self.__node_number - 1)
-
- def __or(self, graph):
- start = self.__new_state()
- ends = []
- for x in [self.__process(graph[1]), self.__process(graph[2])]:
- start.add_epsilon_transition(x[0])
- ends += x[1]
- start.close(None)
- return (start, ends)
-
- def __one_or_more(self, graph):
- (start, ends) = self.__process(graph[1])
- end = self.__new_state()
- end.add_epsilon_transition(start)
- self.__patch_ends(ends, end)
- return (start, [end])
-
- def __zero_or_more(self, graph):
- (node, ends) = self.__process(graph[1])
- start = self.__new_state()
- start.add_epsilon_transition(node)
- self.__patch_ends(ends, start)
- return (start, [start])
-
- def __zero_or_one(self, graph):
- (node, ends) = self.__process(graph[1])
- start = self.__new_state()
- start.add_epsilon_transition(node)
- return (start, ends + [start])
-
- def __repeat(self, graph):
- param_min = int(graph[1])
- param_max = int(graph[2])
- subgraph = graph[3]
- (start, ends) = self.__process(subgraph)
- for i in xrange(1, param_min):
- (start2, ends2) = self.__process(subgraph)
- self.__patch_ends(ends, start2)
- ends = ends2
- if param_min == param_max:
- return (start, ends)
-
- midpoints = []
- for i in xrange(param_min, param_max):
- midpoint = self.__new_state()
- self.__patch_ends(ends, midpoint)
- (start2, ends) = self.__process(subgraph)
- midpoint.add_epsilon_transition(start2)
- midpoints.append(midpoint)
-
- return (start, ends + midpoints)
-
- def __cat(self, graph):
- (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
- self.__patch_ends(left[1], right[0])
- return (left[0], right[1])
-
- def __key_state(self, key):
- state = self.__new_state()
- state.add_unclosed_transition(key)
- return (state, [state])
-
- def __literal(self, graph):
- return self.__key_state(TransitionKey.single_char(graph[1]))
-
- def __class(self, graph):
- return self.__key_state(
- TransitionKey.character_class(graph, self.__character_classes))
-
- def __not_class(self, graph):
- return self.__key_state(
- TransitionKey.character_class(graph, self.__character_classes))
-
- def __any(self, graph):
- return self.__key_state(TransitionKey.any())
-
- def __epsilon(self, graph):
- start = self.__new_state()
- end = self.__new_state()
- start.close(end)
- return (start, [end])
-
- def __action(self, graph):
- (start, ends) = self.__process(graph[1])
- action = graph[2]
- end = self.__new_state()
- self.__patch_ends(ends, end)
- end.set_action(action)
- return (start, [end])
-
- def __continue(self, graph):
- (start, ends) = self.__process(graph[1])
- state = self.__peek_state()
- if not state['start_node']:
- state['start_node'] = self.__new_state()
- self.__patch_ends(ends, state['start_node'])
- return (start, [])
-
- def __catch_all(self, graph):
- return self.__key_state(TransitionKey.unique('catch_all'))
-
- def __join(self, graph):
- (graph, name, subgraph, modifier) = graph[1:]
- subgraphs = self.__peek_state()['subgraphs']
- if not name in subgraphs:
- subgraphs[name] = self.__nfa(subgraph)
- (subgraph_start, subgraph_end, nodes_in_subgraph) = subgraphs[name]
- (start, ends) = self.__process(graph)
- if modifier:
- assert modifier == 'ZERO_OR_MORE'
- for end in ends:
- end.add_epsilon_transition(subgraph_end)
- self.__patch_ends(ends, subgraph_start)
- end = self.__new_state()
- subgraph_end.add_epsilon_transition(end)
- return (start, [end])
-
- def __process(self, graph):
- assert type(graph) == TupleType
- method = "_NfaBuilder__" + graph[0].lower()
- if not method in self.__operation_map:
- matches = filter(lambda (name, func): name == method, self.__members)
- assert len(matches) == 1
- self.__operation_map[method] = matches[0][1]
- return self.__operation_map[method](graph)
-
- def __patch_ends(self, ends, new_end):
- for end in ends:
- end.close(new_end)
-
- def __push_state(self):
- self.__states.append({
- 'start_node' : None,
- 'subgraphs' : {},
- 'unpatched_ends' : [],
- })
-
- def __pop_state(self):
- return self.__states.pop()
-
- def __peek_state(self):
- return self.__states[len(self.__states) - 1]
-
- def __nfa(self, graph):
- start_node_number = self.__node_number
- self.__push_state()
- (start, ends) = self.__process(graph)
- state = self.__pop_state()
- if state['start_node']:
- state['start_node'].close(start)
- start = state['start_node']
- for k, subgraph in state['subgraphs'].items():
- subgraph[1].close(None)
- end = self.__new_state()
- if self.__states:
- self.__peek_state()['unpatched_ends'] += state['unpatched_ends']
- else:
- self.__patch_ends(state['unpatched_ends'], end)
- self.__patch_ends(ends, end)
- return (start, end, self.__node_number - start_node_number)
-
- def nfa(self, graph):
- (start, end, nodes_created) = self.__nfa(graph)
- end.close(None)
- return Nfa(start, end, nodes_created)
-
- @staticmethod
- def add_action(graph, action):
- return ('ACTION', graph, action)
-
- @staticmethod
- def add_continue(graph):
- return ('CONTINUE', graph)
-
- @staticmethod
- def catch_all():
- return ('CATCH_ALL',)
-
- @staticmethod
- def epsilon():
- return ('EPSILON',)
-
- @staticmethod
- def join_subgraph(graph, name, subgraph, modifier):
- if modifier:
- modifier = NfaBuilder.__modifer_map[modifier]
- return ('JOIN', graph, name, subgraph, modifier)
-
- @staticmethod
- def or_graphs(graphs):
- return reduce(lambda acc, g: ('OR', acc, g), graphs)
-
- @staticmethod
- def cat_graphs(graphs):
- return reduce(lambda acc, g: ('CAT', acc, g), graphs)
-
- __modifer_map = {
- '+': 'ONE_OR_MORE',
- '?': 'ZERO_OR_ONE',
- '*': 'ZERO_OR_MORE',
- }
-
- @staticmethod
- def apply_modifier(modifier, graph):
- return (NfaBuilder.__modifer_map[modifier], graph)
-
class Nfa(Automaton):
def __init__(self, start, end, nodes_created):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 12 07:12:31 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 12 09:47:12 2013 UTC
@@ -28,7 +28,7 @@
import ply.yacc as yacc
from rule_lexer import RuleLexer
from regex_parser import RegexParser
-from nfa import NfaBuilder
+from nfa_builder import NfaBuilder
from transition_keys import TransitionKey
class RuleParserState:
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.