Revision: 19041
Author: [email protected]
Date: Mon Feb 3 21:28:33 2014 UTC
Log: Experimental parser: use Terms instead of tuples
[email protected]
BUG=
Review URL: https://codereview.chromium.org/137883006
http://code.google.com/p/v8/source/detail?r=19041
Added:
/branches/experimental/parser/tools/lexer_generator/action.py
Modified:
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/code_generator.py
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/term_test.py
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/action.py Mon Feb
3 21:28:33 2014 UTC
@@ -0,0 +1,173 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import StringType, IntType
+
+class Term(object):
+ '''A class representing a function and its arguments.
+ f(a,b,c) would be represented as ('f', a, b, c) where
+ a, b, and c are strings or Terms.'''
+
+ __empty_term = None
+
+ @staticmethod
+ def empty_term():
+ if Term.__empty_term == None:
+ Term.__empty_term = Term('')
+ return Term.__empty_term
+
+ def __init__(self, name, *args):
+ assert type(name) == StringType
+ if not name:
+ assert not args, 'empty term must not have args'
+ for v in args:
+ if type(v) == StringType:
+ assert v, 'string args must be non empty'
+ else:
+ assert isinstance(v, Term)
+ self.__tuple = tuple([name] + list(args))
+ self.__str = None
+
+ def name(self):
+ return self.__tuple[0]
+
+ def args(self):
+ return self.__tuple[1:]
+
+ def __hash__(self):
+ return hash(self.__tuple)
+
+ def __nonzero__(self):
+ return bool(self.__tuple[0])
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and self.__tuple ==
other.__tuple)
+
+ # TODO(dcarney): escape '(', ')' and ',' in strings
+ def __str__(self):
+ if self.__str == None:
+ self.__str = '(%s)' % ','.join(map(str, self.__tuple))
+ return self.__str
+
+ def to_dot(self):
+ node_ix = [0]
+ node_template = 'node [label="%s"]; N_%d;'
+ edge_template = 'N_%d -> N_%d'
+ nodes = []
+ edges = []
+
+ def escape(v):
+ v = str(v)
+ v =
v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
+ v = v.replace('\\', '\\\\').replace('\"', '\\\"')
+ return v
+
+ def process(term):
+ if isinstance(term, str):
+ node_ix[0] += 1
+ nodes.append(node_template % (escape(term), node_ix[0]))
+ return node_ix[0]
+ elif isinstance(term, Term):
+ child_ixs = map(process, term.args())
+ node_ix[0] += 1
+ nodes.append(node_template % (escape(term.name()), node_ix[0]))
+ for child_ix in child_ixs:
+ edges.append(edge_template % (node_ix[0], child_ix))
+ return node_ix[0]
+ raise Exception
+
+ process(self)
+ return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
+
+class Action(object):
+
+ __empty_action = None
+
+ @staticmethod
+ def empty_action():
+ if Action.__empty_action == None:
+ Action.__empty_action = Action(Term.empty_term(), Term.empty_term())
+ return Action.__empty_action
+
+ @staticmethod
+ def dominant_action(state_set):
+ action = Action.empty_action()
+ for state in state_set:
+ if not state.action():
+ continue
+ if not action:
+ action = state.action()
+ continue
+ if state.action().precedence() == action.precedence():
+ assert state.action() == action
+ elif state.action().precedence() < action.precedence():
+ action = state.action()
+ return action
+
+ def __init__(self, entry_action, match_action, precedence = -1):
+ assert isinstance(match_action, Term)
+ assert isinstance(entry_action, Term)
+ assert type(precedence) == IntType
+ self.__entry_action = entry_action
+ self.__match_action = match_action
+ self.__precedence = precedence
+
+ def entry_action(self):
+ return self.__entry_action
+
+ def match_action(self):
+ return self.__match_action
+
+ def precedence(self):
+ return self.__precedence
+
+ def to_term(self):
+ return Term(
+ 'action_serialization',
+ self.__entry_action, self.__match_action, str(self.__precedence))
+
+ @staticmethod
+ def from_term(term):
+ assert term.name() == 'action_serialization'
+ return Action(term.args()[0], term.args()[1], int(term.args()[2]))
+
+ def __nonzero__(self):
+ return bool(self.__entry_action) or bool(self.__match_action)
+
+ def __hash__(self):
+ return hash((self.__entry_action, self.__match_action))
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.__entry_action == other.__entry_action and
+ self.__match_action == other.__match_action)
+
+ def __str__(self):
+ parts = []
+ for action in [self.__entry_action, self.__match_action]:
+ parts.append('' if not action else str(action))
+ return "action< %s >" % " | ".join(parts)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Mon
Feb 3 15:08:36 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Mon
Feb 3 21:28:33 2014 UTC
@@ -25,120 +25,13 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from types import TupleType, ListType, StringType
+from types import TupleType, ListType
from itertools import chain
+from action import Term, Action
from transition_keys import TransitionKey
-class Term(object):
- '''A class representing a function and its arguments.
- f(a,b,c) would be represented as ('f', a, b, c) where
- a, b, and c are strings or Terms.'''
-
- __empty_term = None
-
- @staticmethod
- def empty_term():
- if Term.__empty_term == None:
- Term.__empty_term = Term('')
- return Term.__empty_term
-
- @staticmethod
- def __verify_string(v):
- assert (not ',' in v) and (not '(' in v)
-
- def __init__(self, name, *args):
- assert type(name) == StringType
- self.__verify_string(name)
- if not name:
- assert not args, 'empty term must not have args'
- for v in args:
- assert v, 'args must be non empty'
- if type(v) == StringType:
- self.__verify_string(v)
- else:
- assert isinstance(v, self.__class__)
- self.__tuple = tuple([name] + list(args))
- self.__str = None
-
- def name(self):
- return self.__tuple[0]
-
- def args(self):
- return self.__tuple[1:]
-
- def __hash__(self):
- return hash(self.__tuple)
-
- def __nonzero__(self):
- return bool(self.__tuple[0])
-
- def __eq__(self, other):
- return (isinstance(other, self.__class__) and self.__tuple ==
other.__tuple)
-
- def __str__(self):
- if self.__str == None:
- self.__str = '(%s)' % ','.join(map(str, self.__tuple))
- return self.__str
-
-class Action(object):
-
- __empty_action = None
-
- @staticmethod
- def empty_action():
- if Action.__empty_action == None:
- Action.__empty_action = Action(Term.empty_term(), Term.empty_term())
- return Action.__empty_action
-
- @staticmethod
- def dominant_action(state_set):
- action = Action.empty_action()
- for state in state_set:
- if not state.action():
- continue
- if not action:
- action = state.action()
- continue
- if state.action().precedence() == action.precedence():
- assert state.action() == action
- elif state.action().precedence() < action.precedence():
- action = state.action()
- return action
-
- def __init__(self, entry_action, match_action, precedence = -1):
- for action in [entry_action, match_action]:
- assert isinstance(action, Term)
- self.__entry_action = entry_action
- self.__match_action = match_action
- self.__precedence = precedence
-
- def entry_action(self):
- return self.__entry_action
-
- def match_action(self):
- return self.__match_action
-
- def precedence(self):
- return self.__precedence
-
- def __nonzero__(self):
- return bool(self.__entry_action) or bool(self.__match_action)
-
- def __hash__(self):
- return hash((self.__entry_action, self.__match_action))
-
- def __eq__(self, other):
- return (isinstance(other, self.__class__) and
- self.__entry_action == other.__entry_action and
- self.__match_action == other.__match_action)
-
- def __str__(self):
- parts = []
- for action in [self.__entry_action, self.__match_action]:
- parts.append('' if not action else str(action))
- return "action< %s >" % " | ".join(parts)
-
class AutomatonState(object):
+ '''A base class for dfa and nfa states. Immutable'''
__node_number_counter = 0
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py
Mon Feb 3 15:08:36 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/code_generator.py
Mon Feb 3 21:28:33 2014 UTC
@@ -47,7 +47,7 @@
else:
dfa = rule_processor.default_automata().dfa()
self.__dfa = dfa
- self.__default_action = rule_processor.default_action
+ self.__default_action = rule_processor.default_action()
self.__debug_print = debug_print
self.__start_node_number = self.__dfa.start_state().node_number()
self.__log = log
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri
Jan 17 13:38:07 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Mon
Feb 3 21:28:33 2014 UTC
@@ -86,11 +86,11 @@
def generate_rule_tree_html(rule_processor):
scripts = []
loads = []
- rule_tree_dots = rule_processor.rule_tree_dots()
- for i, (rule, dot) in enumerate(rule_tree_dots.items()):
+ for i, (name, automata) in
enumerate(list(rule_processor.automata_iter())):
rule_i = "rule_%d" % i
+ dot = automata.rule_term().to_dot()
scripts.append(script_template % (rule_i, dot))
- loads.append(load_template % ("rules [%s]" % rule, rule_i))
+ loads.append(load_template % ("rules [%s]" % name, rule_i))
body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
return file_template % body
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon
Feb 3 15:08:36 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon
Feb 3 21:28:33 2014 UTC
@@ -39,7 +39,7 @@
automata = rule_processor.default_automata()
for automaton in [automata.nfa(), automata.dfa(),
automata.minimal_dfa()]:
for i, (action, start, stop) in enumerate(
- automaton.lex(string, rule_processor.default_action)):
+ automaton.lex(string, rule_processor.default_action())):
self.assertEquals(expected[i][0], action)
self.assertEquals(expected[i][1], string[start : stop])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Thu
Jan 23 11:39:04 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Mon
Feb 3 21:28:33 2014 UTC
@@ -27,6 +27,7 @@
from types import TupleType
from inspect import getmembers
+from action import *
from nfa import *
# Nfa is built in two stages:
@@ -40,7 +41,7 @@
class NfaBuilder(object):
- def __init__(self, encoding, character_classes = {}):
+ def __init__(self, encoding, character_classes):
self.__node_number = 0
self.__operation_map = {}
self.__members = getmembers(self)
@@ -58,39 +59,39 @@
self.__global_end_node = self.__new_state()
return self.__global_end_node
- def __or(self, graph):
+ def __or(self, args):
start = self.__new_state()
ends = []
- for x in [self.__process(graph[1]), self.__process(graph[2])]:
+ for x in [self.__process(args[0]), self.__process(args[1])]:
start.add_epsilon_transition(x[0])
ends += x[1]
start.close(None)
return (start, ends)
- def __one_or_more(self, graph):
- (start, ends) = self.__process(graph[1])
+ def __one_or_more(self, args):
+ (start, ends) = self.__process(args[0])
end = self.__new_state()
end.add_epsilon_transition(start)
self.__patch_ends(ends, end)
return (start, [end])
- def __zero_or_more(self, graph):
- (node, ends) = self.__process(graph[1])
+ def __zero_or_more(self, args):
+ (node, ends) = self.__process(args[0])
start = self.__new_state()
start.add_epsilon_transition(node)
self.__patch_ends(ends, start)
return (start, [start])
- def __zero_or_one(self, graph):
- (node, ends) = self.__process(graph[1])
+ def __zero_or_one(self, args):
+ (node, ends) = self.__process(args[0])
start = self.__new_state()
start.add_epsilon_transition(node)
return (start, ends + [start])
- def __repeat(self, graph):
- param_min = int(graph[1])
- param_max = int(graph[2])
- subgraph = graph[3]
+ def __repeat(self, args):
+ param_min = int(args[0])
+ param_max = int(args[1])
+ subgraph = args[2]
(start, ends) = self.__process(subgraph)
for i in xrange(1, param_min):
(start2, ends2) = self.__process(subgraph)
@@ -109,8 +110,8 @@
return (start, ends + midpoints)
- def __cat(self, graph):
- (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
+ def __cat(self, args):
+ (left, right) = (self.__process(args[0]), self.__process(args[1]))
self.__patch_ends(left[1], right[0])
return (left[0], right[1])
@@ -119,31 +120,28 @@
state.add_unclosed_transition(key)
return (state, [state])
- def __literal(self, graph):
+ def __literal(self, args):
+ assert len(args) == 1
return self.__key_state(
- TransitionKey.single_char(self.__encoding, graph[1]))
+ TransitionKey.single_char(self.__encoding, args[0]))
- def __class(self, graph):
+ def __class(self, args):
+ assert len(args) == 1
return self.__key_state(TransitionKey.character_class(
- self.__encoding, graph, self.__character_classes))
+ self.__encoding, Term('CLASS', args[0]), self.__character_classes))
- def __not_class(self, graph):
+ def __not_class(self, args):
+ assert len(args) == 1
return self.__key_state(TransitionKey.character_class(
- self.__encoding, graph, self.__character_classes))
+ self.__encoding, Term('NOT_CLASS', args[0]),
self.__character_classes))
- def __any(self, graph):
+ def __any(self, args):
return self.__key_state(TransitionKey.any(self.__encoding))
- def __epsilon(self, graph):
- start = self.__new_state()
+ def __action(self, args):
+ (start, ends) = self.__process(args[0])
+ action = Action.from_term(args[1])
end = self.__new_state()
- start.close(end)
- return (start, [end])
-
- def __action(self, graph):
- (start, ends) = self.__process(graph[1])
- action = graph[2]
- end = self.__new_state()
self.__patch_ends(ends, end)
end.set_action(action)
if action.match_action():
@@ -151,19 +149,18 @@
end.add_epsilon_transition(global_end)
return (start, [end])
- def __continue(self, graph):
- (start, ends) = self.__process(graph[1])
+ def __continue(self, args):
+ (start, ends) = self.__process(args[0])
state = self.__peek_state()
if not state['start_node']:
state['start_node'] = self.__new_state()
self.__patch_ends(ends, state['start_node'])
return (start, [])
- def __unique_key(self, graph):
- return self.__key_state(TransitionKey.unique(graph[1]))
+ def __unique_key(self, args):
+ return self.__key_state(TransitionKey.unique(args[0]))
- def __join(self, graph):
- (graph, name, subgraph) = graph[1:]
+ def __join(self, (graph, name, subgraph)):
subgraphs = self.__peek_state()['subgraphs']
if not name in subgraphs:
subgraphs[name] = self.__nfa(subgraph)
@@ -177,14 +174,14 @@
else:
return (start, [])
- def __process(self, graph):
- assert type(graph) == TupleType
- method = "_NfaBuilder__" + graph[0].lower()
+ def __process(self, term):
+ assert isinstance(term, Term)
+ method = "_NfaBuilder__" + term.name().lower()
if not method in self.__operation_map:
matches = filter(lambda (name, func): name == method, self.__members)
assert len(matches) == 1
self.__operation_map[method] = matches[0][1]
- return self.__operation_map[method](graph)
+ return self.__operation_map[method](term.args())
def __patch_ends(self, ends, new_end):
for end in ends:
@@ -203,10 +200,10 @@
def __peek_state(self):
return self.__states[-1]
- def __nfa(self, graph):
+ def __nfa(self, term):
start_node_number = self.__node_number
self.__push_state()
- (start, ends) = self.__process(graph)
+ (start, ends) = self.__process(term)
state = self.__pop_state()
if state['start_node']:
state['start_node'].close(start)
@@ -257,8 +254,11 @@
transitions[inverse_key] = transitions[catch_all]
del transitions[catch_all]
- def nfa(self, graph):
- (start, end, nodes_created) = self.__nfa(graph)
+ @staticmethod
+ def nfa(encoding, character_classes, rule_term):
+
+ self = NfaBuilder(encoding, character_classes)
+ (start, end, nodes_created) = self.__nfa(rule_term)
global_end_to_return = self.__global_end_node or end
assert global_end_to_return
@@ -267,74 +267,35 @@
global_end_to_return.close(None)
- # FIXME: the same NfaBuilder should not be called twice, so this
cleanup
- # should be unnecessary.
- self.__global_end_node = None
-
self.__compute_epsilon_closures(start)
f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
Automaton.visit_states(set([start]), f)
return Nfa(self.__encoding, start, global_end_to_return, nodes_created)
- @staticmethod
- def rule_tree_dot(graph):
- node_ix = [0]
- node_template = 'node [label="%s"]; N_%d;'
- edge_template = 'N_%d -> N_%d'
- nodes = []
- edges = []
-
- def escape(v):
- v = str(v)
- v =
v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
- v = v.replace('\\', '\\\\').replace('\"', '\\\"')
- return v
-
- def process_thing(thing):
- if isinstance(thing, str):
- node_ix[0] += 1
- nodes.append(node_template % (escape(thing), node_ix[0]))
- return node_ix[0]
- if isinstance(thing, tuple):
- child_ixs = map(process_thing, list(thing)[1:])
- node_ix[0] += 1
- nodes.append(node_template % (escape(thing[0]), node_ix[0]))
- for child_ix in child_ixs:
- edges.append(edge_template % (node_ix[0], child_ix))
- return node_ix[0]
- if isinstance(thing, Action):
- node_ix[0] += 1
- nodes.append(node_template % (str(thing), node_ix[0]))
- return node_ix[0]
- raise Exception
-
- process_thing(graph)
- return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
-
@staticmethod
- def add_action(graph, action):
- return ('ACTION', graph, action)
+ def add_action(term, action):
+ return Term('ACTION', term, action.to_term())
@staticmethod
- def add_continue(graph):
- return ('CONTINUE', graph)
+ def add_continue(term):
+ return Term('CONTINUE', term)
@staticmethod
def unique_key(name):
- return ('UNIQUE_KEY', name)
+ return Term('UNIQUE_KEY', name)
@staticmethod
- def join_subgraph(graph, name, subgraph):
- return ('JOIN', graph, name, subgraph)
+ def join_subgraph(term, name, subgraph_term):
+ return Term('JOIN', term, name, subgraph_term)
@staticmethod
- def or_graphs(graphs):
- return reduce(lambda acc, g: ('OR', acc, g), graphs)
+ def or_terms(terms):
+ return reduce(lambda acc, g: Term('OR', acc, g), terms)
@staticmethod
- def cat_graphs(graphs):
- return reduce(lambda acc, g: ('CAT', acc, g), graphs)
+ def cat_terms(terms):
+ return reduce(lambda acc, g: Term('CAT', acc, g), terms)
__modifer_map = {
'+': 'ONE_OR_MORE',
@@ -343,5 +304,5 @@
}
@staticmethod
- def apply_modifier(modifier, graph):
- return (NfaBuilder.__modifer_map[modifier], graph)
+ def apply_modifier(modifier, term):
+ return Term(NfaBuilder.__modifer_map[modifier], term)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu
Nov 7 12:03:35 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Mon
Feb 3 21:28:33 2014 UTC
@@ -26,8 +26,9 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ply.yacc as yacc
+from types import ListType, TupleType
from regex_lexer import RegexLexer
-from types import ListType, TupleType
+from action import Term
class RegexParser:
@@ -47,7 +48,7 @@
if len(p) == 2:
p[0] = p[1]
else:
- p[0] = (self.token_map[p[2]], p[1], p[3])
+ p[0] = Term(self.token_map[p[2]], p[1], p[3])
def p_fragments(self, p):
'''fragments : fragment
@@ -65,9 +66,9 @@
'''
if p[2] != None:
if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
- p[0] = (p[2][0], p[2][1], p[2][2], p[1])
+ p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])
else:
- p[0] = (p[2], p[1])
+ p[0] = Term(p[2], p[1])
else:
p[0] = p[1]
@@ -91,19 +92,19 @@
def p_literal(self, p):
'''literal : LITERAL'''
- p[0] = ('LITERAL', p[1])
+ p[0] = Term('LITERAL', p[1])
def p_any(self, p):
'''any : ANY'''
- p[0] = (self.token_map[p[1]],)
+ p[0] = Term(self.token_map[p[1]])
def p_class(self, p):
'''class : CLASS_BEGIN class_content CLASS_END
| CLASS_BEGIN NOT class_content CLASS_END'''
if len(p) == 4:
- p[0] = ("CLASS", p[2])
+ p[0] = Term("CLASS", p[2])
else:
- p[0] = ("NOT_CLASS", p[3])
+ p[0] = Term("NOT_CLASS", p[3])
def p_group(self, p):
'''group : GROUP_BEGIN start GROUP_END'''
@@ -116,14 +117,14 @@
| CLASS_LITERAL_AS_OCTAL maybe_class_content
'''
if len(p) == 5:
- left = ("RANGE", p[1], p[3])
+ left = Term("RANGE", p[1], p[3])
else:
if len(p[1]) == 1:
- left = ('LITERAL', p[1])
+ left = Term('LITERAL', p[1])
elif p[1][0] == '\\':
- left = ('LITERAL', chr(int(p[1][1:], 8)))
+ left = Term('LITERAL', chr(int(p[1][1:], 8)))
else:
- left = ('CHARACTER_CLASS', p[1][1:-1])
+ left = Term('CHARACTER_CLASS', p[1][1:-1])
p[0] = self.__cat(left, p[len(p)-1])
def p_maybe_class_content(self, p):
@@ -139,9 +140,8 @@
@staticmethod
def __cat(left, right):
- if right == None:
- return left
- return ('CAT', left, right)
+ assert left
+ return left if not right else Term('CAT', left, right)
def build(self, **kwargs):
self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon
Feb 3 15:08:36 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon
Feb 3 21:28:33 2014 UTC
@@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ply.yacc as yacc
-from automaton import Term, Action
+from action import Term, Action
from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
@@ -67,13 +67,13 @@
'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
state = self.__state
assert not p[1] in state.aliases
- graph = p[3]
- state.aliases[p[1]] = graph
- if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
+ term = p[3]
+ state.aliases[p[1]] = term
+ if term.name() == 'CLASS' or term.name() == 'NOT_CLASS':
classes = state.character_classes
- assert not p[1] in classes
+ assert not p[1] in classes, "cannot reassign alias"
encoding = state.encoding
- classes[p[1]] = TransitionKey.character_class(encoding, graph,
classes)
+ classes[p[1]] = TransitionKey.character_class(encoding, term,
classes)
def p_rules(self, p):
'''rules : state_change transition_rules rules
@@ -171,12 +171,12 @@
if len(p) == 2:
p[0] = p[1]
else:
- p[0] = NfaBuilder.or_graphs([p[1], p[3]])
+ p[0] = NfaBuilder.or_terms([p[1], p[3]])
def p_regex_parts(self, p):
'''regex_parts : regex_part
| regex_part regex_parts'''
- p[0] = NfaBuilder.cat_graphs(p[1:])
+ p[0] = NfaBuilder.cat_terms(p[1:])
def p_regex_part(self, p):
'''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS
modifier
@@ -185,11 +185,11 @@
| regex modifier
| regex_alias modifier'''
modifier = p[len(p)-1]
- graph = p[2] if len(p) == 5 else p[1]
+ term = p[2] if len(p) == 5 else p[1]
if modifier:
- p[0] = NfaBuilder.apply_modifier(modifier, graph)
+ p[0] = NfaBuilder.apply_modifier(modifier, term)
else:
- p[0] = graph
+ p[0] = term
def p_regex_string_literal(self, p):
'regex_string_literal : STRING'
@@ -197,7 +197,6 @@
escape_char = lambda string, char: string.replace(char, "\\" + char)
string = reduce(escape_char, "+?*|.[](){}",
string).replace("\\\"", "\"")
p[0] = RegexParser.parse(string)
-
def p_regex(self, p):
'regex : REGEX'
string = p[1][1:-1].replace("\\/", "/")
@@ -251,6 +250,7 @@
def __init__(self, parser_state):
self.__automata = {}
self.__rule_trees = {}
+ self.__default_action = None
self.__process_parser_state(parser_state)
@staticmethod
@@ -265,24 +265,26 @@
def default_automata(self):
return self.__automata['default']
- def rule_tree_dots(self):
- result = {}
- for rule in self.__rule_trees:
- result[rule] = NfaBuilder.rule_tree_dot(self.__rule_trees[rule])
- return result
+ def default_action(self):
+ return self.__default_action
class Automata(object):
- def __init__(self, builder, graph):
- self.__builder = builder
- self.__graph = graph
+ def __init__(self, encoding, character_classes, rule_term):
+ self.__encoding = encoding
+ self.__character_classes = character_classes
+ self.__rule_term = rule_term
self.__nfa = None
self.__dfa = None
self.__minimial_dfa = None
+ def rule_term(self):
+ return self.__rule_term
+
def nfa(self):
if not self.__nfa:
- self.__nfa = self.__builder.nfa(self.__graph)
+ self.__nfa = NfaBuilder.nfa(
+ self.__encoding, self.__character_classes, self.__rule_term)
return self.__nfa
def dfa(self):
@@ -302,15 +304,14 @@
def __process_parser_state(self, parser_state):
rule_map = {}
- builder = NfaBuilder(parser_state.encoding,
parser_state.character_classes)
assert 'default' in parser_state.rules
def process(subgraph, v):
graphs = []
for graph, precedence, action in v['regex']:
(entry_action, match_action, transition) = action
if entry_action or match_action:
- action = Action(entry_action, match_action, precedence)
- graph = NfaBuilder.add_action(graph, action)
+ graph = NfaBuilder.add_action(
+ graph, Action(entry_action, match_action, precedence))
if not transition:
pass
elif transition == 'continue':
@@ -321,7 +322,7 @@
graph = NfaBuilder.join_subgraph(
graph, transition, rule_map[transition])
graphs.append(graph)
- graph = NfaBuilder.or_graphs(graphs)
+ graph = NfaBuilder.or_terms(graphs)
rule_map[subgraph] = graph
# process first the subgraphs, then the default graph
for k, v in parser_state.rules.items():
@@ -330,8 +331,9 @@
process('default', parser_state.rules['default'])
# build the automata
for rule_name, graph in rule_map.items():
- self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)
+ self.__automata[rule_name] = RuleProcessor.Automata(
+ parser_state.encoding, parser_state.character_classes, graph)
self.__rule_trees[rule_name] = graph
# process default_action
default_action = parser_state.rules['default']['default_action']
- self.default_action = Action(Term.empty_term(), default_action)
+ self.__default_action = Action(Term.empty_term(), default_action)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/term_test.py Mon
Feb 3 14:12:24 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/term_test.py Mon
Feb 3 21:28:33 2014 UTC
@@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
-from automaton import Term
+from action import Term
class TermTestCase(unittest.TestCase):
=======================================
---
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Fri Nov 22 08:40:59 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Mon Feb 3 21:28:33 2014 UTC
@@ -66,9 +66,9 @@
else:
regex = "[%s]" % string
token = "CLASS"
- graph = RegexParser.parse(regex)
- assert graph[0] == token
- key = TransitionKey.character_class(encoding, graph, classes)
+ term = RegexParser.parse(regex)
+ assert term.name() == token
+ key = TransitionKey.character_class(encoding, term, classes)
for c in match:
self.assertEqual(invert, not key.matches_char(c))
for c in no_match:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Wed Jan 29 13:19:19 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Feb 3 21:28:33 2014 UTC
@@ -182,17 +182,18 @@
return TransitionKey.__cached_key(None, name, get_bounds)
@staticmethod
- def __process_graph(encoding, graph, ranges, key_map):
- key = graph[0]
+ def __process_term(encoding, term, ranges, key_map):
+ key = term.name()
+ args = term.args()
if key == 'RANGE':
- ranges.append((ord(graph[1]), ord(graph[2])))
+ ranges.append((ord(args[0]), ord(args[1])))
elif key == 'LITERAL':
- ranges.append((ord(graph[1]), ord(graph[1])))
+ ranges.append((ord(args[0]), ord(args[0])))
elif key == 'CAT':
- for x in [graph[1], graph[2]]:
- TransitionKey.__process_graph(encoding, x, ranges, key_map)
+ for x in [args[0], args[1]]:
+ TransitionKey.__process_term(encoding, x, ranges, key_map)
elif key == 'CHARACTER_CLASS':
- class_name = graph[1]
+ class_name = args[0]
if encoding.class_range(class_name):
r = encoding.class_range(class_name)
if class_name in key_map:
@@ -206,13 +207,13 @@
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
- raise Exception('unknown character class [%s]' % graph[1])
+ raise Exception('unknown character class [%s]' % args[0])
else:
raise Exception('bad key [%s]' % key)
@staticmethod
- def character_class(encoding, graph, key_map):
- '''Processes 'graph' (a representation of a character class in the rule
+ def character_class(encoding, term, key_map):
+ '''Processes 'term' (a representation of a character class in the rule
file), and constructs a TransitionKey based on it. 'key_map' contains
already constructed aliases for character classes (they can be used in
the
new character class). It is a map from strings (character class names)
to
@@ -220,9 +221,10 @@
[a-z:digit:] where 'digit' is a previously constructed character class
found
in "key_map".'''
ranges = []
- assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
- invert = graph[0] == 'NOT_CLASS'
- TransitionKey.__process_graph(encoding, graph[1], ranges, key_map)
+ assert term.name() == 'CLASS' or term.name() == 'NOT_CLASS'
+ invert = term.name() == 'NOT_CLASS'
+ assert len(term.args()) == 1
+ TransitionKey.__process_term(encoding, term.args()[0], ranges, key_map)
return TransitionKey.__key_from_ranges(encoding, invert, ranges)
def matches_char(self, char):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.