Revision: 19041
Author:   [email protected]
Date:     Mon Feb  3 21:28:33 2014 UTC
Log:      Experimental parser: use Terms instead of tuples

[email protected]

BUG=

Review URL: https://codereview.chromium.org/137883006
http://code.google.com/p/v8/source/detail?r=19041

Added:
 /branches/experimental/parser/tools/lexer_generator/action.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/automaton.py
 /branches/experimental/parser/tools/lexer_generator/code_generator.py
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/lexer_test.py
 /branches/experimental/parser/tools/lexer_generator/nfa_builder.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/term_test.py
 /branches/experimental/parser/tools/lexer_generator/transition_key_test.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/action.py Mon Feb 3 21:28:33 2014 UTC
@@ -0,0 +1,173 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import StringType, IntType
+
+class Term(object):
+  '''A class representing a function and its arguments.
+  f(a,b,c) would be represented as ('f', a, b, c) where
+  a, b, and c are strings or Terms.'''
+
+  __empty_term = None
+
+  @staticmethod
+  def empty_term():
+    if Term.__empty_term == None:
+      Term.__empty_term = Term('')
+    return Term.__empty_term
+
+  def __init__(self, name, *args):
+    assert type(name) == StringType
+    if not name:
+      assert not args, 'empty term must not have args'
+    for v in args:
+      if type(v) == StringType:
+        assert v, 'string args must be non empty'
+      else:
+        assert isinstance(v, Term)
+    self.__tuple = tuple([name] + list(args))
+    self.__str = None
+
+  def name(self):
+    return self.__tuple[0]
+
+  def args(self):
+    return self.__tuple[1:]
+
+  def  __hash__(self):
+    return hash(self.__tuple)
+
+  def __nonzero__(self):
+    return bool(self.__tuple[0])
+
+  def __eq__(self, other):
+ return (isinstance(other, self.__class__) and self.__tuple == other.__tuple)
+
+  # TODO(dcarney): escape '(', ')' and ',' in strings
+  def __str__(self):
+    if self.__str == None:
+      self.__str = '(%s)' % ','.join(map(str, self.__tuple))
+    return self.__str
+
+  def to_dot(self):
+    node_ix = [0]
+    node_template = 'node [label="%s"]; N_%d;'
+    edge_template = 'N_%d -> N_%d'
+    nodes = []
+    edges = []
+
+    def escape(v):
+      v = str(v)
+ v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
+      v = v.replace('\\', '\\\\').replace('\"', '\\\"')
+      return v
+
+    def process(term):
+      if isinstance(term, str):
+        node_ix[0] += 1
+        nodes.append(node_template % (escape(term), node_ix[0]))
+        return node_ix[0]
+      elif isinstance(term, Term):
+        child_ixs = map(process, term.args())
+        node_ix[0] += 1
+        nodes.append(node_template % (escape(term.name()), node_ix[0]))
+        for child_ix in child_ixs:
+          edges.append(edge_template % (node_ix[0], child_ix))
+        return node_ix[0]
+      raise Exception
+
+    process(self)
+    return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
+
+class Action(object):
+
+  __empty_action = None
+
+  @staticmethod
+  def empty_action():
+    if Action.__empty_action == None:
+      Action.__empty_action = Action(Term.empty_term(), Term.empty_term())
+    return Action.__empty_action
+
+  @staticmethod
+  def dominant_action(state_set):
+    action = Action.empty_action()
+    for state in state_set:
+      if not state.action():
+        continue
+      if not action:
+        action = state.action()
+        continue
+      if state.action().precedence() == action.precedence():
+        assert state.action() == action
+      elif state.action().precedence() < action.precedence():
+        action = state.action()
+    return action
+
+  def __init__(self, entry_action, match_action, precedence = -1):
+    assert isinstance(match_action, Term)
+    assert isinstance(entry_action, Term)
+    assert type(precedence) == IntType
+    self.__entry_action = entry_action
+    self.__match_action = match_action
+    self.__precedence = precedence
+
+  def entry_action(self):
+    return self.__entry_action
+
+  def match_action(self):
+    return self.__match_action
+
+  def precedence(self):
+    return self.__precedence
+
+  def to_term(self):
+    return Term(
+      'action_serialization',
+      self.__entry_action, self.__match_action, str(self.__precedence))
+
+  @staticmethod
+  def from_term(term):
+    assert term.name() == 'action_serialization'
+    return Action(term.args()[0], term.args()[1], int(term.args()[2]))
+
+  def __nonzero__(self):
+    return bool(self.__entry_action) or bool(self.__match_action)
+
+  def __hash__(self):
+    return hash((self.__entry_action, self.__match_action))
+
+  def __eq__(self, other):
+    return (isinstance(other, self.__class__) and
+            self.__entry_action == other.__entry_action and
+            self.__match_action == other.__match_action)
+
+  def __str__(self):
+    parts = []
+    for action in [self.__entry_action, self.__match_action]:
+      parts.append('' if not action else str(action))
+    return "action< %s >" % " | ".join(parts)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Mon Feb 3 15:08:36 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/automaton.py Mon Feb 3 21:28:33 2014 UTC
@@ -25,120 +25,13 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-from types import TupleType, ListType, StringType
+from types import TupleType, ListType
 from itertools import chain
+from action import Term, Action
 from transition_keys import TransitionKey

-class Term(object):
-  '''A class representing a function and its arguments.
-  f(a,b,c) would be represented as ('f', a, b, c) where
-  a, b, and c are strings or Terms.'''
-
-  __empty_term = None
-
-  @staticmethod
-  def empty_term():
-    if Term.__empty_term == None:
-      Term.__empty_term = Term('')
-    return Term.__empty_term
-
-  @staticmethod
-  def __verify_string(v):
-    assert (not ',' in v) and (not '(' in v)
-
-  def __init__(self, name, *args):
-    assert type(name) == StringType
-    self.__verify_string(name)
-    if not name:
-      assert not args, 'empty term must not have args'
-    for v in args:
-      assert v, 'args must be non empty'
-      if type(v) == StringType:
-        self.__verify_string(v)
-      else:
-        assert isinstance(v, self.__class__)
-    self.__tuple = tuple([name] + list(args))
-    self.__str = None
-
-  def name(self):
-    return self.__tuple[0]
-
-  def args(self):
-    return self.__tuple[1:]
-
-  def  __hash__(self):
-    return hash(self.__tuple)
-
-  def __nonzero__(self):
-    return bool(self.__tuple[0])
-
-  def __eq__(self, other):
- return (isinstance(other, self.__class__) and self.__tuple == other.__tuple)
-
-  def __str__(self):
-    if self.__str == None:
-      self.__str = '(%s)' % ','.join(map(str, self.__tuple))
-    return self.__str
-
-class Action(object):
-
-  __empty_action = None
-
-  @staticmethod
-  def empty_action():
-    if Action.__empty_action == None:
-      Action.__empty_action = Action(Term.empty_term(), Term.empty_term())
-    return Action.__empty_action
-
-  @staticmethod
-  def dominant_action(state_set):
-    action = Action.empty_action()
-    for state in state_set:
-      if not state.action():
-        continue
-      if not action:
-        action = state.action()
-        continue
-      if state.action().precedence() == action.precedence():
-        assert state.action() == action
-      elif state.action().precedence() < action.precedence():
-        action = state.action()
-    return action
-
-  def __init__(self, entry_action, match_action, precedence = -1):
-    for action in [entry_action, match_action]:
-      assert isinstance(action, Term)
-    self.__entry_action = entry_action
-    self.__match_action = match_action
-    self.__precedence = precedence
-
-  def entry_action(self):
-    return self.__entry_action
-
-  def match_action(self):
-    return self.__match_action
-
-  def precedence(self):
-    return self.__precedence
-
-  def __nonzero__(self):
-    return bool(self.__entry_action) or bool(self.__match_action)
-
-  def __hash__(self):
-    return hash((self.__entry_action, self.__match_action))
-
-  def __eq__(self, other):
-    return (isinstance(other, self.__class__) and
-            self.__entry_action == other.__entry_action and
-            self.__match_action == other.__match_action)
-
-  def __str__(self):
-    parts = []
-    for action in [self.__entry_action, self.__match_action]:
-      parts.append('' if not action else str(action))
-    return "action< %s >" % " | ".join(parts)
-
 class AutomatonState(object):
+  '''A base class for dfa and nfa states.  Immutable'''

   __node_number_counter = 0

=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py Mon Feb 3 15:08:36 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.py Mon Feb 3 21:28:33 2014 UTC
@@ -47,7 +47,7 @@
     else:
       dfa = rule_processor.default_automata().dfa()
     self.__dfa = dfa
-    self.__default_action = rule_processor.default_action
+    self.__default_action = rule_processor.default_action()
     self.__debug_print = debug_print
     self.__start_node_number = self.__dfa.start_state().node_number()
     self.__log = log
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri Jan 17 13:38:07 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Mon Feb 3 21:28:33 2014 UTC
@@ -86,11 +86,11 @@
 def generate_rule_tree_html(rule_processor):
   scripts = []
   loads = []
-  rule_tree_dots = rule_processor.rule_tree_dots()
-  for i, (rule, dot) in enumerate(rule_tree_dots.items()):
+ for i, (name, automata) in enumerate(list(rule_processor.automata_iter())):
     rule_i = "rule_%d" % i
+    dot = automata.rule_term().to_dot()
     scripts.append(script_template % (rule_i, dot))
-    loads.append(load_template % ("rules [%s]" % rule, rule_i))
+    loads.append(load_template % ("rules [%s]" % name, rule_i))
   body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
   return file_template % body

=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon Feb 3 15:08:36 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon Feb 3 21:28:33 2014 UTC
@@ -39,7 +39,7 @@
     automata = rule_processor.default_automata()
for automaton in [automata.nfa(), automata.dfa(), automata.minimal_dfa()]:
         for i, (action, start, stop) in enumerate(
-            automaton.lex(string, rule_processor.default_action)):
+            automaton.lex(string, rule_processor.default_action())):
           self.assertEquals(expected[i][0], action)
           self.assertEquals(expected[i][1], string[start : stop])

=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Thu Jan 23 11:39:04 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Mon Feb 3 21:28:33 2014 UTC
@@ -27,6 +27,7 @@

 from types import TupleType
 from inspect import getmembers
+from action import *
 from nfa import *

 # Nfa is built in two stages:
@@ -40,7 +41,7 @@

 class NfaBuilder(object):

-  def __init__(self, encoding, character_classes = {}):
+  def __init__(self, encoding, character_classes):
     self.__node_number = 0
     self.__operation_map = {}
     self.__members = getmembers(self)
@@ -58,39 +59,39 @@
       self.__global_end_node = self.__new_state()
     return self.__global_end_node

-  def __or(self, graph):
+  def __or(self, args):
     start = self.__new_state()
     ends = []
-    for x in [self.__process(graph[1]), self.__process(graph[2])]:
+    for x in [self.__process(args[0]), self.__process(args[1])]:
       start.add_epsilon_transition(x[0])
       ends += x[1]
     start.close(None)
     return (start, ends)

-  def __one_or_more(self, graph):
-    (start, ends) = self.__process(graph[1])
+  def __one_or_more(self, args):
+    (start, ends) = self.__process(args[0])
     end =  self.__new_state()
     end.add_epsilon_transition(start)
     self.__patch_ends(ends, end)
     return (start, [end])

-  def __zero_or_more(self, graph):
-    (node, ends) = self.__process(graph[1])
+  def __zero_or_more(self, args):
+    (node, ends) = self.__process(args[0])
     start =  self.__new_state()
     start.add_epsilon_transition(node)
     self.__patch_ends(ends, start)
     return (start, [start])

-  def __zero_or_one(self, graph):
-    (node, ends) = self.__process(graph[1])
+  def __zero_or_one(self, args):
+    (node, ends) = self.__process(args[0])
     start =  self.__new_state()
     start.add_epsilon_transition(node)
     return (start, ends + [start])

-  def __repeat(self, graph):
-    param_min = int(graph[1])
-    param_max = int(graph[2])
-    subgraph = graph[3]
+  def __repeat(self, args):
+    param_min = int(args[0])
+    param_max = int(args[1])
+    subgraph = args[2]
     (start, ends) = self.__process(subgraph)
     for i in xrange(1, param_min):
       (start2, ends2) = self.__process(subgraph)
@@ -109,8 +110,8 @@

     return (start, ends + midpoints)

-  def __cat(self, graph):
-    (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
+  def __cat(self, args):
+    (left, right) = (self.__process(args[0]), self.__process(args[1]))
     self.__patch_ends(left[1], right[0])
     return (left[0], right[1])

@@ -119,31 +120,28 @@
     state.add_unclosed_transition(key)
     return (state, [state])

-  def __literal(self, graph):
+  def __literal(self, args):
+    assert len(args) == 1
     return self.__key_state(
-      TransitionKey.single_char(self.__encoding, graph[1]))
+      TransitionKey.single_char(self.__encoding, args[0]))

-  def __class(self, graph):
+  def __class(self, args):
+    assert len(args) == 1
     return self.__key_state(TransitionKey.character_class(
-      self.__encoding, graph, self.__character_classes))
+      self.__encoding, Term('CLASS', args[0]), self.__character_classes))

-  def __not_class(self, graph):
+  def __not_class(self, args):
+    assert len(args) == 1
     return self.__key_state(TransitionKey.character_class(
-      self.__encoding, graph, self.__character_classes))
+ self.__encoding, Term('NOT_CLASS', args[0]), self.__character_classes))

-  def __any(self, graph):
+  def __any(self, args):
     return self.__key_state(TransitionKey.any(self.__encoding))

-  def __epsilon(self, graph):
-    start = self.__new_state()
+  def __action(self, args):
+    (start, ends) = self.__process(args[0])
+    action = Action.from_term(args[1])
     end = self.__new_state()
-    start.close(end)
-    return (start, [end])
-
-  def __action(self, graph):
-    (start, ends) = self.__process(graph[1])
-    action = graph[2]
-    end = self.__new_state()
     self.__patch_ends(ends, end)
     end.set_action(action)
     if action.match_action():
@@ -151,19 +149,18 @@
       end.add_epsilon_transition(global_end)
     return (start, [end])

-  def __continue(self, graph):
-    (start, ends) = self.__process(graph[1])
+  def __continue(self, args):
+    (start, ends) = self.__process(args[0])
     state = self.__peek_state()
     if not state['start_node']:
       state['start_node'] = self.__new_state()
     self.__patch_ends(ends, state['start_node'])
     return (start, [])

-  def __unique_key(self, graph):
-    return self.__key_state(TransitionKey.unique(graph[1]))
+  def __unique_key(self, args):
+    return self.__key_state(TransitionKey.unique(args[0]))

-  def __join(self, graph):
-    (graph, name, subgraph) = graph[1:]
+  def __join(self, (graph, name, subgraph)):
     subgraphs = self.__peek_state()['subgraphs']
     if not name in subgraphs:
       subgraphs[name] = self.__nfa(subgraph)
@@ -177,14 +174,14 @@
     else:
       return (start, [])

-  def __process(self, graph):
-    assert type(graph) == TupleType
-    method = "_NfaBuilder__" + graph[0].lower()
+  def __process(self, term):
+    assert isinstance(term, Term)
+    method = "_NfaBuilder__" + term.name().lower()
     if not method in self.__operation_map:
       matches = filter(lambda (name, func): name == method, self.__members)
       assert len(matches) == 1
       self.__operation_map[method] = matches[0][1]
-    return self.__operation_map[method](graph)
+    return self.__operation_map[method](term.args())

   def __patch_ends(self, ends, new_end):
     for end in ends:
@@ -203,10 +200,10 @@
   def __peek_state(self):
     return self.__states[-1]

-  def __nfa(self, graph):
+  def __nfa(self, term):
     start_node_number = self.__node_number
     self.__push_state()
-    (start, ends) = self.__process(graph)
+    (start, ends) = self.__process(term)
     state = self.__pop_state()
     if state['start_node']:
       state['start_node'].close(start)
@@ -257,8 +254,11 @@
       transitions[inverse_key] = transitions[catch_all]
     del transitions[catch_all]

-  def nfa(self, graph):
-    (start, end, nodes_created) = self.__nfa(graph)
+  @staticmethod
+  def nfa(encoding, character_classes, rule_term):
+
+    self = NfaBuilder(encoding, character_classes)
+    (start, end, nodes_created) = self.__nfa(rule_term)

     global_end_to_return = self.__global_end_node or end
     assert global_end_to_return
@@ -267,74 +267,35 @@

     global_end_to_return.close(None)

- # FIXME: the same NfaBuilder should not be called twice, so this cleanup
-    # should be unnecessary.
-    self.__global_end_node = None
-
     self.__compute_epsilon_closures(start)
     f = lambda node, state: self.__replace_catch_all(self.__encoding, node)
     Automaton.visit_states(set([start]), f)

     return Nfa(self.__encoding, start, global_end_to_return, nodes_created)

-  @staticmethod
-  def rule_tree_dot(graph):
-    node_ix = [0]
-    node_template = 'node [label="%s"]; N_%d;'
-    edge_template = 'N_%d -> N_%d'
-    nodes = []
-    edges = []
-
-    def escape(v):
-      v = str(v)
- v = v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
-      v = v.replace('\\', '\\\\').replace('\"', '\\\"')
-      return v
-
-    def process_thing(thing):
-      if isinstance(thing, str):
-        node_ix[0] += 1
-        nodes.append(node_template % (escape(thing), node_ix[0]))
-        return node_ix[0]
-      if isinstance(thing, tuple):
-        child_ixs = map(process_thing, list(thing)[1:])
-        node_ix[0] += 1
-        nodes.append(node_template % (escape(thing[0]), node_ix[0]))
-        for child_ix in child_ixs:
-          edges.append(edge_template % (node_ix[0], child_ix))
-        return node_ix[0]
-      if isinstance(thing, Action):
-        node_ix[0] += 1
-        nodes.append(node_template % (str(thing), node_ix[0]))
-        return node_ix[0]
-      raise Exception
-
-    process_thing(graph)
-    return 'digraph { %s %s }' % ('\n'.join(nodes), '\n'.join(edges))
-
   @staticmethod
-  def add_action(graph, action):
-    return ('ACTION', graph, action)
+  def add_action(term, action):
+    return Term('ACTION', term, action.to_term())

   @staticmethod
-  def add_continue(graph):
-    return ('CONTINUE', graph)
+  def add_continue(term):
+    return Term('CONTINUE', term)

   @staticmethod
   def unique_key(name):
-    return ('UNIQUE_KEY', name)
+    return Term('UNIQUE_KEY', name)

   @staticmethod
-  def join_subgraph(graph, name, subgraph):
-    return ('JOIN', graph, name, subgraph)
+  def join_subgraph(term, name, subgraph_term):
+    return Term('JOIN', term, name, subgraph_term)

   @staticmethod
-  def or_graphs(graphs):
-    return reduce(lambda acc, g: ('OR', acc, g), graphs)
+  def or_terms(terms):
+    return reduce(lambda acc, g: Term('OR', acc, g), terms)

   @staticmethod
-  def cat_graphs(graphs):
-    return reduce(lambda acc, g: ('CAT', acc, g), graphs)
+  def cat_terms(terms):
+    return reduce(lambda acc, g: Term('CAT', acc, g), terms)

   __modifer_map = {
     '+': 'ONE_OR_MORE',
@@ -343,5 +304,5 @@
   }

   @staticmethod
-  def apply_modifier(modifier, graph):
-    return (NfaBuilder.__modifer_map[modifier], graph)
+  def apply_modifier(modifier, term):
+    return Term(NfaBuilder.__modifer_map[modifier], term)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu Nov 7 12:03:35 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Mon Feb 3 21:28:33 2014 UTC
@@ -26,8 +26,9 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import ply.yacc as yacc
+from types import ListType, TupleType
 from regex_lexer import RegexLexer
-from types import ListType, TupleType
+from action import Term

 class RegexParser:

@@ -47,7 +48,7 @@
     if len(p) == 2:
       p[0] = p[1]
     else:
-      p[0] = (self.token_map[p[2]], p[1], p[3])
+      p[0] = Term(self.token_map[p[2]], p[1], p[3])

   def p_fragments(self, p):
     '''fragments : fragment
@@ -65,9 +66,9 @@
     '''
     if p[2] != None:
       if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
-        p[0] = (p[2][0], p[2][1], p[2][2], p[1])
+        p[0] = Term(p[2][0], p[2][1], p[2][2], p[1])
       else:
-        p[0] = (p[2], p[1])
+        p[0] = Term(p[2], p[1])
     else:
       p[0] = p[1]

@@ -91,19 +92,19 @@

   def p_literal(self, p):
     '''literal : LITERAL'''
-    p[0] = ('LITERAL', p[1])
+    p[0] = Term('LITERAL', p[1])

   def p_any(self, p):
     '''any : ANY'''
-    p[0] = (self.token_map[p[1]],)
+    p[0] = Term(self.token_map[p[1]])

   def p_class(self, p):
     '''class : CLASS_BEGIN class_content CLASS_END
              | CLASS_BEGIN NOT class_content CLASS_END'''
     if len(p) == 4:
-      p[0] = ("CLASS", p[2])
+      p[0] = Term("CLASS", p[2])
     else:
-      p[0] = ("NOT_CLASS", p[3])
+      p[0] = Term("NOT_CLASS", p[3])

   def p_group(self, p):
     '''group : GROUP_BEGIN start GROUP_END'''
@@ -116,14 +117,14 @@
                      | CLASS_LITERAL_AS_OCTAL maybe_class_content
     '''
     if len(p) == 5:
-      left = ("RANGE", p[1], p[3])
+      left = Term("RANGE", p[1], p[3])
     else:
       if len(p[1]) == 1:
-        left = ('LITERAL', p[1])
+        left = Term('LITERAL', p[1])
       elif p[1][0] == '\\':
-        left = ('LITERAL', chr(int(p[1][1:], 8)))
+        left = Term('LITERAL', chr(int(p[1][1:], 8)))
       else:
-        left = ('CHARACTER_CLASS', p[1][1:-1])
+        left = Term('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])

   def p_maybe_class_content(self, p):
@@ -139,9 +140,8 @@

   @staticmethod
   def __cat(left, right):
-    if right == None:
-      return left
-    return ('CAT', left, right)
+    assert left
+    return left if not right else Term('CAT', left, right)

   def build(self, **kwargs):
     self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon Feb 3 15:08:36 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon Feb 3 21:28:33 2014 UTC
@@ -26,7 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import ply.yacc as yacc
-from automaton import Term, Action
+from action import Term, Action
 from rule_lexer import RuleLexer
 from regex_parser import RegexParser
 from nfa_builder import NfaBuilder
@@ -67,13 +67,13 @@
     'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
     state = self.__state
     assert not p[1] in state.aliases
-    graph = p[3]
-    state.aliases[p[1]] = graph
-    if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
+    term = p[3]
+    state.aliases[p[1]] = term
+    if term.name() == 'CLASS' or term.name() == 'NOT_CLASS':
       classes = state.character_classes
-      assert not p[1] in classes
+      assert not p[1] in classes, "cannot reassign alias"
       encoding = state.encoding
- classes[p[1]] = TransitionKey.character_class(encoding, graph, classes) + classes[p[1]] = TransitionKey.character_class(encoding, term, classes)

   def p_rules(self, p):
     '''rules : state_change transition_rules rules
@@ -171,12 +171,12 @@
     if len(p) == 2:
       p[0] = p[1]
     else:
-      p[0] = NfaBuilder.or_graphs([p[1], p[3]])
+      p[0] = NfaBuilder.or_terms([p[1], p[3]])

   def p_regex_parts(self, p):
     '''regex_parts : regex_part
                    | regex_part regex_parts'''
-    p[0] = NfaBuilder.cat_graphs(p[1:])
+    p[0] = NfaBuilder.cat_terms(p[1:])

   def p_regex_part(self, p):
'''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier
@@ -185,11 +185,11 @@
                   | regex modifier
                   | regex_alias modifier'''
     modifier = p[len(p)-1]
-    graph = p[2] if len(p) == 5 else p[1]
+    term = p[2] if len(p) == 5 else p[1]
     if modifier:
-      p[0] = NfaBuilder.apply_modifier(modifier, graph)
+      p[0] = NfaBuilder.apply_modifier(modifier, term)
     else:
-      p[0] = graph
+      p[0] = term

   def p_regex_string_literal(self, p):
     'regex_string_literal : STRING'
@@ -197,7 +197,6 @@
     escape_char = lambda string, char: string.replace(char, "\\" + char)
string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"")
     p[0] = RegexParser.parse(string)
-
   def p_regex(self, p):
     'regex : REGEX'
     string = p[1][1:-1].replace("\\/", "/")
@@ -251,6 +250,7 @@
   def __init__(self, parser_state):
     self.__automata = {}
     self.__rule_trees = {}
+    self.__default_action = None
     self.__process_parser_state(parser_state)

   @staticmethod
@@ -265,24 +265,26 @@
   def default_automata(self):
     return self.__automata['default']

-  def rule_tree_dots(self):
-    result = {}
-    for rule in self.__rule_trees:
-      result[rule] = NfaBuilder.rule_tree_dot(self.__rule_trees[rule])
-    return result
+  def default_action(self):
+    return self.__default_action

   class Automata(object):

-    def __init__(self, builder, graph):
-      self.__builder = builder
-      self.__graph = graph
+    def __init__(self, encoding, character_classes, rule_term):
+      self.__encoding = encoding
+      self.__character_classes = character_classes
+      self.__rule_term = rule_term
       self.__nfa = None
       self.__dfa = None
       self.__minimial_dfa = None

+    def rule_term(self):
+      return self.__rule_term
+
     def nfa(self):
       if not self.__nfa:
-        self.__nfa = self.__builder.nfa(self.__graph)
+        self.__nfa = NfaBuilder.nfa(
+          self.__encoding, self.__character_classes, self.__rule_term)
       return self.__nfa

     def dfa(self):
@@ -302,15 +304,14 @@

   def __process_parser_state(self, parser_state):
     rule_map = {}
- builder = NfaBuilder(parser_state.encoding, parser_state.character_classes)
     assert 'default' in parser_state.rules
     def process(subgraph, v):
       graphs = []
       for graph, precedence, action in v['regex']:
         (entry_action, match_action, transition) = action
         if entry_action or match_action:
-          action = Action(entry_action, match_action, precedence)
-          graph = NfaBuilder.add_action(graph, action)
+          graph = NfaBuilder.add_action(
+            graph, Action(entry_action, match_action, precedence))
         if not transition:
           pass
         elif transition == 'continue':
@@ -321,7 +322,7 @@
           graph = NfaBuilder.join_subgraph(
             graph, transition, rule_map[transition])
         graphs.append(graph)
-      graph = NfaBuilder.or_graphs(graphs)
+      graph = NfaBuilder.or_terms(graphs)
       rule_map[subgraph] = graph
     # process first the subgraphs, then the default graph
     for k, v in parser_state.rules.items():
@@ -330,8 +331,9 @@
     process('default', parser_state.rules['default'])
     # build the automata
     for rule_name, graph in rule_map.items():
-      self.__automata[rule_name] = RuleProcessor.Automata(builder, graph)
+      self.__automata[rule_name] = RuleProcessor.Automata(
+        parser_state.encoding, parser_state.character_classes, graph)
       self.__rule_trees[rule_name] = graph
     # process default_action
     default_action = parser_state.rules['default']['default_action']
-    self.default_action = Action(Term.empty_term(), default_action)
+    self.__default_action = Action(Term.empty_term(), default_action)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/term_test.py Mon Feb 3 14:12:24 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/term_test.py Mon Feb 3 21:28:33 2014 UTC
@@ -26,7 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import unittest
-from automaton import Term
+from action import Term

 class TermTestCase(unittest.TestCase):

=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Fri Nov 22 08:40:59 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Mon Feb 3 21:28:33 2014 UTC
@@ -66,9 +66,9 @@
         else:
           regex = "[%s]" % string
           token = "CLASS"
-        graph = RegexParser.parse(regex)
-        assert graph[0] == token
-        key = TransitionKey.character_class(encoding, graph, classes)
+        term = RegexParser.parse(regex)
+        assert term.name() == token
+        key = TransitionKey.character_class(encoding, term, classes)
         for c in match:
           self.assertEqual(invert, not key.matches_char(c))
         for c in no_match:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Wed Jan 29 13:19:19 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Feb 3 21:28:33 2014 UTC
@@ -182,17 +182,18 @@
     return TransitionKey.__cached_key(None, name, get_bounds)

   @staticmethod
-  def __process_graph(encoding, graph, ranges, key_map):
-    key = graph[0]
+  def __process_term(encoding, term, ranges, key_map):
+    key = term.name()
+    args = term.args()
     if key == 'RANGE':
-      ranges.append((ord(graph[1]), ord(graph[2])))
+      ranges.append((ord(args[0]), ord(args[1])))
     elif key == 'LITERAL':
-      ranges.append((ord(graph[1]), ord(graph[1])))
+      ranges.append((ord(args[0]), ord(args[0])))
     elif key == 'CAT':
-      for x in [graph[1], graph[2]]:
-        TransitionKey.__process_graph(encoding, x, ranges, key_map)
+      for x in [args[0], args[1]]:
+        TransitionKey.__process_term(encoding, x, ranges, key_map)
     elif key == 'CHARACTER_CLASS':
-      class_name = graph[1]
+      class_name = args[0]
       if encoding.class_range(class_name):
         r = encoding.class_range(class_name)
         if class_name in key_map:
@@ -206,13 +207,13 @@
       elif class_name in key_map:
         ranges += key_map[class_name].__ranges
       else:
-        raise Exception('unknown character class [%s]' % graph[1])
+        raise Exception('unknown character class [%s]' % args[0])
     else:
       raise Exception('bad key [%s]' % key)

   @staticmethod
-  def character_class(encoding, graph, key_map):
-    '''Processes 'graph' (a representation of a character class in the rule
+  def character_class(encoding, term, key_map):
+    '''Processes 'term' (a representation of a character class in the rule
     file), and constructs a TransitionKey based on it. 'key_map' contains
already constructed aliases for character classes (they can be used in the new character class). It is a map from strings (character class names) to
@@ -220,9 +221,10 @@
[a-z:digit:] where 'digit' is a previously constructed character class found
     in "key_map".'''
     ranges = []
-    assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
-    invert = graph[0] == 'NOT_CLASS'
-    TransitionKey.__process_graph(encoding, graph[1], ranges, key_map)
+    assert term.name() == 'CLASS' or term.name() == 'NOT_CLASS'
+    invert = term.name() == 'NOT_CLASS'
+    assert len(term.args()) == 1
+    TransitionKey.__process_term(encoding, term.args()[0], ranges, key_map)
     return TransitionKey.__key_from_ranges(encoding, invert, ranges)

   def matches_char(self, char):

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to