Revision: 19028
Author:   [email protected]
Date:     Mon Feb  3 14:12:24 2014 UTC
Log:      Experimental parser: some tuple removal

[email protected]

BUG=

Review URL: https://codereview.chromium.org/152513004
http://code.google.com/p/v8/source/detail?r=19028

Added:
 /branches/experimental/parser/tools/lexer_generator/term_test.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/automaton.py
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja
 /branches/experimental/parser/tools/lexer_generator/code_generator.py
 /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
 /branches/experimental/parser/tools/lexer_generator/lexer_test.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/test_suite.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/term_test.py Mon Feb 3 14:12:24 2014 UTC
@@ -0,0 +1,51 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from automaton import Term
+
+class TermTestCase(unittest.TestCase):
+
+  def test_basic(self):
+
+    t = Term('a')
+    self.assertEqual('a', t.name())
+    self.assertEqual((), t.args())
+    self.assertEqual("(a)", str(t))
+    self.assertEqual(Term('a'), t)
+
+    t = Term('a', 'b', 'c')
+    self.assertEqual('a', t.name())
+    self.assertEqual(('b', 'c'), t.args())
+    self.assertEqual("(a,b,c)", str(t))
+    self.assertEqual(Term('a', 'b', 'c'), t)
+
+    t = Term('a', Term('b', 'c'))
+    self.assertEqual('a', t.name())
+    self.assertEqual((Term('b', 'c'), ), t.args())
+    self.assertEqual("(a,(b,c))", str(t))
+    self.assertEqual(Term('a', Term('b', 'c')), t)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Tue Jan 21 15:59:38 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/automaton.py Mon Feb 3 14:12:24 2014 UTC
@@ -25,10 +25,48 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-from types import TupleType, ListType
+from types import TupleType, ListType, StringType
 from itertools import chain
 from transition_keys import TransitionKey

+class Term(object):
+  '''A class representing a function and its arguments.
+  f(a,b,c) would be represented as ('f', a, b, c) where
+  a, b, and c are strings or Terms.'''
+
+  @staticmethod
+  def __verify_string(v):
+    assert (not ',' in v) and (not '(' in v)
+
+  def __init__(self, name, *args):
+    assert type(name) == StringType
+    self.__verify_string(name)
+    for v in args:
+      if type(v) == StringType:
+        self.__verify_string(v)
+      else:
+        assert isinstance(v, self.__class__)
+    self.__tuple = tuple([name] + list(args))
+    self.__str = None
+
+  def name(self):
+    return self.__tuple[0]
+
+  def args(self):
+    return self.__tuple[1:]
+
+  def  __hash__(self):
+    return hash(self.__tuple)
+
+  def __eq__(self, other):
+    return (isinstance(other, self.__class__) and
+            self.__tuple == other.__tuple)
+
+  def __str__(self):
+    if self.__str == None:
+      self.__str = '(%s)' % ','.join(map(str, self.__tuple))
+    return self.__str
+
 class Action(object):

   @staticmethod
@@ -50,8 +88,7 @@
     for action in [entry_action, match_action]:
       if action == None:
         continue
-      assert type(action) == TupleType and len(action)
-      assert action[0] != None
+      assert isinstance(action, Term)
     assert entry_action or match_action
     self.__entry_action = entry_action
     self.__match_action = match_action
@@ -79,9 +116,7 @@
     for action in [self.__entry_action, self.__match_action]:
       part = ""
       if action:
-        part += action[0]
-        if action[1]:
-          part += "(%s)" % str(action[1])
+        part += str(action)
       parts.append(part)
     return "action< %s >" % " | ".join(parts)

=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Wed Jan 29 13:19:19 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Mon Feb 3 14:12:24 2014 UTC
@@ -65,11 +65,11 @@

 {#- entry actions must not change the value of the cursor
     unless they jump -#}
-{% macro dispatch_entry_action(type, value) -%}
+{% macro dispatch_entry_action(type, args) -%}
   {% if type == 'store_token' %}
-    stored_token = Token::{{value}};
+    stored_token = Token::{{args[0]}};
   {% elif type == 'set_marker' %}
-    marker = cursor_ - {{value}};
+    marker = cursor_ - {{args[0]}};
   {% elif type == 'set_has_escapes' %}
     next_.has_escapes = true;
   {% elif type == 'octal_inside_string' %}
@@ -85,16 +85,16 @@
     next_.has_escapes = true;
   {% elif type == 'if_line_terminator_backtrack' %}
     if (!has_line_terminator_before_next_) {
-      {{dispatch_match_action('backtrack', value)}}
+      {{dispatch_match_action('backtrack', args)}}
     }
   {% else %}
-    uncompilable code for {{type}} {{value}}
+    uncompilable code for {{type}} {{args}}
   {% endif -%}
 {%- endmacro -%}


 {#- match actions must all explicitly jump or return -#}
-{% macro dispatch_match_action(type, value) -%}
+{% macro dispatch_match_action(type, args) -%}
   {% if type == 'terminate' %}
     {{dispatch_match_action('backtrack', ('1', 'EOS'))}}
   {% elif type == 'terminate_illegal' %}
@@ -118,45 +118,45 @@
     has_line_terminator_before_next_ = true;
     goto state_entry_0;
   {% elif type == 'token' %}
-    DO_TOKEN(Token::{{value}})
+    DO_TOKEN(Token::{{args[0]}})
     return;
   {% elif type == 'goto_start' %}
-    {{ jump(value[0]) }}
+    {{ jump(args[0]|int) }}
   {% elif type == 'store_token_and_goto' %}
-    stored_token = Token::{{value[0]}};
-    {{ jump(value[1]) }}
+    stored_token = Token::{{args[0]}};
+    {{ jump(args[1]|int) }}
   {% elif type == 'do_stored_token' %}
     DO_TOKEN(stored_token)
     return;
   {% elif type == 'do_token_and_go_forward' %}
-    DO_TOKEN(Token::{{value}});
+    DO_TOKEN(Token::{{args[0]}});
     FORWARD();
     RESET_START();
     return;
   {% elif type == 'harmony_token' %}
-    if (harmony_{{value[0]}}_) {
-      DO_TOKEN(Token::{{value[1]}});
+    if (harmony_{{args[0]}}_) {
+      DO_TOKEN(Token::{{args[1]}});
     } else {
-      DO_TOKEN(Token::{{value[2]}});
+      DO_TOKEN(Token::{{args[2]}});
     }
     return;
   {% elif type == 'store_harmony_token_and_goto' %}
-    if (harmony_{{value[0]}}_) {
-      stored_token = Token::{{value[1]}};
+    if (harmony_{{args[0]}}_) {
+      stored_token = Token::{{args[1]}};
     } else {
-      stored_token = Token::{{value[2]}};
+      stored_token = Token::{{args[2]}};
     }
-    {{ jump(value[3]) }}
+    {{ jump(args[3]|int) }}
   {% elif type == 'octal_number' %}
     last_octal_end_ = cursor_;
     DO_TOKEN(Token::NUMBER);
     return;
   {% elif type == 'backtrack' %}
-    BACKWARD({{value[0]}});
-    DO_TOKEN(Token::{{value[1]}});
+    BACKWARD({{args[0]}});
+    DO_TOKEN(Token::{{args[1]}});
     return;
   {% else %}
-    uncompilable code for {{type}} {{value}}
+    uncompilable code for {{type}} {{args}}
   {% endif -%}
 {%- endmacro -%}

@@ -219,7 +219,7 @@

   {%- set entry_action = state.entry_action -%}
   {%- if entry_action %}
-    {{ dispatch_entry_action(entry_action[0], entry_action[1]) }}
+    {{ dispatch_entry_action(entry_action.name(), entry_action.args()) }}
   {%- endif %}

   {{ write_label('after_entry_code', node_number) }}
@@ -279,7 +279,7 @@

   {%- set match_action = state.match_action -%}
   {%- if match_action %}
-    {{ dispatch_match_action(match_action[0], match_action[1]) }}
+    {{ dispatch_match_action(match_action.name(), match_action.args()) }}
     CRASH();
   {% else %}
     goto default_action;
@@ -343,7 +343,7 @@
 {%- if debug_print %}
     fprintf(stderr, "default action\n");
 {% endif -%}
-    {{dispatch_match_action(default_action[0], default_action[1])}}
+    {{dispatch_match_action(default_action.name(), default_action.args())}}
   CRASH();

   if (false) {
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py Wed Jan 29 13:19:19 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.py Mon Feb 3 14:12:24 2014 UTC
@@ -30,6 +30,7 @@
 import jinja2
 from copy import deepcopy
 from dfa import Dfa
+from automaton import Term
 from transition_keys import TransitionKey

 class CodeGenerator:
@@ -297,9 +298,9 @@
     states = self.__dfa_states
     for state in states:
       if (state['match_action'] and
-          state['match_action'][0] == 'do_stored_token'):
-        assert not state['match_action'][1] in goto_map
-        goto_map[state['match_action'][1]] = state['node_number']
+          state['match_action'].name() == 'do_stored_token'):
+        assert not state['match_action'].args()[0] in goto_map
+        goto_map[state['match_action'].args()[0]] = state['node_number']
     mapped_actions = set([
       'store_harmony_token_and_goto',
       'store_token_and_goto',
@@ -307,9 +308,9 @@
     for state in states:
       if not state['match_action']:
         continue
-      action = state['match_action'][0]
+      action = state['match_action'].name()
       if action in mapped_actions:
-        value = state['match_action'][1]
+        value = state['match_action'].args()
         target_id = goto_map[value[-1]]
         states[target_id]['must_not_inline'] = True
         if action != 'goto_start':
@@ -319,7 +320,8 @@
           states[target_id]['can_elide_read'] = False
           label = 'state_entry'
         jump_label = self.__register_jump(target_id, label)
- state['match_action'] = (action, tuple(list(value[:-1]) + [jump_label]))
+        new_args = list(value[:-1]) + [str(jump_label)]
+        state['match_action'] = Term(action, *new_args)

   def __inlined_state(self, target_id):
     state = deepcopy(self.__dfa_states[target_id])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py Wed Jan 22 13:29:05 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py Mon Feb 3 14:12:24 2014 UTC
@@ -26,7 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 from transition_keys import TransitionKey
-from automaton import Action
+from automaton import Term, Action
 from dfa import Dfa

 # --- Optimization: Replacing tokens with gotos ---
@@ -175,8 +175,8 @@
       action = state.action()
       if not action or not action.match_action():
         return False
-      if (action.match_action()[0] == 'token' or
-          action.match_action()[0] == 'harmony_token'):
+      if (action.match_action().name() == 'token' or
+          action.match_action().name() == 'harmony_token'):
         return True
       return False

@@ -225,22 +225,19 @@
     def replacement_action(old_action, transition_state):
       assert old_action.match_action()
       state_id = name(transition_state)
-      if old_action.match_action()[0] == 'token':
-        old_token = old_action.match_action()[1]
-        if (transition_state.action().match_action()[0] == 'token' and
-            transition_state.action().match_action()[1] == old_token):
+      if old_action.match_action().name() == 'token':
+        old_token = old_action.match_action().args()[0]
+        if (transition_state.action().match_action().name() == 'token' and
+ transition_state.action().match_action().args()[0] == old_token):
           # no need to store token
-          match_action = ('goto_start', (state_id,))
+          match_action = Term('goto_start', state_id)
           counters['goto_start'] += 1
         else:
           counters['store_token_and_goto'] += 1
-          match_action = ('store_token_and_goto', (old_token, state_id))
-      elif old_action.match_action()[0] == 'harmony_token':
-        match_action = ('store_harmony_token_and_goto',
-                        (old_action.match_action()[1][0],
-                         old_action.match_action()[1][1],
-                         old_action.match_action()[1][2],
-                         state_id))
+          match_action = Term('store_token_and_goto', old_token, state_id)
+      elif old_action.match_action().name() == 'harmony_token':
+        new_args = list(old_action.match_action().args()) + [state_id]
+        match_action = Term('store_harmony_token_and_goto', *new_args)
         counters['store_harmony_token_and_goto'] += 1
       else:
         raise Exception(old_action.match_action())
@@ -278,9 +275,9 @@
     for state_id in store_states:
       old_action = states[state_id]['action']
       assert not old_action.entry_action()
-      assert old_action.match_action()[0] == 'token', 'unimplemented'
-      entry_action = ('store_token', old_action.match_action()[1])
-      match_action = ('do_stored_token', state_id)
+      assert old_action.match_action().name() == 'token', 'unimplemented'
+ entry_action = Term('store_token', old_action.match_action().args()[0])
+      match_action = Term('do_stored_token', state_id)
       precedence = old_action.precedence()
       states[state_id]['action'] = Action(
         entry_action, match_action, precedence)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Thu Jan 23 11:39:04 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon Feb 3 14:12:24 2014 UTC
@@ -26,13 +26,13 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import unittest
-from automaton import Action
+from automaton import Term, Action
 from rule_parser import RuleProcessor

 class LexerTestCase(unittest.TestCase):

   def __verify_action_stream(self, rules, string, expected):
-    expected = map(lambda (action, s) : (Action(None, (action, None)), s),
+    expected = map(lambda (action, s) : (Action(None, Term(action)), s),
                    expected)
     rule_processor = RuleProcessor.parse(rules, 'latin1')
     automata = rule_processor.default_automata()
@@ -44,7 +44,7 @@

   @staticmethod
   def __terminate():
-    return (Action(None, ('terminate', None)), '\0')
+    return ('terminate', '\0')

   def test_simple(self):
     rules = '''
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu Jan 23 11:39:04 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon Feb 3 14:12:24 2014 UTC
@@ -26,7 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 import ply.yacc as yacc
-from automaton import Action
+from automaton import Term, Action
 from rule_lexer import RuleLexer
 from regex_parser import RegexParser
 from nfa_builder import NfaBuilder
@@ -148,12 +148,9 @@
                          | IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS
| IDENTIFIER LEFT_PARENTHESIS action_params RIGHT_PARENTHESIS'''
     if len(p) == 2 or len(p) == 4:
-      p[0] = (p[1], None)
+      p[0] = Term(p[1])
     elif len(p) == 5:
-      if len(p[3]) == 1:
-        p[0] = (p[1], p[3][0])
-      else:
-        p[0] = (p[1], p[3])
+        p[0] = Term(p[1], *p[3])
     else:
       raise Exception()

=======================================
--- /branches/experimental/parser/tools/lexer_generator/test_suite.py Wed Nov 20 11:29:01 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/test_suite.py Mon Feb 3 14:12:24 2014 UTC
@@ -27,6 +27,7 @@

 from unittest import TestLoader, TextTestRunner, TestSuite

+from term_test import *
 from automata_test import *
 from code_generator_test import *
 from lexer_test import *
@@ -36,6 +37,7 @@
 if __name__ == "__main__":
   loader = TestLoader()
   suite = TestSuite((
+    loader.loadTestsFromTestCase(TermTestCase),
     loader.loadTestsFromTestCase(TransitionKeyTestCase),
     loader.loadTestsFromTestCase(AutomataTestCase),
     loader.loadTestsFromTestCase(RuleParserTestCase),

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to