Revision: 17442
Author:   [email protected]
Date:     Thu Oct 31 11:01:22 2013 UTC
Log:      Experimental parser: more tests, better key logic

[email protected]
BUG=

Review URL: https://codereview.chromium.org/54533002
http://code.google.com/p/v8/source/detail?r=17442

Added:
 /branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/automata_test.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Thu Oct 31 11:01:22 2013 UTC
@@ -0,0 +1,48 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from transition_keys import TransitionKey
+
+class TransitionKeyTestCase(unittest.TestCase):
+
+  __equal_pairs = [
+    (TransitionKey.epsilon(), TransitionKey.epsilon()),
+    (TransitionKey.any(), TransitionKey.any()),
+    (TransitionKey.single_char('a'), TransitionKey.single_char('a')),
+  ]
+
+  def test_eq(self):
+    for (left, right) in self.__equal_pairs:
+      self.assertEqual(left, right)
+
+  def test_hash(self):
+    for (left, right) in self.__equal_pairs:
+      self.assertEqual(hash(left), hash(right))
+
+if __name__ == '__main__':
+    unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py Thu Oct 31 08:19:46 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/automata_test.py Thu Oct 31 11:01:22 2013 UTC
@@ -43,16 +43,22 @@

     # (pattern, should match, shouldn't match)
     __test_data = [
-      ("a", ["a"], ["b"]),
-      ("ab", ["ab"], ["bb"]),
-      ("a+b", ["ab", "aab", "aaab"], ["a", "b"]),
-      ("a?b", ["ab", "b"], ["a", "c"]),
-      ("a*b", ["ab", "aaab", "b"], ["a", "c"]),
-      ("a|b", ["a", "b"], ["ab", "c"]),
+      ("a", ["a"], ["b", ""]),
+      ("ab", ["ab"], ["bb", ""]),
+      ("a+b", ["ab", "aab", "aaab"], ["a", "b", ""]),
+      ("a?b", ["ab", "b"], ["a", "c", ""]),
+      ("a*b", ["ab", "aaab", "b"], ["a", "c", ""]),
+      ("a|b", ["a", "b"], ["ab", "c", ""]),
+      (".", ["a", "b"], ["", "aa"]),
+      (".*", ["", "a", "abcaabbcc"], []),
+      ("a.b", ["aab", "abb", "acb"], ["ab", ""]),
+      # ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+      # ("a.+b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+      (".|.", ["a", "b"], ["aa", ""]),
     ]

     def test_matches(self):
-      for (regex, matches, not_matches) in AutomataTestCase.__test_data:
+      for (regex, matches, not_matches) in self.__test_data:
         (nfa, dfa) = build_automata(regex)
         for string in matches:
           self.assertTrue(nfa.matches(string))
@@ -61,5 +67,11 @@
           self.assertFalse(nfa.matches(string))
           self.assertFalse(dfa.matches(string))

+    def test_can_construct_dot(self):
+      for (regex, matches, not_matches) in self.__test_data:
+        (nfa, dfa) = build_automata(regex)
+        nfa.to_dot()
+        dfa.to_dot()
+
 if __name__ == '__main__':
     unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31 08:19:46 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31 11:01:22 2013 UTC
@@ -93,15 +93,15 @@
     return reduce(f, self.__transitions.items(), set())

   def char_matches(self, value):
-    return self.__matches((lambda k, v : k.matches_char(v)), value)
+    return self.__matches(lambda k, v : k.matches_char(v), value)

   def key_matches(self, value):
-    return self.__matches((lambda k, v : k.matches_key(v)), value)
+    return self.__matches(lambda k, v : k.matches_key(v), value)

   @staticmethod
   def gather_transition_keys(state_set):
     f = lambda acc, state: acc | set(state.__transitions.keys())
-    return TransitionKey.merge_key_set(reduce(f, state_set, set()))
+    return TransitionKey.disjoint_keys(reduce(f, state_set, set()))

 class NfaBuilder:

@@ -169,7 +169,7 @@
     assert type(graph) == TupleType
     method = "_NfaBuilder__" + graph[0].lower()
     if not method in self.__operation_map:
- matches = filter((lambda (name, func): name == method), self.__members)
+      matches = filter(lambda (name, func): name == method, self.__members)
       assert len(matches) == 1
       self.__operation_map[method] = matches[0][1]
     return self.__operation_map[method](graph)
@@ -253,6 +253,7 @@
   @staticmethod
   def __to_dfa(nfa_state_set, builder):
     nfa_state_set = Nfa.__close(nfa_state_set)
+    assert nfa_state_set
     name = str([x.node_number() for x in nfa_state_set])
     (dfa_nodes, end_nodes, end_node) = builder
     if name in dfa_nodes:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Thu Oct 31 08:19:46 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Thu Oct 31 11:01:22 2013 UTC
@@ -25,7 +25,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-
 class TransitionKey:

   __lower_bound = 0
@@ -34,9 +33,20 @@
   __unicode_literal_bounds = (257, 257)
   __upper_bound = 257

+  @staticmethod
+  def __verify_ranges(ranges):
+ last = (TransitionKey.__lower_bound - 1, TransitionKey.__lower_bound - 1)
+    for r in ranges:
+      assert TransitionKey.__lower_bound <= r[0]
+      assert r[1] <= TransitionKey.__upper_bound
+      assert r[0] <= r[1]
+      assert last[1] < r[0]
+      # TODO check classes are always disjoint points
+      last = r
+
   @staticmethod
   def __create(ranges):
-    # TODO - verify ranges
+    TransitionKey.__verify_ranges(ranges)
     key = TransitionKey()
     key.__ranges = ranges
     key.__cached_hash = None
@@ -53,7 +63,11 @@
   @staticmethod
   def any():
     if (TransitionKey.__cached_any == None):
-      bounds = [(TransitionKey.__lower_bound, TransitionKey.__upper_bound)]
+      bounds = [
+        (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound),
+        TransitionKey.__unicode_whitespace_bounds,
+        TransitionKey.__unicode_literal_bounds,
+      ]
       TransitionKey.__cached_any = TransitionKey.__create(bounds)
     return TransitionKey.__cached_any

@@ -71,6 +85,7 @@

   def matches_char(self, char):
     char = ord(char)
+    # TODO class checks
     for r in self.__ranges:
       if r[0] <= char and char <= r[1]: return True
     return False
@@ -78,9 +93,10 @@
   def matches_key(self, key):
     assert isinstance(key, self.__class__)
     assert key != TransitionKey.epsilon()
-    if self == key: return True
-    if self == TransitionKey.epsilon(): return False
-    # TODO
+    assert len(key.__ranges) == 1
+    subkey = key.__ranges[0]
+    for k in self.__ranges:
+      if k[0] <= subkey[0] and k[1] >= subkey[1]: return True
     return False

   def __hash__(self):
@@ -95,10 +111,18 @@

   @staticmethod
   def __print_range(r):
+    def to_str(x):
+      if x <= TransitionKey.__latin_1_upper_bound:
+        return chr(x)
+      if x == TransitionKey.__unicode_literal_bounds[0]:
+        return "literal"
+      if x == TransitionKey.__unicode_whitespace_bounds[0]:
+        return "whitespace"
+      assert False
     if r[0] == r[1]:
-      return "'%s'" % chr(r[0])
+      return "'%s'" % to_str(r[0])
     else:
-      return "['%s'-'%s']" % (chr(r[0]), chr(r[1]))
+      return "['%s'-'%s']" % (to_str(r[0]), to_str(r[1]))

   def __str__(self):
     if self == self.epsilon():
@@ -108,7 +132,18 @@
     return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges)

   @staticmethod
-  def merge_key_set(key_set):
-    key_set -= set([TransitionKey.epsilon()])
-    # TODO need intersections and symmetric differences
-    return key_set
+  def disjoint_keys(key_set):
+    ranges = sorted(reduce(lambda acc, x : acc + x.__ranges, key_set, []))
+    for i in range(0, len(ranges)):
+      right = ranges[i][1]
+      limit = i
+      for j in range(i + 1, len(ranges)):
+        if ranges[j][0] > right:
+          break
+        assert ranges[j][0] >= ranges[i][0]
+        limit = j
+      if limit == i: continue
+      for j in range(i + 1, limit + 1):
+        ranges[j] = (right, ranges[j][1])
+    TransitionKey.__verify_ranges(ranges)
+    return map(lambda x : TransitionKey.__create([x]), ranges)

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to