Revision: 17442
Author: [email protected]
Date: Thu Oct 31 11:01:22 2013 UTC
Log: Experimental parser: more tests, better key logic
[email protected]
BUG=
Review URL: https://codereview.chromium.org/54533002
http://code.google.com/p/v8/source/detail?r=17442
Added:
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Modified:
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Thu Oct 31 11:01:22 2013 UTC
@@ -0,0 +1,48 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from transition_keys import TransitionKey
+
+class TransitionKeyTestCase(unittest.TestCase):
+
+ __equal_pairs = [
+ (TransitionKey.epsilon(), TransitionKey.epsilon()),
+ (TransitionKey.any(), TransitionKey.any()),
+ (TransitionKey.single_char('a'), TransitionKey.single_char('a')),
+ ]
+
+ def test_eq(self):
+ for (left, right) in self.__equal_pairs:
+ self.assertEqual(left, right)
+
+ def test_hash(self):
+ for (left, right) in self.__equal_pairs:
+ self.assertEqual(hash(left), hash(right))
+
+if __name__ == '__main__':
+ unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Thu Oct 31 08:19:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Thu Oct 31 11:01:22 2013 UTC
@@ -43,16 +43,22 @@
# (pattern, should match, shouldn't match)
__test_data = [
- ("a", ["a"], ["b"]),
- ("ab", ["ab"], ["bb"]),
- ("a+b", ["ab", "aab", "aaab"], ["a", "b"]),
- ("a?b", ["ab", "b"], ["a", "c"]),
- ("a*b", ["ab", "aaab", "b"], ["a", "c"]),
- ("a|b", ["a", "b"], ["ab", "c"]),
+ ("a", ["a"], ["b", ""]),
+ ("ab", ["ab"], ["bb", ""]),
+ ("a+b", ["ab", "aab", "aaab"], ["a", "b", ""]),
+ ("a?b", ["ab", "b"], ["a", "c", ""]),
+ ("a*b", ["ab", "aaab", "b"], ["a", "c", ""]),
+ ("a|b", ["a", "b"], ["ab", "c", ""]),
+ (".", ["a", "b"], ["", "aa"]),
+ (".*", ["", "a", "abcaabbcc"], []),
+ ("a.b", ["aab", "abb", "acb"], ["ab", ""]),
+ # ("a.?b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+ # ("a.+b", ["aab", "abb", "acb", "ab"], ["aaab", ""]),
+ (".|.", ["a", "b"], ["aa", ""]),
]
def test_matches(self):
- for (regex, matches, not_matches) in AutomataTestCase.__test_data:
+ for (regex, matches, not_matches) in self.__test_data:
(nfa, dfa) = build_automata(regex)
for string in matches:
self.assertTrue(nfa.matches(string))
@@ -61,5 +67,11 @@
self.assertFalse(nfa.matches(string))
self.assertFalse(dfa.matches(string))
+ def test_can_construct_dot(self):
+ for (regex, matches, not_matches) in self.__test_data:
+ (nfa, dfa) = build_automata(regex)
+ nfa.to_dot()
+ dfa.to_dot()
+
if __name__ == '__main__':
unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31
08:19:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31
11:01:22 2013 UTC
@@ -93,15 +93,15 @@
return reduce(f, self.__transitions.items(), set())
def char_matches(self, value):
- return self.__matches((lambda k, v : k.matches_char(v)), value)
+ return self.__matches(lambda k, v : k.matches_char(v), value)
def key_matches(self, value):
- return self.__matches((lambda k, v : k.matches_key(v)), value)
+ return self.__matches(lambda k, v : k.matches_key(v), value)
@staticmethod
def gather_transition_keys(state_set):
f = lambda acc, state: acc | set(state.__transitions.keys())
- return TransitionKey.merge_key_set(reduce(f, state_set, set()))
+ return TransitionKey.disjoint_keys(reduce(f, state_set, set()))
class NfaBuilder:
@@ -169,7 +169,7 @@
assert type(graph) == TupleType
method = "_NfaBuilder__" + graph[0].lower()
if not method in self.__operation_map:
- matches = filter((lambda (name, func): name == method),
self.__members)
+ matches = filter(lambda (name, func): name == method, self.__members)
assert len(matches) == 1
self.__operation_map[method] = matches[0][1]
return self.__operation_map[method](graph)
@@ -253,6 +253,7 @@
@staticmethod
def __to_dfa(nfa_state_set, builder):
nfa_state_set = Nfa.__close(nfa_state_set)
+ assert nfa_state_set
name = str([x.node_number() for x in nfa_state_set])
(dfa_nodes, end_nodes, end_node) = builder
if name in dfa_nodes:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 08:19:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 11:01:22 2013 UTC
@@ -25,7 +25,6 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
class TransitionKey:
__lower_bound = 0
@@ -34,9 +33,20 @@
__unicode_literal_bounds = (257, 257)
__upper_bound = 257
+ @staticmethod
+ def __verify_ranges(ranges):
+ last = (TransitionKey.__lower_bound - 1, TransitionKey.__lower_bound -
1)
+ for r in ranges:
+ assert TransitionKey.__lower_bound <= r[0]
+ assert r[1] <= TransitionKey.__upper_bound
+ assert r[0] <= r[1]
+ assert last[1] < r[0]
+ # TODO check classes are always disjoint points
+ last = r
+
@staticmethod
def __create(ranges):
- # TODO - verify ranges
+ TransitionKey.__verify_ranges(ranges)
key = TransitionKey()
key.__ranges = ranges
key.__cached_hash = None
@@ -53,7 +63,11 @@
@staticmethod
def any():
if (TransitionKey.__cached_any == None):
- bounds = [(TransitionKey.__lower_bound, TransitionKey.__upper_bound)]
+ bounds = [
+ (TransitionKey.__lower_bound, TransitionKey.__latin_1_upper_bound),
+ TransitionKey.__unicode_whitespace_bounds,
+ TransitionKey.__unicode_literal_bounds,
+ ]
TransitionKey.__cached_any = TransitionKey.__create(bounds)
return TransitionKey.__cached_any
@@ -71,6 +85,7 @@
def matches_char(self, char):
char = ord(char)
+ # TODO class checks
for r in self.__ranges:
if r[0] <= char and char <= r[1]: return True
return False
@@ -78,9 +93,10 @@
def matches_key(self, key):
assert isinstance(key, self.__class__)
assert key != TransitionKey.epsilon()
- if self == key: return True
- if self == TransitionKey.epsilon(): return False
- # TODO
+ assert len(key.__ranges) == 1
+ subkey = key.__ranges[0]
+ for k in self.__ranges:
+ if k[0] <= subkey[0] and k[1] >= subkey[1]: return True
return False
def __hash__(self):
@@ -95,10 +111,18 @@
@staticmethod
def __print_range(r):
+ def to_str(x):
+ if x <= TransitionKey.__latin_1_upper_bound:
+ return chr(x)
+ if x == TransitionKey.__unicode_literal_bounds[0]:
+ return "literal"
+ if x == TransitionKey.__unicode_whitespace_bounds[0]:
+ return "whitespace"
+ assert False
if r[0] == r[1]:
- return "'%s'" % chr(r[0])
+ return "'%s'" % to_str(r[0])
else:
- return "['%s'-'%s']" % (chr(r[0]), chr(r[1]))
+ return "['%s'-'%s']" % (to_str(r[0]), to_str(r[1]))
def __str__(self):
if self == self.epsilon():
@@ -108,7 +132,18 @@
return ", ".join(TransitionKey.__print_range(x) for x in self.__ranges)
@staticmethod
- def merge_key_set(key_set):
- key_set -= set([TransitionKey.epsilon()])
- # TODO need intersections and symmetric differences
- return key_set
+ def disjoint_keys(key_set):
+ ranges = sorted(reduce(lambda acc, x : acc + x.__ranges, key_set, []))
+ for i in range(0, len(ranges)):
+ right = ranges[i][1]
+ limit = i
+ for j in range(i + 1, len(ranges)):
+ if ranges[j][0] > right:
+ break
+ assert ranges[j][0] >= ranges[i][0]
+ limit = j
+ if limit == i: continue
+ for j in range(i + 1, limit + 1):
+ ranges[j] = (right, ranges[j][1])
+ TransitionKey.__verify_ranges(ranges)
+ return map(lambda x : TransitionKey.__create([x]), ranges)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.