Revision: 17439
Author: [email protected]
Date: Thu Oct 31 08:19:46 2013 UTC
Log: Experimental parser: some unit tests
[email protected]
BUG=
Review URL: https://codereview.chromium.org/54303002
http://code.google.com/p/v8/source/detail?r=17439
Added:
/branches/experimental/parser/tools/lexer_generator/automata_test.py
Modified:
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Thu Oct 31 08:19:46 2013 UTC
@@ -0,0 +1,65 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from regex_parser import RegexParser
+from nfa import NfaBuilder
+from dfa import Dfa
+
+def build_automata(string):
+ parser = RegexParser()
+ parser.build()
+ graph = parser.parse(string)
+ nfa = NfaBuilder().nfa(graph)
+ (start_name, dfa_nodes, end_nodes) = nfa.compute_dfa()
+ dfa = Dfa(start_name, dfa_nodes, end_nodes)
+ return (nfa, dfa)
+
+class AutomataTestCase(unittest.TestCase):
+
+ # (pattern, should match, shouldn't match)
+ __test_data = [
+ ("a", ["a"], ["b"]),
+ ("ab", ["ab"], ["bb"]),
+ ("a+b", ["ab", "aab", "aaab"], ["a", "b"]),
+ ("a?b", ["ab", "b"], ["a", "c"]),
+ ("a*b", ["ab", "aaab", "b"], ["a", "c"]),
+ ("a|b", ["a", "b"], ["ab", "c"]),
+ ]
+
+ def test_matches(self):
+ for (regex, matches, not_matches) in AutomataTestCase.__test_data:
+ (nfa, dfa) = build_automata(regex)
+ for string in matches:
+ self.assertTrue(nfa.matches(string))
+ self.assertTrue(dfa.matches(string))
+ for string in not_matches:
+ self.assertFalse(nfa.matches(string))
+ self.assertFalse(dfa.matches(string))
+
+if __name__ == '__main__':
+ unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Oct 31
07:24:53 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Oct 31
08:19:46 2013 UTC
@@ -78,6 +78,19 @@
edge = next_edge - visited
return state
+ def matches(self, string):
+ state = self.__start
+ for c in string:
+ next_state = None
+ for key, transition_state in state.transitions().items():
+ if key.matches_char(c):
+ next_state = transition_state
+ break
+ if not next_state:
+ return False
+ state = next_state
+ return state in self.__terminal_set
+
def to_dot(self):
def f(node, node_content):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31
07:24:53 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31
08:19:46 2013 UTC
@@ -245,7 +245,7 @@
valid_states = Nfa.__close(set([self.__start]))
for c in string:
f = lambda acc, state: acc | state.char_matches(c)
- valid_states = Nfa.__close(reduce(valid_states, f, set()))
+ valid_states = Nfa.__close(reduce(f, valid_states, set()))
if not valid_states:
return False
return self.__end in valid_states
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue
Oct 29 15:10:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu
Oct 31 08:19:46 2013 UTC
@@ -125,7 +125,7 @@
return ('CAT', left, right)
def build(self, **kwargs):
- self.parser = yacc.yacc(module=self, **kwargs)
+ self.parser = yacc.yacc(module=self, debug=0, write_tables=0, **kwargs)
self.lexer = RegexLexer()
self.lexer.build(**kwargs)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 07:24:53 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 08:19:46 2013 UTC
@@ -70,6 +70,7 @@
return TransitionKey.__create([(129, 129)])
def matches_char(self, char):
+ char = ord(char)
for r in self.__ranges:
if r[0] <= char and char <= r[1]: return True
return False
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.