Revision: 17447
Author:   [email protected]
Date:     Thu Oct 31 13:00:18 2013 UTC
Log: Experimental lexer generator: fixing the rule parser + adding tests.

Also parse this: <cond> regex => newcond { body }

BUG=
[email protected]

Review URL: https://codereview.chromium.org/48783004
http://code.google.com/p/v8/source/detail?r=17447

Added:
 /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py
Modified:
 /branches/experimental/parser/tools/lexer_generator/rule_lexer.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py Thu Oct 31 13:00:18 2013 UTC
@@ -0,0 +1,99 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from rule_parser import RuleParser
+
+class RuleParserTestCase(unittest.TestCase):
+
+   def setUp(self):
+     self.parser = RuleParser()
+     self.parser.build()
+
+   def test_basic(self):
+     self.parser.parse('alias = regex1;')
+     self.parser.parse('<cond1> regex2 :=> cond2')
+     self.parser.parse('<cond2> regex3 {body}')
+     self.parser.parse('<cond3> regex4 => cond4 {body}')
+
+     self.assertTrue(len(self.parser.aliases), 1)
+     self.assertTrue('alias' in self.parser.aliases)
+     self.assertEquals(self.parser.aliases['alias'], 'regex1')
+
+     self.assertTrue(len(self.parser.transitions), 2)
+     self.assertTrue('cond1' in self.parser.transitions)
+     self.assertEquals(len(self.parser.transitions['cond1']), 1)
+     self.assertTrue('regex2' in self.parser.transitions['cond1'])
+     self.assertEquals(self.parser.transitions['cond1']['regex2'],
+                       ('condition', 'cond2'))
+
+     self.assertTrue('cond2' in self.parser.transitions)
+     self.assertEquals(len(self.parser.transitions['cond2']), 1)
+     self.assertTrue('regex3' in self.parser.transitions['cond2'])
+     self.assertEquals(self.parser.transitions['cond2']['regex3'],
+                       ('body', 'body'))
+
+     self.assertTrue('cond3' in self.parser.transitions)
+     self.assertEquals(len(self.parser.transitions['cond3']), 1)
+     self.assertTrue('regex4' in self.parser.transitions['cond3'])
+     self.assertEquals(self.parser.transitions['cond3']['regex4'],
+                       ('condition_and_body', 'cond4', 'body'))
+
+   def test_more_complicated(self):
+     self.parser.parse('alias = regex;with;semicolon;')
+     self.parser.parse('<cond1> regex2 :=> with :=> arrow :=> cond2')
+ self.parser.parse('<cond1> regex3}with}braces} {body {with} braces } }') + self.parser.parse('<cond1> regex4{with{braces} {body {with} braces } }')
+
+ self.assertEquals(self.parser.aliases['alias'], 'regex;with;semicolon')
+
+     self.assertEquals(
+         self.parser.transitions['cond1']['regex2 :=> with :=> arrow'],
+         ('condition', 'cond2'))
+
+     self.assertEquals(
+         self.parser.transitions['cond1']['regex3}with}braces}'],
+         ('body', 'body {with} braces }'))
+
+     self.assertEquals(
+         self.parser.transitions['cond1']['regex4{with{braces}'],
+         ('body', 'body {with} braces }'))
+
+   def test_body_with_if(self):
+     self.parser.parse('<cond> regex { if (foo) { bar } }')
+     self.assertEquals(
+         self.parser.transitions['cond']['regex'],
+         ('body', 'if (foo) { bar }'))
+
+   def test_regexp_with_count(self):
+     self.parser.parse('<cond> regex{1,3} { if (foo) { bar } }')
+     self.assertEquals(
+         self.parser.transitions['cond']['regex{1,3}'],
+         ('body', 'if (foo) { bar }'))
+
+if __name__ == '__main__':
+    unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Wed Oct 30 16:36:45 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu Oct 31 13:00:18 2013 UTC
@@ -36,8 +36,9 @@
       'CONDITION',
       'CONDITION_BEGIN',
       'CONDITION_END',
-      'REGEX_AND_TRANSITION',
-      'REGEX_AND_BODY',
+      'REGEX_TRANSITION_BODY',
+      'REGEX_TRANSITION',
+      'REGEX_BODY',
       )

   t_ANY_ignore = " \t\n"
@@ -75,21 +76,26 @@
     return t

   def t_seenCondition_CONDITION_END(self, t):
-    r'>'
+    r'>\s*'
     self.lexer.begin('afterCondition')
     return t

-  def t_afterCondition_REGEX_AND_TRANSITION(self, t):
+  def t_afterCondition_REGEX_TRANSITION_BODY(self, t):
+    r'(?P<regex>.+)\s*=>\s*(?P<new>.+)\s*{\s*(?P<body>.+)\s*}\s*'
+    self.lexer.begin('INITIAL')
+    return t
+
+  def t_afterCondition_REGEX_TRANSITION(self, t):
     r'(?P<regex>.+)\s*:=>\s*(?P<new>.+)\s*'
     self.lexer.begin('INITIAL')
     return t

-  def t_afterCondition_REGEX_AND_BODY(self, t):
-    r'(?P<regex>.+)\s*{\s*(?P<body>.+)\s*}\s*'
+  def t_afterCondition_REGEX_BODY(self, t):
+    r'(?P<regex>.+?)\s+{\s*(?P<body>.+)\s*}\s*'
     self.lexer.begin('INITIAL')
     return t

-  def t_error(self, t):
+  def t_ANY_error(self, t):
     raise Exception("Illegal character '%s'" % t.value[0])

   def build(self, **kwargs):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed Oct 30 16:36:45 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu Oct 31 13:00:18 2013 UTC
@@ -32,8 +32,9 @@

   tokens = RuleLexer.tokens

-  aliases = dict()
-  transitions = dict()
+  def __init__(self):
+    self.aliases = dict()
+    self.transitions = dict()

   def p_statement_alias(self, p):
     'statement : ALIAS EQUALS REGEX'
@@ -41,25 +42,33 @@
     self.aliases[p[1]] = regex

   def p_statement_condition_transition(self, p):
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_AND_TRANSITION'
+    'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION'
     old_condition = p[2]
-    regex = self.lexer.lexer.lexmatch.group('regex')
+    regex = self.lexer.lexer.lexmatch.group('regex').strip()
     new_condition = self.lexer.lexer.lexmatch.group('new')
     if old_condition not in self.transitions:
-      self.transitions[old_condition] = []
-    self.transitions[old_condition].append((regex, new_condition))
+      self.transitions[old_condition] = dict()
+    self.transitions[old_condition][regex] = ('condition', new_condition)

   def p_statement_condition_body(self, p):
-    'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_AND_BODY'
+    'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY'
     old_condition = p[2]
-    regex = self.lexer.lexer.lexmatch.group('regex')
-    body = self.lexer.lexer.lexmatch.group('body')
+    regex = self.lexer.lexer.lexmatch.group('regex').strip()
+    body = self.lexer.lexer.lexmatch.group('body').strip()
     if old_condition not in self.transitions:
-      self.transitions[old_condition] = []
-    self.transitions[old_condition].append((regex, body))
+      self.transitions[old_condition] = dict()
+    self.transitions[old_condition][regex] = ('body', body)

-  def p_empty(self, p):
-    'empty :'
+  def p_statement_condition_transition_body(self, p):
+ 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION_BODY'
+    old_condition = p[2]
+    regex = self.lexer.lexer.lexmatch.group('regex').strip()
+    new_condition = self.lexer.lexer.lexmatch.group('new').strip()
+    body = self.lexer.lexer.lexmatch.group('body').strip()
+    if old_condition not in self.transitions:
+      self.transitions[old_condition] = dict()
+    self.transitions[old_condition][regex] = (
+        'condition_and_body', new_condition, body)

   def p_error(self, p):
     raise Exception("Syntax error in input '%s'" % p)

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to