Revision: 17447
Author: [email protected]
Date: Thu Oct 31 13:00:18 2013 UTC
Log: Experimental lexer generator: fixing the rule parser + adding
tests.
Also parse this: <cond> regex => newcond { body }
BUG=
[email protected]
Review URL: https://codereview.chromium.org/48783004
http://code.google.com/p/v8/source/detail?r=17447
Added:
/branches/experimental/parser/tools/lexer_generator/rule_parser_test.py
Modified:
/branches/experimental/parser/tools/lexer_generator/rule_lexer.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser_test.py
Thu Oct 31 13:00:18 2013 UTC
@@ -0,0 +1,99 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from rule_parser import RuleParser
+
+class RuleParserTestCase(unittest.TestCase):
+
+ def setUp(self):
+ self.parser = RuleParser()
+ self.parser.build()
+
+ def test_basic(self):
+ self.parser.parse('alias = regex1;')
+ self.parser.parse('<cond1> regex2 :=> cond2')
+ self.parser.parse('<cond2> regex3 {body}')
+ self.parser.parse('<cond3> regex4 => cond4 {body}')
+
+ self.assertTrue(len(self.parser.aliases), 1)
+ self.assertTrue('alias' in self.parser.aliases)
+ self.assertEquals(self.parser.aliases['alias'], 'regex1')
+
+ self.assertTrue(len(self.parser.transitions), 2)
+ self.assertTrue('cond1' in self.parser.transitions)
+ self.assertEquals(len(self.parser.transitions['cond1']), 1)
+ self.assertTrue('regex2' in self.parser.transitions['cond1'])
+ self.assertEquals(self.parser.transitions['cond1']['regex2'],
+ ('condition', 'cond2'))
+
+ self.assertTrue('cond2' in self.parser.transitions)
+ self.assertEquals(len(self.parser.transitions['cond2']), 1)
+ self.assertTrue('regex3' in self.parser.transitions['cond2'])
+ self.assertEquals(self.parser.transitions['cond2']['regex3'],
+ ('body', 'body'))
+
+ self.assertTrue('cond3' in self.parser.transitions)
+ self.assertEquals(len(self.parser.transitions['cond3']), 1)
+ self.assertTrue('regex4' in self.parser.transitions['cond3'])
+ self.assertEquals(self.parser.transitions['cond3']['regex4'],
+ ('condition_and_body', 'cond4', 'body'))
+
+ def test_more_complicated(self):
+ self.parser.parse('alias = regex;with;semicolon;')
+ self.parser.parse('<cond1> regex2 :=> with :=> arrow :=> cond2')
+ self.parser.parse('<cond1> regex3}with}braces} {body {with} braces }
}')
+ self.parser.parse('<cond1> regex4{with{braces} {body {with} braces }
}')
+
+
self.assertEquals(self.parser.aliases['alias'], 'regex;with;semicolon')
+
+ self.assertEquals(
+ self.parser.transitions['cond1']['regex2 :=> with :=> arrow'],
+ ('condition', 'cond2'))
+
+ self.assertEquals(
+ self.parser.transitions['cond1']['regex3}with}braces}'],
+ ('body', 'body {with} braces }'))
+
+ self.assertEquals(
+ self.parser.transitions['cond1']['regex4{with{braces}'],
+ ('body', 'body {with} braces }'))
+
+ def test_body_with_if(self):
+ self.parser.parse('<cond> regex { if (foo) { bar } }')
+ self.assertEquals(
+ self.parser.transitions['cond']['regex'],
+ ('body', 'if (foo) { bar }'))
+
+ def test_regexp_with_count(self):
+ self.parser.parse('<cond> regex{1,3} { if (foo) { bar } }')
+ self.assertEquals(
+ self.parser.transitions['cond']['regex{1,3}'],
+ ('body', 'if (foo) { bar }'))
+
+if __name__ == '__main__':
+ unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Wed
Oct 30 16:36:45 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_lexer.py Thu
Oct 31 13:00:18 2013 UTC
@@ -36,8 +36,9 @@
'CONDITION',
'CONDITION_BEGIN',
'CONDITION_END',
- 'REGEX_AND_TRANSITION',
- 'REGEX_AND_BODY',
+ 'REGEX_TRANSITION_BODY',
+ 'REGEX_TRANSITION',
+ 'REGEX_BODY',
)
t_ANY_ignore = " \t\n"
@@ -75,21 +76,26 @@
return t
def t_seenCondition_CONDITION_END(self, t):
- r'>'
+ r'>\s*'
self.lexer.begin('afterCondition')
return t
- def t_afterCondition_REGEX_AND_TRANSITION(self, t):
+ def t_afterCondition_REGEX_TRANSITION_BODY(self, t):
+ r'(?P<regex>.+)\s*=>\s*(?P<new>.+)\s*{\s*(?P<body>.+)\s*}\s*'
+ self.lexer.begin('INITIAL')
+ return t
+
+ def t_afterCondition_REGEX_TRANSITION(self, t):
r'(?P<regex>.+)\s*:=>\s*(?P<new>.+)\s*'
self.lexer.begin('INITIAL')
return t
- def t_afterCondition_REGEX_AND_BODY(self, t):
- r'(?P<regex>.+)\s*{\s*(?P<body>.+)\s*}\s*'
+ def t_afterCondition_REGEX_BODY(self, t):
+ r'(?P<regex>.+?)\s+{\s*(?P<body>.+)\s*}\s*'
self.lexer.begin('INITIAL')
return t
- def t_error(self, t):
+ def t_ANY_error(self, t):
raise Exception("Illegal character '%s'" % t.value[0])
def build(self, **kwargs):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed
Oct 30 16:36:45 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Oct 31 13:00:18 2013 UTC
@@ -32,8 +32,9 @@
tokens = RuleLexer.tokens
- aliases = dict()
- transitions = dict()
+ def __init__(self):
+ self.aliases = dict()
+ self.transitions = dict()
def p_statement_alias(self, p):
'statement : ALIAS EQUALS REGEX'
@@ -41,25 +42,33 @@
self.aliases[p[1]] = regex
def p_statement_condition_transition(self, p):
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END
REGEX_AND_TRANSITION'
+ 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_TRANSITION'
old_condition = p[2]
- regex = self.lexer.lexer.lexmatch.group('regex')
+ regex = self.lexer.lexer.lexmatch.group('regex').strip()
new_condition = self.lexer.lexer.lexmatch.group('new')
if old_condition not in self.transitions:
- self.transitions[old_condition] = []
- self.transitions[old_condition].append((regex, new_condition))
+ self.transitions[old_condition] = dict()
+ self.transitions[old_condition][regex] = ('condition', new_condition)
def p_statement_condition_body(self, p):
- 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_AND_BODY'
+ 'statement : CONDITION_BEGIN CONDITION CONDITION_END REGEX_BODY'
old_condition = p[2]
- regex = self.lexer.lexer.lexmatch.group('regex')
- body = self.lexer.lexer.lexmatch.group('body')
+ regex = self.lexer.lexer.lexmatch.group('regex').strip()
+ body = self.lexer.lexer.lexmatch.group('body').strip()
if old_condition not in self.transitions:
- self.transitions[old_condition] = []
- self.transitions[old_condition].append((regex, body))
+ self.transitions[old_condition] = dict()
+ self.transitions[old_condition][regex] = ('body', body)
- def p_empty(self, p):
- 'empty :'
+ def p_statement_condition_transition_body(self, p):
+ 'statement : CONDITION_BEGIN CONDITION CONDITION_END
REGEX_TRANSITION_BODY'
+ old_condition = p[2]
+ regex = self.lexer.lexer.lexmatch.group('regex').strip()
+ new_condition = self.lexer.lexer.lexmatch.group('new').strip()
+ body = self.lexer.lexer.lexmatch.group('body').strip()
+ if old_condition not in self.transitions:
+ self.transitions[old_condition] = dict()
+ self.transitions[old_condition][regex] = (
+ 'condition_and_body', new_condition, body)
def p_error(self, p):
raise Exception("Syntax error in input '%s'" % p)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.