Reviewers: marja,
Message:
Committed patchset #1 manually as r17499.
Description:
Experimental parser: user defined key classes
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17499
Please review this at https://codereview.chromium.org/59973005/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+41, -12 lines):
M tools/lexer_generator/nfa.py
M tools/lexer_generator/rule_parser.py
M tools/lexer_generator/transition_key_test.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
index
9a172c9beb19da78df4707dc44bd334515e784c8..bba36efc6d6ed7fd29485e16b6f75ad123165353
100644
--- a/tools/lexer_generator/nfa.py
+++ b/tools/lexer_generator/nfa.py
@@ -122,6 +122,10 @@ class NfaBuilder:
self.__node_number = 0
self.__operation_map = {}
self.__members = getmembers(self)
+ self.__character_classes = {}
+
+ def set_character_classes(self, classes):
+ self.__character_classes = classes
def __new_state(self):
self.__node_number += 1
@@ -192,10 +196,12 @@ class NfaBuilder:
return self.__key_state(TransitionKey.single_char(graph[1]))
def __class(self, graph):
- return self.__key_state(TransitionKey.character_class(False, graph[1]))
+ return self.__key_state(
+ TransitionKey.character_class(graph, self.__character_classes))
def __not_class(self, graph):
- return self.__key_state(TransitionKey.character_class(True, graph[1]))
+ return self.__key_state(
+ TransitionKey.character_class(graph, self.__character_classes))
def __any(self, graph):
return self.__key_state(TransitionKey.any())
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py
b/tools/lexer_generator/rule_parser.py
index
eac1e2faac1973b374d063060e482572b1860245..bfaf179fc8f948debb4a58f12bcae63fadc59410
100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -29,6 +29,7 @@ import ply.yacc as yacc
from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa import NfaBuilder
+from transition_keys import TransitionKey
class RuleParser:
@@ -39,6 +40,7 @@ class RuleParser:
'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
'any' : RegexParser.parse("."),
}
+ self.character_classes = {}
self.current_transition = None
self.rules = {}
@@ -56,7 +58,12 @@ class RuleParser:
def p_alias_rule(self, p):
'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
assert not p[1] in self.aliases
+ graph = p[3]
self.aliases[p[1]] = p[3]
+ if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
+ classes = self.character_classes
+ assert not p[1] in classes
+ classes[p[1]] = TransitionKey.character_class(graph, classes)
def p_transition_rule(self, p):
'''transition_rule : transition composite_regex code
@@ -91,7 +98,9 @@ class RuleParser:
p[0] = p[1]
else:
p[0] = NfaBuilder.or_graphs([p[1], p[3]])
- # NfaBuilder().nfa(p[0])
+ # builder = NfaBuilder()
+ # builder.set_character_classes(self.character_classes)
+ # builder.nfa(p[0])
def p_regex_parts(self, p):
'''regex_parts : regex_part
Index: tools/lexer_generator/transition_key_test.py
diff --git a/tools/lexer_generator/transition_key_test.py
b/tools/lexer_generator/transition_key_test.py
index
480d6c0830304b21d664bd9a6839e1b6874b7ca1..d2fcd40e921fa486960ae625978c1c6c0e0fa66a
100644
--- a/tools/lexer_generator/transition_key_test.py
+++ b/tools/lexer_generator/transition_key_test.py
@@ -53,6 +53,7 @@ class TransitionKeyTestCase(unittest.TestCase):
("a-zA-Z0g" , "abyzABYZ0" , "123"),
("a-z:ws::lit:" , "abc" , "123"),
]
+ classes = {}
for (string, match, no_match) in data:
for invert in [False, True]:
if invert:
@@ -63,11 +64,19 @@ class TransitionKeyTestCase(unittest.TestCase):
token = "CLASS"
graph = RegexParser.parse(regex)
assert graph[0] == token
- key = TransitionKey.character_class(invert, graph[1])
+ key = TransitionKey.character_class(graph, classes)
for c in match:
self.assertEqual(invert, not key.matches_char(c))
for c in no_match:
self.assertEqual(invert, key.matches_char(c))
+ def test_self_defined_classes(self):
+ graph = RegexParser.parse("[a-z]")
+ classes = {
+ 'self_defined' : TransitionKey.character_class(graph, {})}
+ graph = RegexParser.parse("[^:self_defined:]")
+ key = TransitionKey.character_class(graph, classes)
+ self.assertTrue(key.matches_char('A'))
+
if __name__ == '__main__':
unittest.main()
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
d759a4f5b14f01aa4a3f10a40eef84c0e400a7bd..1c0de4d2b15b667477f52a3c2121c76d9b683c3f
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -91,29 +91,34 @@ class TransitionKey:
return TransitionKey.__create([(char, char)])
@staticmethod
- def __process_graph(graph, ranges):
+ def __process_graph(graph, ranges, key_map):
key = graph[0]
if key == 'RANGE':
ranges.append((ord(graph[1]), ord(graph[2])))
elif key == 'LITERAL':
ranges.append((ord(graph[1]), ord(graph[1])))
+ elif key == 'CAT':
+ for x in [graph[1], graph[2]]:
+ TransitionKey.__process_graph(x, ranges, key_map)
elif key == 'CHARACTER_CLASS':
- if graph[1] == 'ws':
+ class_name = graph[1]
+ if class_name == 'ws':
ranges.append(TransitionKey.__unicode_whitespace_bounds)
- elif graph[1] == 'lit':
+ elif class_name == 'lit':
ranges.append(TransitionKey.__unicode_literal_bounds)
+ elif class_name in key_map:
+ ranges += key_map[class_name].__ranges
else:
raise Exception("unknown character class [%s]" % graph[1])
- elif key == 'CAT':
- for x in [graph[1], graph[2]]:
- TransitionKey.__process_graph(x, ranges)
else:
raise Exception("bad key [%s]" % key)
@staticmethod
- def character_class(invert, graph):
+ def character_class(graph, key_map):
ranges = []
- TransitionKey.__process_graph(graph, ranges)
+ assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
+ invert = graph[0] == 'NOT_CLASS'
+ TransitionKey.__process_graph(graph[1], ranges, key_map)
return TransitionKey.__key_from_ranges(invert, ranges)
def matches_char(self, char):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.