Revision: 17499
Author: [email protected]
Date: Tue Nov 5 15:47:39 2013 UTC
Log: Experimental parser: user defined key classes
[email protected]
BUG=
Review URL: https://codereview.chromium.org/59973005
http://code.google.com/p/v8/source/detail?r=17499
Modified:
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5
15:16:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5
15:47:39 2013 UTC
@@ -122,6 +122,10 @@
self.__node_number = 0
self.__operation_map = {}
self.__members = getmembers(self)
+ self.__character_classes = {}
+
+ def set_character_classes(self, classes):
+ self.__character_classes = classes
def __new_state(self):
self.__node_number += 1
@@ -192,10 +196,12 @@
return self.__key_state(TransitionKey.single_char(graph[1]))
def __class(self, graph):
- return self.__key_state(TransitionKey.character_class(False, graph[1]))
+ return self.__key_state(
+ TransitionKey.character_class(graph, self.__character_classes))
def __not_class(self, graph):
- return self.__key_state(TransitionKey.character_class(True, graph[1]))
+ return self.__key_state(
+ TransitionKey.character_class(graph, self.__character_classes))
def __any(self, graph):
return self.__key_state(TransitionKey.any())
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 5 14:52:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 5 15:47:39 2013 UTC
@@ -29,6 +29,7 @@
from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa import NfaBuilder
+from transition_keys import TransitionKey
class RuleParser:
@@ -39,6 +40,7 @@
'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
'any' : RegexParser.parse("."),
}
+ self.character_classes = {}
self.current_transition = None
self.rules = {}
@@ -56,7 +58,12 @@
def p_alias_rule(self, p):
'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
assert not p[1] in self.aliases
+ graph = p[3]
self.aliases[p[1]] = p[3]
+ if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
+ classes = self.character_classes
+ assert not p[1] in classes
+ classes[p[1]] = TransitionKey.character_class(graph, classes)
def p_transition_rule(self, p):
'''transition_rule : transition composite_regex code
@@ -91,7 +98,9 @@
p[0] = p[1]
else:
p[0] = NfaBuilder.or_graphs([p[1], p[3]])
- # NfaBuilder().nfa(p[0])
+ # builder = NfaBuilder()
+ # builder.set_character_classes(self.character_classes)
+ # builder.nfa(p[0])
def p_regex_parts(self, p):
'''regex_parts : regex_part
=======================================
---
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Thu Oct 31 14:46:33 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Tue Nov 5 15:47:39 2013 UTC
@@ -53,6 +53,7 @@
("a-zA-Z0g" , "abyzABYZ0" , "123"),
("a-z:ws::lit:" , "abc" , "123"),
]
+ classes = {}
for (string, match, no_match) in data:
for invert in [False, True]:
if invert:
@@ -63,11 +64,19 @@
token = "CLASS"
graph = RegexParser.parse(regex)
assert graph[0] == token
- key = TransitionKey.character_class(invert, graph[1])
+ key = TransitionKey.character_class(graph, classes)
for c in match:
self.assertEqual(invert, not key.matches_char(c))
for c in no_match:
self.assertEqual(invert, key.matches_char(c))
+ def test_self_defined_classes(self):
+ graph = RegexParser.parse("[a-z]")
+ classes = {
+ 'self_defined' : TransitionKey.character_class(graph, {})}
+ graph = RegexParser.parse("[^:self_defined:]")
+ key = TransitionKey.character_class(graph, classes)
+ self.assertTrue(key.matches_char('A'))
+
if __name__ == '__main__':
unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Tue Nov 5 15:16:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Tue Nov 5 15:47:39 2013 UTC
@@ -91,29 +91,34 @@
return TransitionKey.__create([(char, char)])
@staticmethod
- def __process_graph(graph, ranges):
+ def __process_graph(graph, ranges, key_map):
key = graph[0]
if key == 'RANGE':
ranges.append((ord(graph[1]), ord(graph[2])))
elif key == 'LITERAL':
ranges.append((ord(graph[1]), ord(graph[1])))
+ elif key == 'CAT':
+ for x in [graph[1], graph[2]]:
+ TransitionKey.__process_graph(x, ranges, key_map)
elif key == 'CHARACTER_CLASS':
- if graph[1] == 'ws':
+ class_name = graph[1]
+ if class_name == 'ws':
ranges.append(TransitionKey.__unicode_whitespace_bounds)
- elif graph[1] == 'lit':
+ elif class_name == 'lit':
ranges.append(TransitionKey.__unicode_literal_bounds)
+ elif class_name in key_map:
+ ranges += key_map[class_name].__ranges
else:
raise Exception("unknown character class [%s]" % graph[1])
- elif key == 'CAT':
- for x in [graph[1], graph[2]]:
- TransitionKey.__process_graph(x, ranges)
else:
raise Exception("bad key [%s]" % key)
@staticmethod
- def character_class(invert, graph):
+ def character_class(graph, key_map):
ranges = []
- TransitionKey.__process_graph(graph, ranges)
+ assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
+ invert = graph[0] == 'NOT_CLASS'
+ TransitionKey.__process_graph(graph[1], ranges, key_map)
return TransitionKey.__key_from_ranges(invert, ranges)
def matches_char(self, char):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.