Revision: 17495
Author: [email protected]
Date: Tue Nov 5 15:16:01 2013 UTC
Log: Experimental lexer generator: parse {} in regexps.
BUG=
[email protected]
Review URL: https://codereview.chromium.org/59953002
http://code.google.com/p/v8/source/detail?r=17495
Modified:
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Tue Nov 5 12:37:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Tue Nov 5 15:16:01 2013 UTC
@@ -55,6 +55,11 @@
("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),
(".|.", ["a", "b"], ["aa", ""]),
("//.", ["//a"], ["aa", ""]),
+ ("[ab]{2}", ["aa", "ab", "ba", "bb"], ["", "a", "b", "aaa", "bbb"]),
+ ("[ab]{2,3}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb"],
+ ["", "a", "b", "aaaa", "bbba"]),
+ ("[ab]{2,4}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb", "abab"],
+ ["", "a", "b", "aaaba", "bbbaa"])
]
def test_matches(self):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5
14:52:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5
15:16:01 2013 UTC
@@ -108,6 +108,9 @@
def key_matches(self, value):
return self.__matches(lambda k, v : k.matches_key(v), value)
+ def __str__(self):
+ return "NfaState(" + str(self.__node_number) + ")"
+
@staticmethod
def gather_transition_keys(state_set):
f = lambda acc, state: acc | set(state.__transitions.keys())
@@ -153,6 +156,28 @@
start.add_epsilon_transition(node)
return (start, ends + [start])
+ def __repeat(self, graph):
+ param_min = int(graph[1])
+ param_max = int(graph[2])
+ subgraph = graph[3]
+ (start, ends) = self.__process(subgraph)
+ for i in xrange(1, param_min):
+ (start2, ends2) = self.__process(subgraph)
+ self.__patch_ends(ends, start2)
+ ends = ends2
+ if param_min == param_max:
+ return (start, ends)
+
+ midpoints = []
+ for i in xrange(param_min, param_max):
+ midpoint = self.__new_state()
+ self.__patch_ends(ends, midpoint)
+ (start2, ends) = self.__process(subgraph)
+ midpoint.add_epsilon_transition(start2)
+ midpoints.append(midpoint)
+
+ return (start, ends + midpoints)
+
def __cat(self, graph):
(left, right) = (self.__process(graph[1]), self.__process(graph[2]))
self.__patch_ends(left[1], right[0])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue
Nov 5 14:52:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue
Nov 5 15:16:01 2013 UTC
@@ -43,6 +43,11 @@
'ZERO_OR_ONE',
'ANY',
+ 'REPEAT_BEGIN',
+ 'REPEAT_END',
+
+ 'NUMBER',
+ 'COMMA',
'LITERAL',
'RANGE',
@@ -64,11 +69,17 @@
t_GROUP_BEGIN = r'\('
t_GROUP_END = r'\)'
+ t_REPEAT_BEGIN = r'\{'
+ t_REPEAT_END = r'\}'
+
t_OR = r'\|'
t_ONE_OR_MORE = r'\+'
t_ZERO_OR_MORE = r'\*'
t_ZERO_OR_ONE = r'\?'
+ t_NUMBER = r'[0-9]+'
+ t_COMMA = r','
+
t_ANY = r'\.'
t_LITERAL = r'.'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue
Nov 5 14:52:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue
Nov 5 15:16:01 2013 UTC
@@ -64,7 +64,10 @@
| any maybe_modifier
'''
if p[2] != None:
- p[0] = (p[2], p[1])
+ if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
+ p[0] = (p[2][0], p[2][1], p[2][2], p[1])
+ else:
+ p[0] = (p[2], p[1])
else:
p[0] = p[1]
@@ -72,11 +75,20 @@
'''maybe_modifier : ONE_OR_MORE
| ZERO_OR_ONE
| ZERO_OR_MORE
+ | repetition
| empty'''
p[0] = p[1]
- if p[1] != None:
+ if p[1] in self.token_map:
p[0] = self.token_map[p[1]]
+ def p_repetition(self, p):
+ '''repetition : REPEAT_BEGIN NUMBER REPEAT_END
+ | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''
+ if len(p) == 4:
+ p[0] = ("REPEAT", p[2], p[2])
+ else:
+ p[0] = ("REPEAT", p[2], p[4])
+
def p_literal(self, p):
'''literal : LITERAL'''
p[0] = ('LITERAL', p[1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Tue Nov 5 14:52:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Tue Nov 5 15:16:01 2013 UTC
@@ -195,6 +195,7 @@
@staticmethod
def disjoint_keys(key_set):
+ key_set.discard(TransitionKey.epsilon())
if not key_set:
return []
range_map = {}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.