Revision: 17453
Author: [email protected]
Date: Thu Oct 31 14:46:33 2013 UTC
Log: Experimental parser: parse character classes
[email protected]
BUG=
Review URL: https://codereview.chromium.org/50293010
http://code.google.com/p/v8/source/detail?r=17453
Modified:
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Thu
Oct 31 14:36:42 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Thu
Oct 31 14:46:33 2013 UTC
@@ -48,6 +48,7 @@
'RANGE',
'NOT',
'CLASS_LITERAL',
+ 'CHARACTER_CLASS',
)
states = (
@@ -84,9 +85,10 @@
t_class_RANGE = '-'
t_class_NOT = '\^'
+ t_class_CHARACTER_CLASS = ':ws:|:lit:'
def t_class_ESCAPED_CLASS_LITERAL(self, t):
- r'\\\^|\\-|\\\[|\\\]'
+ r'\\\^|\\-|\\\[|\\\]\\:'
t.type = 'CLASS_LITERAL'
t.value = t.value[1:]
return t
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu
Oct 31 14:36:42 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu
Oct 31 14:46:33 2013 UTC
@@ -100,11 +100,15 @@
def p_class_content(self, p):
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL
maybe_class_content
| CLASS_LITERAL maybe_class_content
+ | CHARACTER_CLASS maybe_class_content
'''
if len(p) == 5:
left = ("RANGE", p[1], p[3])
else:
- left = ('LITERAL', p[1])
+ if len(p[1]) == 1:
+ left = ('LITERAL', p[1])
+ else:
+ left = ('CHARACTER_CLASS', p[1:-1])
p[0] = self.__cat(left, p[len(p)-1])
def p_maybe_class_content(self, p):
=======================================
---
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Thu Oct 31 14:36:42 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/transition_key_test.py
Thu Oct 31 14:46:33 2013 UTC
@@ -51,6 +51,7 @@
("1-2", "12", "ab"),
("a-zA-Z", "abyzABYZ" , "123"),
("a-zA-Z0g" , "abyzABYZ0" , "123"),
+ ("a-z:ws::lit:" , "abc" , "123"),
]
for (string, match, no_match) in data:
for invert in [False, True]:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 14:36:42 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 14:46:33 2013 UTC
@@ -97,6 +97,13 @@
ranges.append((ord(graph[1]), ord(graph[2])))
elif key == 'LITERAL':
ranges.append((ord(graph[1]), ord(graph[1])))
+ elif key == 'CHARACTER_CLASS':
+ if graph[1] == 'ws':
+ ranges.append(TransitionKey.__unicode_whitespace_bounds)
+ elif graph[1] == 'lit':
+ ranges.append(TransitionKey.__unicode_literal_bounds)
+ else:
+ assert "unknown character class %s" % graph[1]
elif key == 'CAT':
for x in [graph[1], graph[2]]:
TransitionKey.__process_graph(x, ranges)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.