Reviewers: marja,
Message:
Committed patchset #1 manually as r17453.
Description:
Experimental parser: parse character classes
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17453
Please review this at https://codereview.chromium.org/50293010/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+16, -2 lines):
M tools/lexer_generator/regex_lexer.py
M tools/lexer_generator/regex_parser.py
M tools/lexer_generator/transition_key_test.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py
b/tools/lexer_generator/regex_lexer.py
index
d45f202c054a04563d81aabbfb167685ff716f7a..26c44870b807d2c68282ccda9fc59516206647ce
100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -48,6 +48,7 @@ class RegexLexer:
'RANGE',
'NOT',
'CLASS_LITERAL',
+ 'CHARACTER_CLASS',
)
states = (
@@ -84,9 +85,10 @@ class RegexLexer:
t_class_RANGE = '-'
t_class_NOT = '\^'
+ t_class_CHARACTER_CLASS = ':ws:|:lit:'
def t_class_ESCAPED_CLASS_LITERAL(self, t):
- r'\\\^|\\-|\\\[|\\\]'
+ r'\\\^|\\-|\\\[|\\\]\\:'
t.type = 'CLASS_LITERAL'
t.value = t.value[1:]
return t
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py
b/tools/lexer_generator/regex_parser.py
index
e0dc502844d01136b66fd77fadc5144571fd8aaa..2e370371d9489217f57157434a0a03129be8d543
100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -100,11 +100,15 @@ class RegexParser:
def p_class_content(self, p):
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL
maybe_class_content
| CLASS_LITERAL maybe_class_content
+ | CHARACTER_CLASS maybe_class_content
'''
if len(p) == 5:
left = ("RANGE", p[1], p[3])
else:
- left = ('LITERAL', p[1])
+ if len(p[1]) == 1:
+ left = ('LITERAL', p[1])
+ else:
+ left = ('CHARACTER_CLASS', p[1:-1])
p[0] = self.__cat(left, p[len(p)-1])
def p_maybe_class_content(self, p):
Index: tools/lexer_generator/transition_key_test.py
diff --git a/tools/lexer_generator/transition_key_test.py
b/tools/lexer_generator/transition_key_test.py
index
ad60f00e7ae35f64ec9509873140e06145a4b05f..480d6c0830304b21d664bd9a6839e1b6874b7ca1
100644
--- a/tools/lexer_generator/transition_key_test.py
+++ b/tools/lexer_generator/transition_key_test.py
@@ -51,6 +51,7 @@ class TransitionKeyTestCase(unittest.TestCase):
("1-2", "12", "ab"),
("a-zA-Z", "abyzABYZ" , "123"),
("a-zA-Z0g" , "abyzABYZ0" , "123"),
+ ("a-z:ws::lit:" , "abc" , "123"),
]
for (string, match, no_match) in data:
for invert in [False, True]:
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
3fd378cc787437344b4e502a09fc0c076bce8437..e39cdc0e6469a0e0f5afb73cb6c2e86097fd4a96
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -97,6 +97,13 @@ class TransitionKey:
ranges.append((ord(graph[1]), ord(graph[2])))
elif key == 'LITERAL':
ranges.append((ord(graph[1]), ord(graph[1])))
+ elif key == 'CHARACTER_CLASS':
+ if graph[1] == 'ws':
+ ranges.append(TransitionKey.__unicode_whitespace_bounds)
+ elif graph[1] == 'lit':
+ ranges.append(TransitionKey.__unicode_literal_bounds)
+ else:
+ assert "unknown character class %s" % graph[1]
elif key == 'CAT':
for x in [graph[1], graph[2]]:
TransitionKey.__process_graph(x, ranges)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.