Reviewers: marja,
Message:
Committed patchset #1 manually as r17582.
Description:
Experimental parser: better escaping
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17582
Please review this at https://codereview.chromium.org/66313005/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+22, -11 lines):
M tools/lexer_generator/generator.py
M tools/lexer_generator/regex_lexer.py
M tools/lexer_generator/rule_parser.py
Index: tools/lexer_generator/generator.py
diff --git a/tools/lexer_generator/generator.py
b/tools/lexer_generator/generator.py
index
6137a7e386864a86076a0e58d564bb23f2828814..213ee6bab28c67a36564724c9695bea30ba4c5ee
100644
--- a/tools/lexer_generator/generator.py
+++ b/tools/lexer_generator/generator.py
@@ -88,11 +88,12 @@ def process_rules(parser_state):
if code:
graph = NfaBuilder.add_action(graph, (precedence, code, None))
if transition == 'continue':
- graph = NfaBuilder.add_continue(graph)
+ if not v['default'][1][2] == 'continue':
+ graph = NfaBuilder.add_continue(graph)
elif (transition == 'break' or
transition == 'terminate' or
transition == 'terminate_illegal'):
- pass
+ NfaBuilder.add_action(graph, (-1, transition, None))
else:
assert k == 'default'
graph = NfaBuilder.join_subgraph(graph, transition,
rule_map[transition])
@@ -101,6 +102,9 @@ def process_rules(parser_state):
# merge default action
(precedence, code, transition) = v['default'][1]
assert transition == 'continue' or transition == 'break'
+ if transition == 'continue':
+ assert k != 'default'
+ # graph = NfaBuilder.apply_modifier('*', graph)
if code:
graph = NfaBuilder.add_incoming_action(graph, (precedence, code,
None))
rule_map[k] = graph
Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py
b/tools/lexer_generator/regex_lexer.py
index
78f36484c1ab85e347fbcad89a9083e2452afaf8..ea5b3e06aa254072d95db4bf5ed8243df03928a0
100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -27,6 +27,13 @@
import ply.lex as lex
+def build_escape_map(chars):
+ def add_escape(d, char):
+ d['\\' + char] = char
+ return d
+ return reduce(add_escape, chars,
+ {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
class RegexLexer:
tokens = (
@@ -62,10 +69,12 @@ class RegexLexer:
('repeat','exclusive'),
)
+ __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+
def t_ESCAPED_LITERAL(self, t):
- r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\|\\\{|\\\}'
+ r'\\.'
t.type = 'LITERAL'
- t.value = t.value[1:]
+ t.value = RegexLexer.__escaped_literals[t.value]
return t
t_GROUP_BEGIN = r'\('
@@ -98,15 +107,12 @@ class RegexLexer:
r'\\\d+'
return t
- escaped_class_literals = {
- '\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f',
- '\\^' : '^', '\\[' : '[', '\\]' : ']', '\\-' : '-', '\\:' : ':',
- }
+ __escaped_class_literals = build_escape_map("^[]-:")
def t_class_ESCAPED_CLASS_LITERAL(self, t):
- r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w'
+ r'\\.'
t.type = 'CLASS_LITERAL'
- t.value = RegexLexer.escaped_class_literals[t.value]
+ t.value = RegexLexer.__escaped_class_literals[t.value]
return t
t_class_CLASS_LITERAL = r'[\w $_+]'
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py
b/tools/lexer_generator/rule_parser.py
index
046806fd0e4f36461f2b66485540fd8609222365..86f62d641012c6384030495579c189738b00619b
100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -150,8 +150,9 @@ class RuleParser:
def p_regex_string_literal(self, p):
'regex_string_literal : STRING'
+ string = p[1][1:-1]
escape_char = lambda string, char: string.replace(char, "\\" + char)
- string = reduce(escape_char, "\+?*|.[](){}", p[1][1:-1])
+ string = reduce(escape_char, "+?*|.[](){}",
string).replace("\\\"", "\"")
p[0] = RegexParser.parse(string)
def p_regex(self, p):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.