Revision: 17582
Author: [email protected]
Date: Fri Nov 8 10:33:44 2013 UTC
Log: Experimental parser: better escaping
[email protected]
BUG=
Review URL: https://codereview.chromium.org/66313005
http://code.google.com/p/v8/source/detail?r=17582
Modified:
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri
Nov 8 07:29:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Fri
Nov 8 10:33:44 2013 UTC
@@ -88,11 +88,12 @@
if code:
graph = NfaBuilder.add_action(graph, (precedence, code, None))
if transition == 'continue':
- graph = NfaBuilder.add_continue(graph)
+ if not v['default'][1][2] == 'continue':
+ graph = NfaBuilder.add_continue(graph)
elif (transition == 'break' or
transition == 'terminate' or
transition == 'terminate_illegal'):
- pass
+ NfaBuilder.add_action(graph, (-1, transition, None))
else:
assert k == 'default'
graph = NfaBuilder.join_subgraph(graph, transition,
rule_map[transition])
@@ -101,6 +102,9 @@
# merge default action
(precedence, code, transition) = v['default'][1]
assert transition == 'continue' or transition == 'break'
+ if transition == 'continue':
+ assert k != 'default'
+ # graph = NfaBuilder.apply_modifier('*', graph)
if code:
graph = NfaBuilder.add_incoming_action(graph, (precedence, code,
None))
rule_map[k] = graph
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Wed
Nov 6 08:50:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Fri
Nov 8 10:33:44 2013 UTC
@@ -27,6 +27,13 @@
import ply.lex as lex
+def build_escape_map(chars):
+ def add_escape(d, char):
+ d['\\' + char] = char
+ return d
+ return reduce(add_escape, chars,
+ {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
class RegexLexer:
tokens = (
@@ -62,10 +69,12 @@
('repeat','exclusive'),
)
+ __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+
def t_ESCAPED_LITERAL(self, t):
- r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\|\\\{|\\\}'
+ r'\\.'
t.type = 'LITERAL'
- t.value = t.value[1:]
+ t.value = RegexLexer.__escaped_literals[t.value]
return t
t_GROUP_BEGIN = r'\('
@@ -98,15 +107,12 @@
r'\\\d+'
return t
- escaped_class_literals = {
- '\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f',
- '\\^' : '^', '\\[' : '[', '\\]' : ']', '\\-' : '-', '\\:' : ':',
- }
+ __escaped_class_literals = build_escape_map("^[]-:")
def t_class_ESCAPED_CLASS_LITERAL(self, t):
- r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w'
+ r'\\.'
t.type = 'CLASS_LITERAL'
- t.value = RegexLexer.escaped_class_literals[t.value]
+ t.value = RegexLexer.__escaped_class_literals[t.value]
return t
t_class_CLASS_LITERAL = r'[\w $_+]'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Nov 7 12:55:33 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri
Nov 8 10:33:44 2013 UTC
@@ -150,8 +150,9 @@
def p_regex_string_literal(self, p):
'regex_string_literal : STRING'
+ string = p[1][1:-1]
escape_char = lambda string, char: string.replace(char, "\\" + char)
- string = reduce(escape_char, "\+?*|.[](){}", p[1][1:-1])
+ string = reduce(escape_char, "+?*|.[](){}",
string).replace("\\\"", "\"")
p[0] = RegexParser.parse(string)
def p_regex(self, p):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.