Revision: 17582
Author:   [email protected]
Date:     Fri Nov  8 10:33:44 2013 UTC
Log:      Experimental parser: better escaping

[email protected]

BUG=

Review URL: https://codereview.chromium.org/66313005
http://code.google.com/p/v8/source/detail?r=17582

Modified:
 /branches/experimental/parser/tools/lexer_generator/generator.py
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Fri Nov 8 07:29:01 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Fri Nov 8 10:33:44 2013 UTC
@@ -88,11 +88,12 @@
       if code:
         graph = NfaBuilder.add_action(graph, (precedence, code, None))
       if transition == 'continue':
-        graph = NfaBuilder.add_continue(graph)
+        if not v['default'][1][2] == 'continue':
+          graph = NfaBuilder.add_continue(graph)
       elif (transition == 'break' or
             transition == 'terminate' or
             transition == 'terminate_illegal'):
-        pass
+        NfaBuilder.add_action(graph, (-1, transition, None))
       else:
         assert k == 'default'
graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition])
@@ -101,6 +102,9 @@
     # merge default action
     (precedence, code, transition) = v['default'][1]
     assert transition == 'continue' or transition == 'break'
+    if transition == 'continue':
+      assert k != 'default'
+      # graph = NfaBuilder.apply_modifier('*', graph)
     if code:
graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))
     rule_map[k] = graph
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Wed Nov 6 08:50:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Fri Nov 8 10:33:44 2013 UTC
@@ -27,6 +27,13 @@

 import ply.lex as lex

+def build_escape_map(chars):
+  def add_escape(d, char):
+    d['\\' + char] = char
+    return d
+  return reduce(add_escape, chars,
+    {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
 class RegexLexer:

   tokens = (
@@ -62,10 +69,12 @@
     ('repeat','exclusive'),
   )

+  __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+
   def t_ESCAPED_LITERAL(self, t):
-    r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\|\\\{|\\\}'
+    r'\\.'
     t.type = 'LITERAL'
-    t.value = t.value[1:]
+    t.value = RegexLexer.__escaped_literals[t.value]
     return t

   t_GROUP_BEGIN = r'\('
@@ -98,15 +107,12 @@
     r'\\\d+'
     return t

-  escaped_class_literals = {
-    '\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f',
-    '\\^' : '^', '\\[' : '[', '\\]' : ']', '\\-' : '-', '\\:' : ':',
-  }
+  __escaped_class_literals = build_escape_map("^[]-:")

   def t_class_ESCAPED_CLASS_LITERAL(self, t):
-    r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w'
+    r'\\.'
     t.type = 'CLASS_LITERAL'
-    t.value = RegexLexer.escaped_class_literals[t.value]
+    t.value = RegexLexer.__escaped_class_literals[t.value]
     return t

   t_class_CLASS_LITERAL = r'[\w $_+]'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu Nov 7 12:55:33 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Fri Nov 8 10:33:44 2013 UTC
@@ -150,8 +150,9 @@

   def p_regex_string_literal(self, p):
     'regex_string_literal : STRING'
+    string = p[1][1:-1]
     escape_char = lambda string, char: string.replace(char, "\\" + char)
-    string = reduce(escape_char, "\+?*|.[](){}", p[1][1:-1])
+ string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"")
     p[0] = RegexParser.parse(string)

   def p_regex(self, p):

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to