Reviewers: marja,

Message:
Committed patchset #1 manually as r17582.

Description:
Experimental parser: better escaping

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17582

Please review this at https://codereview.chromium.org/66313005/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+22, -11 lines):
  M tools/lexer_generator/generator.py
  M tools/lexer_generator/regex_lexer.py
  M tools/lexer_generator/rule_parser.py


Index: tools/lexer_generator/generator.py
diff --git a/tools/lexer_generator/generator.py b/tools/lexer_generator/generator.py index 6137a7e386864a86076a0e58d564bb23f2828814..213ee6bab28c67a36564724c9695bea30ba4c5ee 100644
--- a/tools/lexer_generator/generator.py
+++ b/tools/lexer_generator/generator.py
@@ -88,11 +88,12 @@ def process_rules(parser_state):
       if code:
         graph = NfaBuilder.add_action(graph, (precedence, code, None))
       if transition == 'continue':
-        graph = NfaBuilder.add_continue(graph)
+        if not v['default'][1][2] == 'continue':
+          graph = NfaBuilder.add_continue(graph)
       elif (transition == 'break' or
             transition == 'terminate' or
             transition == 'terminate_illegal'):
-        pass
+        NfaBuilder.add_action(graph, (-1, transition, None))
       else:
         assert k == 'default'
graph = NfaBuilder.join_subgraph(graph, transition, rule_map[transition])
@@ -101,6 +102,9 @@ def process_rules(parser_state):
     # merge default action
     (precedence, code, transition) = v['default'][1]
     assert transition == 'continue' or transition == 'break'
+    if transition == 'continue':
+      assert k != 'default'
+      # graph = NfaBuilder.apply_modifier('*', graph)
     if code:
graph = NfaBuilder.add_incoming_action(graph, (precedence, code, None))
     rule_map[k] = graph
Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py b/tools/lexer_generator/regex_lexer.py index 78f36484c1ab85e347fbcad89a9083e2452afaf8..ea5b3e06aa254072d95db4bf5ed8243df03928a0 100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -27,6 +27,13 @@

 import ply.lex as lex

+def build_escape_map(chars):
+  def add_escape(d, char):
+    d['\\' + char] = char
+    return d
+  return reduce(add_escape, chars,
+    {'\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f'})
+
 class RegexLexer:

   tokens = (
@@ -62,10 +69,12 @@ class RegexLexer:
     ('repeat','exclusive'),
   )

+  __escaped_literals = build_escape_map("(){}[]?+.*|\\")
+
   def t_ESCAPED_LITERAL(self, t):
-    r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\|\\\{|\\\}'
+    r'\\.'
     t.type = 'LITERAL'
-    t.value = t.value[1:]
+    t.value = RegexLexer.__escaped_literals[t.value]
     return t

   t_GROUP_BEGIN = r'\('
@@ -98,15 +107,12 @@ class RegexLexer:
     r'\\\d+'
     return t

-  escaped_class_literals = {
-    '\\t' : '\t', '\\r' : '\r', '\\n' : '\n', '\\v' : '\v', '\\f' : '\f',
-    '\\^' : '^', '\\[' : '[', '\\]' : ']', '\\-' : '-', '\\:' : ':',
-  }
+  __escaped_class_literals = build_escape_map("^[]-:")

   def t_class_ESCAPED_CLASS_LITERAL(self, t):
-    r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w'
+    r'\\.'
     t.type = 'CLASS_LITERAL'
-    t.value = RegexLexer.escaped_class_literals[t.value]
+    t.value = RegexLexer.__escaped_class_literals[t.value]
     return t

   t_class_CLASS_LITERAL = r'[\w $_+]'
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py index 046806fd0e4f36461f2b66485540fd8609222365..86f62d641012c6384030495579c189738b00619b 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -150,8 +150,9 @@ class RuleParser:

   def p_regex_string_literal(self, p):
     'regex_string_literal : STRING'
+    string = p[1][1:-1]
     escape_char = lambda string, char: string.replace(char, "\\" + char)
-    string = reduce(escape_char, "\+?*|.[](){}", p[1][1:-1])
+ string = reduce(escape_char, "+?*|.[](){}", string).replace("\\\"", "\"")
     p[0] = RegexParser.parse(string)

   def p_regex(self, p):


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to