Revision: 17500
Author:   [email protected]
Date:     Tue Nov  5 16:32:06 2013 UTC
Log:      Experimental lexer generator: parse \000 etc. inside char classes.

[email protected]
BUG=

Review URL: https://codereview.chromium.org/59263003
http://code.google.com/p/v8/source/detail?r=17500

Modified:
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Nov 5 15:42:39 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Nov 5 16:32:06 2013 UTC
@@ -53,6 +53,7 @@
     'RANGE',
     'NOT',
     'CLASS_LITERAL',
+    'CLASS_LITERAL_AS_OCTAL',
     'CHARACTER_CLASS',
   )

@@ -93,13 +94,17 @@
   t_class_NOT = '\^'
   t_class_CHARACTER_CLASS = r':\w+:'

+  def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
+    r'\\\d+'
+    return t
+
   def t_class_ESCAPED_CLASS_LITERAL(self, t):
     r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w'
     t.type = 'CLASS_LITERAL'
     t.value = t.value[1:]
     return t

-  t_class_CLASS_LITERAL = r'[\w $_+]' # fix this
+  t_class_CLASS_LITERAL = r'[\w $_+]'

   def t_REPEAT_BEGIN(self, t):
     r'\{'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Nov 5 15:16:01 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Nov 5 16:32:06 2013 UTC
@@ -113,12 +113,15 @@
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
                      | CLASS_LITERAL maybe_class_content
                      | CHARACTER_CLASS maybe_class_content
+                     | CLASS_LITERAL_AS_OCTAL maybe_class_content
     '''
     if len(p) == 5:
       left = ("RANGE", p[1], p[3])
     else:
       if len(p[1]) == 1:
         left = ('LITERAL', p[1])
+      elif p[1][0] == '\\':
+        left = ('LITERAL', chr(int(p[1][1:], 8)))
       else:
         left = ('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Nov 5 15:47:39 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Nov 5 16:32:06 2013 UTC
@@ -37,7 +37,7 @@

   def __init__(self):
     self.aliases = {
-      'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
+      'eof' : RegexParser.parse("[\\0]"),
       'any' : RegexParser.parse("."),
     }
     self.character_classes = {}

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to