Reviewers: dcarney,

Message:
Committed patchset #2 manually as r17500.

Description:
Experimental lexer generator: parse \000 etc. inside char classes.

[email protected]
BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17500

Please review this at https://codereview.chromium.org/59263003/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+10, -2 lines):
  M tools/lexer_generator/regex_lexer.py
  M tools/lexer_generator/regex_parser.py
  M tools/lexer_generator/rule_parser.py


Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py b/tools/lexer_generator/regex_lexer.py index 55c3bda94192668f6c085c55e5561103b680f5d2..8fb80b5d16cd71c207ab8eb1070996fdf5b7741f 100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -53,6 +53,7 @@ class RegexLexer:
     'RANGE',
     'NOT',
     'CLASS_LITERAL',
+    'CLASS_LITERAL_AS_OCTAL',
     'CHARACTER_CLASS',
   )

@@ -93,13 +94,17 @@ class RegexLexer:
   t_class_NOT = '\^'
   t_class_CHARACTER_CLASS = r':\w+:'

+  def t_class_CLASS_LITERAL_AS_OCTAL(self, t):
+    r'\\\d+'
+    return t
+
   def t_class_ESCAPED_CLASS_LITERAL(self, t):
     r'\\\^|\\-|\\\[|\\\]|\\\:|\\\w'
     t.type = 'CLASS_LITERAL'
     t.value = t.value[1:]
     return t

-  t_class_CLASS_LITERAL = r'[\w $_+]' # fix this
+  t_class_CLASS_LITERAL = r'[\w $_+]'

   def t_REPEAT_BEGIN(self, t):
     r'\{'
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py b/tools/lexer_generator/regex_parser.py index c533215ab863dd535df8b4377c2b869d88f0a5f0..0afd356cd2336172390c44e3514e031dc8809416 100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -113,12 +113,15 @@ class RegexParser:
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
                      | CLASS_LITERAL maybe_class_content
                      | CHARACTER_CLASS maybe_class_content
+                     | CLASS_LITERAL_AS_OCTAL maybe_class_content
     '''
     if len(p) == 5:
       left = ("RANGE", p[1], p[3])
     else:
       if len(p[1]) == 1:
         left = ('LITERAL', p[1])
+      elif p[1][0] == '\\':
+        left = ('LITERAL', chr(int(p[1][1:], 8)))
       else:
         left = ('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py index bfaf179fc8f948debb4a58f12bcae63fadc59410..2702915030ac075b607f7bad8d0cefac110d4a39 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -37,7 +37,7 @@ class RuleParser:

   def __init__(self):
     self.aliases = {
-      'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
+      'eof' : RegexParser.parse("[\\0]"),
       'any' : RegexParser.parse("."),
     }
     self.character_classes = {}


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to