Revision: 17453
Author:   [email protected]
Date:     Thu Oct 31 14:46:33 2013 UTC
Log:      Experimental parser: parse character classes

[email protected]
BUG=

Review URL: https://codereview.chromium.org/50293010
http://code.google.com/p/v8/source/detail?r=17453

Modified:
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_key_test.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Thu Oct 31 14:36:42 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Thu Oct 31 14:46:33 2013 UTC
@@ -48,6 +48,7 @@
     'RANGE',
     'NOT',
     'CLASS_LITERAL',
+    'CHARACTER_CLASS',
   )

   states = (
@@ -84,9 +85,10 @@

   t_class_RANGE = '-'
   t_class_NOT = '\^'
+  t_class_CHARACTER_CLASS = ':ws:|:lit:'

   def t_class_ESCAPED_CLASS_LITERAL(self, t):
-    r'\\\^|\\-|\\\[|\\\]'
+    r'\\\^|\\-|\\\[|\\\]\\:'
     t.type = 'CLASS_LITERAL'
     t.value = t.value[1:]
     return t
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu Oct 31 14:36:42 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu Oct 31 14:46:33 2013 UTC
@@ -100,11 +100,15 @@
   def p_class_content(self, p):
'''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
                      | CLASS_LITERAL maybe_class_content
+                     | CHARACTER_CLASS maybe_class_content
     '''
     if len(p) == 5:
       left = ("RANGE", p[1], p[3])
     else:
-      left = ('LITERAL', p[1])
+      if len(p[1]) == 1:
+        left = ('LITERAL', p[1])
+      else:
+        left = ('CHARACTER_CLASS', p[1:-1])
     p[0] = self.__cat(left, p[len(p)-1])

   def p_maybe_class_content(self, p):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Thu Oct 31 14:36:42 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Thu Oct 31 14:46:33 2013 UTC
@@ -51,6 +51,7 @@
       ("1-2", "12", "ab"),
       ("a-zA-Z", "abyzABYZ" , "123"),
       ("a-zA-Z0g" , "abyzABYZ0" , "123"),
+      ("a-z:ws::lit:" , "abc" , "123"),
     ]
     for (string, match, no_match) in data:
       for invert in [False, True]:
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Thu Oct 31 14:36:42 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Thu Oct 31 14:46:33 2013 UTC
@@ -97,6 +97,13 @@
       ranges.append((ord(graph[1]), ord(graph[2])))
     elif key == 'LITERAL':
       ranges.append((ord(graph[1]), ord(graph[1])))
+    elif key == 'CHARACTER_CLASS':
+      if graph[1] == 'ws':
+        ranges.append(TransitionKey.__unicode_whitespace_bounds)
+      elif graph[1] == 'lit':
+        ranges.append(TransitionKey.__unicode_literal_bounds)
+      else:
+        assert "unknown character class %s" % graph[1]
     elif key == 'CAT':
       for x in [graph[1], graph[2]]:
         TransitionKey.__process_graph(x, ranges)

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to