Revision: 17499
Author:   [email protected]
Date:     Tue Nov  5 15:47:39 2013 UTC
Log:      Experimental parser: user defined key classes

[email protected]

BUG=

Review URL: https://codereview.chromium.org/59973005
http://code.google.com/p/v8/source/detail?r=17499

Modified:
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_key_test.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5 15:16:01 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5 15:47:39 2013 UTC
@@ -122,6 +122,10 @@
     self.__node_number = 0
     self.__operation_map = {}
     self.__members = getmembers(self)
+    self.__character_classes = {}
+
+  def set_character_classes(self, classes):
+    self.__character_classes = classes

   def __new_state(self):
     self.__node_number += 1
@@ -192,10 +196,12 @@
     return self.__key_state(TransitionKey.single_char(graph[1]))

   def __class(self, graph):
-    return self.__key_state(TransitionKey.character_class(False, graph[1]))
+    return self.__key_state(
+      TransitionKey.character_class(graph, self.__character_classes))

   def __not_class(self, graph):
-    return self.__key_state(TransitionKey.character_class(True, graph[1]))
+    return self.__key_state(
+      TransitionKey.character_class(graph, self.__character_classes))

   def __any(self, graph):
     return self.__key_state(TransitionKey.any())
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Nov 5 14:52:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Nov 5 15:47:39 2013 UTC
@@ -29,6 +29,7 @@
 from rule_lexer import RuleLexer
 from regex_parser import RegexParser
 from nfa import NfaBuilder
+from transition_keys import TransitionKey

 class RuleParser:

@@ -39,6 +40,7 @@
       'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
       'any' : RegexParser.parse("."),
     }
+    self.character_classes = {}
     self.current_transition = None
     self.rules = {}

@@ -56,7 +58,12 @@
   def p_alias_rule(self, p):
     'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
     assert not p[1] in self.aliases
+    graph = p[3]
     self.aliases[p[1]] = p[3]
+    if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
+      classes = self.character_classes
+      assert not p[1] in classes
+      classes[p[1]] = TransitionKey.character_class(graph, classes)

   def p_transition_rule(self, p):
     '''transition_rule : transition composite_regex code
@@ -91,7 +98,9 @@
       p[0] = p[1]
     else:
       p[0] = NfaBuilder.or_graphs([p[1], p[3]])
-    # NfaBuilder().nfa(p[0])
+    # builder = NfaBuilder()
+    # builder.set_character_classes(self.character_classes)
+    # builder.nfa(p[0])

   def p_regex_parts(self, p):
     '''regex_parts : regex_part
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Thu Oct 31 14:46:33 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_key_test.py Tue Nov 5 15:47:39 2013 UTC
@@ -53,6 +53,7 @@
       ("a-zA-Z0g" , "abyzABYZ0" , "123"),
       ("a-z:ws::lit:" , "abc" , "123"),
     ]
+    classes = {}
     for (string, match, no_match) in data:
       for invert in [False, True]:
         if invert:
@@ -63,11 +64,19 @@
           token = "CLASS"
         graph = RegexParser.parse(regex)
         assert graph[0] == token
-        key = TransitionKey.character_class(invert, graph[1])
+        key = TransitionKey.character_class(graph, classes)
         for c in match:
           self.assertEqual(invert, not key.matches_char(c))
         for c in no_match:
           self.assertEqual(invert, key.matches_char(c))

+  def test_self_defined_classes(self):
+    graph = RegexParser.parse("[a-z]")
+    classes = {
+      'self_defined' : TransitionKey.character_class(graph, {})}
+    graph = RegexParser.parse("[^:self_defined:]")
+    key = TransitionKey.character_class(graph, classes)
+    self.assertTrue(key.matches_char('A'))
+
 if __name__ == '__main__':
     unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 5 15:16:01 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 5 15:47:39 2013 UTC
@@ -91,29 +91,34 @@
     return TransitionKey.__create([(char, char)])

   @staticmethod
-  def __process_graph(graph, ranges):
+  def __process_graph(graph, ranges, key_map):
     key = graph[0]
     if key == 'RANGE':
       ranges.append((ord(graph[1]), ord(graph[2])))
     elif key == 'LITERAL':
       ranges.append((ord(graph[1]), ord(graph[1])))
+    elif key == 'CAT':
+      for x in [graph[1], graph[2]]:
+        TransitionKey.__process_graph(x, ranges, key_map)
     elif key == 'CHARACTER_CLASS':
-      if graph[1] == 'ws':
+      class_name = graph[1]
+      if class_name == 'ws':
         ranges.append(TransitionKey.__unicode_whitespace_bounds)
-      elif graph[1] == 'lit':
+      elif class_name == 'lit':
         ranges.append(TransitionKey.__unicode_literal_bounds)
+      elif class_name in key_map:
+        ranges += key_map[class_name].__ranges
       else:
         raise Exception("unknown character class [%s]" % graph[1])
-    elif key == 'CAT':
-      for x in [graph[1], graph[2]]:
-        TransitionKey.__process_graph(x, ranges)
     else:
       raise Exception("bad key [%s]" % key)

   @staticmethod
-  def character_class(invert, graph):
+  def character_class(graph, key_map):
     ranges = []
-    TransitionKey.__process_graph(graph, ranges)
+    assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
+    invert = graph[0] == 'NOT_CLASS'
+    TransitionKey.__process_graph(graph[1], ranges, key_map)
     return TransitionKey.__key_from_ranges(invert, ranges)

   def matches_char(self, char):

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to