Reviewers: marja,

Message:
Committed patchset #1 manually as r17499.

Description:
Experimental parser: user defined key classes

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17499

Please review this at https://codereview.chromium.org/59973005/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+41, -12 lines):
  M tools/lexer_generator/nfa.py
  M tools/lexer_generator/rule_parser.py
  M tools/lexer_generator/transition_key_test.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
index 9a172c9beb19da78df4707dc44bd334515e784c8..bba36efc6d6ed7fd29485e16b6f75ad123165353 100644
--- a/tools/lexer_generator/nfa.py
+++ b/tools/lexer_generator/nfa.py
@@ -122,6 +122,10 @@ class NfaBuilder:
     self.__node_number = 0
     self.__operation_map = {}
     self.__members = getmembers(self)
+    self.__character_classes = {}
+
+  def set_character_classes(self, classes):
+    self.__character_classes = classes

   def __new_state(self):
     self.__node_number += 1
@@ -192,10 +196,12 @@ class NfaBuilder:
     return self.__key_state(TransitionKey.single_char(graph[1]))

   def __class(self, graph):
-    return self.__key_state(TransitionKey.character_class(False, graph[1]))
+    return self.__key_state(
+      TransitionKey.character_class(graph, self.__character_classes))

   def __not_class(self, graph):
-    return self.__key_state(TransitionKey.character_class(True, graph[1]))
+    return self.__key_state(
+      TransitionKey.character_class(graph, self.__character_classes))

   def __any(self, graph):
     return self.__key_state(TransitionKey.any())
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py index eac1e2faac1973b374d063060e482572b1860245..bfaf179fc8f948debb4a58f12bcae63fadc59410 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -29,6 +29,7 @@ import ply.yacc as yacc
 from rule_lexer import RuleLexer
 from regex_parser import RegexParser
 from nfa import NfaBuilder
+from transition_keys import TransitionKey

 class RuleParser:

@@ -39,6 +40,7 @@ class RuleParser:
       'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
       'any' : RegexParser.parse("."),
     }
+    self.character_classes = {}
     self.current_transition = None
     self.rules = {}

@@ -56,7 +58,12 @@ class RuleParser:
   def p_alias_rule(self, p):
     'alias_rule : IDENTIFIER EQUALS composite_regex SEMICOLON'
     assert not p[1] in self.aliases
+    graph = p[3]
     self.aliases[p[1]] = p[3]
+    if graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS':
+      classes = self.character_classes
+      assert not p[1] in classes
+      classes[p[1]] = TransitionKey.character_class(graph, classes)

   def p_transition_rule(self, p):
     '''transition_rule : transition composite_regex code
@@ -91,7 +98,9 @@ class RuleParser:
       p[0] = p[1]
     else:
       p[0] = NfaBuilder.or_graphs([p[1], p[3]])
-    # NfaBuilder().nfa(p[0])
+    # builder = NfaBuilder()
+    # builder.set_character_classes(self.character_classes)
+    # builder.nfa(p[0])

   def p_regex_parts(self, p):
     '''regex_parts : regex_part
Index: tools/lexer_generator/transition_key_test.py
diff --git a/tools/lexer_generator/transition_key_test.py b/tools/lexer_generator/transition_key_test.py index 480d6c0830304b21d664bd9a6839e1b6874b7ca1..d2fcd40e921fa486960ae625978c1c6c0e0fa66a 100644
--- a/tools/lexer_generator/transition_key_test.py
+++ b/tools/lexer_generator/transition_key_test.py
@@ -53,6 +53,7 @@ class TransitionKeyTestCase(unittest.TestCase):
       ("a-zA-Z0g" , "abyzABYZ0" , "123"),
       ("a-z:ws::lit:" , "abc" , "123"),
     ]
+    classes = {}
     for (string, match, no_match) in data:
       for invert in [False, True]:
         if invert:
@@ -63,11 +64,19 @@ class TransitionKeyTestCase(unittest.TestCase):
           token = "CLASS"
         graph = RegexParser.parse(regex)
         assert graph[0] == token
-        key = TransitionKey.character_class(invert, graph[1])
+        key = TransitionKey.character_class(graph, classes)
         for c in match:
           self.assertEqual(invert, not key.matches_char(c))
         for c in no_match:
           self.assertEqual(invert, key.matches_char(c))

+  def test_self_defined_classes(self):
+    graph = RegexParser.parse("[a-z]")
+    classes = {
+      'self_defined' : TransitionKey.character_class(graph, {})}
+    graph = RegexParser.parse("[^:self_defined:]")
+    key = TransitionKey.character_class(graph, classes)
+    self.assertTrue(key.matches_char('A'))
+
 if __name__ == '__main__':
     unittest.main()
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index d759a4f5b14f01aa4a3f10a40eef84c0e400a7bd..1c0de4d2b15b667477f52a3c2121c76d9b683c3f 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -91,29 +91,34 @@ class TransitionKey:
     return TransitionKey.__create([(char, char)])

   @staticmethod
-  def __process_graph(graph, ranges):
+  def __process_graph(graph, ranges, key_map):
     key = graph[0]
     if key == 'RANGE':
       ranges.append((ord(graph[1]), ord(graph[2])))
     elif key == 'LITERAL':
       ranges.append((ord(graph[1]), ord(graph[1])))
+    elif key == 'CAT':
+      for x in [graph[1], graph[2]]:
+        TransitionKey.__process_graph(x, ranges, key_map)
     elif key == 'CHARACTER_CLASS':
-      if graph[1] == 'ws':
+      class_name = graph[1]
+      if class_name == 'ws':
         ranges.append(TransitionKey.__unicode_whitespace_bounds)
-      elif graph[1] == 'lit':
+      elif class_name == 'lit':
         ranges.append(TransitionKey.__unicode_literal_bounds)
+      elif class_name in key_map:
+        ranges += key_map[class_name].__ranges
       else:
         raise Exception("unknown character class [%s]" % graph[1])
-    elif key == 'CAT':
-      for x in [graph[1], graph[2]]:
-        TransitionKey.__process_graph(x, ranges)
     else:
       raise Exception("bad key [%s]" % key)

   @staticmethod
-  def character_class(invert, graph):
+  def character_class(graph, key_map):
     ranges = []
-    TransitionKey.__process_graph(graph, ranges)
+    assert graph[0] == 'CLASS' or graph[0] == 'NOT_CLASS'
+    invert = graph[0] == 'NOT_CLASS'
+    TransitionKey.__process_graph(graph[1], ranges, key_map)
     return TransitionKey.__key_from_ranges(invert, ranges)

   def matches_char(self, char):


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to