Revision: 17495
Author:   [email protected]
Date:     Tue Nov  5 15:16:01 2013 UTC
Log:      Experimental lexer generator: parse {} in regexps.

BUG=
[email protected]

Review URL: https://codereview.chromium.org/59953002
http://code.google.com/p/v8/source/detail?r=17495

Modified:
 /branches/experimental/parser/tools/lexer_generator/automata_test.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py Tue Nov 5 12:37:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/automata_test.py Tue Nov 5 15:16:01 2013 UTC
@@ -55,6 +55,11 @@
       ("a.+b", ["aab", "abb", "acb"], ["aaac", "ab", ""]),
       (".|.", ["a", "b"], ["aa", ""]),
       ("//.", ["//a"], ["aa", ""]),
+      ("[ab]{2}", ["aa", "ab", "ba", "bb"], ["", "a", "b", "aaa", "bbb"]),
+      ("[ab]{2,3}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb"],
+       ["", "a", "b", "aaaa", "bbba"]),
+      ("[ab]{2,4}", ["aa", "ab", "ba", "bb", "aab", "baa", "bbb", "abab"],
+       ["", "a", "b", "aaaba", "bbbaa"])
     ]

     def test_matches(self):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5 14:52:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5 15:16:01 2013 UTC
@@ -108,6 +108,9 @@
   def key_matches(self, value):
     return self.__matches(lambda k, v : k.matches_key(v), value)

+  def __str__(self):
+    return "NfaState(" + str(self.__node_number) + ")"
+
   @staticmethod
   def gather_transition_keys(state_set):
     f = lambda acc, state: acc | set(state.__transitions.keys())
@@ -153,6 +156,28 @@
     start.add_epsilon_transition(node)
     return (start, ends + [start])

+  def __repeat(self, graph):
+    param_min = int(graph[1])
+    param_max = int(graph[2])
+    subgraph = graph[3]
+    (start, ends) = self.__process(subgraph)
+    for i in xrange(1, param_min):
+      (start2, ends2) = self.__process(subgraph)
+      self.__patch_ends(ends, start2)
+      ends = ends2
+    if param_min == param_max:
+      return (start, ends)
+
+    midpoints = []
+    for i in xrange(param_min, param_max):
+      midpoint =  self.__new_state()
+      self.__patch_ends(ends, midpoint)
+      (start2, ends) = self.__process(subgraph)
+      midpoint.add_epsilon_transition(start2)
+      midpoints.append(midpoint)
+
+    return (start, ends + midpoints)
+
   def __cat(self, graph):
     (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
     self.__patch_ends(left[1], right[0])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Nov 5 14:52:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Nov 5 15:16:01 2013 UTC
@@ -43,6 +43,11 @@
     'ZERO_OR_ONE',
     'ANY',

+    'REPEAT_BEGIN',
+    'REPEAT_END',
+
+    'NUMBER',
+    'COMMA',
     'LITERAL',

     'RANGE',
@@ -64,11 +69,17 @@
   t_GROUP_BEGIN = r'\('
   t_GROUP_END = r'\)'

+  t_REPEAT_BEGIN = r'\{'
+  t_REPEAT_END = r'\}'
+
   t_OR = r'\|'
   t_ONE_OR_MORE = r'\+'
   t_ZERO_OR_MORE = r'\*'
   t_ZERO_OR_ONE = r'\?'

+  t_NUMBER = r'[0-9]+'
+  t_COMMA = r','
+
   t_ANY = r'\.'

   t_LITERAL = r'.'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Nov 5 14:52:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Nov 5 15:16:01 2013 UTC
@@ -64,7 +64,10 @@
                 | any maybe_modifier
     '''
     if p[2] != None:
-      p[0] = (p[2], p[1])
+      if isinstance(p[2], tuple) and p[2][0] == 'REPEAT':
+        p[0] = (p[2][0], p[2][1], p[2][2], p[1])
+      else:
+        p[0] = (p[2], p[1])
     else:
       p[0] = p[1]

@@ -72,11 +75,20 @@
     '''maybe_modifier : ONE_OR_MORE
                       | ZERO_OR_ONE
                       | ZERO_OR_MORE
+                      | repetition
                       | empty'''
     p[0] = p[1]
-    if p[1] != None:
+    if p[1] in self.token_map:
       p[0] = self.token_map[p[1]]

+  def p_repetition(self, p):
+    '''repetition : REPEAT_BEGIN NUMBER REPEAT_END
+                  | REPEAT_BEGIN NUMBER COMMA NUMBER REPEAT_END'''
+    if len(p) == 4:
+      p[0] = ("REPEAT", p[2], p[2])
+    else:
+      p[0] = ("REPEAT", p[2], p[4])
+
   def p_literal(self, p):
     '''literal : LITERAL'''
     p[0] = ('LITERAL', p[1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 5 14:52:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 5 15:16:01 2013 UTC
@@ -195,6 +195,7 @@

   @staticmethod
   def disjoint_keys(key_set):
+    key_set.discard(TransitionKey.epsilon())
     if not key_set:
       return []
     range_map = {}

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to