Revision: 17494
Author:   [email protected]
Date:     Tue Nov  5 14:52:18 2013 UTC
Log:      Experimental parser: build regex parse trees for all rules

[email protected]

BUG=

Review URL: https://codereview.chromium.org/59033005
http://code.google.com/p/v8/source/detail?r=17494

Modified:
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py
 /branches/experimental/parser/tools/lexer_generator/rule_parser.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Nov 4 15:04:49 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5 14:52:18 2013 UTC
@@ -214,6 +214,16 @@
   def cat_graphs(graphs):
     return reduce(lambda acc, g: ('CAT', acc, g), graphs)

+  __modifer_map = {
+    '+': 'ONE_OR_MORE',
+    '?': 'ZERO_OR_ONE',
+    '*': 'ZERO_OR_MORE',
+  }
+
+  @staticmethod
+  def apply_modifier(modifier, graph):
+    return (NfaBuilder.__modifer_map[modifier], graph)
+
 class Nfa:

   def __init__(self, start, end, nodes_created):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Nov 5 12:37:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Nov 5 14:52:18 2013 UTC
@@ -93,7 +93,7 @@
     t.value = t.value[1:]
     return t

-  t_class_CLASS_LITERAL = r'[\w $_:+]' # fix this
+  t_class_CLASS_LITERAL = r'[\w $_+]' # fix this

   t_ANY_ignore  = '\n'

=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu Oct 31 14:46:33 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Nov 5 14:52:18 2013 UTC
@@ -108,7 +108,7 @@
       if len(p[1]) == 1:
         left = ('LITERAL', p[1])
       else:
-        left = ('CHARACTER_CLASS', p[1:-1])
+        left = ('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])

   def p_maybe_class_content(self, p):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Nov 5 12:37:55 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue Nov 5 14:52:18 2013 UTC
@@ -28,6 +28,7 @@
 import ply.yacc as yacc
 from rule_lexer import RuleLexer
 from regex_parser import RegexParser
+from nfa import NfaBuilder

 class RuleParser:

@@ -35,8 +36,8 @@

   def __init__(self):
     self.aliases = {
-      'eof' : "eof rule",
-      'any' : "any rule",
+      'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
+      'any' : RegexParser.parse("."),
     }
     self.current_transition = None
     self.rules = {}
@@ -86,15 +87,16 @@
   def p_composite_regex(self, p):
     '''composite_regex : regex_parts OR regex_parts
                        | regex_parts'''
-    if p[len(p)-1]:
-      p[0] = p[1:]
+    if len(p) == 2:
+      p[0] = p[1]
     else:
-      p[0] = p[1:-1]
+      p[0] = NfaBuilder.or_graphs([p[1], p[3]])
+    # NfaBuilder().nfa(p[0])

   def p_regex_parts(self, p):
     '''regex_parts : regex_part
                    | regex_part regex_parts'''
-    p[0] = p[1:]
+    p[0] = NfaBuilder.cat_graphs(p[1:])

   def p_regex_part(self, p):
'''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier
@@ -102,15 +104,17 @@
                   | regex_class modifier
                   | regex modifier
                   | regex_alias modifier'''
-    if p[len(p)-1]:
-      p[0] = p[1:]
+    modifier = p[len(p)-1]
+    graph = p[2] if len(p) == 5 else p[1]
+    if modifier:
+      p[0] = NfaBuilder.apply_modifier(modifier, graph)
     else:
-      p[0] = p[1:-1]
+      p[0] = graph

   def p_regex_string_literal(self, p):
     'regex_string_literal : STRING'
     string = p[1][1:-1]
-    for c in "\+?|*[]()":
+    for c in "\+?*|.[](){}":
       string = string.replace(c, "\\" + c)
     p[0] = RegexParser.parse(string)

=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Nov 4 15:04:49 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Tue Nov 5 14:52:18 2013 UTC
@@ -103,12 +103,12 @@
       elif graph[1] == 'lit':
         ranges.append(TransitionKey.__unicode_literal_bounds)
       else:
-        assert "unknown character class %s" % graph[1]
+        raise Exception("unknown character class [%s]" % graph[1])
     elif key == 'CAT':
       for x in [graph[1], graph[2]]:
         TransitionKey.__process_graph(x, ranges)
     else:
-      assert False, "bad key %s" % key
+      raise Exception("bad key [%s]" % key)

   @staticmethod
   def character_class(invert, graph):

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to