Reviewers: marja,

Message:
Committed patchset #1 manually as r17494.

Description:
Experimental parser: build regex parse trees for all rules

[email protected]

BUG=

Committed: https://code.google.com/p/v8/source/detail?r=17494

Please review this at https://codereview.chromium.org/59033005/

SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser

Affected files (+28, -14 lines):
  M tools/lexer_generator/nfa.py
  M tools/lexer_generator/regex_lexer.py
  M tools/lexer_generator/regex_parser.py
  M tools/lexer_generator/rule_parser.py
  M tools/lexer_generator/transition_keys.py


Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
index 7e77c014ccdb25d62b039a4914108eb6ccb096d7..c5fa5dc21508577999cd65f89589809e7fe8635e 100644
--- a/tools/lexer_generator/nfa.py
+++ b/tools/lexer_generator/nfa.py
@@ -214,6 +214,16 @@ class NfaBuilder:
   def cat_graphs(graphs):
     return reduce(lambda acc, g: ('CAT', acc, g), graphs)

+  __modifer_map = {
+    '+': 'ONE_OR_MORE',
+    '?': 'ZERO_OR_ONE',
+    '*': 'ZERO_OR_MORE',
+  }
+
+  @staticmethod
+  def apply_modifier(modifier, graph):
+    return (NfaBuilder.__modifer_map[modifier], graph)
+
 class Nfa:

   def __init__(self, start, end, nodes_created):
Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py b/tools/lexer_generator/regex_lexer.py index bd25b7b24f3d14428a55e97f57ad2287fd698d46..1186069d452237f4e0e2bd077dba8ef907689c43 100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -93,7 +93,7 @@ class RegexLexer:
     t.value = t.value[1:]
     return t

-  t_class_CLASS_LITERAL = r'[\w $_:+]' # fix this
+  t_class_CLASS_LITERAL = r'[\w $_+]' # fix this

   t_ANY_ignore  = '\n'

Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py b/tools/lexer_generator/regex_parser.py index 2e370371d9489217f57157434a0a03129be8d543..7049aa51733ef4bed02018b6eeca63585bb0db94 100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -108,7 +108,7 @@ class RegexParser:
       if len(p[1]) == 1:
         left = ('LITERAL', p[1])
       else:
-        left = ('CHARACTER_CLASS', p[1:-1])
+        left = ('CHARACTER_CLASS', p[1][1:-1])
     p[0] = self.__cat(left, p[len(p)-1])

   def p_maybe_class_content(self, p):
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py b/tools/lexer_generator/rule_parser.py index 61e259a70a5cac2d759a0bd75360f20091c48645..eac1e2faac1973b374d063060e482572b1860245 100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -28,6 +28,7 @@
 import ply.yacc as yacc
 from rule_lexer import RuleLexer
 from regex_parser import RegexParser
+from nfa import NfaBuilder

 class RuleParser:

@@ -35,8 +36,8 @@ class RuleParser:

   def __init__(self):
     self.aliases = {
-      'eof' : "eof rule",
-      'any' : "any rule",
+      'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
+      'any' : RegexParser.parse("."),
     }
     self.current_transition = None
     self.rules = {}
@@ -86,15 +87,16 @@ class RuleParser:
   def p_composite_regex(self, p):
     '''composite_regex : regex_parts OR regex_parts
                        | regex_parts'''
-    if p[len(p)-1]:
-      p[0] = p[1:]
+    if len(p) == 2:
+      p[0] = p[1]
     else:
-      p[0] = p[1:-1]
+      p[0] = NfaBuilder.or_graphs([p[1], p[3]])
+    # NfaBuilder().nfa(p[0])

   def p_regex_parts(self, p):
     '''regex_parts : regex_part
                    | regex_part regex_parts'''
-    p[0] = p[1:]
+    p[0] = NfaBuilder.cat_graphs(p[1:])

   def p_regex_part(self, p):
'''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS modifier
@@ -102,15 +104,17 @@ class RuleParser:
                   | regex_class modifier
                   | regex modifier
                   | regex_alias modifier'''
-    if p[len(p)-1]:
-      p[0] = p[1:]
+    modifier = p[len(p)-1]
+    graph = p[2] if len(p) == 5 else p[1]
+    if modifier:
+      p[0] = NfaBuilder.apply_modifier(modifier, graph)
     else:
-      p[0] = p[1:-1]
+      p[0] = graph

   def p_regex_string_literal(self, p):
     'regex_string_literal : STRING'
     string = p[1][1:-1]
-    for c in "\+?|*[]()":
+    for c in "\+?*|.[](){}":
       string = string.replace(c, "\\" + c)
     p[0] = RegexParser.parse(string)

Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py b/tools/lexer_generator/transition_keys.py index a68ea30f527f8ce3e508be321adeefb211a03a1a..8ddbfb3b073f770ab3740b0d0b331b0345782053 100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -103,12 +103,12 @@ class TransitionKey:
       elif graph[1] == 'lit':
         ranges.append(TransitionKey.__unicode_literal_bounds)
       else:
-        assert "unknown character class %s" % graph[1]
+        raise Exception("unknown character class [%s]" % graph[1])
     elif key == 'CAT':
       for x in [graph[1], graph[2]]:
         TransitionKey.__process_graph(x, ranges)
     else:
-      assert False, "bad key %s" % key
+      raise Exception("bad key [%s]" % key)

   @staticmethod
   def character_class(invert, graph):


--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to