Reviewers: marja,
Message:
Committed patchset #1 manually as r17494.
Description:
Experimental parser: build regex parse trees for all rules
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=17494
Please review this at https://codereview.chromium.org/59033005/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+28, -14 lines):
M tools/lexer_generator/nfa.py
M tools/lexer_generator/regex_lexer.py
M tools/lexer_generator/regex_parser.py
M tools/lexer_generator/rule_parser.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/nfa.py
diff --git a/tools/lexer_generator/nfa.py b/tools/lexer_generator/nfa.py
index
7e77c014ccdb25d62b039a4914108eb6ccb096d7..c5fa5dc21508577999cd65f89589809e7fe8635e
100644
--- a/tools/lexer_generator/nfa.py
+++ b/tools/lexer_generator/nfa.py
@@ -214,6 +214,16 @@ class NfaBuilder:
def cat_graphs(graphs):
return reduce(lambda acc, g: ('CAT', acc, g), graphs)
+ __modifer_map = {
+ '+': 'ONE_OR_MORE',
+ '?': 'ZERO_OR_ONE',
+ '*': 'ZERO_OR_MORE',
+ }
+
+ @staticmethod
+ def apply_modifier(modifier, graph):
+ return (NfaBuilder.__modifer_map[modifier], graph)
+
class Nfa:
def __init__(self, start, end, nodes_created):
Index: tools/lexer_generator/regex_lexer.py
diff --git a/tools/lexer_generator/regex_lexer.py
b/tools/lexer_generator/regex_lexer.py
index
bd25b7b24f3d14428a55e97f57ad2287fd698d46..1186069d452237f4e0e2bd077dba8ef907689c43
100644
--- a/tools/lexer_generator/regex_lexer.py
+++ b/tools/lexer_generator/regex_lexer.py
@@ -93,7 +93,7 @@ class RegexLexer:
t.value = t.value[1:]
return t
- t_class_CLASS_LITERAL = r'[\w $_:+]' # fix this
+ t_class_CLASS_LITERAL = r'[\w $_+]' # fix this
t_ANY_ignore = '\n'
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py
b/tools/lexer_generator/regex_parser.py
index
2e370371d9489217f57157434a0a03129be8d543..7049aa51733ef4bed02018b6eeca63585bb0db94
100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -108,7 +108,7 @@ class RegexParser:
if len(p[1]) == 1:
left = ('LITERAL', p[1])
else:
- left = ('CHARACTER_CLASS', p[1:-1])
+ left = ('CHARACTER_CLASS', p[1][1:-1])
p[0] = self.__cat(left, p[len(p)-1])
def p_maybe_class_content(self, p):
Index: tools/lexer_generator/rule_parser.py
diff --git a/tools/lexer_generator/rule_parser.py
b/tools/lexer_generator/rule_parser.py
index
61e259a70a5cac2d759a0bd75360f20091c48645..eac1e2faac1973b374d063060e482572b1860245
100644
--- a/tools/lexer_generator/rule_parser.py
+++ b/tools/lexer_generator/rule_parser.py
@@ -28,6 +28,7 @@
import ply.yacc as yacc
from rule_lexer import RuleLexer
from regex_parser import RegexParser
+from nfa import NfaBuilder
class RuleParser:
@@ -35,8 +36,8 @@ class RuleParser:
def __init__(self):
self.aliases = {
- 'eof' : "eof rule",
- 'any' : "any rule",
+ 'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
+ 'any' : RegexParser.parse("."),
}
self.current_transition = None
self.rules = {}
@@ -86,15 +87,16 @@ class RuleParser:
def p_composite_regex(self, p):
'''composite_regex : regex_parts OR regex_parts
| regex_parts'''
- if p[len(p)-1]:
- p[0] = p[1:]
+ if len(p) == 2:
+ p[0] = p[1]
else:
- p[0] = p[1:-1]
+ p[0] = NfaBuilder.or_graphs([p[1], p[3]])
+ # NfaBuilder().nfa(p[0])
def p_regex_parts(self, p):
'''regex_parts : regex_part
| regex_part regex_parts'''
- p[0] = p[1:]
+ p[0] = NfaBuilder.cat_graphs(p[1:])
def p_regex_part(self, p):
'''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS
modifier
@@ -102,15 +104,17 @@ class RuleParser:
| regex_class modifier
| regex modifier
| regex_alias modifier'''
- if p[len(p)-1]:
- p[0] = p[1:]
+ modifier = p[len(p)-1]
+ graph = p[2] if len(p) == 5 else p[1]
+ if modifier:
+ p[0] = NfaBuilder.apply_modifier(modifier, graph)
else:
- p[0] = p[1:-1]
+ p[0] = graph
def p_regex_string_literal(self, p):
'regex_string_literal : STRING'
string = p[1][1:-1]
- for c in "\+?|*[]()":
+ for c in "\+?*|.[](){}":
string = string.replace(c, "\\" + c)
p[0] = RegexParser.parse(string)
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
a68ea30f527f8ce3e508be321adeefb211a03a1a..8ddbfb3b073f770ab3740b0d0b331b0345782053
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -103,12 +103,12 @@ class TransitionKey:
elif graph[1] == 'lit':
ranges.append(TransitionKey.__unicode_literal_bounds)
else:
- assert "unknown character class %s" % graph[1]
+ raise Exception("unknown character class [%s]" % graph[1])
elif key == 'CAT':
for x in [graph[1], graph[2]]:
TransitionKey.__process_graph(x, ranges)
else:
- assert False, "bad key %s" % key
+ raise Exception("bad key [%s]" % key)
@staticmethod
def character_class(invert, graph):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.