Revision: 17494
Author: [email protected]
Date: Tue Nov 5 14:52:18 2013 UTC
Log: Experimental parser: build regex parse trees for all rules
[email protected]
BUG=
Review URL: https://codereview.chromium.org/59033005
http://code.google.com/p/v8/source/detail?r=17494
Modified:
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Nov 4
15:04:49 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 5
14:52:18 2013 UTC
@@ -214,6 +214,16 @@
def cat_graphs(graphs):
return reduce(lambda acc, g: ('CAT', acc, g), graphs)
+ __modifer_map = {
+ '+': 'ONE_OR_MORE',
+ '?': 'ZERO_OR_ONE',
+ '*': 'ZERO_OR_MORE',
+ }
+
+ @staticmethod
+ def apply_modifier(modifier, graph):
+ return (NfaBuilder.__modifer_map[modifier], graph)
+
class Nfa:
def __init__(self, start, end, nodes_created):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue
Nov 5 12:37:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue
Nov 5 14:52:18 2013 UTC
@@ -93,7 +93,7 @@
t.value = t.value[1:]
return t
- t_class_CLASS_LITERAL = r'[\w $_:+]' # fix this
+ t_class_CLASS_LITERAL = r'[\w $_+]' # fix this
t_ANY_ignore = '\n'
=======================================
--- /branches/experimental/parser/tools/lexer_generator/regex_parser.py Thu
Oct 31 14:46:33 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue
Nov 5 14:52:18 2013 UTC
@@ -108,7 +108,7 @@
if len(p[1]) == 1:
left = ('LITERAL', p[1])
else:
- left = ('CHARACTER_CLASS', p[1:-1])
+ left = ('CHARACTER_CLASS', p[1][1:-1])
p[0] = self.__cat(left, p[len(p)-1])
def p_maybe_class_content(self, p):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 5 12:37:55 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 5 14:52:18 2013 UTC
@@ -28,6 +28,7 @@
import ply.yacc as yacc
from rule_lexer import RuleLexer
from regex_parser import RegexParser
+from nfa import NfaBuilder
class RuleParser:
@@ -35,8 +36,8 @@
def __init__(self):
self.aliases = {
- 'eof' : "eof rule",
- 'any' : "any rule",
+ 'eof' : RegexParser.parse("eof"), #RegexParser.parse("[\0]"),
+ 'any' : RegexParser.parse("."),
}
self.current_transition = None
self.rules = {}
@@ -86,15 +87,16 @@
def p_composite_regex(self, p):
'''composite_regex : regex_parts OR regex_parts
| regex_parts'''
- if p[len(p)-1]:
- p[0] = p[1:]
+ if len(p) == 2:
+ p[0] = p[1]
else:
- p[0] = p[1:-1]
+ p[0] = NfaBuilder.or_graphs([p[1], p[3]])
+ # NfaBuilder().nfa(p[0])
def p_regex_parts(self, p):
'''regex_parts : regex_part
| regex_part regex_parts'''
- p[0] = p[1:]
+ p[0] = NfaBuilder.cat_graphs(p[1:])
def p_regex_part(self, p):
'''regex_part : LEFT_PARENTHESIS composite_regex RIGHT_PARENTHESIS
modifier
@@ -102,15 +104,17 @@
| regex_class modifier
| regex modifier
| regex_alias modifier'''
- if p[len(p)-1]:
- p[0] = p[1:]
+ modifier = p[len(p)-1]
+ graph = p[2] if len(p) == 5 else p[1]
+ if modifier:
+ p[0] = NfaBuilder.apply_modifier(modifier, graph)
else:
- p[0] = p[1:-1]
+ p[0] = graph
def p_regex_string_literal(self, p):
'regex_string_literal : STRING'
string = p[1][1:-1]
- for c in "\+?|*[]()":
+ for c in "\+?*|.[](){}":
string = string.replace(c, "\\" + c)
p[0] = RegexParser.parse(string)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Nov 4 15:04:49 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Tue Nov 5 14:52:18 2013 UTC
@@ -103,12 +103,12 @@
elif graph[1] == 'lit':
ranges.append(TransitionKey.__unicode_literal_bounds)
else:
- assert "unknown character class %s" % graph[1]
+ raise Exception("unknown character class [%s]" % graph[1])
elif key == 'CAT':
for x in [graph[1], graph[2]]:
TransitionKey.__process_graph(x, ranges)
else:
- assert False, "bad key %s" % key
+ raise Exception("bad key [%s]" % key)
@staticmethod
def character_class(invert, graph):
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.