Revision: 17672
Author: [email protected]
Date: Tue Nov 12 19:10:03 2013 UTC
Log: Parser generator: more layering refactoring
[email protected]
BUG=
Review URL: https://codereview.chromium.org/68683007
http://code.google.com/p/v8/source/detail?r=17672
Modified:
/branches/experimental/parser/tools/lexer_generator/generator.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Tue
Nov 12 18:47:52 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/generator.py Tue
Nov 12 19:10:03 2013 UTC
@@ -29,7 +29,7 @@
from nfa import Nfa
from nfa_builder import NfaBuilder
from dfa import Dfa
-from rule_parser import RuleParser, RuleParserState
+from rule_parser import RuleParser, RuleParserState, RuleProcessor
from code_generator import CodeGenerator
file_template = '''
@@ -64,85 +64,26 @@
%s
</script>'''
-class Generator(object):
+def generate_html(rule_processor):
+ scripts = []
+ loads = []
+ for i, (name, (nfa, dfa)) in
enumerate(list(rule_processor.automata_iter())):
+ if name == 'Normal': continue
+ (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
+ scripts.append(script_template % (nfa_i, nfa.to_dot()))
+ scripts.append(script_template % (dfa_i, dfa.to_dot()))
+ loads.append(load_template % ("nfa [%s]" % name, nfa_i))
+ loads.append(load_template % ("dfa [%s]" % name, dfa_i))
+ body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
+ return file_template % body
- def __init__(self, rules):
- parser_state = RuleParserState()
- RuleParser.parse(rules, parser_state)
- self.__automata = {}
- self.process_rules(parser_state)
+def generate_code(rule_processor):
+ (nfa, dfa) = rule_processor.default_automata()
+ return CodeGenerator.dfa_to_code(dfa)
- def generate_html(self):
- scripts = []
- loads = []
- for i, name in enumerate(self.__automata):
- (nfa, dfa) = self.__automata[name]
- if name == 'Normal': continue
- (nfa_i, dfa_i) = ("nfa_%d" % i, "dfa_%d" % i)
- scripts.append(script_template % (nfa_i, nfa.to_dot()))
- scripts.append(script_template % (dfa_i, dfa.to_dot()))
- loads.append(load_template % ("nfa [%s]" % name, nfa_i))
- loads.append(load_template % ("dfa [%s]" % name, dfa_i))
- body = "\n".join(scripts) + (load_outer_template % "\n".join(loads))
- return file_template % body
-
- def process_rules(self, parser_state):
- rule_map = {}
- builder = NfaBuilder()
- builder.set_character_classes(parser_state.character_classes)
- assert 'default' in parser_state.rules
- def process(k, v):
- graphs = []
- continues = 0
- for (graph, (precedence, code, transition)) in v['regex']:
- default_code = v['default_action']
- action = code if code else default_code
- if action:
- graph = NfaBuilder.add_action(graph, (precedence, action))
- if not transition or transition == 'break':
- pass
- elif transition == 'continue':
- assert not k == 'default'
- continues += 1
- graph = NfaBuilder.add_continue(graph)
- elif (transition == 'terminate' or
- transition == 'terminate_illegal'):
- assert not code
- graph = NfaBuilder.add_action(graph, (-1, transition))
- else:
- assert k == 'default'
- subgraph_modifier = '*' if code else None
- graph = NfaBuilder.join_subgraph(
- graph, transition, rule_map[transition], subgraph_modifier)
- graphs.append(graph)
- if continues == len(graphs):
- graphs.append(NfaBuilder.epsilon())
- if v['catch_all']:
- assert v['catch_all'] == 'continue'
- graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
- graph = NfaBuilder.or_graphs(graphs)
- rule_map[k] = graph
- # process first the subgraphs, then the default graph
- for k, v in parser_state.rules.items():
- if k == 'default': continue
- process(k, v)
- process('default', parser_state.rules['default'])
- # build the automata
- for rule_name, graph in rule_map.items():
- nfa = builder.nfa(graph)
- (start, dfa_nodes) = nfa.compute_dfa()
- dfa = Dfa(start, dfa_nodes)
- self.__automata[rule_name] = (nfa, dfa)
-
- # Lexes strings with the help of DFAs procuded by the grammar. For sanity
- # checking the automata.
- def lex(self, string):
- (nfa, dfa) = self.__automata['default']
- return dfa.lex(string)
-
- def generate_code(self):
- (nfa, dfa) = self.__automata['default']
- return CodeGenerator.dfa_to_code(dfa)
+def lex(rule_processor, string):
+ for t in rule_processor.lex(string + '\0'):
+ print t
if __name__ == '__main__':
@@ -154,21 +95,20 @@
args = parser.parse_args()
re_file = args.re
- parser_state = RuleParserState()
print "parsing %s" % re_file
with open(re_file, 'r') as f:
- generator = Generator(f.read())
+ rule_processor = RuleProcessor.parse(f.read())
html_file = args.html
if html_file:
- html = generator.generate_html()
+ html = generate_html(rule_processor)
with open(args.html, 'w') as f:
f.write(html)
print "wrote html to %s" % html_file
code_file = args.code
if code_file:
- code = generator.generate_code()
+ code = generate_code(rule_processor)
with open(code_file, 'w') as f:
f.write(code)
print "wrote code to %s" % code_file
@@ -176,6 +116,4 @@
input_file = args.input
if input_file:
with open(input_file, 'r') as f:
- input_text = f.read() + '\0'
- for t in generator.lex(input_text):
- print t
+ lex(rule_processor, f.read())
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon
Nov 11 07:52:15 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Tue
Nov 12 19:10:03 2013 UTC
@@ -26,13 +26,14 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
-from generator import Generator
+from rule_parser import RuleProcessor
class LexerTestCase(unittest.TestCase):
def __verify_action_stream(self, rules, string, expected_stream):
expected_stream.append(('terminate', '\0'))
- for i, (action, start, stop) in
enumerate(Generator(rules).lex(string)):
+ rule_processor = RuleProcessor.parse(rules)
+ for i, (action, start, stop) in enumerate(rule_processor.lex(string)):
self.assertEquals(expected_stream[i][0], action)
self.assertEquals(expected_stream[i][1], string[start : stop])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 12 09:47:12 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 12 19:10:03 2013 UTC
@@ -29,6 +29,7 @@
from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
+from dfa import Dfa
from transition_keys import TransitionKey
class RuleParserState:
@@ -212,3 +213,73 @@
raise
parser.__state = None
assert parser_state.transitions <= set(parser_state.rules.keys())
+
+class RuleProcessor(object):
+
+ def __init__(self, parser_state):
+ self.__automata = {}
+ self.__process_parser_state(parser_state)
+
+ @staticmethod
+ def parse(string):
+ parser_state = RuleParserState()
+ RuleParser.parse(string, parser_state)
+ return RuleProcessor(parser_state)
+
+ def automata_iter(self):
+ return iter(self.__automata.items())
+
+ def default_automata(self):
+ return self.__automata['default']
+
+ def lex(self, string):
+ (nfa, dfa) = self.default_automata()
+ return dfa.lex(string)
+
+ def __process_parser_state(self, parser_state):
+ rule_map = {}
+ builder = NfaBuilder()
+ builder.set_character_classes(parser_state.character_classes)
+ assert 'default' in parser_state.rules
+ def process(k, v):
+ graphs = []
+ continues = 0
+ for (graph, (precedence, code, transition)) in v['regex']:
+ default_code = v['default_action']
+ action = code if code else default_code
+ if action:
+ graph = NfaBuilder.add_action(graph, (precedence, action))
+ if not transition or transition == 'break':
+ pass
+ elif transition == 'continue':
+ assert not k == 'default'
+ continues += 1
+ graph = NfaBuilder.add_continue(graph)
+ elif (transition == 'terminate' or
+ transition == 'terminate_illegal'):
+ assert not code
+ graph = NfaBuilder.add_action(graph, (-1, transition))
+ else:
+ assert k == 'default'
+ subgraph_modifier = '*' if code else None
+ graph = NfaBuilder.join_subgraph(
+ graph, transition, rule_map[transition], subgraph_modifier)
+ graphs.append(graph)
+ if continues == len(graphs):
+ graphs.append(NfaBuilder.epsilon())
+ if v['catch_all']:
+ assert v['catch_all'] == 'continue'
+ graphs.append(NfaBuilder.add_continue(NfaBuilder.catch_all()))
+ graph = NfaBuilder.or_graphs(graphs)
+ rule_map[k] = graph
+ # process first the subgraphs, then the default graph
+ for k, v in parser_state.rules.items():
+ if k == 'default': continue
+ process(k, v)
+ process('default', parser_state.rules['default'])
+ # build the automata
+ for rule_name, graph in rule_map.items():
+ nfa = builder.nfa(graph)
+ (start, dfa_nodes) = nfa.compute_dfa()
+ dfa = Dfa(start, dfa_nodes)
+ self.__automata[rule_name] = (nfa, dfa)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.