Revision: 17676
Author: [email protected]
Date: Wed Nov 13 08:20:20 2013 UTC
Log: Experimental parser: action refactor
[email protected]
BUG=
Review URL: https://codereview.chromium.org/71313002
http://code.google.com/p/v8/source/detail?r=17676
Modified:
/branches/experimental/parser/tools/lexer_generator/action_test.py
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/nfa_builder.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/action_test.py Tue
Nov 12 09:47:12 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/action_test.py Wed
Nov 13 08:20:20 2013 UTC
@@ -26,64 +26,49 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
-from rule_parser import RuleParser, RuleParserState
+from automaton import Action
+from rule_parser import RuleProcessor
from nfa_builder import NfaBuilder
from dfa import Dfa
-def dfa_from_nfa(nfa):
- (start_name, dfa_nodes) = nfa.compute_dfa()
- return Dfa(start_name, dfa_nodes)
-
-def process_rules(parser_state):
+def process_rules(rules):
rule_map = {}
- builder = NfaBuilder()
- for k, v in parser_state.rules.items():
- graphs = []
- for (graph, action) in v['regex']:
- graphs.append(NfaBuilder.add_action(graph, action))
- nfa = builder.nfa(NfaBuilder.or_graphs(graphs))
- dfa = dfa_from_nfa(nfa)
- rule_map[k] = (nfa, dfa)
+ for name, automata in RuleProcessor.parse(rules).automata_iter():
+ rule_map[name] = automata
return rule_map
class ActionTestCase(unittest.TestCase):
- def __verify_last_action(self, dfa, string, expected_code,
- expected_condition):
+ def __verify_last_action(self, dfa, string, expected_code):
actions = list(dfa.collect_actions(string))
- self.assertEqual(actions[-1], ('TERMINATE',))
- self.assertEqual(actions[-2][1], expected_code)
- self.assertEqual(actions[-2][2], expected_condition)
+ self.assertEqual(actions[-1], Action('TERMINATE'))
+ self.assertEqual(actions[-2].data(), expected_code)
def test_action_precedence(self):
- parser_state = RuleParserState()
rules = '''<default>
"key" { KEYWORD } <<break>>
/[a-z]+/ { ID } <<break>>'''
- RuleParser.parse(rules, parser_state)
- automata_for_conditions = process_rules(parser_state)
+ automata_for_conditions = process_rules(rules)
self.assertEqual(len(automata_for_conditions), 1)
self.assertTrue('default' in automata_for_conditions)
(nfa, dfa) = automata_for_conditions['default']
- self.__verify_last_action(dfa, 'foo', 'ID', 'break')
- self.__verify_last_action(dfa, 'key', 'KEYWORD', 'break')
- self.__verify_last_action(dfa, 'k', 'ID', 'break')
- self.__verify_last_action(dfa, 'ke', 'ID', 'break')
- self.__verify_last_action(dfa, 'keys', 'ID', 'break')
+ self.__verify_last_action(dfa, 'foo', 'ID')
+ self.__verify_last_action(dfa, 'key', 'KEYWORD')
+ self.__verify_last_action(dfa, 'k', 'ID')
+ self.__verify_last_action(dfa, 'ke', 'ID')
+ self.__verify_last_action(dfa, 'keys', 'ID')
def test_wrong_action_precedence(self):
- parser_state = RuleParserState()
rules = '''<default>
/[a-z]+/ { ID } <<break>>
"key" { KEYWORD } <<break>>'''
- RuleParser.parse(rules, parser_state)
- automata_for_conditions = process_rules(parser_state)
+ automata_for_conditions = process_rules(rules)
self.assertEqual(len(automata_for_conditions), 1)
self.assertTrue('default' in automata_for_conditions)
(nfa, dfa) = automata_for_conditions['default']
# The keyword is not recognized because of the rule preference order
(ID
# is preferred over KEYWORD).
- self.__verify_last_action(dfa, 'foo', 'ID', 'break')
- self.__verify_last_action(dfa, 'key', 'ID', 'break')
+ self.__verify_last_action(dfa, 'foo', 'ID')
+ self.__verify_last_action(dfa, 'key', 'ID')
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Tue Nov 12 09:47:12 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Wed Nov 13 08:20:20 2013 UTC
@@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
+from automaton import Action
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
from dfa import Dfa
@@ -80,8 +81,8 @@
dfa.to_dot()
def test_actions(self):
- left_action = ("LEFT_ACTION", "LEFT_CONDITION")
- right_action = ("RIGHT_ACTION", "RIGHT_CONDITION")
+ left_action = Action("LEFT_ACTION")
+ right_action = Action("RIGHT_ACTION")
left = RegexParser.parse("left")
right = RegexParser.parse("right")
left = NfaBuilder.add_action(left, left_action)
@@ -95,9 +96,9 @@
for i, action in enumerate(actions):
self.assertEqual(action, expected[i])
def verify_miss(string, expected):
- verify(string, expected + [('MISS',)])
+ verify(string, expected + [Action('MISS',)])
def verify_hit(string, expected):
- verify(string, expected + [('TERMINATE',)])
+ verify(string, expected + [Action('TERMINATE',)])
(l, r) = left_action, right_action
verify_hit("left", [l])
verify_miss("lefta", [l])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Tue
Nov 12 16:04:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed
Nov 13 08:20:20 2013 UTC
@@ -30,12 +30,49 @@
from transition_keys import TransitionKey
class Action(object):
- pass
+
+ def __init__(self, type, data = None, precedence = -1):
+ assert type
+ self.__type = type
+ self.__data = data
+ self.__precedence = precedence
+
+ def type(self):
+ return self.__type
+
+ def data(self):
+ return self.__data
+
+ def precedence(self):
+ return self.__precedence
+
+ def __hash__(self):
+ return hash((self.__type, self.__data))
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.__type == other.__type and
+ self.__data == other.__data)
+
+ def __str__(self):
+ if not self.__data:
+ return "action<%s>" % self.__type
+ return "action<%s, %s>" % (self.__type, self.__data)
class AutomatonState(object):
- def __init__(self, node_number):
- self.__node_number = node_number
+ __node_number_counter = 0
+
+ def __init__(self):
+ self.__node_number = AutomatonState.__node_number_counter
+ AutomatonState.__node_number_counter += 1
+
+ def __hash__(self):
+ return hash(self.__node_number)
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.__node_number == other.__node_number)
def node_number(self):
return self.__node_number
@@ -91,16 +128,20 @@
def to_dot(self):
def escape(v):
- v = str(v).replace('\r', '\\\\r')
- v = str(v).replace('\t', '\\\\t')
- v = str(v).replace('\n', '\\\\n')
- v = str(v).replace('\\', '\\\\')
- v = str(v).replace('\"', '\\\"')
+ v = str(v)
+ v =
v.replace('\r', '\\\\r').replace('\t', '\\\\t').replace('\n', '\\\\n')
+ v = v.replace('\\', '\\\\').replace('\"', '\\\"')
return v
def f(node, (node_content, edge_content)):
if node.action():
- action_text = node.action()[1].split('\n')[0]
+ action = node.action()
+ if action.type() == 'code':
+ # assert action.data()
+ action_text = action.data()
+ else:
+ action_text = action.type()
+ action_text = escape(action_text)
node_content.append(' S_l%s[shape = box, label="%s"];' %
(node.node_number(), action_text))
node_content.append(' S_%s -> S_l%s [arrowhead = none];' %
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Tue Nov 12
18:47:52 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 13
08:20:20 2013 UTC
@@ -31,11 +31,11 @@
class DfaState(AutomatonState):
- def __init__(self, name, node_number, actions):
- super(DfaState, self).__init__(node_number)
+ def __init__(self, name, action):
+ super(DfaState, self).__init__()
self.__name = name
self.__transitions = {}
- self.__actions = actions
+ self.__action = action
def transitions_to_multiple_states(self):
return False
@@ -44,10 +44,7 @@
return self.__name
def action(self):
- return self.__actions[0] if self.__actions else None
-
- def actions(self):
- return self.__actions
+ return self.__action
def add_transition(self, key, state):
assert not self.__transitions.has_key(key)
@@ -62,8 +59,8 @@
super(Dfa, self).__init__()
self.__terminal_set = set()
self.__name_map = {}
- for i, (name, node_data) in enumerate(mapping.items()):
- node = DfaState(name, i, node_data['actions'])
+ for name, node_data in mapping.items():
+ node = DfaState(name, node_data['action'])
self.__name_map[name] = node
if node_data['terminal']:
self.__terminal_set.add(node)
@@ -104,18 +101,18 @@
for c in string:
state = Dfa.__match_char(state, c)
if not state:
- yield ('MISS',)
+ yield Action('MISS')
return
if state.action():
yield state.action()
if state in self.__terminal_set:
- yield ('TERMINATE', )
+ yield Action('TERMINATE')
else:
- yield ('MISS',)
+ yield Action('MISS')
def matches(self, string):
actions = list(self.collect_actions(string))
- return actions and actions[-1][0] == 'TERMINATE'
+ return actions and actions[-1].type() == 'TERMINATE'
def lex(self, string):
state = self.__start
@@ -124,14 +121,14 @@
next = Dfa.__match_char(state, c)
if not next:
assert state.action() # must invoke default action here
- yield (state.action()[1], last_position, pos)
+ yield (state.action(), last_position, pos)
last_position = pos
# lex next token
next = Dfa.__match_char(self.__start, c)
assert next
state = next
assert state.action() # must invoke default action here
- yield (state.action()[1], last_position, len(string))
+ yield (state.action(), last_position, len(string))
def minimize(self):
pass
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Tue
Nov 12 19:10:03 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Wed
Nov 13 08:20:20 2013 UTC
@@ -26,16 +26,18 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
+from automaton import Action
from rule_parser import RuleProcessor
class LexerTestCase(unittest.TestCase):
- def __verify_action_stream(self, rules, string, expected_stream):
- expected_stream.append(('terminate', '\0'))
+ def __verify_action_stream(self, rules, string, expected):
+ expected = map(lambda (action, s) : (Action('code', action), s),
expected)
+ expected.append((Action('terminate'), '\0'))
rule_processor = RuleProcessor.parse(rules)
for i, (action, start, stop) in enumerate(rule_processor.lex(string)):
- self.assertEquals(expected_stream[i][0], action)
- self.assertEquals(expected_stream[i][1], string[start : stop])
+ self.assertEquals(expected[i][0], action)
+ self.assertEquals(expected[i][1], string[start : stop])
def test_simple(self):
rules = '''
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 12
16:24:44 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 13
08:20:20 2013 UTC
@@ -30,8 +30,8 @@
class NfaState(AutomatonState):
- def __init__(self, node_number):
- super(NfaState, self).__init__(node_number)
+ def __init__(self):
+ super(NfaState, self).__init__()
self.__transitions = {}
self.__unclosed = set()
self.__epsilon_closure = None
@@ -149,6 +149,21 @@
keys.discard(TransitionKey.epsilon())
return TransitionKey.disjoint_keys(keys)
+ @staticmethod
+ def __gather_actions(states):
+ action = None
+ for state in states:
+ if not state.action():
+ continue
+ if not action:
+ action = state.action()
+ continue
+ if state.action().precedence() == action.precedence():
+ assert state.action() == action
+ elif state.action().precedence() < action.precedence():
+ action = state.action()
+ return action
+
@staticmethod
def __to_dfa(nfa_state_set, dfa_nodes, end_node):
nfa_state_set = Nfa.__close(nfa_state_set)
@@ -156,18 +171,10 @@
name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
if name in dfa_nodes:
return name
- def gather_actions(states):
- actions = set()
- for state in states:
- if state.action():
- actions.add(state.action())
- actions = list(actions)
- actions.sort()
- return actions
dfa_nodes[name] = {
'transitions': {},
'terminal': end_node in nfa_state_set,
- 'actions' : gather_actions(nfa_state_set)}
+ 'action' : Nfa.__gather_actions(nfa_state_set)}
for key in Nfa.__gather_transition_keys(nfa_state_set):
match_states = set()
f = lambda acc, state: acc | state.key_matches(key)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Tue
Nov 12 16:04:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa_builder.py Wed
Nov 13 08:20:20 2013 UTC
@@ -43,7 +43,7 @@
def __new_state(self):
self.__node_number += 1
- return NfaState(self.__node_number - 1)
+ return NfaState()
def __or(self, graph):
start = self.__new_state()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Tue
Nov 12 19:10:03 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Wed
Nov 13 08:20:20 2013 UTC
@@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ply.yacc as yacc
+from automaton import Action
from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
@@ -116,7 +117,7 @@
assert not rules['catch_all']
rules['catch_all'] = transition
else:
- rule = (p[1], (RuleParser.__rule_precedence_counter, code,
transition))
+ rule = (p[1], RuleParser.__rule_precedence_counter, code, transition)
rules['regex'].append(rule)
def p_action(self, p):
@@ -244,11 +245,11 @@
def process(k, v):
graphs = []
continues = 0
- for (graph, (precedence, code, transition)) in v['regex']:
+ for (graph, precedence, code, transition) in v['regex']:
default_code = v['default_action']
- action = code if code else default_code
- if action:
- graph = NfaBuilder.add_action(graph, (precedence, action))
+ if code or default_code:
+ action = Action('code', code if code else default_code,
precedence)
+ graph = NfaBuilder.add_action(graph, action)
if not transition or transition == 'break':
pass
elif transition == 'continue':
@@ -258,7 +259,7 @@
elif (transition == 'terminate' or
transition == 'terminate_illegal'):
assert not code
- graph = NfaBuilder.add_action(graph, (-1, transition))
+ graph = NfaBuilder.add_action(graph, Action(transition, None,
-1))
else:
assert k == 'default'
subgraph_modifier = '*' if code else None
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.