Revision: 17460
Author: [email protected]
Date: Mon Nov 4 15:04:49 2013 UTC
Log: Experimental parser: add embedded actions
[email protected]
BUG=
Review URL: https://codereview.chromium.org/57923002
http://code.google.com/p/v8/source/detail?r=17460
Modified:
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Thu Oct 31 14:36:42 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Mon Nov 4 15:04:49 2013 UTC
@@ -30,16 +30,17 @@
from nfa import NfaBuilder
from dfa import Dfa
+def dfa_from_nfa(nfa):
+ (start_name, dfa_nodes) = nfa.compute_dfa()
+ return Dfa(start_name, dfa_nodes)
+
def build_automata(string):
- graph = RegexParser.parse(string)
- nfa = NfaBuilder().nfa(graph)
- (start_name, dfa_nodes, end_nodes) = nfa.compute_dfa()
- dfa = Dfa(start_name, dfa_nodes, end_nodes)
- return (nfa, dfa)
+ nfa = NfaBuilder().nfa(RegexParser.parse(string))
+ return (nfa, dfa_from_nfa(nfa))
class AutomataTestCase(unittest.TestCase):
- # (pattern, should match, shouldn't match)
+ # (pattern, should match, should not match)
__test_data = [
("a", ["a"], ["b", ""]),
("ab", ["ab"], ["bb", ""]),
@@ -71,5 +72,30 @@
nfa.to_dot()
dfa.to_dot()
+ def test_actions(self):
+ left_action = ("LEFT_ACTION",)
+ right_action = ("RIGHT_ACTION",)
+ left = RegexParser.parse("left")
+ right = RegexParser.parse("right")
+ left = NfaBuilder.add_action(left, left_action)
+ right = NfaBuilder.add_action(right, right_action)
+ composite = ('ONE_OR_MORE', NfaBuilder.or_graphs([left, right]))
+ nfa = NfaBuilder().nfa(composite)
+ dfa = dfa_from_nfa(nfa)
+ def verify(string, expected):
+ actions = list(dfa.collect_actions(string))
+ assertEqual(len(expected), len(actions))
+ for i, action in enumerate(actions):
+ assertEqual(action[i], expected[i])
+ def verify_miss(string, expected):
+ verify(string, expected + [('MISS',)])
+ def verify_hit(string, expected):
+ verify(string, expected + [('TERMINATE',)])
+ (l, r) = left_action, right_action
+ verify_hit("left", [l])
+ verify_miss("lefta", [l])
+ verify_hit("leftrightleftright", [l, r, l, r])
+ verify_miss("leftrightleftrightx", [l, r, l, r])
+
if __name__ == '__main__':
- unittest.main()
+ unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Oct 31
08:19:46 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Mon Nov 4
15:04:49 2013 UTC
@@ -26,6 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from nfa import Nfa
+from transition_keys import TransitionKey
class DfaState:
@@ -40,28 +41,45 @@
def node_number(self):
return self.__node_number
- def add_transition(self, key, state):
+ def add_transition(self, key, action, state):
assert not self.__transitions.has_key(key)
- self.__transitions[key] = state
+ self.__transitions[key] = (state, action)
+
+ def set_action(self, action):
+ assert self.__action == None
+ self.__action = action
def transitions(self):
return self.__transitions
class Dfa:
- def __init__(self, start_name, mapping, end_names):
+ def __init__(self, start_name, mapping):
+ self.__terminal_set = set()
name_map = {}
- offset = 0
- self.__terminal_set = set()
- for name in mapping.keys():
- dfa_state = DfaState(name, offset)
- name_map[name] = dfa_state
- offset += 1
- if name in end_names:
- self.__terminal_set.add(dfa_state)
- for name, values in mapping.items():
- for key, value in values.items():
- name_map[name].add_transition(key, name_map[value])
+ action_map = {}
+ for i, name in enumerate(mapping.keys()):
+ name_map[name] = DfaState(name, i)
+ for name, node_data in mapping.items():
+ node = name_map[name]
+ if node_data['terminal']:
+ self.__terminal_set.add(node)
+ inversion = {}
+ for key, (state, action) in node_data['transitions'].items():
+ if not state in inversion:
+ inversion[state] = {}
+ # TODO fix this
+ action_key = str(action)
+ if not action_key in action_map:
+ action_map[action_key] = action
+ if not action_key in inversion[state]:
+ inversion[state][action_key] = []
+ inversion[state][action_key].append(key)
+ for state, values in inversion.items():
+ for action_key, keys in values.items():
+ merged_key = TransitionKey.merged_key(keys)
+ action = action_map[action_key]
+ node.add_transition(merged_key, action, name_map[state])
self.__start = name_map[start_name]
assert self.__terminal_set
@@ -69,35 +87,43 @@
def __visit_edges(start, visitor, state):
edge = set([start])
visited = set()
+ first = lambda v: [x[0] for x in v]
while edge:
f = lambda (next_edge, state), node: (
- next_edge | set(node.transitions().values()),
+ next_edge | set(first(node.transitions().values())),
visitor(node, state))
(next_edge, state) = reduce(f, edge, (set(), state))
visited |= edge
edge = next_edge - visited
return state
- def matches(self, string):
+ def collect_actions(self, string):
state = self.__start
for c in string:
- next_state = None
- for key, transition_state in state.transitions().items():
- if key.matches_char(c):
- next_state = transition_state
- break
- if not next_state:
- return False
- state = next_state
- return state in self.__terminal_set
+ next = [s for k, s in state.transitions().items() if
k.matches_char(c)]
+ if not next:
+ yield ('MISS',)
+ return
+ assert len(next) == 1
+ (state, action) = next[0]
+ if action:
+ yield action
+ if state in self.__terminal_set:
+ yield ('TERMINATE', )
+ else:
+ yield ('MISS',)
+
+ def matches(self, string):
+ actions = list(self.collect_actions(string))
+ return actions and actions[-1][0] == 'TERMINATE'
def to_dot(self):
def f(node, node_content):
- for key, value in node.transitions().items():
+ for key, (state, action) in node.transitions().items():
node_content.append(
" S_%s -> S_%s [ label = \"%s\" ];" %
- (node.node_number(), value.node_number(), key))
+ (node.node_number(), state.node_number(), key))
return node_content
node_content = self.__visit_edges(self.__start, f, [])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31
11:01:22 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Nov 4
15:04:49 2013 UTC
@@ -36,6 +36,7 @@
self.__unclosed = set()
self.__node_number = node_number
self.__epsilon_closure = None
+ self.__transition_action = None
def node_number(self):
return self.__node_number
@@ -48,45 +49,54 @@
assert self.__epsilon_closure == None
self.__epsilon_closure = frozenset(closure)
+ def set_transition_action(self, action):
+ assert not self.is_closed()
+ assert self.__transition_action == None
+ self.__transition_action = action
+
def transitions(self):
assert self.is_closed()
return self.__transitions
- def get_epsilon_transitions(self):
- key = TransitionKey.epsilon();
- if not key in self.__transitions: return frozenset()
- return frozenset(self.__transitions[key])
+ def next_states(self, key_filter):
+ assert self.is_closed()
+ first = lambda v: [x[0] for x in v]
+ f = lambda acc, (k, v) : acc if not key_filter(k) else acc |
set(first(v))
+ return reduce(f, self.__transitions.items(), set())
- def __add_transition(self, key, value):
- if value == None:
+ def __add_transition(self, key, action, next_state):
+ if next_state == None:
+ assert not action
assert not self.is_closed(), "already closed"
self.__unclosed.add(key)
return
if not key in self.__transitions:
self.__transitions[key] = set()
- self.__transitions[key].add(value)
+ self.__transitions[key].add((next_state, action))
def add_unclosed_transition(self, key):
assert key != TransitionKey.epsilon()
- self.__add_transition(key, None)
+ self.__add_transition(key, None, None)
def add_epsilon_transition(self, state):
assert state != None
- self.__add_transition(TransitionKey.epsilon(), state)
+ self.__add_transition(TransitionKey.epsilon(), None, state)
def is_closed(self):
return self.__unclosed == None
def close(self, end):
assert not self.is_closed()
+ unclosed, self.__unclosed = self.__unclosed, None
+ action, self.__transition_action = self.__transition_action, None
if end == None:
- assert not self.__unclosed
- else:
- for key in self.__unclosed:
- self.__add_transition(key, end)
- if not self.__unclosed:
- self.add_epsilon_transition(end)
- self.__unclosed = None
+ assert not unclosed
+ assert not action
+ return
+ for key in unclosed:
+ self.__add_transition(key, action, end)
+ if not unclosed:
+ self.__add_transition(TransitionKey.epsilon(), action, end)
def __matches(self, match_func, value):
f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc
@@ -106,13 +116,13 @@
class NfaBuilder:
def __init__(self):
+ self.__node_number = 0
self.__operation_map = {}
self.__members = getmembers(self)
def __new_state(self):
- node_number = self.node_number
- self.node_number = self.node_number + 1
- return NfaState(node_number)
+ self.__node_number += 1
+ return NfaState(self.__node_number - 1)
def __or(self, graph):
start = self.__new_state()
@@ -165,6 +175,12 @@
def __any(self, graph):
return self.__key_state(TransitionKey.any())
+ def __action(self, graph):
+ result = self.__process(graph[1])
+ for end in result[1]:
+ end.set_transition_action(graph[2])
+ return result
+
def __process(self, graph):
assert type(graph) == TupleType
method = "_NfaBuilder__" + graph[0].lower()
@@ -179,12 +195,24 @@
end.close(new_end)
def nfa(self, graph):
- self.node_number = 0
+ start_node_number = self.__node_number
(start, ends) = self.__process(graph)
end = self.__new_state()
self.__patch_ends(ends, end)
end.close(None)
- return Nfa(start, end, self.node_number)
+ return Nfa(start, end, self.__node_number - start_node_number)
+
+ @staticmethod
+ def add_action(graph, action):
+ return ('ACTION', graph, action)
+
+ @staticmethod
+ def or_graphs(graphs):
+ return reduce(lambda acc, g: ('OR', acc, g), graphs)
+
+ @staticmethod
+ def cat_graphs(graphs):
+ return reduce(lambda acc, g: ('CAT', acc, g), graphs)
class Nfa:
@@ -208,9 +236,7 @@
def __visit_all_edges(self, visitor, state):
edge = set([self.__start])
- def next_edge(node):
- f = lambda acc, values: acc | values
- return reduce(f, node.transitions().values(), set())
+ next_edge = lambda node: node.next_states(lambda x : True)
return Nfa.__visit_edges(edge, next_edge, visitor, state)
def __verify(self, nodes_created):
@@ -229,7 +255,8 @@
def inner(node, closure):
closure.add(node)
return closure
- next_edge = lambda node : node.get_epsilon_transitions()
+ is_epsilon = lambda k: k == TransitionKey.epsilon()
+ next_edge = lambda node : node.next_states(is_epsilon)
edge = next_edge(node)
closure = Nfa.__visit_edges(edge, next_edge, inner, set())
node.set_epsilon_closure(closure)
@@ -245,35 +272,43 @@
valid_states = Nfa.__close(set([self.__start]))
for c in string:
f = lambda acc, state: acc | state.char_matches(c)
- valid_states = Nfa.__close(reduce(f, valid_states, set()))
- if not valid_states:
+ transitions = reduce(f, valid_states, set())
+ if not transitions:
return False
+ valid_states = Nfa.__close(set([x[0] for x in transitions]))
return self.__end in valid_states
@staticmethod
- def __to_dfa(nfa_state_set, builder):
+ def __to_dfa(nfa_state_set, dfa_nodes, end_node):
nfa_state_set = Nfa.__close(nfa_state_set)
assert nfa_state_set
- name = str([x.node_number() for x in nfa_state_set])
- (dfa_nodes, end_nodes, end_node) = builder
+ name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
if name in dfa_nodes:
return name
- dfa_node = {}
- dfa_nodes[name] = dfa_node
+ dfa_nodes[name] = {
+ 'transitions': {},
+ 'terminal': end_node in nfa_state_set}
for key in NfaState.gather_transition_keys(nfa_state_set):
f = lambda acc, state: acc | state.key_matches(key)
- dfa_node[key] = Nfa.__to_dfa(reduce(f, nfa_state_set, set()),
builder)
- if end_node in nfa_state_set:
- end_nodes.add(name)
+ transitions = reduce(f, nfa_state_set, set())
+ match_states = set()
+ actions = set()
+ for (state, action) in transitions:
+ match_states.add(state)
+ if action:
+ actions.add(action)
+ assert len(match_states) == len(transitions)
+ assert not actions or len(actions) == 1
+ action = iter(actions).next() if actions else None
+ transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
+ dfa_nodes[name]['transitions'][key] = (transition_state, action)
return name
def compute_dfa(self):
self.__compute_epsilon_closures()
dfa_nodes = {}
- end_nodes = set()
- dfa_builder = (dfa_nodes, end_nodes, self.__end)
- start_name = self.__to_dfa(set([self.__start]), dfa_builder)
- return (start_name, dfa_nodes, end_nodes)
+ start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)
+ return (start_name, dfa_nodes)
def to_dot(self):
@@ -284,7 +319,7 @@
for value in values:
node_content.append(
" S_%d -> S_%d [ label = \"%s\" ];" %
- (node.node_number(), value.node_number(), key))
+ (node.node_number(), value[0].node_number(), key))
return node_content
node_content = self.__visit_all_edges(f, [])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Oct 31 14:46:33 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Mon Nov 4 15:04:49 2013 UTC
@@ -238,6 +238,11 @@
merged.append(last)
return merged
+ @staticmethod
+ def merged_key(keys):
+ f = lambda acc, key: acc + list(key.__ranges)
+ return TransitionKey.__key_from_ranges(False, reduce(f, keys, []))
+
@staticmethod
def __invert_ranges(ranges):
inverted = []
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.