Revision: 17900
Author: [email protected]
Date: Wed Nov 20 09:31:53 2013 UTC
Log: Experimental parser: abstract lexing
[email protected]
BUG=
Review URL: https://codereview.chromium.org/77153005
http://code.google.com/p/v8/source/detail?r=17900
Modified:
/branches/experimental/parser/tools/lexer_generator/automata_test.py
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py
Fri Nov 15 10:06:54 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automata_test.py
Wed Nov 20 09:31:53 2013 UTC
@@ -84,31 +84,6 @@
for automaton in build_automata(regex):
automaton.to_dot()
- def test_actions(self):
- left_action = simple_action("LEFT_ACTION")
- right_action = simple_action("RIGHT_ACTION")
- left = RegexParser.parse("left")
- right = RegexParser.parse("right")
- left = NfaBuilder.add_action(left, left_action)
- right = NfaBuilder.add_action(right, right_action)
- composite = ('ONE_OR_MORE', NfaBuilder.or_graphs([left, right]))
- nfa = NfaBuilder().nfa(composite)
- dfa = dfa_from_nfa(nfa)
- def verify(string, expected):
- actions = list(dfa.collect_actions(string))
- self.assertEqual(len(expected), len(actions))
- for i, action in enumerate(actions):
- self.assertEqual(action, expected[i])
- def verify_miss(string, expected):
- verify(string, expected + [Dfa.miss_action()])
- def verify_hit(string, expected):
- verify(string, expected + [Dfa.terminal_action()])
- (l, r) = left_action, right_action
- verify_hit("left", [l])
- verify_miss("lefta", [l])
- verify_hit("leftrightleftright", [l, r, l, r])
- verify_miss("leftrightleftrightx", [l, r, l, r])
-
def test_minimization(self):
def empty_node():
return { 'transitions' : {}, 'terminal' : False, 'action' : None }
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Thu
Nov 14 21:09:14 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed
Nov 20 09:31:53 2013 UTC
@@ -31,6 +31,21 @@
class Action(object):
+ @staticmethod
+ def dominant_action(state_set):
+ action = None
+ for state in state_set:
+ if not state.action():
+ continue
+ if not action:
+ action = state.action()
+ continue
+ if state.action().precedence() == action.precedence():
+ assert state.action() == action
+ elif state.action().precedence() < action.precedence():
+ action = state.action()
+ return action
+
def __init__(self, entry_action, match_action, precedence = -1):
for action in [entry_action, match_action]:
if action == None:
@@ -133,9 +148,52 @@
edge = next_edge - visited
return visit_state
+ def start_set(self):
+ return set([self.start_state()])
+
def visit_all_states(self, visitor, visit_state = None, state_iter =
None):
return self.visit_states(self.start_set(), visitor, visit_state,
state_iter)
+ # TODO use iters
+ @staticmethod
+ def epsilon_closure(states):
+ f = lambda acc, node: acc | node.epsilon_closure()
+ return reduce(f, states, set(iter(states)))
+
+ @staticmethod
+ def __transition_states_for_char(valid_states, c):
+ f = lambda state : state.transition_state_iter_for_char(c)
+ return set(chain(*map(f, Automaton.epsilon_closure(valid_states))))
+
+ def matches(self, string):
+ valid_states = self.start_set()
+ for c in string:
+ valid_states = Automaton.__transition_states_for_char(valid_states,
c)
+ if not valid_states:
+ return False
+ valid_states = self.epsilon_closure(valid_states)
+ return len(self.terminal_set().intersection(valid_states)) > 0
+
+ def lex(self, string):
+ last_position = 0
+ valid_states = self.start_set()
+ for pos, c in enumerate(string):
+ transitions = Automaton.__transition_states_for_char(valid_states, c)
+ if transitions:
+ valid_states = transitions
+ continue
+ action = Action.dominant_action(valid_states)
+ assert action # must invoke default action here
+ yield (action, last_position, pos)
+ last_position = pos
+ # lex next token
+ valid_states =
Automaton.__transition_states_for_char(self.start_set(), c)
+ assert valid_states
+ valid_states = self.epsilon_closure(valid_states)
+ action = Action.dominant_action(valid_states)
+ assert action # must invoke default action here
+ yield (action, last_position, len(string))
+
def to_dot(self):
def escape(v):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20
08:59:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20
09:31:53 2013 UTC
@@ -47,32 +47,37 @@
return self.__action
def add_transition(self, key, state):
+ assert not key == TransitionKey.epsilon()
assert not self.__transitions.has_key(key)
self.__transitions[key] = state
def transitions(self):
return self.__transitions
+ def epsilon_closure(self):
+ return set([self])
+
# TODO abstract state matching
def __matches(self, match_func, value):
# f collects states whose corresponding TransitionKey matches 'value'.
f = (lambda acc, (key, state):
acc | set([state]) if match_func(key, value) else acc)
- matches = reduce(f, self.__transitions.items(), set())
- # Since this is a dfa, we should have at most one such state. Return
it.
+ return reduce(f, self.__transitions.items(), set())
+
+ def transition_state_iter_for_char(self, value):
+ return iter(self.__matches(lambda k, v : k.matches_char(v), value))
+
+ def transition_state_iter_for_key(self, value):
+ return iter(self.__matches(lambda k, v : k.is_superset_of_key(v),
value))
+
+ def transition_state_for_key(self, value):
+ matches = self.__matches(lambda k, v : k.is_superset_of_key(v), value)
if not matches:
return None
+ # Since this is a dfa, we should have at most one such state. Return
it.
assert len(matches) == 1
return iter(matches).next()
- def next_state_with_char(self, value):
- return self.__matches(lambda k, v : k.matches_char(v), value)
-
- def transition_state_with_key(self, key):
- # 'key' can be a subkey of one of the TransitionKeys for this state,
but it
- # cannot partially overlap a TransitionKey for this state.
- return self.__matches(lambda k, v : k.is_superset_of_key(v), key)
-
class Dfa(Automaton):
def __init__(self, start_name, mapping):
@@ -109,61 +114,9 @@
def start_state(self):
return self.__start
- def start_set(self):
- return set([self.__start])
-
def terminal_set(self):
return set(self.__terminal_set)
- @staticmethod
- def __match_char(state, char):
- match = list(state.state_iter(key_filter = lambda k:
k.matches_char(char)))
- if not match: return None
- assert len(match) == 1
- return match[0]
-
- @staticmethod
- def terminal_action():
- return Action(None, ('TERMINATE', None))
-
- @staticmethod
- def miss_action():
- return Action(None, ('Miss', None))
-
- def collect_actions(self, string):
- state = self.__start
- for c in string:
- state = Dfa.__match_char(state, c)
- if not state:
- yield self.miss_action()
- return
- if state.action():
- yield state.action()
- if state in self.__terminal_set:
- yield self.terminal_action()
- else:
- yield self.miss_action()
-
- def matches(self, string):
- actions = list(self.collect_actions(string))
- return actions and actions[-1] == self.terminal_action()
-
- def lex(self, string):
- state = self.__start
- last_position = 0
- for pos, c in enumerate(string):
- next = Dfa.__match_char(state, c)
- if not next:
- assert state.action() # must invoke default action here
- yield (state.action(), last_position, pos)
- last_position = pos
- # lex next token
- next = Dfa.__match_char(self.__start, c)
- assert next
- state = next
- assert state.action() # must invoke default action here
- yield (state.action(), last_position, len(string))
-
def minimize(self):
return DfaMinimizer(self).minimize()
@@ -279,7 +232,7 @@
transitions = {}
for state_id, state in id_map.items():
def f((key_id, key)):
- transition_state = state.transition_state_with_key(key)
+ transition_state = state.transition_state_for_key(key)
if transition_state:
return reverse_id_map[transition_state]
return None
@@ -293,7 +246,7 @@
state = id_map[state_id]
for key_id, key in enumerate(self.__alphabet):
transition_state_id = transition_state_array[key_id]
- transition_state = state.transition_state_with_key(key)
+ transition_state = state.transition_state_for_key(key)
if not transition_state:
assert transition_state_id == None
else:
@@ -332,15 +285,15 @@
transition_id = transition_array[key_id]
if transition_id == None:
if verify:
- assert not state.transition_state_with_key(key)
+ assert not state.transition_state_for_key(key)
for s_id in state_ids:
- assert not id_map[s_id].transition_state_with_key(key)
+ assert not id_map[s_id].transition_state_for_key(key)
continue
transition_partition = reverse_partition_map[transition_id]
assert transition_partition
if verify:
for s_id in state_ids:
- transition = id_map[s_id].transition_state_with_key(key)
+ transition = id_map[s_id].transition_state_for_key(key)
assert transition
test_partition =
reverse_partition_map[reverse_id_map[transition]]
assert test_partition == transition_partition
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Tue
Nov 19 12:28:18 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Wed
Nov 20 09:31:53 2013 UTC
@@ -35,7 +35,7 @@
expected = map(lambda (action, s) : (Action(None, (action, None)), s),
expected)
expected.append((Action(None, ('terminate', None)), '\0'))
automata = RuleProcessor.parse(rules).default_automata()
- for automaton in [automata.dfa(), automata.minimal_dfa()]:
+ for automaton in [automata.nfa(), automata.dfa(),
automata.minimal_dfa()]:
for i, (action, start, stop) in enumerate(automaton.lex(string)):
self.assertEquals(expected[i][0], action)
self.assertEquals(expected[i][1], string[start : stop])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 19
18:25:42 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 20
09:31:53 2013 UTC
@@ -103,11 +103,11 @@
acc | states if match_func(key, value) else acc)
return reduce(f, self.__transitions.items(), set())
- def next_states_with_char(self, value):
- return self.__matches(lambda k, v : k.matches_char(v), value)
+ def transition_state_iter_for_char(self, value):
+ return iter(self.__matches(lambda k, v : k.matches_char(v), value))
- def key_matches(self, value):
- return self.__matches(lambda k, v : k.is_superset_of_key(v), value)
+ def transition_state_iter_for_key(self, value):
+ return iter(self.__matches(lambda k, v : k.is_superset_of_key(v),
value))
class Nfa(Automaton):
@@ -117,8 +117,8 @@
self.__end = end
self.__verify(nodes_created)
- def start_set(self):
- return set([self.__start])
+ def start_state(self):
+ return self.__start
def terminal_set(self):
return set([self.__end])
@@ -130,45 +130,15 @@
count = self.visit_all_states(f, 0)
assert count == nodes_created
- @staticmethod
- def __close(states):
- f = lambda acc, node: acc | node.epsilon_closure()
- return reduce(f, states, set(states))
-
- def matches(self, string):
- valid_states = Nfa.__close(set([self.__start]))
- for c in string:
- f = lambda acc, state: acc | state.next_states_with_char(c)
- transitions = reduce(f, valid_states, set())
- if not transitions:
- return False
- valid_states = Nfa.__close(transitions)
- return self.__end in valid_states
-
@staticmethod
def __gather_transition_keys(state_set):
keys = set(chain(*map(lambda state: state.key_iter(), state_set)))
keys.discard(TransitionKey.epsilon())
return TransitionKey.disjoint_keys(keys)
- @staticmethod
- def __gather_actions(states):
- action = None
- for state in states:
- if not state.action():
- continue
- if not action:
- action = state.action()
- continue
- if state.action().precedence() == action.precedence():
- assert state.action() == action
- elif state.action().precedence() < action.precedence():
- action = state.action()
- return action
-
@staticmethod
def __to_dfa(nfa_state_set, dfa_nodes, end_node):
- nfa_state_set = Nfa.__close(nfa_state_set)
+ nfa_state_set = Automaton.epsilon_closure(nfa_state_set)
assert nfa_state_set
name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
if name in dfa_nodes:
@@ -176,12 +146,11 @@
dfa_nodes[name] = {
'transitions': {},
'terminal': end_node in nfa_state_set,
- 'action' : Nfa.__gather_actions(nfa_state_set)}
+ 'action' : Action.dominant_action(nfa_state_set)}
for key in Nfa.__gather_transition_keys(nfa_state_set):
match_states = set()
- f = lambda acc, state: acc | state.key_matches(key)
- for state in reduce(f, nfa_state_set, set()):
- match_states.add(state)
+ f = lambda state: state.transition_state_iter_for_key(key)
+ match_states |= set(chain(*map(f, nfa_state_set)))
transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
dfa_nodes[name]['transitions'][key] = transition_state
return name
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.