Revision: 17900
Author:   [email protected]
Date:     Wed Nov 20 09:31:53 2013 UTC
Log:      Experimental parser: abstract lexing

[email protected]

BUG=

Review URL: https://codereview.chromium.org/77153005
http://code.google.com/p/v8/source/detail?r=17900

Modified:
 /branches/experimental/parser/tools/lexer_generator/automata_test.py
 /branches/experimental/parser/tools/lexer_generator/automaton.py
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/lexer_test.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py Fri Nov 15 10:06:54 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/automata_test.py Wed Nov 20 09:31:53 2013 UTC
@@ -84,31 +84,6 @@
         for automaton in build_automata(regex):
           automaton.to_dot()

-    def test_actions(self):
-      left_action = simple_action("LEFT_ACTION")
-      right_action = simple_action("RIGHT_ACTION")
-      left = RegexParser.parse("left")
-      right = RegexParser.parse("right")
-      left = NfaBuilder.add_action(left, left_action)
-      right = NfaBuilder.add_action(right, right_action)
-      composite = ('ONE_OR_MORE', NfaBuilder.or_graphs([left, right]))
-      nfa = NfaBuilder().nfa(composite)
-      dfa = dfa_from_nfa(nfa)
-      def verify(string, expected):
-        actions = list(dfa.collect_actions(string))
-        self.assertEqual(len(expected), len(actions))
-        for i, action in enumerate(actions):
-          self.assertEqual(action, expected[i])
-      def verify_miss(string, expected):
-        verify(string, expected + [Dfa.miss_action()])
-      def verify_hit(string, expected):
-        verify(string, expected + [Dfa.terminal_action()])
-      (l, r) = left_action, right_action
-      verify_hit("left", [l])
-      verify_miss("lefta", [l])
-      verify_hit("leftrightleftright", [l, r, l, r])
-      verify_miss("leftrightleftrightx", [l, r, l, r])
-
     def test_minimization(self):
       def empty_node():
         return { 'transitions' : {}, 'terminal' : False, 'action' : None }
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Thu Nov 14 21:09:14 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/automaton.py Wed Nov 20 09:31:53 2013 UTC
@@ -31,6 +31,21 @@

 class Action(object):

+  @staticmethod
+  def dominant_action(state_set):
+    action = None
+    for state in state_set:
+      if not state.action():
+        continue
+      if not action:
+        action = state.action()
+        continue
+      if state.action().precedence() == action.precedence():
+        assert state.action() == action
+      elif state.action().precedence() < action.precedence():
+        action = state.action()
+    return action
+
   def __init__(self, entry_action, match_action, precedence = -1):
     for action in [entry_action, match_action]:
       if action == None:
@@ -133,9 +148,52 @@
       edge = next_edge - visited
     return visit_state

+  def start_set(self):
+    return set([self.start_state()])
+
def visit_all_states(self, visitor, visit_state = None, state_iter = None): return self.visit_states(self.start_set(), visitor, visit_state, state_iter)

+  # TODO use iters
+  @staticmethod
+  def epsilon_closure(states):
+    f = lambda acc, node: acc | node.epsilon_closure()
+    return reduce(f, states, set(iter(states)))
+
+  @staticmethod
+  def __transition_states_for_char(valid_states, c):
+    f = lambda state : state.transition_state_iter_for_char(c)
+    return set(chain(*map(f, Automaton.epsilon_closure(valid_states))))
+
+  def matches(self, string):
+    valid_states = self.start_set()
+    for c in string:
+ valid_states = Automaton.__transition_states_for_char(valid_states, c)
+      if not valid_states:
+        return False
+    valid_states = self.epsilon_closure(valid_states)
+    return len(self.terminal_set().intersection(valid_states)) > 0
+
+  def lex(self, string):
+    last_position = 0
+    valid_states = self.start_set()
+    for pos, c in enumerate(string):
+      transitions = Automaton.__transition_states_for_char(valid_states, c)
+      if transitions:
+        valid_states = transitions
+        continue
+      action = Action.dominant_action(valid_states)
+      assert action # must invoke default action here
+      yield (action, last_position, pos)
+      last_position = pos
+      # lex next token
+ valid_states = Automaton.__transition_states_for_char(self.start_set(), c)
+      assert valid_states
+    valid_states = self.epsilon_closure(valid_states)
+    action = Action.dominant_action(valid_states)
+    assert action # must invoke default action here
+    yield (action, last_position, len(string))
+
   def to_dot(self):

     def escape(v):
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20 08:59:01 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Nov 20 09:31:53 2013 UTC
@@ -47,32 +47,37 @@
     return self.__action

   def add_transition(self, key, state):
+    assert not key == TransitionKey.epsilon()
     assert not self.__transitions.has_key(key)
     self.__transitions[key] = state

   def transitions(self):
     return self.__transitions

+  def epsilon_closure(self):
+    return set([self])
+
   # TODO abstract state matching
   def __matches(self, match_func, value):
     # f collects states whose corresponding TransitionKey matches 'value'.
     f = (lambda acc, (key, state):
            acc | set([state]) if match_func(key, value) else acc)
-    matches = reduce(f, self.__transitions.items(), set())
- # Since this is a dfa, we should have at most one such state. Return it.
+    return reduce(f, self.__transitions.items(), set())
+
+  def transition_state_iter_for_char(self, value):
+    return iter(self.__matches(lambda k, v : k.matches_char(v), value))
+
+  def transition_state_iter_for_key(self, value):
+ return iter(self.__matches(lambda k, v : k.is_superset_of_key(v), value))
+
+  def transition_state_for_key(self, value):
+    matches = self.__matches(lambda k, v : k.is_superset_of_key(v), value)
     if not matches:
       return None
+ # Since this is a dfa, we should have at most one such state. Return it.
     assert len(matches) == 1
     return iter(matches).next()

-  def next_state_with_char(self, value):
-    return self.__matches(lambda k, v : k.matches_char(v), value)
-
-  def transition_state_with_key(self, key):
- # 'key' can be a subkey of one of the TransitionKeys for this state, but it
-    # cannot partially overlap a TransitionKey for this state.
-    return self.__matches(lambda k, v : k.is_superset_of_key(v), key)
-
 class Dfa(Automaton):

   def __init__(self, start_name, mapping):
@@ -109,61 +114,9 @@
   def start_state(self):
     return self.__start

-  def start_set(self):
-    return set([self.__start])
-
   def terminal_set(self):
     return set(self.__terminal_set)

-  @staticmethod
-  def __match_char(state, char):
- match = list(state.state_iter(key_filter = lambda k: k.matches_char(char)))
-    if not match: return None
-    assert len(match) == 1
-    return match[0]
-
-  @staticmethod
-  def terminal_action():
-    return Action(None, ('TERMINATE', None))
-
-  @staticmethod
-  def miss_action():
-    return Action(None, ('Miss', None))
-
-  def collect_actions(self, string):
-    state = self.__start
-    for c in string:
-      state = Dfa.__match_char(state, c)
-      if not state:
-        yield self.miss_action()
-        return
-      if state.action():
-        yield state.action()
-    if state in self.__terminal_set:
-      yield self.terminal_action()
-    else:
-      yield self.miss_action()
-
-  def matches(self, string):
-    actions = list(self.collect_actions(string))
-    return actions and actions[-1] == self.terminal_action()
-
-  def lex(self, string):
-    state = self.__start
-    last_position = 0
-    for pos, c in enumerate(string):
-      next = Dfa.__match_char(state, c)
-      if not next:
-        assert state.action() # must invoke default action here
-        yield (state.action(), last_position, pos)
-        last_position = pos
-        # lex next token
-        next = Dfa.__match_char(self.__start, c)
-        assert next
-      state = next
-    assert state.action() # must invoke default action here
-    yield (state.action(), last_position, len(string))
-
   def minimize(self):
     return DfaMinimizer(self).minimize()

@@ -279,7 +232,7 @@
     transitions = {}
     for state_id, state in id_map.items():
       def f((key_id, key)):
-        transition_state = state.transition_state_with_key(key)
+        transition_state = state.transition_state_for_key(key)
         if transition_state:
           return reverse_id_map[transition_state]
         return None
@@ -293,7 +246,7 @@
           state = id_map[state_id]
           for key_id, key in enumerate(self.__alphabet):
             transition_state_id = transition_state_array[key_id]
-            transition_state = state.transition_state_with_key(key)
+            transition_state = state.transition_state_for_key(key)
             if not transition_state:
               assert transition_state_id == None
             else:
@@ -332,15 +285,15 @@
         transition_id = transition_array[key_id]
         if transition_id == None:
           if verify:
-            assert not state.transition_state_with_key(key)
+            assert not state.transition_state_for_key(key)
             for s_id in state_ids:
-              assert not id_map[s_id].transition_state_with_key(key)
+              assert not id_map[s_id].transition_state_for_key(key)
           continue
         transition_partition = reverse_partition_map[transition_id]
         assert transition_partition
         if verify:
           for s_id in state_ids:
-            transition = id_map[s_id].transition_state_with_key(key)
+            transition = id_map[s_id].transition_state_for_key(key)
             assert transition
test_partition = reverse_partition_map[reverse_id_map[transition]]
             assert test_partition == transition_partition
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Tue Nov 19 12:28:18 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Wed Nov 20 09:31:53 2013 UTC
@@ -35,7 +35,7 @@
expected = map(lambda (action, s) : (Action(None, (action, None)), s), expected)
     expected.append((Action(None, ('terminate', None)), '\0'))
     automata = RuleProcessor.parse(rules).default_automata()
-    for automaton in [automata.dfa(), automata.minimal_dfa()]:
+ for automaton in [automata.nfa(), automata.dfa(), automata.minimal_dfa()]:
         for i, (action, start, stop) in enumerate(automaton.lex(string)):
           self.assertEquals(expected[i][0], action)
           self.assertEquals(expected[i][1], string[start : stop])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Nov 19 18:25:42 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Wed Nov 20 09:31:53 2013 UTC
@@ -103,11 +103,11 @@
            acc | states if match_func(key, value) else acc)
     return reduce(f, self.__transitions.items(), set())

-  def next_states_with_char(self, value):
-    return self.__matches(lambda k, v : k.matches_char(v), value)
+  def transition_state_iter_for_char(self, value):
+    return iter(self.__matches(lambda k, v : k.matches_char(v), value))

-  def key_matches(self, value):
-    return self.__matches(lambda k, v : k.is_superset_of_key(v), value)
+  def transition_state_iter_for_key(self, value):
+ return iter(self.__matches(lambda k, v : k.is_superset_of_key(v), value))

 class Nfa(Automaton):

@@ -117,8 +117,8 @@
     self.__end = end
     self.__verify(nodes_created)

-  def start_set(self):
-    return set([self.__start])
+  def start_state(self):
+    return self.__start

   def terminal_set(self):
     return set([self.__end])
@@ -130,45 +130,15 @@
     count = self.visit_all_states(f, 0)
     assert count == nodes_created

-  @staticmethod
-  def __close(states):
-    f = lambda acc, node: acc | node.epsilon_closure()
-    return reduce(f, states, set(states))
-
-  def matches(self, string):
-    valid_states = Nfa.__close(set([self.__start]))
-    for c in string:
-      f = lambda acc, state: acc | state.next_states_with_char(c)
-      transitions = reduce(f, valid_states, set())
-      if not transitions:
-        return False
-      valid_states = Nfa.__close(transitions)
-    return self.__end in valid_states
-
   @staticmethod
   def __gather_transition_keys(state_set):
     keys = set(chain(*map(lambda state: state.key_iter(), state_set)))
     keys.discard(TransitionKey.epsilon())
     return TransitionKey.disjoint_keys(keys)

-  @staticmethod
-  def __gather_actions(states):
-    action = None
-    for state in states:
-      if not state.action():
-        continue
-      if not action:
-        action = state.action()
-        continue
-      if state.action().precedence() == action.precedence():
-        assert state.action() == action
-      elif state.action().precedence() < action.precedence():
-        action = state.action()
-    return action
-
   @staticmethod
   def __to_dfa(nfa_state_set, dfa_nodes, end_node):
-    nfa_state_set = Nfa.__close(nfa_state_set)
+    nfa_state_set = Automaton.epsilon_closure(nfa_state_set)
     assert nfa_state_set
     name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
     if name in dfa_nodes:
@@ -176,12 +146,11 @@
     dfa_nodes[name] = {
       'transitions': {},
       'terminal': end_node in nfa_state_set,
-      'action' : Nfa.__gather_actions(nfa_state_set)}
+      'action' : Action.dominant_action(nfa_state_set)}
     for key in Nfa.__gather_transition_keys(nfa_state_set):
       match_states = set()
-      f = lambda acc, state: acc | state.key_matches(key)
-      for state in reduce(f, nfa_state_set, set()):
-        match_states.add(state)
+      f = lambda state: state.transition_state_iter_for_key(key)
+      match_states |= set(chain(*map(f, nfa_state_set)))
       transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
       dfa_nodes[name]['transitions'][key] = transition_state
     return name

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to