Revision: 17460
Author:   [email protected]
Date:     Mon Nov  4 15:04:49 2013 UTC
Log:      Experimental parser: add embedded actions

[email protected]
BUG=

Review URL: https://codereview.chromium.org/57923002
http://code.google.com/p/v8/source/detail?r=17460

Modified:
 /branches/experimental/parser/tools/lexer_generator/automata_test.py
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/transition_keys.py

=======================================
--- /branches/experimental/parser/tools/lexer_generator/automata_test.py Thu Oct 31 14:36:42 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/automata_test.py Mon Nov 4 15:04:49 2013 UTC
@@ -30,16 +30,17 @@
 from nfa import NfaBuilder
 from dfa import Dfa

+def dfa_from_nfa(nfa):
+  (start_name, dfa_nodes) = nfa.compute_dfa()
+  return Dfa(start_name, dfa_nodes)
+
 def build_automata(string):
-  graph = RegexParser.parse(string)
-  nfa = NfaBuilder().nfa(graph)
-  (start_name, dfa_nodes, end_nodes) = nfa.compute_dfa()
-  dfa = Dfa(start_name, dfa_nodes, end_nodes)
-  return (nfa, dfa)
+  nfa = NfaBuilder().nfa(RegexParser.parse(string))
+  return (nfa, dfa_from_nfa(nfa))

 class AutomataTestCase(unittest.TestCase):

-    # (pattern, should match, shouldn't match)
+    # (pattern, should match, should not match)
     __test_data = [
       ("a", ["a"], ["b", ""]),
       ("ab", ["ab"], ["bb", ""]),
@@ -71,5 +72,30 @@
         nfa.to_dot()
         dfa.to_dot()

+    def test_actions(self):
+      left_action = ("LEFT_ACTION",)
+      right_action = ("RIGHT_ACTION",)
+      left = RegexParser.parse("left")
+      right = RegexParser.parse("right")
+      left = NfaBuilder.add_action(left, left_action)
+      right = NfaBuilder.add_action(right, right_action)
+      composite = ('ONE_OR_MORE', NfaBuilder.or_graphs([left, right]))
+      nfa = NfaBuilder().nfa(composite)
+      dfa = dfa_from_nfa(nfa)
+      def verify(string, expected):
+        actions = list(dfa.collect_actions(string))
+        assertEqual(len(expected), len(actions))
+        for i, action in enumerate(actions):
+          assertEqual(action[i], expected[i])
+      def verify_miss(string, expected):
+        verify(string, expected + [('MISS',)])
+      def verify_hit(string, expected):
+        verify(string, expected + [('TERMINATE',)])
+      (l, r) = left_action, right_action
+      verify_hit("left", [l])
+      verify_miss("lefta", [l])
+      verify_hit("leftrightleftright", [l, r, l, r])
+      verify_miss("leftrightleftrightx", [l, r, l, r])
+
 if __name__ == '__main__':
-    unittest.main()
+   unittest.main()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Oct 31 08:19:46 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Mon Nov 4 15:04:49 2013 UTC
@@ -26,6 +26,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 from nfa import Nfa
+from transition_keys import TransitionKey

 class DfaState:

@@ -40,28 +41,45 @@
   def node_number(self):
     return self.__node_number

-  def add_transition(self, key, state):
+  def add_transition(self, key, action, state):
     assert not self.__transitions.has_key(key)
-    self.__transitions[key] = state
+    self.__transitions[key] = (state, action)
+
+  def set_action(self, action):
+    assert self.__action == None
+    self.__action = action

   def transitions(self):
     return self.__transitions

 class Dfa:

-  def __init__(self, start_name, mapping, end_names):
+  def __init__(self, start_name, mapping):
+    self.__terminal_set = set()
     name_map = {}
-    offset = 0
-    self.__terminal_set = set()
-    for name in mapping.keys():
-      dfa_state = DfaState(name, offset)
-      name_map[name] = dfa_state
-      offset += 1
-      if name in end_names:
-        self.__terminal_set.add(dfa_state)
-    for name, values in mapping.items():
-      for key, value in values.items():
-        name_map[name].add_transition(key, name_map[value])
+    action_map = {}
+    for i, name in enumerate(mapping.keys()):
+      name_map[name] = DfaState(name, i)
+    for name, node_data in mapping.items():
+      node = name_map[name]
+      if node_data['terminal']:
+        self.__terminal_set.add(node)
+      inversion = {}
+      for key, (state, action) in node_data['transitions'].items():
+        if not state in inversion:
+          inversion[state] = {}
+        # TODO fix this
+        action_key = str(action)
+        if not action_key in action_map:
+          action_map[action_key] = action
+        if not action_key in inversion[state]:
+          inversion[state][action_key] = []
+        inversion[state][action_key].append(key)
+      for state, values in inversion.items():
+        for action_key, keys in values.items():
+          merged_key = TransitionKey.merged_key(keys)
+          action = action_map[action_key]
+          node.add_transition(merged_key, action, name_map[state])
     self.__start = name_map[start_name]
     assert self.__terminal_set

@@ -69,35 +87,43 @@
   def __visit_edges(start, visitor, state):
     edge = set([start])
     visited = set()
+    first = lambda v: [x[0] for x in v]
     while edge:
       f = lambda (next_edge, state), node: (
-        next_edge | set(node.transitions().values()),
+        next_edge | set(first(node.transitions().values())),
         visitor(node, state))
       (next_edge, state) = reduce(f, edge, (set(), state))
       visited |= edge
       edge = next_edge - visited
     return state

-  def matches(self, string):
+  def collect_actions(self, string):
     state = self.__start
     for c in string:
-      next_state = None
-      for key, transition_state in state.transitions().items():
-        if key.matches_char(c):
-          next_state = transition_state
-          break
-      if not next_state:
-        return False
-      state = next_state
-    return state in self.__terminal_set
+ next = [s for k, s in state.transitions().items() if k.matches_char(c)]
+      if not next:
+        yield ('MISS',)
+        return
+      assert len(next) == 1
+      (state, action) = next[0]
+      if action:
+        yield action
+    if state in self.__terminal_set:
+      yield ('TERMINATE', )
+    else:
+      yield ('MISS',)
+
+  def matches(self, string):
+    actions = list(self.collect_actions(string))
+    return actions and actions[-1][0] == 'TERMINATE'

   def to_dot(self):

     def f(node, node_content):
-      for key, value in node.transitions().items():
+      for key, (state, action) in node.transitions().items():
         node_content.append(
           "  S_%s -> S_%s [ label = \"%s\" ];" %
-            (node.node_number(), value.node_number(), key))
+            (node.node_number(), state.node_number(), key))
       return node_content

     node_content = self.__visit_edges(self.__start, f, [])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/nfa.py Thu Oct 31 11:01:22 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/nfa.py Mon Nov 4 15:04:49 2013 UTC
@@ -36,6 +36,7 @@
     self.__unclosed = set()
     self.__node_number = node_number
     self.__epsilon_closure = None
+    self.__transition_action = None

   def node_number(self):
     return self.__node_number
@@ -48,45 +49,54 @@
     assert self.__epsilon_closure == None
     self.__epsilon_closure = frozenset(closure)

+  def set_transition_action(self, action):
+    assert not self.is_closed()
+    assert self.__transition_action == None
+    self.__transition_action = action
+
   def transitions(self):
     assert self.is_closed()
     return self.__transitions

-  def get_epsilon_transitions(self):
-    key = TransitionKey.epsilon();
-    if not key in self.__transitions: return frozenset()
-    return frozenset(self.__transitions[key])
+  def next_states(self, key_filter):
+    assert self.is_closed()
+    first = lambda v: [x[0] for x in v]
+ f = lambda acc, (k, v) : acc if not key_filter(k) else acc | set(first(v))
+    return reduce(f, self.__transitions.items(), set())

-  def __add_transition(self, key, value):
-    if value == None:
+  def __add_transition(self, key, action, next_state):
+    if next_state == None:
+      assert not action
       assert not self.is_closed(), "already closed"
       self.__unclosed.add(key)
       return
     if not key in self.__transitions:
       self.__transitions[key] = set()
-    self.__transitions[key].add(value)
+    self.__transitions[key].add((next_state, action))

   def add_unclosed_transition(self, key):
     assert key != TransitionKey.epsilon()
-    self.__add_transition(key, None)
+    self.__add_transition(key, None, None)

   def add_epsilon_transition(self, state):
     assert state != None
-    self.__add_transition(TransitionKey.epsilon(), state)
+    self.__add_transition(TransitionKey.epsilon(), None, state)

   def is_closed(self):
     return self.__unclosed == None

   def close(self, end):
     assert not self.is_closed()
+    unclosed, self.__unclosed = self.__unclosed, None
+    action, self.__transition_action = self.__transition_action, None
     if end == None:
-      assert not self.__unclosed
-    else:
-      for key in self.__unclosed:
-        self.__add_transition(key, end)
-      if not self.__unclosed:
-        self.add_epsilon_transition(end)
-    self.__unclosed = None
+      assert not unclosed
+      assert not action
+      return
+    for key in unclosed:
+      self.__add_transition(key, action, end)
+    if not unclosed:
+      self.__add_transition(TransitionKey.epsilon(), action, end)

   def __matches(self, match_func, value):
     f = lambda acc, (k, vs): acc | vs if match_func(k, value) else acc
@@ -106,13 +116,13 @@
 class NfaBuilder:

   def __init__(self):
+    self.__node_number = 0
     self.__operation_map = {}
     self.__members = getmembers(self)

   def __new_state(self):
-    node_number = self.node_number
-    self.node_number = self.node_number + 1
-    return NfaState(node_number)
+    self.__node_number += 1
+    return NfaState(self.__node_number - 1)

   def __or(self, graph):
     start = self.__new_state()
@@ -165,6 +175,12 @@
   def __any(self, graph):
     return self.__key_state(TransitionKey.any())

+  def __action(self, graph):
+    result = self.__process(graph[1])
+    for end in result[1]:
+      end.set_transition_action(graph[2])
+    return result
+
   def __process(self, graph):
     assert type(graph) == TupleType
     method = "_NfaBuilder__" + graph[0].lower()
@@ -179,12 +195,24 @@
       end.close(new_end)

   def nfa(self, graph):
-    self.node_number = 0
+    start_node_number = self.__node_number
     (start, ends) = self.__process(graph)
     end =  self.__new_state()
     self.__patch_ends(ends, end)
     end.close(None)
-    return Nfa(start, end, self.node_number)
+    return Nfa(start, end, self.__node_number - start_node_number)
+
+  @staticmethod
+  def add_action(graph, action):
+    return ('ACTION', graph, action)
+
+  @staticmethod
+  def or_graphs(graphs):
+    return reduce(lambda acc, g: ('OR', acc, g), graphs)
+
+  @staticmethod
+  def cat_graphs(graphs):
+    return reduce(lambda acc, g: ('CAT', acc, g), graphs)

 class Nfa:

@@ -208,9 +236,7 @@

   def __visit_all_edges(self, visitor, state):
     edge = set([self.__start])
-    def next_edge(node):
-      f = lambda acc, values: acc | values
-      return reduce(f, node.transitions().values(), set())
+    next_edge = lambda node: node.next_states(lambda x : True)
     return Nfa.__visit_edges(edge, next_edge, visitor, state)

   def __verify(self, nodes_created):
@@ -229,7 +255,8 @@
       def inner(node, closure):
         closure.add(node)
         return closure
-      next_edge = lambda node : node.get_epsilon_transitions()
+      is_epsilon = lambda k: k == TransitionKey.epsilon()
+      next_edge = lambda node : node.next_states(is_epsilon)
       edge = next_edge(node)
       closure = Nfa.__visit_edges(edge, next_edge, inner, set())
       node.set_epsilon_closure(closure)
@@ -245,35 +272,43 @@
     valid_states = Nfa.__close(set([self.__start]))
     for c in string:
       f = lambda acc, state: acc | state.char_matches(c)
-      valid_states = Nfa.__close(reduce(f, valid_states, set()))
-      if not valid_states:
+      transitions = reduce(f, valid_states, set())
+      if not transitions:
         return False
+      valid_states = Nfa.__close(set([x[0] for x in transitions]))
     return self.__end in valid_states

   @staticmethod
-  def __to_dfa(nfa_state_set, builder):
+  def __to_dfa(nfa_state_set, dfa_nodes, end_node):
     nfa_state_set = Nfa.__close(nfa_state_set)
     assert nfa_state_set
-    name = str([x.node_number() for x in nfa_state_set])
-    (dfa_nodes, end_nodes, end_node) = builder
+    name = ".".join(str(x.node_number()) for x in sorted(nfa_state_set))
     if name in dfa_nodes:
       return name
-    dfa_node = {}
-    dfa_nodes[name] = dfa_node
+    dfa_nodes[name] = {
+      'transitions': {},
+      'terminal': end_node in nfa_state_set}
     for key in NfaState.gather_transition_keys(nfa_state_set):
       f = lambda acc, state: acc | state.key_matches(key)
- dfa_node[key] = Nfa.__to_dfa(reduce(f, nfa_state_set, set()), builder)
-    if end_node in nfa_state_set:
-      end_nodes.add(name)
+      transitions = reduce(f, nfa_state_set, set())
+      match_states = set()
+      actions = set()
+      for (state, action) in transitions:
+        match_states.add(state)
+        if action:
+          actions.add(action)
+      assert len(match_states) == len(transitions)
+      assert not actions or len(actions) == 1
+      action = iter(actions).next() if actions else None
+      transition_state = Nfa.__to_dfa(match_states, dfa_nodes, end_node)
+      dfa_nodes[name]['transitions'][key] = (transition_state, action)
     return name

   def compute_dfa(self):
     self.__compute_epsilon_closures()
     dfa_nodes = {}
-    end_nodes = set()
-    dfa_builder = (dfa_nodes, end_nodes, self.__end)
-    start_name = self.__to_dfa(set([self.__start]), dfa_builder)
-    return (start_name, dfa_nodes, end_nodes)
+    start_name = self.__to_dfa(set([self.__start]), dfa_nodes, self.__end)
+    return (start_name, dfa_nodes)

   def to_dot(self):

@@ -284,7 +319,7 @@
         for value in values:
           node_content.append(
             "  S_%d -> S_%d [ label = \"%s\" ];" %
-              (node.node_number(), value.node_number(), key))
+              (node.node_number(), value[0].node_number(), key))
       return node_content

     node_content = self.__visit_all_edges(f, [])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py Thu Oct 31 14:46:33 2013 UTC +++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py Mon Nov 4 15:04:49 2013 UTC
@@ -238,6 +238,11 @@
       merged.append(last)
     return merged

+  @staticmethod
+  def merged_key(keys):
+    f = lambda acc, key: acc + list(key.__ranges)
+    return TransitionKey.__key_from_ranges(False, reduce(f, keys, []))
+
   @staticmethod
   def __invert_ranges(ranges):
     inverted = []

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to