Revision: 19502
Author:   [email protected]
Date:     Thu Feb 20 08:59:23 2014 UTC
Log:      Experimental parser: add dfa path iterator

[email protected]

BUG=

Review URL: https://codereview.chromium.org/172893003
http://code.google.com/p/v8/source/detail?r=19502

Modified:
 /branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/backtracking_generator.py
 /branches/experimental/parser/tools/lexer_generator/code_generator.jinja
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/generator.py

=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Feb 19 15:52:12 2014 UTC +++ /branches/experimental/parser/src/lexer/lexer_py.re Thu Feb 20 08:59:23 2014 UTC
@@ -142,8 +142,8 @@
 line_terminator+   <|line_terminator|>
 /[:whitespace:]+/  <|skip|>

-"\""           <set_marker(1)||DoubleQuoteString>
-"'"            <set_marker(1)||SingleQuoteString>
+"\""           <set_marker(1)|token(ILLEGAL)|DoubleQuoteString>
+"'"            <set_marker(1)|token(ILLEGAL)|SingleQuoteString>

 # all keywords
 "break"       <|token(BREAK)|>
@@ -192,11 +192,12 @@
 "with"        <|token(WITH)|>
 "yield"       <|token(YIELD)|>

-identifier_start <|token(IDENTIFIER)|Identifier>
-unicode_escape <check_escaped_identifier_start|token(IDENTIFIER)| Identifier>
+identifier_start  <|token(IDENTIFIER)|Identifier>
+unicode_escape <check_escaped_identifier_start|token(IDENTIFIER)| Identifier>
+"\\"              <|token(ILLEGAL)|>  # ambiguous backtracking otherwise

 eos             <terminate>
-default_action  <do_token_and_go_forward(ILLEGAL)>
+default_action  <default>

 <<DoubleQuoteString>>
 epsilon                       <StringSubgraph>
@@ -222,8 +223,9 @@
 line_terminator               <|token(ILLEGAL)|>

 <<Identifier>>
-identifier_char <|token(IDENTIFIER)|continue>
-/\\u[:hex_digit:]{4}/ <check_escaped_identifier_part|token(IDENTIFIER)| continue>
+identifier_char  <|token(IDENTIFIER)|continue>
+unicode_escape   <check_escaped_identifier_part|token(IDENTIFIER)|continue>
+"\\"             <|token(ILLEGAL)|>  # ambiguous backtracking otherwise

 <<SingleLineComment>>
 line_terminator  <|line_terminator|>
@@ -232,7 +234,6 @@

 <<MultiLineComment>>
 /\*+\//          <|skip|>
-# TODO(dcarney): find a way to generate the below rule
 /\*+[^\/*]/      <||continue>
 line_terminator  <line_terminator_in_multiline_comment||continue>
 catch_all        <||continue>
=======================================
--- /branches/experimental/parser/tools/lexer_generator/backtracking_generator.py Wed Feb 19 15:52:12 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/backtracking_generator.py Thu Feb 20 08:59:23 2014 UTC
@@ -40,6 +40,36 @@
     self.__default_action = default_action
     self.__log = log

+  @staticmethod
+  def __process_error_states(dfa, error_map, action_state_map):
+    path_map = { k : [] for k in error_map.iterkeys()}
+    error_states = set(error_map.iterkeys())
+    for path, states_in_path, terminal in dfa.path_iter():
+      matching = error_states & states_in_path
+      if not matching:
+        continue
+      last_action = None
+      path_length = 0
+      for (key, state) in path:
+        if state in error_states:
+          assert last_action != None
+          path_map[state].append([k for k, s in path[1:path_length]])
+        elif state in action_state_map:
+          last_action = state
+        path_length += 1
+    print reduce(lambda acc, x : acc + len(x), path_map.itervalues(), 0)
+    for state, key_paths in path_map.iteritems():
+      count = 0
+      action_str = map(str, error_map[state][0])
+      for key_path in key_paths:
+        print "path %d %s %s" % (
+          state.node_number(),
+          action_str,
+          " --> ".join(map(str, key_path)))
+        count += 1
+        if count > 10:
+          break
+
   def __generate(self):
     dfa = self.__dfa
     default_action = self.__default_action
@@ -72,6 +102,7 @@
     if (not start_state in incoming_transitions and
         not start_state in action_states):
       action_states[start_state] = default_action
+    error_map = {}
     for state in incoming_transitions.iterkeys():
       if state in omega_states or state in action_states:
         continue
@@ -98,12 +129,11 @@
               next.add(incoming_state)
         seen |= unchecked
         unchecked = next - seen
-      # accumulate unique actions
+      # Accumulate unique actions.
       actions = set(map(lambda x : action_states[x].term(), match_edge))
       assert actions
       if not len(actions) == 1:
-        # TODO(dcarney): need to warn here after second pass
-        # actions = set([default_action])
+        error_map[state] = actions, match_edge
         continue
       action = iter(actions).next()
       # don't install default actions
@@ -111,6 +141,11 @@
         continue
       store_states |= match_edge
       backtracking_map[state.node_number()] = (action, match_edge)
+    # Bail if error occurred.
+    if error_map:
+      self.__process_error_states(dfa, error_map, action_states)
+      raise Exception('ambiguous backtracking')
+    # Now install backtracking everywhere.
     def install_backtracking_action(new_states, node_number):
       omega_state_id = str(node_number) + '_bt'
       key = TransitionKey.omega()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Wed Feb 19 15:52:12 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/code_generator.jinja Thu Feb 20 08:59:23 2014 UTC
@@ -139,8 +139,8 @@
   {% elif type == 'do_stored_token' %}
     DO_TOKEN(stored_token)
     return;
-  {% elif type == 'do_token_and_go_forward' %}
-    DO_TOKEN(Token::{{args[0]}});
+  {% elif type == 'default' %}
+    DO_TOKEN(Token::ILLEGAL);
     FORWARD();
     RESET_START();
     return;
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa.py Wed Feb 19 15:52:12 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/dfa.py Thu Feb 20 08:59:23 2014 UTC
@@ -168,3 +168,37 @@
         incoming_transitions[transition_state].append((key, state))
     self.visit_all_states(f)
     return incoming_transitions
+
+  def path_iter(self):
+    '''Walk all paths through the dfa, ending at nodes with no transitions
+ or the first reappearance of a node. Yields data structures which cannot
+    be modified or stored.
+    Yields path: [(key, state)], states_in_path: set, is_terminal : boolean
+    path always starts with (None, state_state)'''
+    start_state = self.start_state()
+    path = [(None, start_state)]
+    in_path = set([start_state])
+    iters = [(start_state.key_state_iter(), False)]
+    while path:
+      assert len(path) == len(in_path)
+      assert len(path) == len(iters)
+      try:
+        (it, called) = iters[-1]
+        (key, state) = it.next()
+        if not called:
+          iters[-1] = (it, True)
+        path.append((key, state))
+        if state in in_path:
+          # Terminate nested iteration.
+          yield path, in_path, False
+          path.pop()
+          continue
+        # Continue nesting.
+        in_path.add(state)
+        iters.append((state.key_state_iter(), False))
+      except StopIteration:
+        # No more transitions in iterator, pop everything.
+        (it, called) = iters.pop()
+        if not called:
+          yield path, in_path, True
+        in_path.remove(path.pop()[1])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/generator.py Wed Feb 19 10:54:59 2014 UTC +++ /branches/experimental/parser/tools/lexer_generator/generator.py Thu Feb 20 08:59:23 2014 UTC
@@ -141,6 +141,7 @@
   parser.add_argument('--debug-code', action='store_true')
   parser.add_argument('--profile', action='store_true')
   parser.add_argument('--rule-html')
+  parser.add_argument('--count-paths', action='store_true')
   args = parser.parse_args()

   minimize_default = not args.no_minimize_default
@@ -168,6 +169,13 @@
         dfa.node_count(), mdfa.node_count())
     DfaMinimizer.set_verify(True)

+  if args.count_paths:
+    path_count = 0
+    print 'counting'
+ for path in rule_processor.default_automata().minimal_dfa().path_iter():
+      path_count += 1
+    print 'done', path_count
+
   html_file = args.html
   if html_file:
     html = generate_html(

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to