Hi,
it seems I forgot to send the updated patch. Here it is.
We now dump info like:
Checking profitability of path:  5 (16 insns) 3 (2 insns) 34 (2 insns) 33 (4 
insns) 32 (1 insns) 10 (3 insns) 6
  Control statement insns: 16
  Overall: 12 insns
  Registering FSM jump thread: (6, 10) incoming edge;  (10, 32)  (32, 33)  (33, 
34)  (34, 3)  (3, 5)  (5, 16) nocopy; (5, 16) 

path is printed backwards. It is how the loop process it.

Bootstrapped/regtested x86_64-linux, OK?

Honza

        PR middle-end/77445
        * gcc.dg/tree-ssa/pr77445-2.c: Update testcase to check that all
        threading is done.
        * tree-ssa-threadbackward.c (profitable_jump_thread_path): Dump
        statistics of the analyzed path; allow threading for speed when
        any of BBs along the path are optimized for speed.

Index: testsuite/gcc.dg/tree-ssa/pr77445-2.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/pr77445-2.c       (revision 245124)
+++ testsuite/gcc.dg/tree-ssa/pr77445-2.c       (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread1-details-blocks-stats" } */
+/* { dg-options "-O2 -fdump-tree-thread1-details-blocks-stats 
-fdump-tree-thread2-details-blocks-stats 
-fdump-tree-thread3-details-blocks-stats 
-fdump-tree-thread4-details-blocks-stats" } */
 typedef enum STATES {
        START=0,
        INVALID,
@@ -121,3 +121,7 @@ enum STATES FMS( u8 **in , u32 *transiti
    increase much.  */
 /* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */
 /* { dg-final { scan-tree-dump-times "Invalid sum" 2 "thread1" } } */
+/* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */
+/* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */
+/* { dg-final { scan-tree-dump-not "not considered" "thread3" } } */
+/* { dg-final { scan-tree-dump-not "not considered" "thread4" } } */
Index: tree-ssa-threadbackward.c
===================================================================
--- tree-ssa-threadbackward.c   (revision 245124)
+++ tree-ssa-threadbackward.c   (working copy)
@@ -159,6 +159,10 @@ profitable_jump_thread_path (vec<basic_b
   bool threaded_through_latch = false;
   bool multiway_branch_in_path = false;
   bool threaded_multiway_branch = false;
+  bool contains_hot_bb = false;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "Checking profitability of path: ");
 
   /* Count the number of instructions on the path: as these instructions
      will have to be duplicated, we will not record the path if there
@@ -168,6 +172,8 @@ profitable_jump_thread_path (vec<basic_b
     {
       basic_block bb = (*path)[j];
 
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, " %i", bb->index);
       /* Remember, blocks in the path are stored in opposite order
         in the PATH array.  The last entry in the array represents
         the block with an outgoing edge that we will redirect to the
@@ -177,6 +183,7 @@ profitable_jump_thread_path (vec<basic_b
         branch.  */
       if (j < path_length - 1)
        {
+         int orig_n_insns = n_insns;
          if (bb->loop_father != loop)
            {
              path_crosses_loops = true;
@@ -219,6 +226,9 @@ profitable_jump_thread_path (vec<basic_b
                }
            }
 
+
+         if (!contains_hot_bb && speed_p && j < path_length - 1)
+           contains_hot_bb |= optimize_bb_for_speed_p (bb);
          for (gsi = gsi_after_labels (bb);
               !gsi_end_p (gsi);
               gsi_next_nondebug (&gsi))
@@ -229,8 +239,10 @@ profitable_jump_thread_path (vec<basic_b
                  && !(gimple_code (stmt) == GIMPLE_ASSIGN
                       && gimple_assign_rhs_code (stmt) == ASSERT_EXPR)
                  && !is_gimple_debug (stmt))
-               n_insns += estimate_num_insns (stmt, &eni_size_weights);
+               n_insns += estimate_num_insns (stmt, &eni_size_weights);
            }
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           fprintf (dump_file, " (%i insns)", n_insns-orig_n_insns);
 
          /* We do not look at the block with the threaded branch
             in this loop.  So if any block with a last statement that
@@ -264,7 +276,13 @@ profitable_jump_thread_path (vec<basic_b
      last block in the threading path.  So don't count it against our
      statement count.  */
 
-  n_insns-= estimate_num_insns (stmt, &eni_size_weights);
+  int stmt_insns = estimate_num_insns (stmt, &eni_size_weights);
+  n_insns-= stmt_insns;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "\n  Control statement insns: %i\n"
+            "  Overall: %i insns\n",
+            stmt_insns, n_insns);
 
   /* We have found a constant value for ARG.  For GIMPLE_SWITCH
      and GIMPLE_GOTO, we use it as-is.  However, for a GIMPLE_COND
@@ -311,7 +329,11 @@ profitable_jump_thread_path (vec<basic_b
       return NULL;
     }
 
-  if (speed_p && optimize_edge_for_speed_p (taken_edge))
+  /* Threading is profitable if the path duplicated is hot but also
+     in a case we separate cold path from hot path and permit optimization
+     of the hot path later.  Be on the agressive side here. In some testcases,
+     as in PR 78407 this leads to noticeable improvements.  */
+  if (speed_p && (optimize_edge_for_speed_p (taken_edge) || contains_hot_bb))
     {
       if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
        {

Reply via email to