Hi,
this patches updates profile after hoist_guard transformation that was added in
2015.  I wonder why this transofrm is bundled in tree-ssa-loop-unswitch and not
enabled at -O2/-Os.  It converts

     while (1)                                                                  
       {                                                                        
         [header]]                                                              
         loop_phi_nodes;                                                        
         something1;                                                            
         if (cond1)                                                             
           body;                                                                
         nvar = phi(orig, bvar) ... for all variables changed in body;          
         [guard_end]                                                            
         something2;                                                            
         if (cond2)                                                             
           break;                                                               
         something3;                                                            
       }                                                                        

to

   if (cond1)
     while (1)                                                                  
       {                                                                        
         [header]]                                                              
         loop_phi_nodes;                                                        
         something1;                                                            
         body;                                                                
         [guard_end]                                                            
         something2;                                                            
         if (cond2)                                                             
           break;                                                               
         something3;                                                            
       }                                                                        

Which, unlike normal if conversion seems almost always win becuase it does not
duplicate any code. While path where loop executes 0 times has one extra
if (cond1) on it, this seems to be quite reasonable tradeoff.

Bootstrapped/regtested x86_64-linux, will commit it tomorrow unless there
are complains.

        * gcc.dg/loop-unswitch-2.c: New testcase.
        * gcc.dg/loop-unswitch-1.c: New testcase.

        * tree-ssa-loop-unswitch.c (hoist_guard): Update profile.
Index: testsuite/gcc.dg/loop-unswitch-2.c
===================================================================
--- testsuite/gcc.dg/loop-unswitch-2.c  (revision 245196)
+++ testsuite/gcc.dg/loop-unswitch-2.c  (working copy)
@@ -12,4 +12,5 @@ void foo (float **a, float **b, float *c
 }
 
 /* { dg-final { scan-tree-dump-times "guard hoisted" 3 "unswitch" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch" } } */
 
Index: testsuite/gcc.dg/loop-unswitch-3.c
===================================================================
--- testsuite/gcc.dg/loop-unswitch-3.c  (revision 245196)
+++ testsuite/gcc.dg/loop-unswitch-3.c  (working copy)
@@ -22,5 +22,6 @@ float *foo(int ustride, int size, float
 }
 
 /* { dg-final { scan-tree-dump-times "guard hoisted" 1 "unswitch" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "unswitch" } } */
 
 
Index: tree-ssa-loop-unswitch.c
===================================================================
--- tree-ssa-loop-unswitch.c    (revision 245196)
+++ tree-ssa-loop-unswitch.c    (working copy)
@@ -787,6 +787,7 @@ hoist_guard (struct loop *loop, edge gua
   edge te, fe, e, new_edge;
   gimple *stmt;
   basic_block guard_bb = guard->src;
+  edge not_guard;
   gimple_stmt_iterator gsi;
   int flags = 0;
   bool fix_dom_of_exit;
@@ -818,18 +819,80 @@ hoist_guard (struct loop *loop, edge gua
   update_stmt (cond_stmt);
   /* Create new loop pre-header.  */
   e = split_block (pre_header, last_stmt (pre_header));
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "  Moving guard %i->%i (prob %i) to bb %i, "   
+            "new preheader is %i\n",
+            guard->src->index, guard->dest->index, guard->probability,
+            e->src->index, e->dest->index);
+
   gcc_assert (loop_preheader_edge (loop)->src == e->dest);
+
   if (guard == fe)
     {
       e->flags = EDGE_TRUE_VALUE;
       flags |= EDGE_FALSE_VALUE;
+      not_guard = te;
     }
   else
     {
       e->flags = EDGE_FALSE_VALUE;
       flags |= EDGE_TRUE_VALUE;
+      not_guard = fe;
     }
   new_edge = make_edge (pre_header, exit->dest, flags);
+
+  /* Determine the probability that we skip the loop.  Assume that loop has
+     same average number of iterations regardless outcome of guard.  */
+  new_edge->probability = guard->probability;
+  int skip_count = guard->src->count
+                  ? RDIV (guard->count * pre_header->count, guard->src->count)
+                  : apply_probability (guard->count, new_edge->probability);
+
+  if (skip_count > e->count)
+    {
+      fprintf (dump_file, "  Capping count; expect profile inconsistency\n");
+      skip_count = e->count;
+    }
+  new_edge->count = skip_count;
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "  Estimated probability of skipping loop is %i\n",
+            new_edge->probability);
+
+  /* Update profile after the transform:
+
+     First decrease count of path from newly hoisted loop guard
+     to loop header...  */
+  e->count -= skip_count;
+  e->probability = REG_BR_PROB_BASE - new_edge->probability;
+  e->dest->count = e->count;
+  e->dest->frequency = EDGE_FREQUENCY (e);
+
+  /* ... now update profile to represent that original guard will be optimized
+     away ...  */
+  guard->probability = 0;
+  guard->count = 0;
+  not_guard->probability = REG_BR_PROB_BASE;
+  /* This count is wrong (frequency of not_guard does not change),
+     but will be scaled later.  */
+  not_guard->count = guard->src->count;
+
+  /* ... finally scale everything in the loop except for guarded basic blocks
+     where profile does not change.  */
+  basic_block *body = get_loop_body (loop);
+  
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, "  Scaling nonguarded BBs in loop:");
+  for (unsigned int i = 0; i < loop->num_nodes; i++)
+    {
+      basic_block bb = body[i];
+      if (!dominated_by_p (CDI_DOMINATORS, bb, not_guard->dest))
+       {
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           fprintf (dump_file, " %i", bb->index);
+          scale_bbs_frequencies_int (&bb, 1, e->probability, REG_BR_PROB_BASE);
+       }
+    }
+
   if (fix_dom_of_exit)
     set_immediate_dominator (CDI_DOMINATORS, exit->dest, pre_header);
   /* Add NEW_ADGE argument for all phi in post-header block.  */
@@ -856,7 +919,7 @@ hoist_guard (struct loop *loop, edge gua
     }
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "  guard hoisted.\n");
+    fprintf (dump_file, "\n  guard hoisted.\n");
 }
 
 /* Return true if phi argument for exit edge can be used

Reply via email to