Hi,
This patch fixes invalid profile count information in vectorization peeling.
Current implementation is a bit confusing for me since it tries to compute
an overall probability based on scaling probability and change of estimated
niters.  This patch does it in two steps.  Firstly it does the scaling, then
it adjusts to new estimated niters by simply adjusting loop's latch count
information; scaling the loop's count information by the proportion
new_estimated_niters/old_estimate_niters.  Of course we have to adjust loop
latch's count information back after scaling.

Bootstrap and test on x86_64 and AArch64.  gcc.dg/vect/pr79347.c is fixed
for both PR82965 and PR83991.  Is this OK?

Thanks,
bin

2018-01-30  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/82965
        PR tree-optimization/83991
        * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile
        information if the loop was predicted to iterate too many times.
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
index b9b76d8..1f560b8 100644
--- a/gcc/cfgloopmanip.c
+++ b/gcc/cfgloopmanip.c
@@ -509,7 +509,7 @@ scale_loop_profile (struct loop *loop, profile_probability 
p,
                    gcov_type iteration_bound)
 {
   gcov_type iterations = expected_loop_iterations_unbounded (loop);
-  edge e;
+  edge e, preheader_e;
   edge_iterator ei;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -521,77 +521,66 @@ scale_loop_profile (struct loop *loop, 
profile_probability p,
               (int)iteration_bound, (int)iterations);
     }
 
+  /* Scale the probabilities.  */
+  scale_loop_frequencies (loop, p);
+
   /* See if loop is predicted to iterate too many times.  */
-  if (iteration_bound && iterations > 0
-      && p.apply (iterations) > iteration_bound)
+  if (iteration_bound == 0 || iterations <= 0
+      || p.apply (iterations) <= iteration_bound)
+    return;
+
+  e = single_exit (loop);
+  preheader_e = loop_preheader_edge (loop);
+  profile_count count_in = preheader_e->count ();
+  if (e && preheader_e
+      && count_in > profile_count::zero ()
+      && loop->header->count.initialized_p ())
     {
-      /* Fixing loop profile for different trip count is not trivial; the exit
-        probabilities has to be updated to match and frequencies propagated 
down
-        to the loop body.
-
-        We fully update only the simple case of loop with single exit that is
-        either from the latch or BB just before latch and leads from BB with
-        simple conditional jump.   This is OK for use in vectorizer.  */
-      e = single_exit (loop);
-      if (e)
-       {
-         edge other_e;
-         profile_count count_delta;
+      edge other_e;
+      profile_count count_delta;
 
-          FOR_EACH_EDGE (other_e, ei, e->src->succs)
-           if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
-               && e != other_e)
-             break;
+      FOR_EACH_EDGE (other_e, ei, e->src->succs)
+       if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
+           && e != other_e)
+         break;
 
-         /* Probability of exit must be 1/iterations.  */
-         count_delta = e->count ();
-         e->probability = profile_probability::always ()
+      /* Probability of exit must be 1/iterations.  */
+      count_delta = e->count ();
+      e->probability = profile_probability::always ()
                                .apply_scale (1, iteration_bound);
-         other_e->probability = e->probability.invert ();
-         count_delta -= e->count ();
-
-         /* If latch exists, change its count, since we changed
-            probability of exit.  Theoretically we should update everything 
from
-            source of exit edge to latch, but for vectorizer this is enough.  
*/
-         if (loop->latch
-             && loop->latch != e->src)
-           {
-             loop->latch->count += count_delta;
-           }
-       }
+      other_e->probability = e->probability.invert ();
 
       /* Roughly speaking we want to reduce the loop body profile by the
         difference of loop iterations.  We however can do better if
         we look at the actual profile, if it is available.  */
-      p = p.apply_scale (iteration_bound, iterations);
-
-      if (loop->header->count.initialized_p ())
-       {
-         profile_count count_in = profile_count::zero ();
+      p = profile_probability::always ();
 
-         FOR_EACH_EDGE (e, ei, loop->header->preds)
-           if (e->src != loop->latch)
-             count_in += e->count ();
-
-         if (count_in > profile_count::zero () )
-           {
-             p = count_in.probability_in (loop->header->count.apply_scale
-                                                (iteration_bound, 1));
-           }
-       }
+      count_in = count_in.apply_scale (iteration_bound, 1);
+      p = count_in.probability_in (loop->header->count);
       if (!(p > profile_probability::never ()))
        p = profile_probability::very_unlikely ();
-    }
 
-  if (p >= profile_probability::always ()
-      || !p.initialized_p ())
-    return;
+      if (p >= profile_probability::always ()
+         || !p.initialized_p ())
+       return;
 
-  /* Scale the actual probabilities.  */
-  scale_loop_frequencies (loop, p);
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, ";; guessed iterations are now %i\n",
-            (int)expected_loop_iterations_unbounded (loop));
+      /* If latch exists, change its count, since we changed
+        probability of exit.  Theoretically we should update everything from
+        source of exit edge to latch, but for vectorizer this is enough.  */
+      if (loop->latch && loop->latch != e->src)
+       loop->latch->count += count_delta;
+
+      /* Scale the probabilities.  */
+      scale_loop_frequencies (loop, p);
+
+      /* Change latch's count back.  */
+      if (loop->latch && loop->latch != e->src)
+       loop->latch->count -= count_delta;
+
+      if (dump_file && (dump_flags & TDF_DETAILS))
+       fprintf (dump_file, ";; guessed iterations are now %i\n",
+                (int)expected_loop_iterations_unbounded (loop));
+    }
 }
 
 /* Recompute dominance information for basic blocks outside LOOP.  */

Reply via email to