Hi!

Honza, do you think you could have a look at this P1 fix?

Thanks.

On Wed, Jan 31, 2018 at 10:03:51AM +0000, Bin Cheng wrote:
> Hi,
> This patch fixes invalid profile count information in vectorization peeling.
> Current implementation is a bit confusing for me since it tries to compute
> an overall probability based on scaling probability and change of estimated
> niters.  This patch does it in two steps.  Firstly it does the scaling, then
> it adjusts to new estimated niters by simply adjusting loop's latch count
> information; scaling the loop's count information by the proportion
> new_estimated_niters/old_estimate_niters.  Of course we have to adjust loop
> latch's count information back after scaling.
> 
> Bootstrap and test on x86_64 and AArch64.  gcc.dg/vect/pr79347.c is fixed
> for both PR82965 and PR83991.  Is this OK?
> 
> Thanks,
> bin
> 
> 2018-01-30  Bin Cheng  <bin.ch...@arm.com>
> 
>       PR tree-optimization/82965
>       PR tree-optimization/83991
>       * cfgloopmanip.c (scale_loop_profile): Further scale loop's profile
>       information if the loop was predicted to iterate too many times.

> diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
> index b9b76d8..1f560b8 100644
> --- a/gcc/cfgloopmanip.c
> +++ b/gcc/cfgloopmanip.c
> @@ -509,7 +509,7 @@ scale_loop_profile (struct loop *loop, 
> profile_probability p,
>                   gcov_type iteration_bound)
>  {
>    gcov_type iterations = expected_loop_iterations_unbounded (loop);
> -  edge e;
> +  edge e, preheader_e;
>    edge_iterator ei;
>  
>    if (dump_file && (dump_flags & TDF_DETAILS))
> @@ -521,77 +521,66 @@ scale_loop_profile (struct loop *loop, 
> profile_probability p,
>              (int)iteration_bound, (int)iterations);
>      }
>  
> +  /* Scale the probabilities.  */
> +  scale_loop_frequencies (loop, p);
> +
>    /* See if loop is predicted to iterate too many times.  */
> -  if (iteration_bound && iterations > 0
> -      && p.apply (iterations) > iteration_bound)
> +  if (iteration_bound == 0 || iterations <= 0
> +      || p.apply (iterations) <= iteration_bound)
> +    return;
> +
> +  e = single_exit (loop);
> +  preheader_e = loop_preheader_edge (loop);
> +  profile_count count_in = preheader_e->count ();
> +  if (e && preheader_e
> +      && count_in > profile_count::zero ()
> +      && loop->header->count.initialized_p ())
>      {
> -      /* Fixing loop profile for different trip count is not trivial; the 
> exit
> -      probabilities has to be updated to match and frequencies propagated 
> down
> -      to the loop body.
> -
> -      We fully update only the simple case of loop with single exit that is
> -      either from the latch or BB just before latch and leads from BB with
> -      simple conditional jump.   This is OK for use in vectorizer.  */
> -      e = single_exit (loop);
> -      if (e)
> -     {
> -       edge other_e;
> -       profile_count count_delta;
> +      edge other_e;
> +      profile_count count_delta;
>  
> -          FOR_EACH_EDGE (other_e, ei, e->src->succs)
> -         if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
> -             && e != other_e)
> -           break;
> +      FOR_EACH_EDGE (other_e, ei, e->src->succs)
> +     if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
> +         && e != other_e)
> +       break;
>  
> -       /* Probability of exit must be 1/iterations.  */
> -       count_delta = e->count ();
> -       e->probability = profile_probability::always ()
> +      /* Probability of exit must be 1/iterations.  */
> +      count_delta = e->count ();
> +      e->probability = profile_probability::always ()
>                               .apply_scale (1, iteration_bound);
> -       other_e->probability = e->probability.invert ();
> -       count_delta -= e->count ();
> -
> -       /* If latch exists, change its count, since we changed
> -          probability of exit.  Theoretically we should update everything 
> from
> -          source of exit edge to latch, but for vectorizer this is enough.  
> */
> -       if (loop->latch
> -           && loop->latch != e->src)
> -         {
> -           loop->latch->count += count_delta;
> -         }
> -     }
> +      other_e->probability = e->probability.invert ();
>  
>        /* Roughly speaking we want to reduce the loop body profile by the
>        difference of loop iterations.  We however can do better if
>        we look at the actual profile, if it is available.  */
> -      p = p.apply_scale (iteration_bound, iterations);
> -
> -      if (loop->header->count.initialized_p ())
> -     {
> -       profile_count count_in = profile_count::zero ();
> +      p = profile_probability::always ();
>  
> -       FOR_EACH_EDGE (e, ei, loop->header->preds)
> -         if (e->src != loop->latch)
> -           count_in += e->count ();
> -
> -       if (count_in > profile_count::zero () )
> -         {
> -           p = count_in.probability_in (loop->header->count.apply_scale
> -                                              (iteration_bound, 1));
> -         }
> -     }
> +      count_in = count_in.apply_scale (iteration_bound, 1);
> +      p = count_in.probability_in (loop->header->count);
>        if (!(p > profile_probability::never ()))
>       p = profile_probability::very_unlikely ();
> -    }
>  
> -  if (p >= profile_probability::always ()
> -      || !p.initialized_p ())
> -    return;
> +      if (p >= profile_probability::always ()
> +       || !p.initialized_p ())
> +     return;
>  
> -  /* Scale the actual probabilities.  */
> -  scale_loop_frequencies (loop, p);
> -  if (dump_file && (dump_flags & TDF_DETAILS))
> -    fprintf (dump_file, ";; guessed iterations are now %i\n",
> -          (int)expected_loop_iterations_unbounded (loop));
> +      /* If latch exists, change its count, since we changed
> +      probability of exit.  Theoretically we should update everything from
> +      source of exit edge to latch, but for vectorizer this is enough.  */
> +      if (loop->latch && loop->latch != e->src)
> +     loop->latch->count += count_delta;
> +
> +      /* Scale the probabilities.  */
> +      scale_loop_frequencies (loop, p);
> +
> +      /* Change latch's count back.  */
> +      if (loop->latch && loop->latch != e->src)
> +     loop->latch->count -= count_delta;
> +
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +     fprintf (dump_file, ";; guessed iterations are now %i\n",
> +              (int)expected_loop_iterations_unbounded (loop));
> +    }
>  }
>  
>  /* Recompute dominance information for basic blocks outside LOOP.  */


        Jakub

Reply via email to