On Tue, May 19, 2026 at 1:58 PM Abhishek Kaushik
<[email protected]> wrote:
>
> SCEV sometimes fails to compute the final value of a loop PHI when the
> entry value does not match the value produced by later iterations.  For
> example:
>
>   # y = PHI <y_next, 666>
>   i_next = i + 1;
>   y_next = i_next * 10;
>
> The PHI value is:
>
>   666, 10, 20, 30, ...
>
> This is not a normal affine recurrence.  But if the latch count is
> known to be nonzero, the entry value cannot be the value seen after
> the loop.  The final value is the latch argument evaluated at iteration
> niter - 1.
>
> Teach SCEV final value replacement to handle this case.
>
> This patch was bootstrapped and regression tested on aarch64-linux-gnu.
>
> gcc/:
>
>         * tree-scalar-evolution.cc
>         (compute_final_value_from_loop_phi_latch): New function.
>         (final_value_replacement_loop): Use it.
>
> gcc/testsuite/:
>
>         * gcc.dg/tree-ssa/sccp-loop-phi.c: New test.
>         * gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c: New test.
> ---
> Hello,
>
> When compute_final_value_from_loop_phi_latch was guarded with just
> integer_zerop (may_be_zero), and niter was a symbolic value it would result in
> a miscompile as number of latch executions could still be zero. So I've added 
> a
> new if condition `if (!tree_expr_nonzero_p (niter))` and a new test file for
> the non-optimized cases.
> But tree_expr_nonzero_p returns false for some non-zero values (e.g. i + 1
> where i is known non-zero value) as tree_binary_nonzero_warnv_p checks
>
>     case PLUS_EXPR:
>       if (ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_UNDEFINED (type))
>
> which doesn't do anything for unsigned types.
> I also looked at passing just phi instead of phidef, but going from the exit
> phi to the loop header phi needed more calls, so I didn't change it. Please 
> let
> me know if I missed something easier.
>
> Thanks
> Abhishek
>
> Changes since v2:
> - Replace call to number_of_latch_executions with number_of_iterations_exit
> - Add tree_expr_nonzero_p (niter) check to avoid miscompilations.
>
> Changes since v1:
> - Removed no use requirement from loop phi
> - Use number_of_iterations_exit in caller
> - Removed redundant null checks
>
>  .../tree-ssa/sccp-loop-phi-not-optimized.c    | 67 +++++++++++++++
>  gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c | 55 ++++++++++++
>  gcc/tree-scalar-evolution.cc                  | 86 ++++++++++++++++++-
>  3 files changed, 206 insertions(+), 2 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
> new file mode 100644
> index 00000000000..139d5216851
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
> @@ -0,0 +1,67 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sccp-details" } */
> +
> +int
> +nested_unused_phi1 (void)
> +{
> +  int acc = 0;
> +
> +  for (int i = 0; i < 20; i++)
> +    {
> +      int x = 0;
> +      for (int j = 0, y = 123; j != i; j++, y = i + 10)
> +       x = y;
> +      acc += x;
> +    }
> +
> +  return acc;
> +}
> +
> +int
> +nested_unused_phi2 (void)
> +{
> +  int acc = 0;
> +
> +  for (int i = 0; i != 20; i++)
> +    {
> +      int x = 0;
> +      for (int j = 0, y = 123; j != i + 1; j++, y = i + 10)
> +       x = y;
> +      acc += x;
> +    }
> +
> +  return acc;
> +}
> +
> +int
> +nested_unused_phi3 (void)
> +{
> +  int acc = 0;
> +
> +  for (int i = 0; i != 20; i++)
> +    {
> +      int x = 0;
> +      for (int j = 0, y = 123; j != i + 2; j++, y = i + 10)
> +       x = y;
> +      acc += x;
> +    }
> +
> +  return acc;
> +}
> +
> +/* Expected:
> +   nested_unused_phi1 -> 123 + sum_{i=2}^{19} (i + 10) = 492
> +   nested_unused_phi2 -> 123 + sum_{i=1}^{19} (i + 10) = 503
> +   nested_unused_phi3 -> sum_{i=0}^{19} (i + 10)       = 390
> +
> +   These functions could be optimized by sccp, but currently are not because
> +   they are not normal affine recurrence.
> +   compute_final_value_from_loop_phi_latch tries to optimize these cases but
> +   fails to do so for nested_unsed_phi1/2 as the niter in the inner loop 
> might
> +   be zero.
> +   It should technically be able to optimized nested_unused_phi3 as we know
> +   that niter > 0 for the inner loop but tree_expr_nonzero_p (niter) returns
> +   false and we don't.
> +*/
> +
> +/* { dg-final { scan-tree-dump-times "not replacing:" 6 "sccp" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
> new file mode 100644
> index 00000000000..3518a1957b3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sccp -fdump-tree-optimized" } */
> +
> +int
> +const_init_unused_phi (void)
> +{
> +  int x = 0;
> +  for (int i = 0, y = 666; i != 19; i++, y = i * 10)
> +    x = y;
> +  return x;
> +}
> +
> +int
> +conditional_init_unused_phi (int c)
> +{
> +  int x = 0;
> +  int y;
> +
> +  if (c != 0)
> +    y = 20;
> +  else
> +    y = 30;
> +
> +  for (int i = 0; i != 19; i++, y = i * 10)
> +    x = y;
> +
> +  return x;
> +}
> +
> +int
> +nested_unused_phi (void)
> +{
> +  int acc = 0;
> +
> +  for (int i = 0; i < 20; i++)
> +    {
> +      int x = 0;
> +      for (int j = 0, y = 123; j != 19; j++, y = i + 10)
> +       x = y;
> +      acc += x;
> +    }
> +
> +  return acc;
> +}
> +
> +/* Expected:
> +   const_init_unused_phi         -> 180
> +   conditional_init_unused_phi   -> 180
> +   nested_unused_phi             -> sum_{i=0}^{19} (i + 10) = 390
> +   There should be no loop left in the optimized dump.
> +*/
> +
> +/* { dg-final { scan-tree-dump-times "return 180" 2 "sccp" } } */
> +/* { dg-final { scan-tree-dump "return 390" "sccp" } } */
> +/* { dg-final { scan-tree-dump-times "goto" 0 "optimized" } } */
> diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
> index f524786f33b..4b67baaeaf0 100644
> --- a/gcc/tree-scalar-evolution.cc
> +++ b/gcc/tree-scalar-evolution.cc
> @@ -3845,6 +3845,75 @@ analyze_and_compute_bitop_with_inv_effect (class loop* 
> loop, tree phidef,
>    return fold_build2 (code1, type, inv, match_op[0]);
>  }
>
> +/* Try to compute the final value of PHIDEF when PHIDEF is the result of a
> +   loop-header PHI.
> +
> +   This handles the nonzero-latch-count delayed-value form:
> +
> +     y_phi = PHI <latch_arg (latch), init (preheader)>
> +
> +   If the latch count is known to be nonzero, the final value is:
> +
> +     latch_arg evaluated at iteration niter - 1
> +
> +   Return NULL_TREE if the pattern does not apply.  */
> +static tree
> +compute_final_value_from_loop_phi_latch (class loop *loop,
> +       class loop *ex_loop, tree phidef, tree niter, bool* folded_casts)
> +{
> +  if (TREE_CODE (phidef) != SSA_NAME)
> +    return NULL_TREE;
> +
> +  gimple* def_stmt = SSA_NAME_DEF_STMT (phidef);
> +  gphi *header_phi = dyn_cast<gphi*> (def_stmt);
> +  if (!header_phi

As mentioned it feels more natural to pass the phi to this function to avoid
all this.

> +      || gimple_bb (header_phi) != loop->header
> +      || gimple_phi_num_args (header_phi) != 2)
> +    return NULL_TREE;
> +
> +  /* If niter is a symbolic value make sure it can never be zero, otherwise 
> we
> +     do a bad replacement.  */
> +  if (!tree_expr_nonzero_p (niter))

Interesting - may_be_zero is false when niter is actually zero?

Otherwise the patch is OK.

Richard.

> +    return NULL_TREE;
> +
> +  tree latch_arg = PHI_ARG_DEF_FROM_EDGE (header_phi,
> +                                         loop_latch_edge (loop));
> +
> +  tree ev = analyze_scalar_evolution_in_loop (ex_loop,
> +                                             loop,
> +                                             latch_arg,
> +                                             folded_casts);
> +  if (ev == chrec_dont_know)
> +    return NULL_TREE;
> +
> +  bool invariant_p;
> +  if (no_evolution_in_loop_p (ev, ex_loop->num, &invariant_p) && invariant_p)
> +    return ev;
> +  else if (TREE_CODE (ev) == POLYNOMIAL_CHREC
> +          && get_chrec_loop (ev) == ex_loop)
> +  {
> +    tree niter_type = TREE_TYPE (niter);
> +    tree prev_iter = fold_build2 (MINUS_EXPR,
> +                                 niter_type,
> +                                 niter,
> +                                 build_one_cst (niter_type));
> +
> +    tree res = chrec_apply (ex_loop->num, ev, prev_iter);
> +    if (res == chrec_dont_know)
> +      return NULL_TREE;
> +
> +    if (chrec_contains_symbols_defined_in_loop (res, ex_loop->num))
> +    {
> +      res = instantiate_parameters (ex_loop, res);
> +      if (res == chrec_dont_know)
> +       return NULL_TREE;
> +    }
> +
> +    return res;
> +  }
> +  return NULL_TREE;
> +}
> +
>  /* Do final value replacement for LOOP, return true if we did anything.  */
>
>  bool
> @@ -3856,7 +3925,11 @@ final_value_replacement_loop (class loop *loop)
>    if (!exit)
>      return false;
>
> -  tree niter = number_of_latch_executions (loop);
> +  class tree_niter_desc niter_desc;
> +  if (!number_of_iterations_exit (loop, exit, &niter_desc, false))
> +    return false;
> +
> +  tree niter = niter_desc.niter;
>    if (niter == chrec_dont_know)
>      return false;
>
> @@ -3895,7 +3968,7 @@ final_value_replacement_loop (class loop *loop)
>        def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
>                                               &folded_casts);
>
> -      tree bitinv_def, bit_def;
> +      tree bitinv_def, bit_def, phi_latch_final_value;
>        unsigned HOST_WIDE_INT niter_num;
>
>        if (def != chrec_dont_know)
> @@ -3932,6 +4005,15 @@ final_value_replacement_loop (class loop *loop)
>                                                                    
> niter_num)))
>         def = bit_def;
>
> +      else if (integer_zerop (niter_desc.may_be_zero)
> +              && (phi_latch_final_value
> +                  = compute_final_value_from_loop_phi_latch (loop,
> +                                                             ex_loop,
> +                                                             phidef,
> +                                                             niter,
> +                                                             &folded_casts)))
> +       def = phi_latch_final_value;
> +
>        bool cond_overflow_p;
>        if (!tree_does_not_contain_chrecs (def)
>           || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> --
> 2.43.0
>

Reply via email to