On Mon, May 11, 2026 at 3:58 PM Abhishek Kaushik
<[email protected]> wrote:
>
> SCEV sometimes fails to compute the final value of a loop PHI when the
> entry value does not match the value produced by later iterations.  For
> example:
>
>   # y = PHI <y_next, 666>
>   i_next = i + 1;
>   y_next = i_next * 10;
>
> The PHI value is:
>
>   666, 10, 20, 30, ...
>
> This is not a normal affine recurrence.  But if the PHI result is not
> used inside the loop and the latch count is known to be nonzero, the
> entry value cannot be the value seen after the loop.

Why does it matter the PHI result is not used inside of the loop?

> The final value is
> the latch argument evaluated at iteration niter - 1.

That's always true if the latch is known executed, no?

>
> Teach SCEV final value replacement to handle this case.
>
> This patch was bootstrapped and regression tested on aarch64-linux-gnu.
>
> gcc/:
>
>         * tree-scalar-evolution.cc (ssa_name_has_nondebug_use_in_loop_p):
>         New function.
>         (compute_final_value_from_unused_loop_phi): New function.
>         (final_value_replacement_loop): Use it.
>
> gcc/testsuite/:
>
>         * gcc.dg/tree-ssa/sccp-unused-loop-phi.c: New test.
> ---
>  .../gcc.dg/tree-ssa/sccp-unused-loop-phi.c    |  55 +++++++++
>  gcc/tree-scalar-evolution.cc                  | 108 +++++++++++++++++-
>  2 files changed, 162 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-unused-loop-phi.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-unused-loop-phi.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/sccp-unused-loop-phi.c
> new file mode 100644
> index 00000000000..3518a1957b3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-unused-loop-phi.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sccp -fdump-tree-optimized" } */
> +
> +int
> +const_init_unused_phi (void)
> +{
> +  int x = 0;
> +  for (int i = 0, y = 666; i != 19; i++, y = i * 10)
> +    x = y;
> +  return x;
> +}
> +
> +int
> +conditional_init_unused_phi (int c)
> +{
> +  int x = 0;
> +  int y;
> +
> +  if (c != 0)
> +    y = 20;
> +  else
> +    y = 30;
> +
> +  for (int i = 0; i != 19; i++, y = i * 10)
> +    x = y;
> +
> +  return x;
> +}
> +
> +int
> +nested_unused_phi (void)
> +{
> +  int acc = 0;
> +
> +  for (int i = 0; i < 20; i++)
> +    {
> +      int x = 0;
> +      for (int j = 0, y = 123; j != 19; j++, y = i + 10)
> +       x = y;
> +      acc += x;
> +    }
> +
> +  return acc;
> +}
> +
> +/* Expected:
> +   const_init_unused_phi         -> 180
> +   conditional_init_unused_phi   -> 180
> +   nested_unused_phi             -> sum_{i=0}^{19} (i + 10) = 390
> +   There should be no loop left in the optimized dump.
> +*/
> +
> +/* { dg-final { scan-tree-dump-times "return 180" 2 "sccp" } } */
> +/* { dg-final { scan-tree-dump "return 390" "sccp" } } */
> +/* { dg-final { scan-tree-dump-times "goto" 0 "optimized" } } */
> diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
> index f524786f33b..56be952cda0 100644
> --- a/gcc/tree-scalar-evolution.cc
> +++ b/gcc/tree-scalar-evolution.cc
> @@ -3845,6 +3845,104 @@ analyze_and_compute_bitop_with_inv_effect (class 
> loop* loop, tree phidef,
>    return fold_build2 (code1, type, inv, match_op[0]);
>  }
>
> +static bool
> +ssa_name_has_nondebug_use_in_loop_p (tree name, class loop* loop)
> +{
> +  gcc_checking_assert (TREE_CODE (name) == SSA_NAME);
> +
> +  imm_use_iterator imm_iter;
> +  use_operand_p use_p;
> +
> +  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
> +  {
> +    gimple* use_stmt = USE_STMT (use_p);
> +
> +    if (!use_stmt || is_gimple_debug (use_stmt))
> +      continue;
> +
> +    basic_block use_bb = gimple_bb (use_stmt);
> +    if (use_bb && flow_bb_inside_loop_p (loop, use_bb))
> +      return true;
> +  }
> +
> +  return false;
> +}
> +
> +/* Try to compute the final value of PHIDEF when PHIDEF is the result of a
> +   loop-header PHI whose value is not used inside LOOP.
> +
> +   This handles the nonzero-latch-count delayed-value form:
> +
> +     y_phi = PHI <latch_arg (latch), init (preheader)>
> +
> +   If the latch count is known to be nonzero, the final value is:
> +
> +     latch_arg evaluated at iteration niter - 1
> +
> +   Return NULL_TREE if the pattern does not apply.  */
> +static tree
> +compute_final_value_from_unused_loop_phi (class loop *loop,
> +       class loop *ex_loop, tree phidef, tree niter, bool* folded_casts)
> +{
> +  if (TREE_CODE (phidef) != SSA_NAME)
> +    return NULL_TREE;
> +
> +  gimple* def_stmt = SSA_NAME_DEF_STMT (phidef);
> +  gphi *header_phi = dyn_cast<gphi*> (def_stmt);
> +  if (!header_phi
> +      || gimple_bb (header_phi) != loop->header
> +      || gimple_phi_num_args (header_phi) != 2
> +      || ssa_name_has_nondebug_use_in_loop_p (phidef, loop))
> +    return NULL_TREE;
> +
> +  /* This helper only handles the case where at least one latch
> +     edge is definitely taken.  */
> +  if (!tree_fits_uhwi_p (niter) || tree_to_uhwi (niter) == 0)
> +    return NULL_TREE;

It would be nice to improve this by using number_of_iterations_exit in the
caller and then check may_be_zero.

> +
> +  edge latch = loop_latch_edge (loop);
> +  if (!latch)
> +    return NULL_TREE;

this cannot be NULL

> +
> +  tree latch_arg = PHI_ARG_DEF_FROM_EDGE (header_phi, latch);
> +  if (!latch_arg)
> +    return NULL_TREE;

this cannot be NULL.

Otherwise looks good to me.

Richard.

> +
> +  tree ev = analyze_scalar_evolution_in_loop (ex_loop,
> +                                             loop,
> +                                             latch_arg,
> +                                             folded_casts);
> +  if (ev == chrec_dont_know)
> +    return NULL_TREE;
> +
> +  bool invariant_p;
> +  if (no_evolution_in_loop_p (ev, ex_loop->num, &invariant_p) && invariant_p)
> +    return ev;
> +  else if (TREE_CODE (ev) == POLYNOMIAL_CHREC
> +          && get_chrec_loop (ev) == ex_loop)
> +  {
> +    tree niter_type = TREE_TYPE (niter);
> +    tree prev_iter = fold_build2 (MINUS_EXPR,
> +                                 niter_type,
> +                                 niter,
> +                                 build_one_cst (niter_type));
> +
> +    tree res = chrec_apply (ex_loop->num, ev, prev_iter);
> +    if (res == chrec_dont_know)
> +      return NULL_TREE;
> +
> +    if (chrec_contains_symbols_defined_in_loop (res, ex_loop->num))
> +    {
> +      res = instantiate_parameters (ex_loop, res);
> +      if (res == chrec_dont_know)
> +       return NULL_TREE;
> +    }
> +
> +    return res;
> +  }
> +  return NULL_TREE;
> +}
> +
>  /* Do final value replacement for LOOP, return true if we did anything.  */
>
>  bool
> @@ -3895,7 +3993,7 @@ final_value_replacement_loop (class loop *loop)
>        def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
>                                               &folded_casts);
>
> -      tree bitinv_def, bit_def;
> +      tree bitinv_def, bit_def, unused_phi_final_value;
>        unsigned HOST_WIDE_INT niter_num;
>
>        if (def != chrec_dont_know)
> @@ -3932,6 +4030,14 @@ final_value_replacement_loop (class loop *loop)
>                                                                    
> niter_num)))
>         def = bit_def;
>
> +      else if ((unused_phi_final_value
> +               = compute_final_value_from_unused_loop_phi (loop,
> +                                                           ex_loop,
> +                                                           phidef,
> +                                                           niter,
> +                                                           &folded_casts)))
> +       def = unused_phi_final_value;
> +
>        bool cond_overflow_p;
>        if (!tree_does_not_contain_chrecs (def)
>           || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> --
> 2.43.0
>

Reply via email to