On Thu, May 14, 2026 at 5:11 PM Abhishek Kaushik
<[email protected]> wrote:
>
> SCEV sometimes fails to compute the final value of a loop PHI when the
> entry value does not match the value produced by later iterations. For
> example:
>
> # y = PHI <y_next, 666>
> i_next = i + 1;
> y_next = i_next * 10;
>
> The PHI value is:
>
> 666, 10, 20, 30, ...
>
> This is not a normal affine recurrence. But if the latch count is
> known to be nonzero, the entry value cannot be the value seen after
> the loop. The final value is the latch argument evaluated at iteration
> niter - 1.
>
> Teach SCEV final value replacement to handle this case.
>
> This patch was bootstrapped and regression tested on aarch64-linux-gnu.
>
> gcc/:
>
> * tree-scalar-evolution.cc
> (compute_final_value_from_loop_phi_latch): New function.
> (final_value_replacement_loop): Use it.
>
> gcc/testsuite/:
>
> * gcc.dg/tree-ssa/sccp-unused-loop-phi.c: New test.
> ---
> Changes since v1:
> - Removed no use requirement from loop phi
> - Use number_of_iterations_exit in caller
> - Removed redundant null checks
>
> gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c | 55 +++++++++++++
> gcc/tree-scalar-evolution.cc | 77 ++++++++++++++++++-
> 2 files changed, 131 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
> b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
> new file mode 100644
> index 00000000000..3518a1957b3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
> @@ -0,0 +1,55 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-sccp -fdump-tree-optimized" } */
> +
> +int
> +const_init_unused_phi (void)
> +{
> + int x = 0;
> + for (int i = 0, y = 666; i != 19; i++, y = i * 10)
> + x = y;
> + return x;
> +}
> +
> +int
> +conditional_init_unused_phi (int c)
> +{
> + int x = 0;
> + int y;
> +
> + if (c != 0)
> + y = 20;
> + else
> + y = 30;
> +
> + for (int i = 0; i != 19; i++, y = i * 10)
> + x = y;
> +
> + return x;
> +}
> +
> +int
> +nested_unused_phi (void)
> +{
> + int acc = 0;
> +
> + for (int i = 0; i < 20; i++)
> + {
> + int x = 0;
> + for (int j = 0, y = 123; j != 19; j++, y = i + 10)
> + x = y;
> + acc += x;
> + }
> +
> + return acc;
> +}
> +
> +/* Expected:
> + const_init_unused_phi -> 180
> + conditional_init_unused_phi -> 180
> + nested_unused_phi -> sum_{i=0}^{19} (i + 10) = 390
> + There should be no loop left in the optimized dump.
> +*/
> +
> +/* { dg-final { scan-tree-dump-times "return 180" 2 "sccp" } } */
> +/* { dg-final { scan-tree-dump "return 390" "sccp" } } */
> +/* { dg-final { scan-tree-dump-times "goto" 0 "optimized" } } */
> diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
> index f524786f33b..18dc198fe96 100644
> --- a/gcc/tree-scalar-evolution.cc
> +++ b/gcc/tree-scalar-evolution.cc
> @@ -3845,6 +3845,70 @@ analyze_and_compute_bitop_with_inv_effect (class loop*
> loop, tree phidef,
> return fold_build2 (code1, type, inv, match_op[0]);
> }
>
> +/* Try to compute the final value of PHIDEF when PHIDEF is the result of a
> + loop-header PHI.
> +
> + This handles the nonzero-latch-count delayed-value form:
> +
> + y_phi = PHI <latch_arg (latch), init (preheader)>
> +
> + If the latch count is known to be nonzero, the final value is:
> +
> + latch_arg evaluated at iteration niter - 1
> +
> + Return NULL_TREE if the pattern does not apply. */
> +static tree
> +compute_final_value_from_loop_phi_latch (class loop *loop,
> + class loop *ex_loop, tree phidef, tree niter, bool* folded_casts)
> +{
> + if (TREE_CODE (phidef) != SSA_NAME)
> + return NULL_TREE;
> +
> + gimple* def_stmt = SSA_NAME_DEF_STMT (phidef);
> + gphi *header_phi = dyn_cast<gphi*> (def_stmt);
I'd suggest you pass the function the PHI instead of the PHi def.
> + if (!header_phi
> + || gimple_bb (header_phi) != loop->header
> + || gimple_phi_num_args (header_phi) != 2)
> + return NULL_TREE;
> +
> + tree latch_arg = PHI_ARG_DEF_FROM_EDGE (header_phi,
> + loop_latch_edge (loop));
> +
> + tree ev = analyze_scalar_evolution_in_loop (ex_loop,
> + loop,
> + latch_arg,
> + folded_casts);
> + if (ev == chrec_dont_know)
> + return NULL_TREE;
> +
> + bool invariant_p;
> + if (no_evolution_in_loop_p (ev, ex_loop->num, &invariant_p) && invariant_p)
> + return ev;
> + else if (TREE_CODE (ev) == POLYNOMIAL_CHREC
> + && get_chrec_loop (ev) == ex_loop)
> + {
> + tree niter_type = TREE_TYPE (niter);
> + tree prev_iter = fold_build2 (MINUS_EXPR,
> + niter_type,
> + niter,
> + build_one_cst (niter_type));
> +
> + tree res = chrec_apply (ex_loop->num, ev, prev_iter);
> + if (res == chrec_dont_know)
> + return NULL_TREE;
> +
> + if (chrec_contains_symbols_defined_in_loop (res, ex_loop->num))
> + {
> + res = instantiate_parameters (ex_loop, res);
> + if (res == chrec_dont_know)
> + return NULL_TREE;
> + }
> +
> + return res;
> + }
> + return NULL_TREE;
> +}
> +
> /* Do final value replacement for LOOP, return true if we did anything. */
>
> bool
> @@ -3895,8 +3959,9 @@ final_value_replacement_loop (class loop *loop)
> def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
> &folded_casts);
>
> - tree bitinv_def, bit_def;
> + tree bitinv_def, bit_def, phi_latch_final_value;
> unsigned HOST_WIDE_INT niter_num;
> + class tree_niter_desc niter_desc;
>
> if (def != chrec_dont_know)
> def = compute_overall_effect_of_inner_loop (ex_loop, def);
> @@ -3932,6 +3997,16 @@ final_value_replacement_loop (class loop *loop)
>
> niter_num)))
> def = bit_def;
>
> + else if (number_of_iterations_exit (loop, exit, &niter_desc, false)
Please compute this at the start of the function, replacing
tree niter = number_of_latch_executions (loop);
with
tree niter = niter_desc.niter;
otherwise looks OK to me.
Richard.
> + && integer_zerop (niter_desc.may_be_zero)
> + && (phi_latch_final_value
> + = compute_final_value_from_loop_phi_latch (loop,
> + ex_loop,
> + phidef,
> +
> niter_desc.niter,
> + &folded_casts)))
> + def = phi_latch_final_value;
> +
> bool cond_overflow_p;
> if (!tree_does_not_contain_chrecs (def)
> || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
> --
> 2.43.0
>