On Wed, 6 May 2026, Tamar Christina wrote:
> The loop
>
> void add(int N, int *__restrict a, int *__restrict b, int *__restrict c) {
> for (int i = 0; i < N; i++) {
> c[i] = a[i] + b[i];
> if (i > 1000) {
> break;
> }
> }
> }
>
> is an example of a loop that we have started vectorizing using early break
> vectorization. However the given loop is not actually an early break loop as
> the condition on the exit introduces a MAX bound on N.
>
> This patch teaches IVcannon to identify such cases and rewrite the latch
> condition from i < N to i < MIN (N, C) where C is a constant.
The pattern-matching in IVCANON looks quite ugly. loop header copying
(run after ifcombine...) exposes
if (i_21 == 1001)
goto <bb 5>; [1.00%]
else
goto <bb 4>; [99.00%]
<bb 4> [local count: 1004539166]:
i_18 = i_21 + 1;
if (N_13(D) > i_18)
goto <bb 3>; [94.50%]
else
goto <bb 5>; [5.50%]
<bb 5> [local count: 69202658]:
which looks like a ifcombine opportunity to me. I recently reviewed
a patch to do extra if-combining from tail merging.
That said, IVCANON doesn't look like the correct place to fuse
two exit tests?
> It then tries to find an appropriate edge to remove from the loop iteration.
>
> Concretely instead of
>
> .L8:
> add v31.4s, v30.4s, v27.4s
> add v30.4s, v30.4s, v28.4s
> cmpeq p15.s, p7/z, z31.s, #0
> b.any .L41
> ldr q31, [x9, x4]
> add w5, w5, 4
> ldr q29, [x8, x4]
> add v31.4s, v31.4s, v29.4s
> str q31, [x6, x4]
> add x4, x4, 16
> cmp w7, w5
> bne .L8
>
> we now generate:
>
> .L4:
> ldr q31, [x1, x0]
> ldr q30, [x2, x0]
> add v30.4s, v31.4s, v30.4s
> str q30, [x3, x0]
> add x0, x0, 16
> cmp x4, x0
> bne .L4
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> PR tree-optimization/113134
> * tree-ssa-loop-ivcanon.cc (redundant_with_constrained_latch_exit_p,
> (find_latch_constrained_niter): New.
> (try_unroll_loop_completely): Explicitly check exit.
> (canonicalize_loop_induction_variables): Use them.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/113134
> * gcc.dg/ivcannon-pr113134.c: New test.
>
> ---
> diff --git a/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
> b/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..49fd231cf436211ae3779c96def8a833446ca4fd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -fdump-tree-ivcanon-details -std=c99" } */
> +
> +void add(int N, int *__restrict a, int *__restrict b, int *__restrict c) {
> + for (int i = 0; i < N; i++) {
> + c[i] = a[i] + b[i];
> + if (i > 1000) {
> + break;
> + }
> + }
> +}
> +
> +/* { dg-final { scan-tree-dump {exit edges updated and IV rewritten to}
> "ivcanon" } } */
> diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
> index
> fb2ef44a3d3c8504823b75747da1656ce2854bfd..beb3157db44b51bf656d8af143cc1706e322ea6b
> 100644
> --- a/gcc/tree-ssa-loop-ivcanon.cc
> +++ b/gcc/tree-ssa-loop-ivcanon.cc
> @@ -129,6 +129,133 @@ create_canonical_iv (class loop *loop, edge exit, tree
> niter,
> update_stmt (cond);
> }
>
> +/* Return true if exit E becomes redundant once LATCH_EXIT is rewritten with
> a
> + constrained latch count. This is intentionally narrow: the non-exit path
> + must flow directly to LATCH_EXIT in the same iteration. */
> +
> +static bool
> +redundant_with_constrained_latch_exit_p (class loop *loop, edge e,
> + edge latch_exit,
> + HOST_WIDE_INT maxiter)
> +{
> + class tree_niter_desc desc;
> + edge nonexit;
> + gphi_iterator gpi;
> +
> + /* Candidate exit must not be the latch exit and must execute at most
> + once per iteration. i.e. no other flow through the loop. */
> + if (e == latch_exit || !just_once_each_iteration_p (loop, e->src))
> + return false;
> +
> + /* The candidate exit must dominate the latch, otherwise it would be unsafe
> + to elide the candidate edge and have the latch assume it's role. */
> + if (!dominated_by_p (CDI_DOMINATORS, latch_exit->src, e->src))
> + return false;
> +
> + nonexit = EDGE_SUCC (e->src, 0);
> + if (nonexit == e)
> + nonexit = EDGE_SUCC (e->src, 1);
> +
> + /* The condidate edge must fall through to the latch. This might be too
> + strict, but it's safe for now. The reason for this is to prevent a
> second
> + branch like a diamond shape that can reach the latch from the edge. */
> + if (nonexit->dest != latch_exit->src)
> + return false;
> +
> + /* Since we want to use the latch for the candidate edge exit, they must
> have
> + been going to the same destination. */
> + if (e->dest != latch_exit->dest)
> + return false;
> +
> + /* The candidate edge must have a known, constant niters and it must be
> + smaller or equal to our maximum iteration count. */
> + if (!number_of_iterations_exit (loop, e, &desc, false)
> + || !integer_zerop (desc.may_be_zero)
> + || TREE_CODE (desc.niter) != INTEGER_CST
> + || !wi::geu_p (wi::to_widest (desc.niter), maxiter))
> + return false;
> +
> + /* Check that the candidate exit and the latch exit compute the same value
> + otherwise rewriting the candidate exit to go through the latch is not a
> + valid thing to do. */
> + for (gpi = gsi_start_phis (e->dest); !gsi_end_p (gpi); gsi_next (&gpi))
> + {
> + gphi *phi = gpi.phi ();
> + tree e_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
> + tree latch_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_exit);
> +
> + if (!operand_equal_p (e_arg, latch_arg, 0))
> + return false;
> + }
> +
> + return true;
> +}
> +
> +/* Return a constrained latch bound for LATCH_EXIT, using only exits that
> + dominate LATCH_EXIT and are executed once per iteration. The original
> + latch count is stored in *LATCH_NITER_OUT. */
> +
> +static tree
> +find_latch_constrained_niter (class loop *loop, edge latch_exit,
> + tree *latch_niter_out)
> +{
> + class tree_niter_desc desc;
> +
> + *latch_niter_out = NULL_TREE;
> + if (!number_of_iterations_exit (loop, latch_exit, &desc, false)
> + || !integer_zerop (desc.may_be_zero))
> + return chrec_dont_know;
> +
> + tree niter = desc.niter;
> + *latch_niter_out = niter;
> +
> + for (auto e : get_loop_exit_edges (loop))
> + {
> + /* Ignore the latch edge, and the exit must dominate the latch. If it
> + doesn't then the form is broken. */
> + if (e == latch_exit
> + || !just_once_each_iteration_p (loop, e->src)
> + || !dominated_by_p (CDI_DOMINATORS, latch_exit->src, e->src))
> + continue;
> +
> + /* The edge must have a non-zero iteration count. */
> + if (!number_of_iterations_exit (loop, e, &desc, false)
> + || !integer_zerop (desc.may_be_zero))
> + continue;
> +
> + tree aniter = desc.niter;
> + tree ty1 = TREE_TYPE (niter);
> + tree ty2 = TREE_TYPE (aniter);
> + if (ty1 != ty2)
> + {
> + if (TYPE_UNSIGNED (ty1) != TYPE_UNSIGNED (ty2))
> + return chrec_dont_know;
> +
> + if (CONSTANT_CLASS_P (niter)
> + && TYPE_PRECISION (ty1) <= TYPE_PRECISION (ty2))
> + niter = fold_convert (ty2, niter);
> + else if (CONSTANT_CLASS_P (aniter)
> + && TYPE_PRECISION (ty2) <= TYPE_PRECISION (ty1))
> + aniter = fold_convert (ty1, aniter);
> + else
> + return chrec_dont_know;
> + }
> +
> + if (TREE_CODE (aniter) != INTEGER_CST)
> + continue;
> +
> + if (TREE_CODE (niter) != INTEGER_CST)
> + {
> + niter = fold_build2 (MIN_EXPR, TREE_TYPE (niter), niter, aniter);
> + continue;
> + }
> +
> + niter = tree_int_cst_lt (aniter, niter) ? aniter : niter;
> + }
> +
> + return niter;
> +}
> +
> /* Describe size of loop as detected by tree_estimate_loop_size. */
> struct loop_size
> {
> @@ -766,7 +893,7 @@ try_unroll_loop_completely (class loop *loop,
> If the number of execution of loop is determined by standard induction
> variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
> from the iv test. */
> - if (tree_fits_uhwi_p (niter))
> + if (exit && tree_fits_uhwi_p (niter))
> {
> n_unroll = tree_to_uhwi (niter);
> n_unroll_found = true;
> @@ -1354,7 +1481,77 @@ canonicalize_loop_induction_variables (class loop
> *loop,
> innermost_cunrolli_p))
> return true;
>
> - if ((create_iv || by_eval)
> + edge latch_e = NULL;
> + bool aggr_rewritten_p = false;
> + if (create_iv && !single_exit (loop))
> + {
> + tree latch_niter = NULL_TREE;
> + tree niter_aggr = chrec_dont_know;
> +
> + latch_e = loop_latch_edge (loop);
> + if (single_pred_p (latch_e->src))
> + {
> + latch_e = single_pred_edge (latch_e->src);
> + auto bb_exit = latch_e->src;
> + if (EDGE_COUNT (bb_exit->succs) != 2)
> + latch_e = NULL;
> + else if (loop_exit_edge_p (loop, EDGE_SUCC (bb_exit, 0)))
> + latch_e = EDGE_SUCC (bb_exit, 0);
> + else if (loop_exit_edge_p (loop, EDGE_SUCC (bb_exit, 1)))
> + latch_e = EDGE_SUCC (bb_exit, 1);
> + else
> + latch_e = NULL;
> + }
> + else
> + latch_e = NULL;
> +
> + if (latch_e)
> + {
> + niter_aggr = find_latch_constrained_niter (loop, latch_e,
> + &latch_niter);
> + if (!niter_aggr)
> + niter_aggr = chrec_dont_know;
> + if (chrec_contains_undetermined (latch_niter)
> + || chrec_contains_undetermined (niter_aggr)
> + || operand_equal_p (latch_niter, niter_aggr, 0))
> + latch_e = NULL;
> + }
> +
> + if (latch_e)
> + {
> + create_canonical_iv (loop, latch_e, niter_aggr);
> + aggr_rewritten_p = true;
> +
> + for (auto e : get_loop_exit_edges (loop))
> + if (redundant_with_constrained_latch_exit_p (loop, e, latch_e,
> + maxiter))
> + {
> + gcond *cond_stmt
> + = as_a <gcond *> (gsi_stmt (gsi_last_nondebug_bb
> + (e->src)));
> + if (e->flags & EDGE_TRUE_VALUE)
> + gimple_cond_make_false (cond_stmt);
> + else
> + gimple_cond_make_true (cond_stmt);
> + update_stmt (cond_stmt);
> + }
> +
> + if (dump_file && (dump_flags & TDF_DETAILS))
> + {
> + fprintf (dump_file,
> + "Loop %d exit edges updated and IV rewritten to ",
> + loop->num);
> + print_generic_expr (dump_file, niter_aggr, TDF_SLIM);
> + fprintf (dump_file, ".\n");
> + }
> + }
> + }
> +
> + /* Only accept the new MIN based bounds if we can actually simplify the
> loop,
> + otherwise the MIN_EXPR just causes more instruction in loop pre-headers
> + without any benefits. */
> + if (!aggr_rewritten_p
> + && (create_iv || by_eval)
> && niter && !chrec_contains_undetermined (niter)
> && exit && just_once_each_iteration_p (loop, exit->src))
> {
> @@ -1798,5 +1995,3 @@ make_pass_complete_unrolli (gcc::context *ctxt)
> {
> return new pass_complete_unrolli (ctxt);
> }
> -
> -
>
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)