The loop
void add(int N, int *__restrict a, int *__restrict b, int *__restrict c) {
for (int i = 0; i < N; i++) {
c[i] = a[i] + b[i];
if (i > 1000) {
break;
}
}
}
is an example of a loop that we have started vectorizing using early break
vectorization. However the given loop is not actually an early break loop as
the condition on the exit introduces a MAX bound on N.
This patch teaches IVcannon to identify such cases and rewrite the latch
condition from i < N to i < MIN (N, C) where C is a constant.
It then tries to find an appropriate edge to remove from the loop iteration.
Concretely instead of
.L8:
add v31.4s, v30.4s, v27.4s
add v30.4s, v30.4s, v28.4s
cmpeq p15.s, p7/z, z31.s, #0
b.any .L41
ldr q31, [x9, x4]
add w5, w5, 4
ldr q29, [x8, x4]
add v31.4s, v31.4s, v29.4s
str q31, [x6, x4]
add x4, x4, 16
cmp w7, w5
bne .L8
we now generate:
.L4:
ldr q31, [x1, x0]
ldr q30, [x2, x0]
add v30.4s, v31.4s, v30.4s
str q30, [x3, x0]
add x0, x0, 16
cmp x4, x0
bne .L4
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/113134
* tree-ssa-loop-ivcanon.cc (redundant_with_constrained_latch_exit_p,
(find_latch_constrained_niter): New.
(try_unroll_loop_completely): Explicitly check exit.
(canonicalize_loop_induction_variables): Use them.
gcc/testsuite/ChangeLog:
PR tree-optimization/113134
* gcc.dg/ivcannon-pr113134.c: New test.
---
diff --git a/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
b/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
new file mode 100644
index
0000000000000000000000000000000000000000..49fd231cf436211ae3779c96def8a833446ca4fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-ivcanon-details -std=c99" } */
+
+void add(int N, int *__restrict a, int *__restrict b, int *__restrict c) {
+ for (int i = 0; i < N; i++) {
+ c[i] = a[i] + b[i];
+ if (i > 1000) {
+ break;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump {exit edges updated and IV rewritten to}
"ivcanon" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index
fb2ef44a3d3c8504823b75747da1656ce2854bfd..beb3157db44b51bf656d8af143cc1706e322ea6b
100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -129,6 +129,133 @@ create_canonical_iv (class loop *loop, edge exit, tree
niter,
update_stmt (cond);
}
+/* Return true if exit E becomes redundant once LATCH_EXIT is rewritten with a
+ constrained latch count. This is intentionally narrow: the non-exit path
+ must flow directly to LATCH_EXIT in the same iteration. */
+
+static bool
+redundant_with_constrained_latch_exit_p (class loop *loop, edge e,
+ edge latch_exit,
+ HOST_WIDE_INT maxiter)
+{
+ class tree_niter_desc desc;
+ edge nonexit;
+ gphi_iterator gpi;
+
+ /* Candidate exit must not be the latch exit and must execute at most
+ once per iteration. i.e. no other flow through the loop. */
+ if (e == latch_exit || !just_once_each_iteration_p (loop, e->src))
+ return false;
+
+ /* The candidate exit must dominate the latch, otherwise it would be unsafe
+ to elide the candidate edge and have the latch assume it's role. */
+ if (!dominated_by_p (CDI_DOMINATORS, latch_exit->src, e->src))
+ return false;
+
+ nonexit = EDGE_SUCC (e->src, 0);
+ if (nonexit == e)
+ nonexit = EDGE_SUCC (e->src, 1);
+
+ /* The condidate edge must fall through to the latch. This might be too
+ strict, but it's safe for now. The reason for this is to prevent a second
+ branch like a diamond shape that can reach the latch from the edge. */
+ if (nonexit->dest != latch_exit->src)
+ return false;
+
+ /* Since we want to use the latch for the candidate edge exit, they must have
+ been going to the same destination. */
+ if (e->dest != latch_exit->dest)
+ return false;
+
+ /* The candidate edge must have a known, constant niters and it must be
+ smaller or equal to our maximum iteration count. */
+ if (!number_of_iterations_exit (loop, e, &desc, false)
+ || !integer_zerop (desc.may_be_zero)
+ || TREE_CODE (desc.niter) != INTEGER_CST
+ || !wi::geu_p (wi::to_widest (desc.niter), maxiter))
+ return false;
+
+ /* Check that the candidate exit and the latch exit compute the same value
+ otherwise rewriting the candidate exit to go through the latch is not a
+ valid thing to do. */
+ for (gpi = gsi_start_phis (e->dest); !gsi_end_p (gpi); gsi_next (&gpi))
+ {
+ gphi *phi = gpi.phi ();
+ tree e_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
+ tree latch_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_exit);
+
+ if (!operand_equal_p (e_arg, latch_arg, 0))
+ return false;
+ }
+
+ return true;
+}
+
+/* Return a constrained latch bound for LATCH_EXIT, using only exits that
+ dominate LATCH_EXIT and are executed once per iteration. The original
+ latch count is stored in *LATCH_NITER_OUT. */
+
+static tree
+find_latch_constrained_niter (class loop *loop, edge latch_exit,
+ tree *latch_niter_out)
+{
+ class tree_niter_desc desc;
+
+ *latch_niter_out = NULL_TREE;
+ if (!number_of_iterations_exit (loop, latch_exit, &desc, false)
+ || !integer_zerop (desc.may_be_zero))
+ return chrec_dont_know;
+
+ tree niter = desc.niter;
+ *latch_niter_out = niter;
+
+ for (auto e : get_loop_exit_edges (loop))
+ {
+ /* Ignore the latch edge, and the exit must dominate the latch. If it
+ doesn't then the form is broken. */
+ if (e == latch_exit
+ || !just_once_each_iteration_p (loop, e->src)
+ || !dominated_by_p (CDI_DOMINATORS, latch_exit->src, e->src))
+ continue;
+
+ /* The edge must have a non-zero iteration count. */
+ if (!number_of_iterations_exit (loop, e, &desc, false)
+ || !integer_zerop (desc.may_be_zero))
+ continue;
+
+ tree aniter = desc.niter;
+ tree ty1 = TREE_TYPE (niter);
+ tree ty2 = TREE_TYPE (aniter);
+ if (ty1 != ty2)
+ {
+ if (TYPE_UNSIGNED (ty1) != TYPE_UNSIGNED (ty2))
+ return chrec_dont_know;
+
+ if (CONSTANT_CLASS_P (niter)
+ && TYPE_PRECISION (ty1) <= TYPE_PRECISION (ty2))
+ niter = fold_convert (ty2, niter);
+ else if (CONSTANT_CLASS_P (aniter)
+ && TYPE_PRECISION (ty2) <= TYPE_PRECISION (ty1))
+ aniter = fold_convert (ty1, aniter);
+ else
+ return chrec_dont_know;
+ }
+
+ if (TREE_CODE (aniter) != INTEGER_CST)
+ continue;
+
+ if (TREE_CODE (niter) != INTEGER_CST)
+ {
+ niter = fold_build2 (MIN_EXPR, TREE_TYPE (niter), niter, aniter);
+ continue;
+ }
+
+ niter = tree_int_cst_lt (aniter, niter) ? aniter : niter;
+ }
+
+ return niter;
+}
+
/* Describe size of loop as detected by tree_estimate_loop_size. */
struct loop_size
{
@@ -766,7 +893,7 @@ try_unroll_loop_completely (class loop *loop,
If the number of execution of loop is determined by standard induction
variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
from the iv test. */
- if (tree_fits_uhwi_p (niter))
+ if (exit && tree_fits_uhwi_p (niter))
{
n_unroll = tree_to_uhwi (niter);
n_unroll_found = true;
@@ -1354,7 +1481,77 @@ canonicalize_loop_induction_variables (class loop *loop,
innermost_cunrolli_p))
return true;
- if ((create_iv || by_eval)
+ edge latch_e = NULL;
+ bool aggr_rewritten_p = false;
+ if (create_iv && !single_exit (loop))
+ {
+ tree latch_niter = NULL_TREE;
+ tree niter_aggr = chrec_dont_know;
+
+ latch_e = loop_latch_edge (loop);
+ if (single_pred_p (latch_e->src))
+ {
+ latch_e = single_pred_edge (latch_e->src);
+ auto bb_exit = latch_e->src;
+ if (EDGE_COUNT (bb_exit->succs) != 2)
+ latch_e = NULL;
+ else if (loop_exit_edge_p (loop, EDGE_SUCC (bb_exit, 0)))
+ latch_e = EDGE_SUCC (bb_exit, 0);
+ else if (loop_exit_edge_p (loop, EDGE_SUCC (bb_exit, 1)))
+ latch_e = EDGE_SUCC (bb_exit, 1);
+ else
+ latch_e = NULL;
+ }
+ else
+ latch_e = NULL;
+
+ if (latch_e)
+ {
+ niter_aggr = find_latch_constrained_niter (loop, latch_e,
+ &latch_niter);
+ if (!niter_aggr)
+ niter_aggr = chrec_dont_know;
+ if (chrec_contains_undetermined (latch_niter)
+ || chrec_contains_undetermined (niter_aggr)
+ || operand_equal_p (latch_niter, niter_aggr, 0))
+ latch_e = NULL;
+ }
+
+ if (latch_e)
+ {
+ create_canonical_iv (loop, latch_e, niter_aggr);
+ aggr_rewritten_p = true;
+
+ for (auto e : get_loop_exit_edges (loop))
+ if (redundant_with_constrained_latch_exit_p (loop, e, latch_e,
+ maxiter))
+ {
+ gcond *cond_stmt
+ = as_a <gcond *> (gsi_stmt (gsi_last_nondebug_bb
+ (e->src)));
+ if (e->flags & EDGE_TRUE_VALUE)
+ gimple_cond_make_false (cond_stmt);
+ else
+ gimple_cond_make_true (cond_stmt);
+ update_stmt (cond_stmt);
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Loop %d exit edges updated and IV rewritten to ",
+ loop->num);
+ print_generic_expr (dump_file, niter_aggr, TDF_SLIM);
+ fprintf (dump_file, ".\n");
+ }
+ }
+ }
+
+ /* Only accept the new MIN based bounds if we can actually simplify the loop,
+ otherwise the MIN_EXPR just causes more instruction in loop pre-headers
+ without any benefits. */
+ if (!aggr_rewritten_p
+ && (create_iv || by_eval)
&& niter && !chrec_contains_undetermined (niter)
&& exit && just_once_each_iteration_p (loop, exit->src))
{
@@ -1798,5 +1995,3 @@ make_pass_complete_unrolli (gcc::context *ctxt)
{
return new pass_complete_unrolli (ctxt);
}
-
-
--
diff --git a/gcc/testsuite/gcc.dg/ivcannon-pr113134.c b/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
new file mode 100644
index 0000000000000000000000000000000000000000..49fd231cf436211ae3779c96def8a833446ca4fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ivcannon-pr113134.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fdump-tree-ivcanon-details -std=c99" } */
+
+void add(int N, int *__restrict a, int *__restrict b, int *__restrict c) {
+ for (int i = 0; i < N; i++) {
+ c[i] = a[i] + b[i];
+ if (i > 1000) {
+ break;
+ }
+ }
+}
+
+/* { dg-final { scan-tree-dump {exit edges updated and IV rewritten to} "ivcanon" } } */
diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc
index fb2ef44a3d3c8504823b75747da1656ce2854bfd..beb3157db44b51bf656d8af143cc1706e322ea6b 100644
--- a/gcc/tree-ssa-loop-ivcanon.cc
+++ b/gcc/tree-ssa-loop-ivcanon.cc
@@ -129,6 +129,133 @@ create_canonical_iv (class loop *loop, edge exit, tree niter,
update_stmt (cond);
}
+/* Return true if exit E becomes redundant once LATCH_EXIT is rewritten with a
+ constrained latch count. This is intentionally narrow: the non-exit path
+ must flow directly to LATCH_EXIT in the same iteration. */
+
+static bool
+redundant_with_constrained_latch_exit_p (class loop *loop, edge e,
+ edge latch_exit,
+ HOST_WIDE_INT maxiter)
+{
+ class tree_niter_desc desc;
+ edge nonexit;
+ gphi_iterator gpi;
+
+ /* Candidate exit must not be the latch exit and must execute at most
+ once per iteration. i.e. no other flow through the loop. */
+ if (e == latch_exit || !just_once_each_iteration_p (loop, e->src))
+ return false;
+
+ /* The candidate exit must dominate the latch, otherwise it would be unsafe
+ to elide the candidate edge and have the latch assume it's role. */
+ if (!dominated_by_p (CDI_DOMINATORS, latch_exit->src, e->src))
+ return false;
+
+ nonexit = EDGE_SUCC (e->src, 0);
+ if (nonexit == e)
+ nonexit = EDGE_SUCC (e->src, 1);
+
+ /* The condidate edge must fall through to the latch. This might be too
+ strict, but it's safe for now. The reason for this is to prevent a second
+ branch like a diamond shape that can reach the latch from the edge. */
+ if (nonexit->dest != latch_exit->src)
+ return false;
+
+ /* Since we want to use the latch for the candidate edge exit, they must have
+ been going to the same destination. */
+ if (e->dest != latch_exit->dest)
+ return false;
+
+ /* The candidate edge must have a known, constant niters and it must be
+ smaller or equal to our maximum iteration count. */
+ if (!number_of_iterations_exit (loop, e, &desc, false)
+ || !integer_zerop (desc.may_be_zero)
+ || TREE_CODE (desc.niter) != INTEGER_CST
+ || !wi::geu_p (wi::to_widest (desc.niter), maxiter))
+ return false;
+
+ /* Check that the candidate exit and the latch exit compute the same value
+ otherwise rewriting the candidate exit to go through the latch is not a
+ valid thing to do. */
+ for (gpi = gsi_start_phis (e->dest); !gsi_end_p (gpi); gsi_next (&gpi))
+ {
+ gphi *phi = gpi.phi ();
+ tree e_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
+ tree latch_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_exit);
+
+ if (!operand_equal_p (e_arg, latch_arg, 0))
+ return false;
+ }
+
+ return true;
+}
+
+/* Return a constrained latch bound for LATCH_EXIT, using only exits that
+ dominate LATCH_EXIT and are executed once per iteration. The original
+ latch count is stored in *LATCH_NITER_OUT. */
+
+static tree
+find_latch_constrained_niter (class loop *loop, edge latch_exit,
+ tree *latch_niter_out)
+{
+ class tree_niter_desc desc;
+
+ *latch_niter_out = NULL_TREE;
+ if (!number_of_iterations_exit (loop, latch_exit, &desc, false)
+ || !integer_zerop (desc.may_be_zero))
+ return chrec_dont_know;
+
+ tree niter = desc.niter;
+ *latch_niter_out = niter;
+
+ for (auto e : get_loop_exit_edges (loop))
+ {
+ /* Ignore the latch edge, and the exit must dominate the latch. If it
+ doesn't then the form is broken. */
+ if (e == latch_exit
+ || !just_once_each_iteration_p (loop, e->src)
+ || !dominated_by_p (CDI_DOMINATORS, latch_exit->src, e->src))
+ continue;
+
+ /* The edge must have a non-zero iteration count. */
+ if (!number_of_iterations_exit (loop, e, &desc, false)
+ || !integer_zerop (desc.may_be_zero))
+ continue;
+
+ tree aniter = desc.niter;
+ tree ty1 = TREE_TYPE (niter);
+ tree ty2 = TREE_TYPE (aniter);
+ if (ty1 != ty2)
+ {
+ if (TYPE_UNSIGNED (ty1) != TYPE_UNSIGNED (ty2))
+ return chrec_dont_know;
+
+ if (CONSTANT_CLASS_P (niter)
+ && TYPE_PRECISION (ty1) <= TYPE_PRECISION (ty2))
+ niter = fold_convert (ty2, niter);
+ else if (CONSTANT_CLASS_P (aniter)
+ && TYPE_PRECISION (ty2) <= TYPE_PRECISION (ty1))
+ aniter = fold_convert (ty1, aniter);
+ else
+ return chrec_dont_know;
+ }
+
+ if (TREE_CODE (aniter) != INTEGER_CST)
+ continue;
+
+ if (TREE_CODE (niter) != INTEGER_CST)
+ {
+ niter = fold_build2 (MIN_EXPR, TREE_TYPE (niter), niter, aniter);
+ continue;
+ }
+
+ niter = tree_int_cst_lt (aniter, niter) ? aniter : niter;
+ }
+
+ return niter;
+}
+
/* Describe size of loop as detected by tree_estimate_loop_size. */
struct loop_size
{
@@ -766,7 +893,7 @@ try_unroll_loop_completely (class loop *loop,
If the number of execution of loop is determined by standard induction
variable test, then EXIT and EDGE_TO_CANCEL are the two edges leaving
from the iv test. */
- if (tree_fits_uhwi_p (niter))
+ if (exit && tree_fits_uhwi_p (niter))
{
n_unroll = tree_to_uhwi (niter);
n_unroll_found = true;
@@ -1354,7 +1481,77 @@ canonicalize_loop_induction_variables (class loop *loop,
innermost_cunrolli_p))
return true;
- if ((create_iv || by_eval)
+ edge latch_e = NULL;
+ bool aggr_rewritten_p = false;
+ if (create_iv && !single_exit (loop))
+ {
+ tree latch_niter = NULL_TREE;
+ tree niter_aggr = chrec_dont_know;
+
+ latch_e = loop_latch_edge (loop);
+ if (single_pred_p (latch_e->src))
+ {
+ latch_e = single_pred_edge (latch_e->src);
+ auto bb_exit = latch_e->src;
+ if (EDGE_COUNT (bb_exit->succs) != 2)
+ latch_e = NULL;
+ else if (loop_exit_edge_p (loop, EDGE_SUCC (bb_exit, 0)))
+ latch_e = EDGE_SUCC (bb_exit, 0);
+ else if (loop_exit_edge_p (loop, EDGE_SUCC (bb_exit, 1)))
+ latch_e = EDGE_SUCC (bb_exit, 1);
+ else
+ latch_e = NULL;
+ }
+ else
+ latch_e = NULL;
+
+ if (latch_e)
+ {
+ niter_aggr = find_latch_constrained_niter (loop, latch_e,
+ &latch_niter);
+ if (!niter_aggr)
+ niter_aggr = chrec_dont_know;
+ if (chrec_contains_undetermined (latch_niter)
+ || chrec_contains_undetermined (niter_aggr)
+ || operand_equal_p (latch_niter, niter_aggr, 0))
+ latch_e = NULL;
+ }
+
+ if (latch_e)
+ {
+ create_canonical_iv (loop, latch_e, niter_aggr);
+ aggr_rewritten_p = true;
+
+ for (auto e : get_loop_exit_edges (loop))
+ if (redundant_with_constrained_latch_exit_p (loop, e, latch_e,
+ maxiter))
+ {
+ gcond *cond_stmt
+ = as_a <gcond *> (gsi_stmt (gsi_last_nondebug_bb
+ (e->src)));
+ if (e->flags & EDGE_TRUE_VALUE)
+ gimple_cond_make_false (cond_stmt);
+ else
+ gimple_cond_make_true (cond_stmt);
+ update_stmt (cond_stmt);
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file,
+ "Loop %d exit edges updated and IV rewritten to ",
+ loop->num);
+ print_generic_expr (dump_file, niter_aggr, TDF_SLIM);
+ fprintf (dump_file, ".\n");
+ }
+ }
+ }
+
+ /* Only accept the new MIN based bounds if we can actually simplify the loop,
+ otherwise the MIN_EXPR just causes more instruction in loop pre-headers
+ without any benefits. */
+ if (!aggr_rewritten_p
+ && (create_iv || by_eval)
&& niter && !chrec_contains_undetermined (niter)
&& exit && just_once_each_iteration_p (loop, exit->src))
{
@@ -1798,5 +1995,3 @@ make_pass_complete_unrolli (gcc::context *ctxt)
{
return new pass_complete_unrolli (ctxt);
}
-
-