SCEV sometimes fails to compute the final value of a loop PHI when the
entry value does not match the value produced by later iterations. For
example:
# y = PHI <y_next, 666>
i_next = i + 1;
y_next = i_next * 10;
The PHI value is:
666, 10, 20, 30, ...
This is not a normal affine recurrence. But if the latch count is
known to be nonzero, the entry value cannot be the value seen after
the loop. The final value is the latch argument evaluated at iteration
niter - 1.
Teach SCEV final value replacement to handle this case.
This patch was bootstrapped and regression tested on aarch64-linux-gnu.
gcc/:
* tree-scalar-evolution.cc
(compute_final_value_from_loop_phi_latch): New function.
(final_value_replacement_loop): Use it.
gcc/testsuite/:
* gcc.dg/tree-ssa/sccp-loop-phi.c: New test.
* gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c: New test.
---
Hello,
When compute_final_value_from_loop_phi_latch was guarded with just
integer_zerop (may_be_zero), and niter was a symbolic value it would result in
a miscompile as number of latch executions could still be zero. So I've added a
new if condition `if (!tree_expr_nonzero_p (niter))` and a new test file for
the non-optimized cases.
But tree_expr_nonzero_p returns false for some non-zero values (e.g. i + 1
where i is known non-zero value) as tree_binary_nonzero_warnv_p checks
case PLUS_EXPR:
if (ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_UNDEFINED (type))
which doesn't do anything for unsigned types.
I also looked at passing just phi instead of phidef, but going from the exit
phi to the loop header phi needed more calls, so I didn't change it. Please let
me know if I missed something easier.
Thanks
Abhishek
Changes since v2:
- Replace call to number_of_latch_executions with number_of_iterations_exit
- Add tree_expr_nonzero_p (niter) check to avoid miscompilations.
Changes since v1:
- Removed no use requirement from loop phi
- Use number_of_iterations_exit in caller
- Removed redundant null checks
.../tree-ssa/sccp-loop-phi-not-optimized.c | 67 +++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c | 55 ++++++++++++
gcc/tree-scalar-evolution.cc | 86 ++++++++++++++++++-
3 files changed, 206 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
new file mode 100644
index 00000000000..139d5216851
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi-not-optimized.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sccp-details" } */
+
+int
+nested_unused_phi1 (void)
+{
+ int acc = 0;
+
+ for (int i = 0; i < 20; i++)
+ {
+ int x = 0;
+ for (int j = 0, y = 123; j != i; j++, y = i + 10)
+ x = y;
+ acc += x;
+ }
+
+ return acc;
+}
+
+int
+nested_unused_phi2 (void)
+{
+ int acc = 0;
+
+ for (int i = 0; i != 20; i++)
+ {
+ int x = 0;
+ for (int j = 0, y = 123; j != i + 1; j++, y = i + 10)
+ x = y;
+ acc += x;
+ }
+
+ return acc;
+}
+
+int
+nested_unused_phi3 (void)
+{
+ int acc = 0;
+
+ for (int i = 0; i != 20; i++)
+ {
+ int x = 0;
+ for (int j = 0, y = 123; j != i + 2; j++, y = i + 10)
+ x = y;
+ acc += x;
+ }
+
+ return acc;
+}
+
+/* Expected:
+ nested_unused_phi1 -> 123 + sum_{i=2}^{19} (i + 10) = 492
+ nested_unused_phi2 -> 123 + sum_{i=1}^{19} (i + 10) = 503
+ nested_unused_phi3 -> sum_{i=0}^{19} (i + 10) = 390
+
+ These functions could be optimized by sccp, but currently are not because
+ they are not normal affine recurrence.
+ compute_final_value_from_loop_phi_latch tries to optimize these cases but
+ fails to do so for nested_unsed_phi1/2 as the niter in the inner loop might
+ be zero.
+ It should technically be able to optimized nested_unused_phi3 as we know
+ that niter > 0 for the inner loop but tree_expr_nonzero_p (niter) returns
+ false and we don't.
+*/
+
+/* { dg-final { scan-tree-dump-times "not replacing:" 6 "sccp" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
new file mode 100644
index 00000000000..3518a1957b3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sccp-loop-phi.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sccp -fdump-tree-optimized" } */
+
+int
+const_init_unused_phi (void)
+{
+ int x = 0;
+ for (int i = 0, y = 666; i != 19; i++, y = i * 10)
+ x = y;
+ return x;
+}
+
+int
+conditional_init_unused_phi (int c)
+{
+ int x = 0;
+ int y;
+
+ if (c != 0)
+ y = 20;
+ else
+ y = 30;
+
+ for (int i = 0; i != 19; i++, y = i * 10)
+ x = y;
+
+ return x;
+}
+
+int
+nested_unused_phi (void)
+{
+ int acc = 0;
+
+ for (int i = 0; i < 20; i++)
+ {
+ int x = 0;
+ for (int j = 0, y = 123; j != 19; j++, y = i + 10)
+ x = y;
+ acc += x;
+ }
+
+ return acc;
+}
+
+/* Expected:
+ const_init_unused_phi -> 180
+ conditional_init_unused_phi -> 180
+ nested_unused_phi -> sum_{i=0}^{19} (i + 10) = 390
+ There should be no loop left in the optimized dump.
+*/
+
+/* { dg-final { scan-tree-dump-times "return 180" 2 "sccp" } } */
+/* { dg-final { scan-tree-dump "return 390" "sccp" } } */
+/* { dg-final { scan-tree-dump-times "goto" 0 "optimized" } } */
diff --git a/gcc/tree-scalar-evolution.cc b/gcc/tree-scalar-evolution.cc
index f524786f33b..4b67baaeaf0 100644
--- a/gcc/tree-scalar-evolution.cc
+++ b/gcc/tree-scalar-evolution.cc
@@ -3845,6 +3845,75 @@ analyze_and_compute_bitop_with_inv_effect (class loop*
loop, tree phidef,
return fold_build2 (code1, type, inv, match_op[0]);
}
+/* Try to compute the final value of PHIDEF when PHIDEF is the result of a
+ loop-header PHI.
+
+ This handles the nonzero-latch-count delayed-value form:
+
+ y_phi = PHI <latch_arg (latch), init (preheader)>
+
+ If the latch count is known to be nonzero, the final value is:
+
+ latch_arg evaluated at iteration niter - 1
+
+ Return NULL_TREE if the pattern does not apply. */
+static tree
+compute_final_value_from_loop_phi_latch (class loop *loop,
+ class loop *ex_loop, tree phidef, tree niter, bool* folded_casts)
+{
+ if (TREE_CODE (phidef) != SSA_NAME)
+ return NULL_TREE;
+
+ gimple* def_stmt = SSA_NAME_DEF_STMT (phidef);
+ gphi *header_phi = dyn_cast<gphi*> (def_stmt);
+ if (!header_phi
+ || gimple_bb (header_phi) != loop->header
+ || gimple_phi_num_args (header_phi) != 2)
+ return NULL_TREE;
+
+ /* If niter is a symbolic value make sure it can never be zero, otherwise we
+ do a bad replacement. */
+ if (!tree_expr_nonzero_p (niter))
+ return NULL_TREE;
+
+ tree latch_arg = PHI_ARG_DEF_FROM_EDGE (header_phi,
+ loop_latch_edge (loop));
+
+ tree ev = analyze_scalar_evolution_in_loop (ex_loop,
+ loop,
+ latch_arg,
+ folded_casts);
+ if (ev == chrec_dont_know)
+ return NULL_TREE;
+
+ bool invariant_p;
+ if (no_evolution_in_loop_p (ev, ex_loop->num, &invariant_p) && invariant_p)
+ return ev;
+ else if (TREE_CODE (ev) == POLYNOMIAL_CHREC
+ && get_chrec_loop (ev) == ex_loop)
+ {
+ tree niter_type = TREE_TYPE (niter);
+ tree prev_iter = fold_build2 (MINUS_EXPR,
+ niter_type,
+ niter,
+ build_one_cst (niter_type));
+
+ tree res = chrec_apply (ex_loop->num, ev, prev_iter);
+ if (res == chrec_dont_know)
+ return NULL_TREE;
+
+ if (chrec_contains_symbols_defined_in_loop (res, ex_loop->num))
+ {
+ res = instantiate_parameters (ex_loop, res);
+ if (res == chrec_dont_know)
+ return NULL_TREE;
+ }
+
+ return res;
+ }
+ return NULL_TREE;
+}
+
/* Do final value replacement for LOOP, return true if we did anything. */
bool
@@ -3856,7 +3925,11 @@ final_value_replacement_loop (class loop *loop)
if (!exit)
return false;
- tree niter = number_of_latch_executions (loop);
+ class tree_niter_desc niter_desc;
+ if (!number_of_iterations_exit (loop, exit, &niter_desc, false))
+ return false;
+
+ tree niter = niter_desc.niter;
if (niter == chrec_dont_know)
return false;
@@ -3895,7 +3968,7 @@ final_value_replacement_loop (class loop *loop)
def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
&folded_casts);
- tree bitinv_def, bit_def;
+ tree bitinv_def, bit_def, phi_latch_final_value;
unsigned HOST_WIDE_INT niter_num;
if (def != chrec_dont_know)
@@ -3932,6 +4005,15 @@ final_value_replacement_loop (class loop *loop)
niter_num)))
def = bit_def;
+ else if (integer_zerop (niter_desc.may_be_zero)
+ && (phi_latch_final_value
+ = compute_final_value_from_loop_phi_latch (loop,
+ ex_loop,
+ phidef,
+ niter,
+ &folded_casts)))
+ def = phi_latch_final_value;
+
bool cond_overflow_p;
if (!tree_does_not_contain_chrecs (def)
|| chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
--
2.43.0