In the PR we see that the new scalar IV tricks other passes to think there's an
overflow to the use of a signed counter:
The loop is known to iterate 8191 times and we have a VF of 8 and it starts
at 2.
The codegen out of the vectorizer is the same as before, except we now have a
scalar variable counting the scalar iteration count vs a vector one.
i.e. we have
_45 = _39 + 8;
vs
_46 = _45 + { 16, 16, 16, 16, ... }
we pick a lower VF now since costing allows it to but that's not important.
When we get to cunroll since the value is now scalar, it sees that 8 * 8191
would overflow a signed short and so it changes the loop bounds to the largest
possible signed value and then uses this to elide the ivtmp_50 < 8191 as always
true and so you get an infinite loop:
Analyzing # of iterations of loop 1
exit condition [1, + , 1](no_overflow) < 8191
bounds on difference of bases: 8190 ... 8190
result:
# of iterations 8190, bounded by 8190
Statement (exit)if (ivtmp_50 < 8191)
is executed at most 8190 (bounded by 8190) + 1 times in loop 1.
Induction variable (signed short) 8 + 8 * iteration does not wrap in statement
_45 = _39 + 8;
in loop 1.
Statement _45 = _39 + 8;
is executed at most 4094 (bounded by 4094) + 1 times in loop 1.
The signed type was originally chosen because of the negative offset we use when
adjusting for peeling for alignments with masks. However this then introduces
issues as we see here with signed overflow. This patch instead uses an
unsigned type, and adjusts the MAX to always be a signed max. Since unsigned
underflow is defined and wraps around the negative offset for the PFA isn't an
issue.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Pushed.
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/123089
* tree-vect-loop-manip.cc (vect_do_peeling): Use unsigned type for
scalar IV.
* tree-vect-loop.cc (vect_update_ivs_after_vectorizer_for_early_breaks):
Use signed MAX for PFA offset update.
gcc/testsuite/ChangeLog:
PR tree-optimization/123089
* gcc.dg/vect/vect-early-break_141-pr123089.c: New test.
---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
new file mode 100644
index
0000000000000000000000000000000000000000..431edbfbde6731e205788495a93d90e252e717f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
@@ -0,0 +1,40 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* }
} } */
+
+/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target {
i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-*
x86_64-*-* } } } } */
+
+#include "tree-vect.h"
+
+struct
+{
+ int d;
+ short e;
+} i;
+
+int b;
+int *h = &b;
+
+int
+main ()
+{
+ check_vect ();
+
+ short f = 1;
+ short *g = &i.e;
+
+a:
+ if (*g = 0 & ++f, *h)
+ ;
+ else
+ {
+ int c = 0;
+ if (f)
+ goto a;
+ h = &c;
+ }
+
+ return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index
c6d8b05b617f2e73e329a72516964ba0f48b677b..0e3cf327708831d69e2a6e73b103edec078a3d81
100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3662,7 +3662,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree scal_iv_ty = signed_type_for (TREE_TYPE (vector_iters_vf));
+ tree scal_iv_ty = unsigned_type_for (TREE_TYPE (vector_iters_vf));
tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
basic_block exit_bb = NULL;
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index
f9dd88ed82468923ce6f1d01324a247a47169cde..2c014ed27371d8d8a1e731ffcd66b9fd99626e9d
100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11005,10 +11005,15 @@ vect_update_ivs_after_vectorizer_for_early_breaks
(loop_vec_info loop_vinfo)
final IV. */
if (niters_skip)
{
- induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
- induc_def,
- build_zero_cst (TREE_TYPE (induc_def)));
- auto stmt = gimple_build_assign (phi_var, induc_def);
+ tree induc_type = TREE_TYPE (induc_def);
+ tree s_induc_type = signed_type_for (induc_type);
+ induc_def = gimple_build (&iv_stmts, MAX_EXPR, s_induc_type,
+ gimple_convert (&iv_stmts, s_induc_type,
+ induc_def),
+ build_zero_cst (s_induc_type));
+ auto stmt = gimple_build_assign (phi_var,
+ gimple_convert (&iv_stmts, induc_type,
+ induc_def));
gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
basic_block exit_bb = NULL;
/* Identify the early exit merge block. I wish we had stored this. */
--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
new file mode 100644
index 0000000000000000000000000000000000000000..431edbfbde6731e205788495a93d90e252e717f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
@@ -0,0 +1,40 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* } } } */
+
+/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+
+#include "tree-vect.h"
+
+struct
+{
+ int d;
+ short e;
+} i;
+
+int b;
+int *h = &b;
+
+int
+main ()
+{
+ check_vect ();
+
+ short f = 1;
+ short *g = &i.e;
+
+a:
+ if (*g = 0 & ++f, *h)
+ ;
+ else
+ {
+ int c = 0;
+ if (f)
+ goto a;
+ h = &c;
+ }
+
+ return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index c6d8b05b617f2e73e329a72516964ba0f48b677b..0e3cf327708831d69e2a6e73b103edec078a3d81 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3662,7 +3662,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree scal_iv_ty = signed_type_for (TREE_TYPE (vector_iters_vf));
+ tree scal_iv_ty = unsigned_type_for (TREE_TYPE (vector_iters_vf));
tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
basic_block exit_bb = NULL;
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index f9dd88ed82468923ce6f1d01324a247a47169cde..2c014ed27371d8d8a1e731ffcd66b9fd99626e9d 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11005,10 +11005,15 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
final IV. */
if (niters_skip)
{
- induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
- induc_def,
- build_zero_cst (TREE_TYPE (induc_def)));
- auto stmt = gimple_build_assign (phi_var, induc_def);
+ tree induc_type = TREE_TYPE (induc_def);
+ tree s_induc_type = signed_type_for (induc_type);
+ induc_def = gimple_build (&iv_stmts, MAX_EXPR, s_induc_type,
+ gimple_convert (&iv_stmts, s_induc_type,
+ induc_def),
+ build_zero_cst (s_induc_type));
+ auto stmt = gimple_build_assign (phi_var,
+ gimple_convert (&iv_stmts, induc_type,
+ induc_def));
gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
basic_block exit_bb = NULL;
/* Identify the early exit merge block. I wish we had stored this. */