In the PR we see that the new scalar IV tricks other passes to think there's an
overflow to the use of a signed counter:
The loop is known to iterate 8191 times and we have a VF of 8 and it starts
at 2.
The codegen out of the vectorizer is the same as before, except we now have a
scalar variable counting the scalar iteration count vs a vector one.
i.e. we have
_45 = _39 + 8;
vs
_46 = _45 + { 16, 16, 16, 16, ... }
we pick a lower VF now since costing allows it to but that's not important.
When we get to cunroll since the value is now scalar, it sees that 8 * 8191
would overflow a signed short and so it changes the loop bounds to the largest
possible signed value and then uses this to elide the ivtmp_50 < 8191 as always
true and so you get an infinite loop:
Analyzing # of iterations of loop 1
exit condition [1, + , 1](no_overflow) < 8191
bounds on difference of bases: 8190 ... 8190
result:
# of iterations 8190, bounded by 8190
Statement (exit)if (ivtmp_50 < 8191)
is executed at most 8190 (bounded by 8190) + 1 times in loop 1.
Induction variable (signed short) 8 + 8 * iteration does not wrap in statement
_45 = _39 + 8;
in loop 1.
Statement _45 = _39 + 8;
is executed at most 4094 (bounded by 4094) + 1 times in loop 1.
The signed type was originally chosen because of the negative offset we use when
adjusting for peeling for alignments with masks. However this then introduces
issues as we see here with signed overflow. This patch instead determines the
smallest possible signed type for use by the scalar IV where the overflow won't
happen when we include the extra bit for the sign. i.e. if the scalar IV is
an unsigned 8 bit value we pick a signed 16-bit type. But if a signed 8-bit
value we pick a signed 8 bit type.
I've also added some testcases for masking around the boundary values. I've
only added them for char to reduce the runtime of the tests.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Any comments? otherwise will push tomorrow.
Thanks,
Tamar
gcc/ChangeLog:
PR tree-optimization/123089
* tree-vect-loop.cc (vect_update_ivs_after_vectorizer_for_early_breaks):
Add conversion if required, Note that if we did truncate the original
scalar loop had an overflow here anyway.
* tree-vect-stmts.cc (vectorizable_early_exit): Find smallest type where
we won't have UB in the signed IV and store it.
* tree-vectorizer.h (LOOP_VINFO_EARLY_BRK_IV_TYPE): New.
(class _loop_vec_info): Add early_break_iv_type.
* tree-vect-loop-manip.cc (vect_do_peeling): Use it.
gcc/testsuite/ChangeLog:
PR tree-optimization/123089
* gcc.dg/vect/vect-early-break_141-pr123089.c: New test.
* gcc.target/aarch64/sve/peel_ind_14_run.c: New test.
* gcc.target/aarch64/sve/peel_ind_15_run.c: New test.
* gcc.target/aarch64/sve/peel_ind_16_run.c: New test.
---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
new file mode 100644
index
0000000000000000000000000000000000000000..431edbfbde6731e205788495a93d90e252e717f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
@@ -0,0 +1,40 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* }
} } */
+
+/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target {
i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-*
x86_64-*-* } } } } */
+
+#include "tree-vect.h"
+
+struct
+{
+ int d;
+ short e;
+} i;
+
+int b;
+int *h = &b;
+
+int
+main ()
+{
+ check_vect ();
+
+ short f = 1;
+ short *g = &i.e;
+
+a:
+ if (*g = 0 & ++f, *h)
+ ;
+ else
+ {
+ int c = 0;
+ if (f)
+ goto a;
+ h = &c;
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c
new file mode 100644
index
0000000000000000000000000000000000000000..fab939bb25e4b87597cd5183e7143410eff3c596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c
@@ -0,0 +1,42 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw
} } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw
} } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ x[200] = 0;
+ int res = foo (max, x);
+ if (res != 200)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c
new file mode 100644
index
0000000000000000000000000000000000000000..13763f5ebfbea798f85142cec3fe824764609dd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c
@@ -0,0 +1,42 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw
} } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw
} } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ x[33] = 0;
+ int res = foo (max, x);
+ if (res != 33)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c
b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c
new file mode 100644
index
0000000000000000000000000000000000000000..120f737d23128ca2e7627695ccff567dd2293915
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c
@@ -0,0 +1,41 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw
} } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw
} } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ int res = foo (max, x);
+ if (res != max)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index
5d7d599b9749060063ca324bed21d59e2f158541..d4fa3d517d2d3fe52603e24a5a42d32e2e9c7556
100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3738,10 +3738,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters,
tree nitersm1,
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree vector_iters_vf_type = uncounted_p ? sizetype
- : TREE_TYPE (vector_iters_vf);
- tree scal_iv_ty = signed_type_for (vector_iters_vf_type);
- tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
+ tree tmp_niters_vf
+ = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
+ LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
&& get_loop_exit_edges (loop).length () == 1))
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index
00b21ecfc9586501d0e68e2ecacdfb5c013df0fa..d7ef9445cd9e09adcd792b07dd9adc8c3fbd186f
100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11055,10 +11055,12 @@ vect_update_ivs_after_vectorizer_for_early_breaks
(loop_vec_info loop_vinfo)
final IV. */
if (niters_skip)
{
- induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
- induc_def,
- build_zero_cst (TREE_TYPE (induc_def)));
- auto stmt = gimple_build_assign (phi_var, induc_def);
+ tree induc_type = TREE_TYPE (induc_def);
+ induc_def = gimple_build (&iv_stmts, MAX_EXPR, induc_type, induc_def,
+ build_zero_cst (induc_type));
+ auto stmt = gimple_build_assign (phi_var,
+ gimple_convert (&iv_stmts, induc_type,
+ induc_def));
gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
basic_block exit_bb = NULL;
/* Identify the early exit merge block. I wish we had stored this. */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index
56ff1c846d1e0e9e90cf57a685d62085eb482a45..2669eb2ead091783e1a3623216c6296efd5d5bb8
100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12897,6 +12897,54 @@ vectorizable_early_exit (loop_vec_info loop_vinfo,
stmt_vec_info stmt_info,
vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
}
+ /* Check if we have a usable scalar IV type for vectorization. */
+ tree iters_vf_type = sizetype;
+ if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
+ {
+ /* Find the type with the minimum precision we can use
+ for the scalar IV. */
+ tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
+ unsigned int min_ni_width = TYPE_PRECISION (cand_type);
+ /* If the type niters type is unsigned, account for the space of
+ the sign bit to still be able to store the full range. */
+ if (TYPE_UNSIGNED (cand_type))
+ min_ni_width += 1;
+
+ if (TYPE_PRECISION (cand_type) >= min_ni_width)
+ iters_vf_type = cand_type;
+ else
+ {
+ opt_scalar_int_mode cmp_mode_iter;
+ tree iv_type = NULL_TREE;
+ FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
+ {
+ auto cmp_mode = cmp_mode_iter.require ();
+ unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
+ if (cmp_bits >= min_ni_width
+ && targetm.scalar_mode_supported_p (cmp_mode))
+ {
+ iv_type = build_nonstandard_integer_type (cmp_bits,
+ false);
+ if (iv_type)
+ break;
+ }
+ }
+
+ if (!iv_type)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't vectorize early exit because the "
+ "target doesn't support a scalar type wide "
+ "wide enough to hold niters.\n");
+ return false;
+ }
+ iters_vf_type = iv_type;
+ }
+ }
+
+ tree scal_iv_ty = signed_type_for (iters_vf_type);
+ LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = scal_iv_ty;
return true;
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index
8f7483297ea8d578c465c8bc4be0186ea81ba333..de30422b3d32e63e33971159cb7036ce44b49aa9
100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1249,6 +1249,10 @@ public:
inside the relavent exit blocks in order to adjust for early break. */
tree early_break_niters_var;
+ /* The type of the variable to be used to create the scalar IV for early
break
+ loops. */
+ tree early_break_iv_type;
+
/* Record statements that are needed to be live for early break vectorization
but may not have an LC PHI node materialized yet in the exits. */
auto_vec<stmt_vec_info> early_break_live_ivs;
@@ -1320,6 +1324,7 @@ public:
#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
+#define LOOP_VINFO_EARLY_BRK_IV_TYPE(L) (L)->early_break_iv_type
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
--
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
new file mode 100644
index 0000000000000000000000000000000000000000..431edbfbde6731e205788495a93d90e252e717f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_141-pr123089.c
@@ -0,0 +1,40 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target avx2_runtime { target { i?86-*-* x86_64-*-* } } } */
+
+/* { dg-additional-options "-O3 -fno-strict-aliasing -march=znver3" { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target { i?86-*-* x86_64-*-* } } } } */
+
+#include "tree-vect.h"
+
+struct
+{
+ int d;
+ short e;
+} i;
+
+int b;
+int *h = &b;
+
+int
+main ()
+{
+ check_vect ();
+
+ short f = 1;
+ short *g = &i.e;
+
+a:
+ if (*g = 0 & ++f, *h)
+ ;
+ else
+ {
+ int c = 0;
+ if (f)
+ goto a;
+ h = &c;
+ }
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c
new file mode 100644
index 0000000000000000000000000000000000000000..fab939bb25e4b87597cd5183e7143410eff3c596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_14_run.c
@@ -0,0 +1,42 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ x[200] = 0;
+ int res = foo (max, x);
+ if (res != 200)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c
new file mode 100644
index 0000000000000000000000000000000000000000..13763f5ebfbea798f85142cec3fe824764609dd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_15_run.c
@@ -0,0 +1,42 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ x[33] = 0;
+ int res = foo (max, x);
+ if (res != 33)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c
new file mode 100644
index 0000000000000000000000000000000000000000..120f737d23128ca2e7627695ccff567dd2293915
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_16_run.c
@@ -0,0 +1,41 @@
+/* Fix for PR123089 alignment peeling with vectors and VLS and overflows. */
+/* { dg-do run { target aarch64_sve_hw } } */
+/* { dg-options "-Ofast --param aarch64-autovec-preference=sve-only" } */
+/* { dg-additional-options "-msve-vector-bits=256" { target aarch64_sve256_hw } } */
+/* { dg-additional-options "-msve-vector-bits=128" { target aarch64_sve128_hw } } */
+
+#define START 2
+
+int __attribute__((noipa))
+foo (unsigned char n, int *x)
+{
+ unsigned char i = 0;
+#pragma GCC unroll 0
+ for (i = START; i < n; ++i)
+ {
+ if (x[i] == 0)
+ return i;
+ x[i] += 1;
+ }
+ return i;
+}
+
+int main ()
+{
+ int max = 255 - START;
+ int x[255 - START];
+#pragma GCC unroll 0
+ for (int i = 0; i < max; i++)
+ x[i] = 1;
+
+ int res = foo (max, x);
+ if (res != max)
+ __builtin_abort ();
+
+ if (x[START] != 2)
+ __builtin_abort ();
+
+ if (x[0] != 1)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 5d7d599b9749060063ca324bed21d59e2f158541..d4fa3d517d2d3fe52603e24a5a42d32e2e9c7556 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3738,10 +3738,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree vector_iters_vf = niters_vector_mult_vf;
if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
{
- tree vector_iters_vf_type = uncounted_p ? sizetype
- : TREE_TYPE (vector_iters_vf);
- tree scal_iv_ty = signed_type_for (vector_iters_vf_type);
- tree tmp_niters_vf = make_ssa_name (scal_iv_ty);
+ tree tmp_niters_vf
+ = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
+ LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo) = tmp_niters_vf;
if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
&& get_loop_exit_edges (loop).length () == 1))
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 00b21ecfc9586501d0e68e2ecacdfb5c013df0fa..d7ef9445cd9e09adcd792b07dd9adc8c3fbd186f 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11055,10 +11055,12 @@ vect_update_ivs_after_vectorizer_for_early_breaks (loop_vec_info loop_vinfo)
final IV. */
if (niters_skip)
{
- induc_def = gimple_build (&iv_stmts, MAX_EXPR, TREE_TYPE (induc_def),
- induc_def,
- build_zero_cst (TREE_TYPE (induc_def)));
- auto stmt = gimple_build_assign (phi_var, induc_def);
+ tree induc_type = TREE_TYPE (induc_def);
+ induc_def = gimple_build (&iv_stmts, MAX_EXPR, induc_type, induc_def,
+ build_zero_cst (induc_type));
+ auto stmt = gimple_build_assign (phi_var,
+ gimple_convert (&iv_stmts, induc_type,
+ induc_def));
gimple_seq_add_stmt_without_update (&iv_stmts, stmt);
basic_block exit_bb = NULL;
/* Identify the early exit merge block. I wish we had stored this. */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 56ff1c846d1e0e9e90cf57a685d62085eb482a45..2669eb2ead091783e1a3623216c6296efd5d5bb8 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12897,6 +12897,54 @@ vectorizable_early_exit (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
vect_record_loop_mask (loop_vinfo, masks, vec_num, vectype, NULL);
}
+ /* Check if we have a usable scalar IV type for vectorization. */
+ tree iters_vf_type = sizetype;
+ if (!LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo))
+ {
+ /* Find the type with the minimum precision we can use
+ for the scalar IV. */
+ tree cand_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
+ unsigned int min_ni_width = TYPE_PRECISION (cand_type);
+ /* If the type niters type is unsigned, account for the space of
+ the sign bit to still be able to store the full range. */
+ if (TYPE_UNSIGNED (cand_type))
+ min_ni_width += 1;
+
+ if (TYPE_PRECISION (cand_type) >= min_ni_width)
+ iters_vf_type = cand_type;
+ else
+ {
+ opt_scalar_int_mode cmp_mode_iter;
+ tree iv_type = NULL_TREE;
+ FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
+ {
+ auto cmp_mode = cmp_mode_iter.require ();
+ unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode);
+ if (cmp_bits >= min_ni_width
+ && targetm.scalar_mode_supported_p (cmp_mode))
+ {
+ iv_type = build_nonstandard_integer_type (cmp_bits,
+ false);
+ if (iv_type)
+ break;
+ }
+ }
+
+ if (!iv_type)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "can't vectorize early exit because the "
+ "target doesn't support a scalar type wide "
+ "wide enough to hold niters.\n");
+ return false;
+ }
+ iters_vf_type = iv_type;
+ }
+ }
+
+ tree scal_iv_ty = signed_type_for (iters_vf_type);
+ LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo) = scal_iv_ty;
return true;
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 8f7483297ea8d578c465c8bc4be0186ea81ba333..de30422b3d32e63e33971159cb7036ce44b49aa9 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1249,6 +1249,10 @@ public:
inside the relavent exit blocks in order to adjust for early break. */
tree early_break_niters_var;
+ /* The type of the variable to be used to create the scalar IV for early break
+ loops. */
+ tree early_break_iv_type;
+
/* Record statements that are needed to be live for early break vectorization
but may not have an LC PHI node materialized yet in the exits. */
auto_vec<stmt_vec_info> early_break_live_ivs;
@@ -1320,6 +1324,7 @@ public:
#define LOOP_VINFO_EARLY_BRK_DEST_BB(L) (L)->early_break_dest_bb
#define LOOP_VINFO_EARLY_BRK_VUSES(L) (L)->early_break_vuses
#define LOOP_VINFO_EARLY_BRK_NITERS_VAR(L) (L)->early_break_niters_var
+#define LOOP_VINFO_EARLY_BRK_IV_TYPE(L) (L)->early_break_iv_type
#define LOOP_VINFO_LOOP_CONDS(L) (L)->conds
#define LOOP_VINFO_LOOP_IV_COND(L) (L)->loop_iv_cond
#define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies