When load/store with length is used and only QImode versions are
available, vectorizable_live_operation produces wrong results for
VEC_EXTRACT. Provide a flag to vect_get_loop_len to specify if
bias-adjusted length should be used or not.
PR tree-optimization/122297
gcc/ChangeLog:
* tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
(vectorizable_induction): Adjust.
(vectorizable_live_operation_1): Adjust.
(vect_get_loop_len): Provide parameter to select bias-adjusted
length.
(vect_gen_loop_len_mask): Adjust.
(vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
* tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
(vectorizable_call): Adjust.
(vectorizable_operation): Adjust.
(vectorizable_store): Adjust.
(vectorizable_load): Adjust.
(vectorizable_condition): Adjust.
* tree-vectorizer.h (vect_get_loop_len): Add parameter.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
* gcc.dg/vect/nodump-extractlast-2.c: New test.
Bootstrapped and regtested on s390, x86, and ppc64le. Ok for trunk?
Signed-off-by: Juergen Christ <[email protected]>
---
.../gcc.dg/vect/nodump-extractlast-1.c | 2 +-
.../gcc.dg/vect/nodump-extractlast-2.c | 23 +++++++
gcc/tree-vect-loop.cc | 62 +++++++++++++------
gcc/tree-vect-stmts.cc | 24 +++----
gcc/tree-vectorizer.h | 2 +-
5 files changed, 82 insertions(+), 31 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
index 980ac3e42188..83d8a38f13e3 100644
--- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
+++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
@@ -1,4 +1,4 @@
-/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
using VEC_EXTRACT. */
/* { dg-require-effective-target vect_int } */
diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
new file mode 100644
index 000000000000..9697687c1084
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
@@ -0,0 +1,23 @@
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+ using VEC_EXTRACT. This variant uses .LEN_LOAD which might use QImode
+ vectors during load, but SImode vectors for the extraction. */
+int __attribute__ ((noinline, noclone))
+test_int (int *x, int n, int value)
+{
+ int last;
+ for (int j = 0; j < n; ++j)
+ {
+ last = x[j];
+ x[j] = last * value;
+ }
+ return last;
+}
+
+int
+main ()
+{
+ int arr[] = {1,2,3,4,5,1};
+ if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
+ __builtin_abort();
+ return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8e60a433596f..8e40f0ceb93b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6503,8 +6503,11 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
mask = vec_opmask[i];
if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
{
+ /* ??? Why do we use LOAD_STORE_BIAS here when we do not do a load or
+ a store? Shouldn't we instead just ensure that LEN represents the
+ number of elements in the vector? */
len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
- i, 1);
+ i, 1, false);
signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
bias = build_int_cst (intQI_type_node, biasval);
if (!is_cond_op)
@@ -9885,7 +9888,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
_21 = vect_vec_iv_.6_22 + vect_cst__22; */
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
- vectype, 0, 0);
+ vectype, 0, 0, false);
if (SCALAR_FLOAT_TYPE_P (stept))
expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
else
@@ -10032,7 +10035,7 @@ vectorizable_live_operation_1 (loop_vec_info
loop_vinfo, basic_block exit_bb,
{
/* Emit:
- SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
+ SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
where VEC_LHS is the vectorized live-out result, LEN is the length of
the vector, BIAS is the load-store bias. The bias should not be used
@@ -10043,21 +10046,14 @@ vectorizable_live_operation_1 (loop_vec_info
loop_vinfo, basic_block exit_bb,
gimple_stmt_iterator gsi = gsi_last (tem);
tree len = vect_get_loop_len (loop_vinfo, &gsi,
&LOOP_VINFO_LENS (loop_vinfo),
- 1, vectype, 0, 1);
+ 1, vectype, 0, 1, false);
gimple_seq_add_seq (&stmts, tem);
- /* BIAS + 1. */
- signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
- tree bias_plus_one
- = int_const_binop (PLUS_EXPR,
- build_int_cst (TREE_TYPE (len), biasval),
- build_one_cst (TREE_TYPE (len)));
-
- /* LAST_INDEX = LEN - (BIAS + 1). */
+ /* LAST_INDEX = LEN - 1. */
tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len),
- len, bias_plus_one);
+ len, build_one_cst (TREE_TYPE (len)));
- /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>. */
+ /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>. */
tree scalar_res
= gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
vec_lhs_phi, last_index);
@@ -10678,7 +10674,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo,
vec_loop_lens *lens,
tree
vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
vec_loop_lens *lens, unsigned int nvectors, tree vectype,
- unsigned int index, unsigned int factor)
+ unsigned int index, unsigned int factor, bool adjusted)
{
rgroup_controls *rgl = &(*lens)[nvectors - 1];
bool use_bias_adjusted_len =
@@ -10711,7 +10707,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
gimple_stmt_iterator *gsi,
}
}
- if (use_bias_adjusted_len)
+ if (use_bias_adjusted_len && adjusted)
return rgl->bias_adjusted_ctrl;
tree loop_len = rgl->controls[index];
@@ -10734,6 +10730,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo,
gimple_stmt_iterator *gsi,
gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
}
}
+ else if (factor && rgl->factor != factor)
+ {
+ /* The number of scalars per iteration, scalar occupied bytes and
+ the number of vectors are both compile-time constants. */
+ unsigned int nscalars_per_iter
+ = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+ unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter;
+ unsigned int vecsize = nscalars_per_iter * factor;
+ if (rglvecsize > vecsize)
+ {
+ unsigned int fac = rglvecsize / vecsize;
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ gimple_seq seq = NULL;
+ loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
+ build_int_cst (iv_type, fac));
+ if (seq)
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ }
+ else if (rglvecsize < vecsize)
+ {
+ unsigned int fac = vecsize / rglvecsize;
+ tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+ gimple_seq seq = NULL;
+ loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
+ build_int_cst (iv_type, fac));
+ if (seq)
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+ }
+ }
return loop_len;
}
@@ -10751,7 +10777,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo,
gimple_stmt_iterator *gsi,
tree all_one_mask = build_all_ones_cst (vectype);
tree all_zero_mask = build_zero_cst (vectype);
tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype,
index,
- factor);
+ factor, true);
tree bias = build_int_cst (intQI_type_node,
LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
@@ -11022,7 +11048,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks
(loop_vec_info loop_vinfo)
{
vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
- NULL_TREE, 0, 0);
+ NULL_TREE, 0, 0, true);
}
tree iter_var;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ed2486451ca0..a97f7ec07c4f 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3182,7 +3182,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
slp_tree node,
.MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
tree loop_len
- = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
+ = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0,
true);
tree tmp
= fold_build2 (MULT_EXPR, sizetype,
fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
@@ -3253,7 +3253,7 @@ vect_get_loop_variant_data_ptr_increment (
addr = addr + .SELECT_VL (ARG..) * step;
*/
tree loop_len
- = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
+ = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
tree len_type = TREE_TYPE (loop_len);
/* Since the outcome of .SELECT_VL is element size, we should adjust
it into bytesize so that it can be used in address pointer variable
@@ -3842,7 +3842,7 @@ vectorizable_call (vec_info *vinfo,
{
unsigned int vec_num = vec_oprnds0.length ();
tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
- vec_num, vectype_out, i, 1);
+ vec_num, vectype_out, i, 1,
true);
signed char biasval
= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
tree bias = build_int_cst (intQI_type_node, biasval);
@@ -7041,7 +7041,7 @@ vectorizable_operation (vec_info *vinfo,
if (len_loop_p)
{
tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
- vec_num, vectype, i, 1);
+ vec_num, vectype, i, 1, true);
signed char biasval
= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
tree bias = build_int_cst (intQI_type_node, biasval);
@@ -8757,7 +8757,7 @@ vectorizable_store (vec_info *vinfo,
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- ncopies, vectype, j, 1);
+ ncopies, vectype, j, 1, true);
else
final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
signed char biasval
@@ -8950,7 +8950,7 @@ vectorizable_store (vec_info *vinfo,
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi,
loop_lens, num_stmts,
- vectype, j, 1);
+ vectype, j, 1, true);
else
final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
@@ -9334,7 +9334,7 @@ vectorizable_store (vec_info *vinfo,
unsigned factor
= (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num, vectype, i, factor);
+ vec_num, vectype, i, factor, true);
}
else if (final_mask)
{
@@ -10697,7 +10697,7 @@ vectorizable_load (vec_info *vinfo,
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- ncopies, vectype, j, 1);
+ ncopies, vectype, j, 1, true);
else
final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
signed char biasval
@@ -10900,7 +10900,7 @@ vectorizable_load (vec_info *vinfo,
{
if (loop_lens)
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num, vectype, i, 1);
+ vec_num, vectype, i, 1,
true);
else
final_len = build_int_cst (sizetype,
TYPE_VECTOR_SUBPARTS (vectype));
@@ -11352,7 +11352,7 @@ vectorizable_load (vec_info *vinfo,
unsigned factor
= (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
- vec_num, vectype, i, factor);
+ vec_num, vectype, i, factor,
true);
}
else if (final_mask)
{
@@ -12429,8 +12429,10 @@ vectorizable_condition (vec_info *vinfo,
{
if (lens)
{
+ /* ??? Do we really want the adjusted LEN here? Isn't this
+ based on number of elements? */
len = vect_get_loop_len (loop_vinfo, gsi, lens,
- vec_num, vectype, i, 1);
+ vec_num, vectype, i, 1, true);
signed char biasval
= LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
bias = build_int_cst (intQI_type_node, biasval);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 7a38d4969cf2..2116507b69ce 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2673,7 +2673,7 @@ extern void vect_record_loop_len (loop_vec_info,
vec_loop_lens *, unsigned int,
tree, unsigned int);
extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
vec_loop_lens *, unsigned int, tree,
- unsigned int, unsigned int);
+ unsigned int, unsigned int, bool);
extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
gimple_stmt_iterator *, vec_loop_lens *,
unsigned int, tree, tree, unsigned int,
--
2.43.7