When load/store with length is used and only QImode versions are
available, vectorizable_live_operation produces wrong results for
VEC_EXTRACT.  Provide a flag to vect_get_loop_len to specify if
bias-adjusted length should be used or not.

        PR tree-optimization/122297      

gcc/ChangeLog:

        * tree-vect-loop.cc (vectorize_fold_left_reduction): Adjust.
        (vectorizable_induction): Adjust.
        (vectorizable_live_operation_1): Adjust.
        (vect_get_loop_len): Provide parameter to select bias-adjusted
        length.
        (vect_gen_loop_len_mask): Adjust.
        (vect_update_ivs_after_vectorizer_for_early_breaks): Adjust.
        * tree-vect-stmts.cc (vect_get_strided_load_store_ops): Adjust.
        (vectorizable_call): Adjust.
        (vectorizable_operation): Adjust.
        (vectorizable_store): Adjust.
        (vectorizable_load): Adjust.
        (vectorizable_condition): Adjust.
        * tree-vectorizer.h (vect_get_loop_len): Add parameter.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/nodump-extractlast-1.c: Fix typo.
        * gcc.dg/vect/nodump-extractlast-2.c: New test.

Bootstrapped and regtested on s390, x86, and ppc64le.  Ok for trunk?

Signed-off-by: Juergen Christ <[email protected]>
---
 .../gcc.dg/vect/nodump-extractlast-1.c        |  2 +-
 .../gcc.dg/vect/nodump-extractlast-2.c        | 23 +++++++
 gcc/tree-vect-loop.cc                         | 62 +++++++++++++------
 gcc/tree-vect-stmts.cc                        | 24 +++----
 gcc/tree-vectorizer.h                         |  2 +-
 5 files changed, 82 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c 
b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
index 980ac3e42188..83d8a38f13e3 100644
--- a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
+++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-1.c
@@ -1,4 +1,4 @@
-/* Check for a bung in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
    using VEC_EXTRACT.  */
 /* { dg-require-effective-target vect_int } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c 
b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
new file mode 100644
index 000000000000..9697687c1084
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/nodump-extractlast-2.c
@@ -0,0 +1,23 @@
+/* Check for a bug in the treatment of LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS when
+   using VEC_EXTRACT.  This variant uses .LEN_LOAD which might use QImode
+   vectors during load, but SImode vectors for the extraction.  */
+int __attribute__ ((noinline, noclone))
+test_int (int *x, int n, int value)
+{
+  int last;
+  for (int j = 0; j < n; ++j)
+    {
+      last = x[j];
+      x[j] = last * value;
+    }
+  return last;
+}
+
+int
+main ()
+{
+  int arr[] = {1,2,3,4,5,1};
+  if (test_int (arr, sizeof (arr) / sizeof (arr[0]), 42) != 1)
+    __builtin_abort();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 8e60a433596f..8e40f0ceb93b 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -6503,8 +6503,11 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
        mask = vec_opmask[i];
       if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
        {
+         /* ??? Why do we use LOAD_STORE_BIAS here when we do not do a load or
+            a store?  Shouldn't we instead just ensure that LEN represents the
+            number of elements in the vector?  */
          len = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num, vectype_in,
-                                  i, 1);
+                                  i, 1, false);
          signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
          bias = build_int_cst (intQI_type_node, biasval);
          if (!is_cond_op)
@@ -9885,7 +9888,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
                   _21 = vect_vec_iv_.6_22 + vect_cst__22;  */
              vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
              tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
-                                           vectype, 0, 0);
+                                           vectype, 0, 0, false);
              if (SCALAR_FLOAT_TYPE_P (stept))
                expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
              else
@@ -10032,7 +10035,7 @@ vectorizable_live_operation_1 (loop_vec_info 
loop_vinfo, basic_block exit_bb,
     {
       /* Emit:
 
-        SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>
+        SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>
 
         where VEC_LHS is the vectorized live-out result, LEN is the length of
         the vector, BIAS is the load-store bias.  The bias should not be used
@@ -10043,21 +10046,14 @@ vectorizable_live_operation_1 (loop_vec_info 
loop_vinfo, basic_block exit_bb,
       gimple_stmt_iterator gsi = gsi_last (tem);
       tree len = vect_get_loop_len (loop_vinfo, &gsi,
                                    &LOOP_VINFO_LENS (loop_vinfo),
-                                   1, vectype, 0, 1);
+                                   1, vectype, 0, 1, false);
       gimple_seq_add_seq (&stmts, tem);
 
-      /* BIAS + 1.  */
-      signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
-      tree bias_plus_one
-       = int_const_binop (PLUS_EXPR,
-                          build_int_cst (TREE_TYPE (len), biasval),
-                          build_one_cst (TREE_TYPE (len)));
-
-      /* LAST_INDEX = LEN - (BIAS + 1).  */
+      /* LAST_INDEX = LEN - 1.  */
       tree last_index = gimple_build (&stmts, MINUS_EXPR, TREE_TYPE (len),
-                                    len, bias_plus_one);
+                                    len, build_one_cst (TREE_TYPE (len)));
 
-      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - (BIAS + 1)>.  */
+      /* SCALAR_RES = VEC_EXTRACT <VEC_LHS, LEN - 1>.  */
       tree scalar_res
        = gimple_build (&stmts, CFN_VEC_EXTRACT, TREE_TYPE (vectype),
                        vec_lhs_phi, last_index);
@@ -10678,7 +10674,7 @@ vect_record_loop_len (loop_vec_info loop_vinfo, 
vec_loop_lens *lens,
 tree
 vect_get_loop_len (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
                   vec_loop_lens *lens, unsigned int nvectors, tree vectype,
-                  unsigned int index, unsigned int factor)
+                  unsigned int index, unsigned int factor, bool adjusted)
 {
   rgroup_controls *rgl = &(*lens)[nvectors - 1];
   bool use_bias_adjusted_len =
@@ -10711,7 +10707,7 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
        }
     }
 
-  if (use_bias_adjusted_len)
+  if (use_bias_adjusted_len && adjusted)
     return rgl->bias_adjusted_ctrl;
 
   tree loop_len = rgl->controls[index];
@@ -10734,6 +10730,36 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
            gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
        }
     }
+  else if (factor && rgl->factor != factor)
+    {
+      /* The number of scalars per iteration, scalar occupied bytes and
+        the number of vectors are both compile-time constants.  */
+      unsigned int nscalars_per_iter
+       = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
+                    LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
+      unsigned int rglvecsize = rgl->factor * rgl->max_nscalars_per_iter;
+      unsigned int vecsize = nscalars_per_iter * factor;
+      if (rglvecsize > vecsize)
+       {
+         unsigned int fac = rglvecsize / vecsize;
+         tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, EXACT_DIV_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, fac));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+      else if (rglvecsize < vecsize)
+       {
+         unsigned int fac = vecsize / rglvecsize;
+         tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+         gimple_seq seq = NULL;
+         loop_len = gimple_build (&seq, MULT_EXPR, iv_type, loop_len,
+                                  build_int_cst (iv_type, fac));
+         if (seq)
+           gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+       }
+    }
   return loop_len;
 }
 
@@ -10751,7 +10777,7 @@ vect_gen_loop_len_mask (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
   tree all_one_mask = build_all_ones_cst (vectype);
   tree all_zero_mask = build_zero_cst (vectype);
   tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
index,
-                               factor);
+                               factor, true);
   tree bias = build_int_cst (intQI_type_node,
                             LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
   tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
@@ -11022,7 +11048,7 @@ vect_update_ivs_after_vectorizer_for_early_breaks 
(loop_vec_info loop_vinfo)
     {
       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
       tree_vf = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
-                                  NULL_TREE, 0, 0);
+                                  NULL_TREE, 0, 0, true);
     }
 
   tree iter_var;
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ed2486451ca0..a97f7ec07c4f 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3182,7 +3182,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, 
slp_tree node,
         .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
         vectp_a.9_26 = vectp_a.9_7 + ivtmp_8;  */
       tree loop_len
-       = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
+       = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0, 
true);
       tree tmp
        = fold_build2 (MULT_EXPR, sizetype,
                       fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
@@ -3253,7 +3253,7 @@ vect_get_loop_variant_data_ptr_increment (
      addr = addr + .SELECT_VL (ARG..) * step;
   */
   tree loop_len
-    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
+    = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0, true);
   tree len_type = TREE_TYPE (loop_len);
   /* Since the outcome of .SELECT_VL is element size, we should adjust
      it into bytesize so that it can be used in address pointer variable
@@ -3842,7 +3842,7 @@ vectorizable_call (vec_info *vinfo,
                    {
                      unsigned int vec_num = vec_oprnds0.length ();
                      tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                                   vec_num, vectype_out, i, 1);
+                                                   vec_num, vectype_out, i, 1, 
true);
                      signed char biasval
                        = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                      tree bias = build_int_cst (intQI_type_node, biasval);
@@ -7041,7 +7041,7 @@ vectorizable_operation (vec_info *vinfo,
          if (len_loop_p)
            {
              tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                           vec_num, vectype, i, 1);
+                                           vec_num, vectype, i, 1, true);
              signed char biasval
                = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
              tree bias = build_int_cst (intQI_type_node, biasval);
@@ -8757,7 +8757,7 @@ vectorizable_store (vec_info *vinfo,
            {
              if (loop_lens)
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+                                              ncopies, vectype, j, 1, true);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -8950,7 +8950,7 @@ vectorizable_store (vec_info *vinfo,
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi,
                                                   loop_lens, num_stmts,
-                                                  vectype, j, 1);
+                                                  vectype, j, 1, true);
                  else
                    final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
@@ -9334,7 +9334,7 @@ vectorizable_store (vec_info *vinfo,
          unsigned factor
            = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
          final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                        vec_num, vectype, i, factor);
+                                        vec_num, vectype, i, factor, true);
        }
       else if (final_mask)
        {
@@ -10697,7 +10697,7 @@ vectorizable_load (vec_info *vinfo,
            {
              if (loop_lens)
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+                                              ncopies, vectype, j, 1, true);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -10900,7 +10900,7 @@ vectorizable_load (vec_info *vinfo,
                {
                  if (loop_lens)
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num, vectype, i, 1);
+                                                  vec_num, vectype, i, 1, 
true);
                  else
                    final_len = build_int_cst (sizetype,
                                               TYPE_VECTOR_SUBPARTS (vectype));
@@ -11352,7 +11352,7 @@ vectorizable_load (vec_info *vinfo,
                unsigned factor
                  = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
                final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num, vectype, i, factor);
+                                              vec_num, vectype, i, factor, 
true);
              }
            else if (final_mask)
              {
@@ -12429,8 +12429,10 @@ vectorizable_condition (vec_info *vinfo,
            {
              if (lens)
                {
+                 /* ??? Do we really want the adjusted LEN here?  Isn't this
+                    based on number of elements?  */
                  len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                          vec_num, vectype, i, 1);
+                                          vec_num, vectype, i, 1, true);
                  signed char biasval
                    = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                  bias = build_int_cst (intQI_type_node, biasval);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 7a38d4969cf2..2116507b69ce 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2673,7 +2673,7 @@ extern void vect_record_loop_len (loop_vec_info, 
vec_loop_lens *, unsigned int,
                                  tree, unsigned int);
 extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
                               vec_loop_lens *, unsigned int, tree,
-                              unsigned int, unsigned int);
+                              unsigned int, unsigned int, bool);
 extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
                                    gimple_stmt_iterator *, vec_loop_lens *,
                                    unsigned int, tree, tree, unsigned int,
-- 
2.43.7

Reply via email to