Elide loops over ncopies.

        * tree-vect-stmts.cc (vectorizable_load):
---
 gcc/tree-vect-stmts.cc | 54 ++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 34 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 2efa000034c..717d4694b88 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -11007,10 +11007,9 @@ vectorizable_load (vec_info *vinfo,
       gcc_assert (!grouped_load && !slp_perm);
 
       unsigned int inside_cost = 0, prologue_cost = 0;
-      for (j = 0; j < 1; j++)
        {
          /* 1. Create the vector or array pointer update chain.  */
-         if (j == 0 && !costing_p)
+         if (!costing_p)
            {
              if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
                vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
@@ -11022,13 +11021,6 @@ vectorizable_load (vec_info *vinfo,
                                              at_loop, offset, &dummy, gsi,
                                              &ptr_incr, false, bump);
            }
-         else if (!costing_p)
-           {
-             gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
-             if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
-                                              gsi, stmt_info, bump);
-           }
 
          gimple *new_stmt = NULL;
          for (i = 0; i < vec_num; i++)
@@ -11039,12 +11031,11 @@ vectorizable_load (vec_info *vinfo,
              if (!costing_p)
                {
                  if (mask)
-                   vec_mask = vec_masks[vec_num * j + i];
+                   vec_mask = vec_masks[i];
                  if (loop_masks)
                    final_mask
                      = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                           vec_num, vectype,
-                                           vec_num * j + i);
+                                           vec_num, vectype, i);
                  if (vec_mask)
                    final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
                                                   final_mask, vec_mask, gsi);
@@ -11067,7 +11058,7 @@ vectorizable_load (vec_info *vinfo,
                      continue;
                    }
                  if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
-                   vec_offset = vec_offsets[vec_num * j + i];
+                   vec_offset = vec_offsets[i];
                  tree zero = build_zero_cst (vectype);
                  tree scale = size_int (gs_info.scale);
 
@@ -11076,8 +11067,7 @@ vectorizable_load (vec_info *vinfo,
                      if (loop_lens)
                        final_len
                          = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num, vectype,
-                                              vec_num * j + i, 1);
+                                              vec_num, vectype, i, 1);
                      else
                        final_len
                          = build_int_cst (sizetype,
@@ -11148,7 +11138,7 @@ vectorizable_load (vec_info *vinfo,
                    {
                      new_stmt = vect_build_one_gather_load_call
                                   (vinfo, stmt_info, gsi, &gs_info,
-                                   dataref_ptr, vec_offsets[vec_num * j + i],
+                                   dataref_ptr, vec_offsets[i],
                                    final_mask);
                      data_ref = NULL_TREE;
                    }
@@ -11159,7 +11149,7 @@ vectorizable_load (vec_info *vinfo,
                         data with just the lower lanes filled.  */
                      new_stmt = vect_build_one_gather_load_call
                          (vinfo, stmt_info, gsi, &gs_info,
-                          dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
+                          dataref_ptr, vec_offsets[2 * i],
                           final_mask);
                      tree low = make_ssa_name (vectype);
                      gimple_set_lhs (new_stmt, low);
@@ -11204,7 +11194,7 @@ vectorizable_load (vec_info *vinfo,
                      new_stmt = vect_build_one_gather_load_call
                                   (vinfo, stmt_info, gsi, &gs_info,
                                    dataref_ptr,
-                                   vec_offsets[2 * vec_num * j + 2 * i + 1],
+                                   vec_offsets[2 * i + 1],
                                    final_mask);
                      tree high = make_ssa_name (vectype);
                      gimple_set_lhs (new_stmt, high);
@@ -11229,8 +11219,8 @@ vectorizable_load (vec_info *vinfo,
                    {
                      /* We have a offset vector with double the number of
                         lanes.  Select the low/high part accordingly.  */
-                     vec_offset = vec_offsets[(vec_num * j + i) / 2];
-                     if ((vec_num * j + i) & 1)
+                     vec_offset = vec_offsets[i / 2];
+                     if (i & 1)
                        {
                          int count = offset_nunits.to_constant ();
                          vec_perm_builder sel (count, count, 1);
@@ -11290,9 +11280,8 @@ vectorizable_load (vec_info *vinfo,
                     than the data vector for now.  */
                  unsigned HOST_WIDE_INT factor
                    = const_offset_nunits / const_nunits;
-                 vec_offset = vec_offsets[(vec_num * j + i) / factor];
-                 unsigned elt_offset
-                   = ((vec_num * j + i) % factor) * const_nunits;
+                 vec_offset = vec_offsets[i / factor];
+                 unsigned elt_offset = (i % factor) * const_nunits;
                  tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
                  tree scale = size_int (gs_info.scale);
                  align = get_object_alignment (DR_REF (first_dr_info->dr));
@@ -11372,10 +11361,9 @@ vectorizable_load (vec_info *vinfo,
   /* For costing some adjacent vector loads, we'd like to cost with
      the total number of them once instead of cost each one by one. */
   unsigned int n_adjacent_loads = 0;
-  for (j = 0; j < 1; j++)
     {
       /* 1. Create the vector or array pointer update chain.  */
-      if (j == 0 && !costing_p)
+      if (!costing_p)
        {
          bool simd_lane_access_p
            = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
@@ -11450,11 +11438,10 @@ vectorizable_load (vec_info *vinfo,
          if (!costing_p)
            {
              if (mask)
-               vec_mask = vec_masks[vec_num * j + i];
+               vec_mask = vec_masks[i];
              if (loop_masks)
                final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                                vec_num, vectype,
-                                                vec_num * j + i);
+                                                vec_num, vectype, i);
              if (vec_mask)
                final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
                                               final_mask, vec_mask, gsi);
@@ -11504,8 +11491,7 @@ vectorizable_load (vec_info *vinfo,
                    unsigned factor
                      = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
                    final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num, vectype,
-                                                  vec_num * j + i, factor);
+                                                  vec_num, vectype, i, factor);
                  }
                else if (final_mask)
                  {
@@ -11612,12 +11598,12 @@ vectorizable_load (vec_info *vinfo,
                    /* Try to use a single smaller load when we are about
                       to load excess elements compared to the unrolled
                       scalar loop.  */
-                   if (known_gt ((vec_num * j + i + 1) * nunits,
+                   if (known_gt ((i + 1) * nunits,
                                       (group_size * vf - gap)))
                      {
                        poly_uint64 remain = ((group_size * vf - gap)
-                                             - (vec_num * j + i) * nunits);
-                       if (known_ge ((vec_num * j + i + 1) * nunits
+                                             - i * nunits);
+                       if (known_ge ((i + 1) * nunits
                                      - (group_size * vf - gap), nunits))
                          /* DR will be unused.  */
                          ltype = NULL_TREE;
@@ -11920,7 +11906,7 @@ vectorizable_load (vec_info *vinfo,
              if (alignment_support_scheme == dr_explicit_realign_optimized)
                {
                  gcc_assert (phi);
-                 if (i == vec_num - 1 && j == 0)
+                 if (i == vec_num - 1)
                    add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
                                 UNKNOWN_LOCATION);
                  msq = lsq;
-- 
2.43.0

Reply via email to