[PATCH 2/6] Remove non-SLP path from vectorizable_load

Richard Biener Tue, 24 Jun 2025 18:51:13 -0700

This cleans the rest of vectorizable_load from non-SLP

            * tree-vect-stmts.cc (vectorizable_load): Step 2.
---
 gcc/tree-vect-stmts.cc | 185 +++++++++++------------------------------
 1 file changed, 50 insertions(+), 135 deletions(-)


diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 92739903754..c5fe7879d5a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9850,7 +9850,6 @@ vectorizable_load (vec_info *vinfo,
   bool compute_in_loop = false;
   class loop *at_loop;
   int vec_num;
-  bool slp = true;
   bool slp_perm = false;
   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
   poly_uint64 vf;
@@ -9909,7 +9908,7 @@ vectorizable_load (vec_info *vinfo,
        return false;
 
       mask_index = internal_fn_mask_index (ifn);
-      if (mask_index >= 0 && 1)
+      if (mask_index >= 0)
        mask_index = vect_slp_child_index_for_operand
                    (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
       if (mask_index >= 0
@@ -9918,7 +9917,7 @@ vectorizable_load (vec_info *vinfo,
        return false;
 
       els_index = internal_fn_else_index (ifn);
-      if (els_index >= 0 && 1)
+      if (els_index >= 0)
        els_index = vect_slp_child_index_for_operand
          (call, els_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
       if (els_index >= 0
@@ -9942,16 +9941,13 @@ vectorizable_load (vec_info *vinfo,
   /* Multiple types in SLP are handled by creating the appropriate number of
      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
      case of SLP.  */
-  if (1)
-    ncopies = 1;
-  else
-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
+  ncopies = 1;
 
   gcc_assert (ncopies >= 1);
 
   /* FORNOW. This restriction should be relaxed.  */
   if (nested_in_vect_loop
-      && (ncopies > 1 || (1 && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1)))
+      && (ncopies > 1 || SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -9997,15 +9993,6 @@ vectorizable_load (vec_info *vinfo,
       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
       group_size = DR_GROUP_SIZE (first_stmt_info);
 
-      /* Refuse non-SLP vectorization of SLP-only groups.  */
-      if (0 && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "cannot vectorize load in non-SLP mode.\n");
-         return false;
-       }
-
       /* Invalidate assumptions made by dependence analysis when vectorization
         on the unrolled body effectively re-orders stmts.  */
       if (STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
@@ -10046,8 +10033,7 @@ vectorizable_load (vec_info *vinfo,
 
   /* ???  The following checks should really be part of
      get_group_load_store_type.  */
-  if (1
-      && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+  if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
       && !((memory_access_type == VMAT_ELEMENTWISE
            || memory_access_type == VMAT_GATHER_SCATTER)
           && SLP_TREE_LANES (slp_node) == 1))
@@ -10090,8 +10076,7 @@ vectorizable_load (vec_info *vinfo,
        }
     }
 
-  if (1
-      && slp_node->ldst_lanes
+  if (slp_node->ldst_lanes
       && memory_access_type != VMAT_LOAD_STORE_LANES)
     {
       if (dump_enabled_p ())
@@ -10142,8 +10127,7 @@ vectorizable_load (vec_info *vinfo,
 
   if (costing_p) /* transformation not required.  */
     {
-      if (1
-         && mask
+      if (mask
          && !vect_maybe_update_slp_op_vectype (slp_op,
                                                mask_vectype))
        {
@@ -10153,10 +10137,7 @@ vectorizable_load (vec_info *vinfo,
          return false;
        }
 
-      if (0)
-       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
-      else
-       SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
+      SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type;
 
       if (loop_vinfo
          && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
@@ -10210,12 +10191,7 @@ vectorizable_load (vec_info *vinfo,
   if (elsvals.length ())
     maskload_elsval = *elsvals.begin ();
 
-  if (0)
-    gcc_assert (memory_access_type
-               == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
-  else
-    gcc_assert (memory_access_type
-               == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
+  gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node));
 
   if (dump_enabled_p () && !costing_p)
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -10289,15 +10265,8 @@ vectorizable_load (vec_info *vinfo,
                                       vectype, &gsi2);
        }
       gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
-      if (1)
-       for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
-         slp_node->push_vec_def (new_stmt);
-      else
-       {
-         for (j = 0; j < ncopies; ++j)
-           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
-         *vec_stmt = new_stmt;
-       }
+      for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
+       slp_node->push_vec_def (new_stmt);
       return true;
     }
 
@@ -10615,12 +10584,11 @@ vectorizable_load (vec_info *vinfo,
       return true;
     }
 
-  if (memory_access_type == VMAT_GATHER_SCATTER
-      || (0 && memory_access_type == VMAT_CONTIGUOUS))
+  if (memory_access_type == VMAT_GATHER_SCATTER)
     grouped_load = false;
 
   if (grouped_load
-      || (1 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
+      || SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
     {
       if (grouped_load)
        {
@@ -10634,7 +10602,7 @@ vectorizable_load (vec_info *vinfo,
        }
       /* For SLP vectorization we directly vectorize a subchain
          without permutation.  */
-      if (1 && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+      if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
        first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
       /* For BB vectorization always use the first stmt to base
         the data ref pointer on.  */
@@ -10642,60 +10610,39 @@ vectorizable_load (vec_info *vinfo,
        first_stmt_info_for_drptr
          = vect_find_first_scalar_stmt_in_slp (slp_node);
 
-      /* Check if the chain of loads is already vectorized.  */
-      if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
-         /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
-            ???  But we can only do so if there is exactly one
-            as we have no way to get at the rest.  Leave the CSE
-            opportunity alone.
-            ???  With the group load eventually participating
-            in multiple different permutations (having multiple
-            slp nodes which refer to the same group) the CSE
-            is even wrong code.  See PR56270.  */
-         && 0)
-       {
-         *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
-         return true;
-       }
       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
       group_gap_adj = 0;
 
       /* VEC_NUM is the number of vect stmts to be created for this group.  */
-      if (1)
-       {
-         grouped_load = false;
-         /* If an SLP permutation is from N elements to N elements,
-            and if one vector holds a whole number of N, we can load
-            the inputs to the permutation in the same way as an
-            unpermuted sequence.  In other cases we need to load the
-            whole group, not only the number of vector stmts the
-            permutation result fits in.  */
-         unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
-         if (nested_in_vect_loop)
-           /* We do not support grouped accesses in a nested loop,
-              instead the access is contiguous but it might be
-              permuted.  No gap adjustment is needed though.  */
-           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-         else if (slp_perm
-                  && (group_size != scalar_lanes
-                      || !multiple_p (nunits, group_size)))
-           {
-             /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
-                variable VF; see vect_transform_slp_perm_load.  */
-             unsigned int const_vf = vf.to_constant ();
-             unsigned int const_nunits = nunits.to_constant ();
-             vec_num = CEIL (group_size * const_vf, const_nunits);
-             group_gap_adj = vf * group_size - nunits * vec_num;
-           }
-         else
-           {
-             vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-             group_gap_adj
-               = group_size - scalar_lanes;
-           }
-       }
+      grouped_load = false;
+      /* If an SLP permutation is from N elements to N elements,
+        and if one vector holds a whole number of N, we can load
+        the inputs to the permutation in the same way as an
+        unpermuted sequence.  In other cases we need to load the
+        whole group, not only the number of vector stmts the
+        permutation result fits in.  */
+      unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
+      if (nested_in_vect_loop)
+       /* We do not support grouped accesses in a nested loop,
+          instead the access is contiguous but it might be
+          permuted.  No gap adjustment is needed though.  */
+       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+      else if (slp_perm
+              && (group_size != scalar_lanes
+                  || !multiple_p (nunits, group_size)))
+       {
+         /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
+            variable VF; see vect_transform_slp_perm_load.  */
+         unsigned int const_vf = vf.to_constant ();
+         unsigned int const_nunits = nunits.to_constant ();
+         vec_num = CEIL (group_size * const_vf, const_nunits);
+         group_gap_adj = vf * group_size - nunits * vec_num;
+       }
       else
-       vec_num = group_size;
+       {
+         vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+         group_gap_adj = group_size - scalar_lanes;
+       }
 
       ref_type = get_group_alias_ptr_type (first_stmt_info);
     }
@@ -10706,8 +10653,7 @@ vectorizable_load (vec_info *vinfo,
       group_size = vec_num = 1;
       group_gap_adj = 0;
       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
-      if (1)
-       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
     }
 
   gcc_assert (alignment_support_scheme);
@@ -10909,14 +10855,8 @@ vectorizable_load (vec_info *vinfo,
   auto_vec<tree> vec_offsets;
   auto_vec<tree> vec_masks;
   if (mask && !costing_p)
-    {
-      if (1)
-       vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
-                          &vec_masks);
-      else
-       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
-                                      &vec_masks, mask_vectype);
-    }
+    vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
+                      &vec_masks);
 
   tree vec_mask = NULL_TREE;
   tree vec_els = NULL_TREE;
@@ -10929,8 +10869,7 @@ vectorizable_load (vec_info *vinfo,
       /* For costing some adjacent vector loads, we'd like to cost with
         the total number of them once instead of cost each one by one. */
       unsigned int n_adjacent_loads = 0;
-      if (1)
-       ncopies = slp_node->vec_stmts_size / group_size;
+      ncopies = slp_node->vec_stmts_size / group_size;
       for (j = 0; j < ncopies; j++)
        {
          if (costing_p)
@@ -11053,32 +10992,17 @@ vectorizable_load (vec_info *vinfo,
          gimple_call_set_nothrow (call, true);
          vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
 
-         if (0)
-           dr_chain.create (group_size);
          /* Extract each vector into an SSA_NAME.  */
          for (unsigned i = 0; i < group_size; i++)
            {
              new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
                                            vec_array, i, need_zeroing,
                                            final_mask);
-             if (1)
-               slp_node->push_vec_def (new_temp);
-             else
-               dr_chain.quick_push (new_temp);
+             slp_node->push_vec_def (new_temp);
            }
 
-         if (0)
-           /* Record the mapping between SSA_NAMEs and statements.  */
-           vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
-
          /* Record that VEC_ARRAY is now dead.  */
          vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
-
-         if (0)
-           dr_chain.release ();
-
-         if (0)
-           *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
        }
 
       if (costing_p)
@@ -11453,17 +11377,10 @@ vectorizable_load (vec_info *vinfo,
                }
 
              /* Store vector loads in the corresponding SLP_NODE.  */
-             if (1)
-               slp_node->push_vec_def (new_stmt);
+             slp_node->push_vec_def (new_stmt);
            }
-
-         if (0 && !costing_p)
-           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
        }
 
-      if (0 && !costing_p)
-       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
-
       if (costing_p && dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
                         "vect_model_load_cost: inside_cost = %u, "
@@ -12052,7 +11969,7 @@ vectorizable_load (vec_info *vinfo,
            dr_chain.quick_push (new_temp);
 
          /* Store vector loads in the corresponding SLP_NODE.  */
-         if (!costing_p && 1 && !slp_perm)
+         if (!costing_p && !slp_perm)
            slp_node->push_vec_def (new_stmt);
 
          /* With SLP permutation we load the gaps as well, without
@@ -12090,7 +12007,7 @@ vectorizable_load (vec_info *vinfo,
                                         stmt_info, bump);
        }
 
-      if (1 && !slp_perm)
+      if (!slp_perm)
        continue;
 
       if (slp_perm)
@@ -12152,8 +12069,6 @@ vectorizable_load (vec_info *vinfo,
        }
       dr_chain.release ();
     }
-  if (0 && !costing_p)
-    *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
   if (costing_p)
     {
-- 
2.43.0

[PATCH 2/6] Remove non-SLP path from vectorizable_load

Reply via email to