[RFC 2/9] Preparation to support predicated vector tails for BB SLP

Christopher Bazley Tue, 28 Oct 2025 03:24:00 -0700

Calls to vect_(get|record)_loop_(mask|len) are replaced
with calls to new wrappers that have an extra (SLP node)
parameter and which can operate on any vec_info, not just
a loop_vec_info. These wrappers pass calls through to the
original functions (and ignore the SLP node) when invoked
with a loop_vec_info; otherwise, vect_record_(len|mask)
do nothing (for now).


Direct use of LOOP_VINFO_FULLY_(MASKED|WITH_LENGTH)_P,
and LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P is replaced
with wrappers named vect_can_use_(mask|len)_p,
vect_can_use_partial_vectors_p (for queries) and
vect_cannot_use_partial_vectors (for updates). For BB
SLP, vect_cannot_use_partial_vectors does nothing and
the getters return false (for now).

To minimize code churn, the new wrappers are only used in code shared
between loop vectorization and BB SLP.
---
 gcc/tree-vect-stmts.cc | 361 ++++++++++++++++++++++-------------------
 gcc/tree-vectorizer.h  |  74 ++++++++-
 2 files changed, 266 insertions(+), 169 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 83acbb3ff67..3115c610736 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1405,12 +1405,88 @@ vectorizable_internal_function (combined_fn cfn, tree 
fndecl,
   return IFN_LAST;
 }
 
+/* Record that a complete set of masks associated with VINFO would need to
+   contain a sequence of NVECTORS masks that each control a vector of type
+   VECTYPE.  If SCALAR_MASK is nonnull, the fully-masked loop would AND
+   these vector masks with the vector version of SCALAR_MASK.  */
+static void
+vect_record_mask (vec_info *vinfo, slp_tree slp_node, unsigned int nvectors,
+                 tree vectype, tree scalar_mask)
+{
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+  if (loop_vinfo)
+    vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo), 
nvectors,
+                          vectype, scalar_mask);
+  else
+    (void) slp_node; // FORNOW
+}
+
+/* Given a complete set of masks associated with VINFO, extract mask number
+   INDEX for an rgroup that operates on NVECTORS vectors of type VECTYPE, where
+   0 <= INDEX < NVECTORS.  Alternatively, if doing basic block vectorization,
+   materialize an equivalent mask for SLP_NODE.  Insert any set-up statements
+   before GSI.  */
+static tree
+vect_get_mask (vec_info *vinfo, slp_tree slp_node, gimple_stmt_iterator *gsi,
+              unsigned int nvectors, tree vectype, unsigned int index)
+{
+  gcc_assert (vect_can_use_mask_p (vinfo, slp_node));
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+  if (loop_vinfo)
+    return vect_get_loop_mask (loop_vinfo, gsi, &LOOP_VINFO_MASKS (loop_vinfo),
+                              nvectors, vectype, index);
+  else
+    {
+      (void) slp_node; // FORNOW
+      return NULL_TREE;
+    }
+}
+
+/* Record that a complete set of lengths associated with VINFO would need to
+   contain a sequence of NVECTORS lengths for controlling an operation on
+   VECTYPE.  The operation splits each element of VECTYPE into FACTOR separate
+   subelements, measuring the length as a number of these subelements.  */
+static void
+vect_record_len (vec_info *vinfo, slp_tree slp_node, unsigned int nvectors,
+                tree vectype, unsigned int factor)
+{
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+  if (loop_vinfo)
+    vect_record_loop_len (loop_vinfo, &LOOP_VINFO_LENS (loop_vinfo), nvectors,
+                         vectype, factor);
+  else
+    (void) slp_node; // FORNOW
+}
+
+/* Given a complete set of lengths associated with VINFO, extract length number
+   INDEX for an rgroup that operates on NVECTORS vectors of type VECTYPE, where
+   0 <= INDEX < NVECTORS.  Alternatively, if doing basic block vectorization,
+   materialize an equivalent length for SLP_NODE.  Return a value that contains
+   FACTOR multipled by the number of elements that should be processed.  Insert
+   any set-up statements before GSI.  */
+
+static tree
+vect_get_len (vec_info *vinfo, slp_tree slp_node, gimple_stmt_iterator *gsi,
+             unsigned int nvectors, tree vectype, unsigned int index,
+             unsigned int factor)
+{
+  gcc_assert (vect_can_use_len_p (vinfo, slp_node));
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+  if (loop_vinfo)
+    return vect_get_loop_len (loop_vinfo, gsi, &LOOP_VINFO_LENS (loop_vinfo),
+                             nvectors, vectype, index, factor);
+  else
+    {
+      (void) slp_node; // FORNOW
+      return NULL_TREE;
+    }
+}
 
 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
                                  gimple_stmt_iterator *);
 
-/* Check whether a load or store statement in the loop described by
-   LOOP_VINFO is possible in a loop using partial vectors.  This is
+/* Check whether a load or store statement in the loop or SLP group described 
by
+   VINFO is possible using partial vectors.  This is
    testing whether the vectorizer pass has the appropriate support,
    as well as whether the target does.
 
@@ -1431,7 +1507,7 @@ static tree permute_vec_elements (vec_info *, tree, tree, 
tree, stmt_vec_info,
    else values will be added to the vector ELSVALS points to.  */
 
 static void
-check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
+check_load_store_for_partial_vectors (vec_info *vinfo, tree vectype,
                                      slp_tree slp_node,
                                      vec_load_store_type vls_type,
                                      int group_size,
@@ -1439,6 +1515,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                                      slp_tree mask_node,
                                      vec<int> *elsvals = nullptr)
 {
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   vect_memory_access_type memory_access_type = ls->memory_access_type;
 
   /* Invariant loads need no special support.  */
@@ -1462,9 +1539,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
          scalar_mask = def;
       }
 
-  unsigned int nvectors = vect_get_num_copies (loop_vinfo, slp_node);
-  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+  unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
   machine_mode vecmode = TYPE_MODE (vectype);
   bool is_load = (vls_type == VLS_LOAD);
   if (memory_access_type == VMAT_LOAD_STORE_LANES)
@@ -1475,10 +1550,10 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                                                elsvals)
                   : vect_store_lanes_supported (vectype, group_size, true));
       if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
-       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+       vect_record_len (vinfo, slp_node, nvectors, vectype, 1);
       else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
-       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
-                              scalar_mask);
+       vect_record_mask (vinfo, slp_node, nvectors, vectype,
+                         scalar_mask);
       else
        {
          if (dump_enabled_p ())
@@ -1486,7 +1561,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                             "can't operate on partial vectors because"
                             " the target doesn't have an appropriate"
                             " load/store-lanes instruction.\n");
-         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+         vect_cannot_use_partial_vectors (vinfo, slp_node);
        }
       return;
     }
@@ -1509,14 +1584,13 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                                                  memory_type,
                                                  off_vectype, scale,
                                                  elsvals))
-       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+       vect_record_len (vinfo, slp_node, nvectors, vectype, 1);
       else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
                                                       memory_type,
                                                       off_vectype, scale,
                                                       elsvals)
               || memory_access_type == VMAT_GATHER_SCATTER_LEGACY)
-       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
-                              scalar_mask);
+       vect_record_mask (vinfo, slp_node, nvectors, vectype, scalar_mask);
       else
        {
          if (dump_enabled_p ())
@@ -1524,7 +1598,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                             "can't operate on partial vectors because"
                             " the target doesn't have an appropriate"
                             " gather load or scatter store instruction.\n");
-         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+         vect_cannot_use_partial_vectors (vinfo, slp_node);
        }
       return;
     }
@@ -1537,7 +1611,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                         "can't operate on partial vectors because an"
                         " access isn't contiguous.\n");
-      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+      vect_cannot_use_partial_vectors (vinfo, slp_node);
       return;
     }
 
@@ -1547,7 +1621,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                         "can't operate on partial vectors when emulating"
                         " vector operations.\n");
-      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+      vect_cannot_use_partial_vectors (vinfo, slp_node);
       return;
     }
 
@@ -1572,7 +1646,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
     {
       nvectors = group_memory_nvectors (group_size * vf, nunits);
       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
-      vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
+      vect_record_len (vinfo, slp_node, nvectors, vectype, factor);
       using_partial_vectors_p = true;
     }
   else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
@@ -1580,7 +1654,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                                         elsvals))
     {
       nvectors = group_memory_nvectors (group_size * vf, nunits);
-      vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, 
scalar_mask);
+      vect_record_mask (vinfo, slp_node, nvectors, vectype, scalar_mask);
       using_partial_vectors_p = true;
     }
 
@@ -1591,35 +1665,35 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                         "can't operate on partial vectors because the"
                         " target doesn't have the appropriate partial"
                         " vectorization load or store.\n");
-      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+      vect_cannot_use_partial_vectors (vinfo, slp_node);
     }
 }
 
 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
-   form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
-   that needs to be applied to all loads and stores in a vectorized loop.
-   Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
-   otherwise return VEC_MASK & LOOP_MASK.
+   form of the scalar mask condition and LOOP_OR_TAIL_MASK, if nonnull, is the
+   mask that needs to be applied to all loads and stores in a vectorized loop.
+   Return VEC_MASK if LOOP_OR_TAIL_MASK is null or if VEC_MASK is already
+   masked, otherwise return VEC_MASK & LOOP_OR_TAIL_MASK.
 
    MASK_TYPE is the type of both masks.  If new statements are needed,
    insert them before GSI.  */
 
 tree
-prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
+prepare_vec_mask (vec_info *vinfo, tree mask_type, tree loop_or_tail_mask,
                  tree vec_mask, gimple_stmt_iterator *gsi)
 {
   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
-  if (!loop_mask)
+  if (!loop_or_tail_mask)
     return vec_mask;
 
-  gcc_assert (TREE_TYPE (loop_mask) == mask_type);
+  gcc_assert (TREE_TYPE (loop_or_tail_mask) == mask_type);
 
-  if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
+  if (vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_or_tail_mask }))
     return vec_mask;
 
   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
-  gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
-                                         vec_mask, loop_mask);
+  gimple *and_stmt
+    = gimple_build_assign (and_res, BIT_AND_EXPR, vec_mask, loop_or_tail_mask);
 
   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
   return and_res;
@@ -3619,8 +3693,6 @@ vectorizable_call (vec_info *vinfo,
   internal_fn cond_fn = get_conditional_internal_fn (ifn);
   internal_fn cond_len_fn = get_len_internal_fn (ifn);
   int len_opno = internal_fn_len_index (cond_len_fn);
-  vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
-  vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
   unsigned int nvectors = vect_get_num_copies (vinfo, slp_node);
   if (cost_vec) /* transformation not required.  */
     {
@@ -3638,8 +3710,7 @@ vectorizable_call (vec_info *vinfo,
       DUMP_VECT_SCOPE ("vectorizable_call");
       vect_model_simple_cost (vinfo, 1, slp_node, cost_vec);
 
-      if (loop_vinfo
-         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+      if (vect_can_use_partial_vectors_p (vinfo, slp_node)
          && (reduc_idx >= 0 || mask_opno >= 0))
        {
          if (reduc_idx >= 0
@@ -3654,7 +3725,7 @@ vectorizable_call (vec_info *vinfo,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "can't use a fully-masked loop because no"
                                 " conditional operation is available.\n");
-             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+             vect_cannot_use_partial_vectors (vinfo, slp_node);
            }
          else
            {
@@ -3664,11 +3735,10 @@ vectorizable_call (vec_info *vinfo,
              if (cond_len_fn != IFN_LAST
                  && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
                                                     OPTIMIZE_FOR_SPEED))
-               vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
-                                     1);
+               vect_record_len (vinfo, slp_node, nvectors, vectype_out, 1);
              else
-               vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
-                                      scalar_mask);
+               vect_record_mask (vinfo, slp_node, nvectors, vectype_out,
+                                 scalar_mask);
            }
        }
       return true;
@@ -3683,8 +3753,8 @@ vectorizable_call (vec_info *vinfo,
   scalar_dest = gimple_call_lhs (stmt);
   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
 
-  bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
-  bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
+  bool masked_loop_p = vect_can_use_mask_p (vinfo, slp_node);
+  bool len_loop_p = vect_can_use_len_p (vinfo, slp_node);
   unsigned int vect_nargs = nargs;
   if (len_loop_p)
     {
@@ -3743,8 +3813,8 @@ vectorizable_call (vec_info *vinfo,
              if (masked_loop_p && reduc_idx >= 0)
                {
                  unsigned int vec_num = vec_oprnds0.length ();
-                 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks,
-                                                     vec_num, vectype_out, i);
+                 vargs[varg++] = vect_get_mask (vinfo, slp_node, gsi, vec_num,
+                                                vectype_out, i);
                }
              size_t k;
              for (k = 0; k < nargs; k++)
@@ -3783,8 +3853,8 @@ vectorizable_call (vec_info *vinfo,
                  if (len_opno >= 0 && len_loop_p)
                    {
                      unsigned int vec_num = vec_oprnds0.length ();
-                     tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                                   vec_num, vectype_out, i, 1);
+                     tree len = vect_get_len (vinfo, slp_node, gsi, vec_num,
+                                              vectype_out, i, 1);
                      signed char biasval
                        = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
                      tree bias = build_int_cst (intQI_type_node, biasval);
@@ -3794,10 +3864,10 @@ vectorizable_call (vec_info *vinfo,
                  else if (mask_opno >= 0 && masked_loop_p)
                    {
                      unsigned int vec_num = vec_oprnds0.length ();
-                     tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
-                                                     vec_num, vectype_out, i);
+                     tree mask = vect_get_mask (vinfo, slp_node, gsi, vec_num,
+                                                vectype_out, i);
                      vargs[mask_opno]
-                       = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
+                       = prepare_vec_mask (vinfo, TREE_TYPE (mask), mask,
                                            vargs[mask_opno], gsi);
                    }
 
@@ -4409,24 +4479,21 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
              }
              break;
            case SIMD_CLONE_ARG_TYPE_MASK:
-             if (loop_vinfo
-                 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-               vect_record_loop_mask (loop_vinfo,
-                                      &LOOP_VINFO_MASKS (loop_vinfo),
-                                      ncopies, vectype, op);
+             if (vect_can_use_partial_vectors_p (vinfo, slp_node))
+               vect_record_mask (vinfo, slp_node, ncopies, vectype, op);
 
              break;
            }
        }
 
-      if (!bestn->simdclone->inbranch && loop_vinfo)
+      if (!bestn->simdclone->inbranch)
        {
          if (dump_enabled_p ()
-             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
+             && vect_can_use_partial_vectors_p (vinfo, slp_node))
            dump_printf_loc (MSG_NOTE, vect_location,
                             "can't use a fully-masked loop because a"
                             " non-masked simd clone was selected.\n");
-         LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+         vect_cannot_use_partial_vectors (vinfo, slp_node);
        }
 
       SLP_TREE_TYPE (slp_node) = call_simd_clone_vec_info_type;
@@ -4597,23 +4664,15 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
                          if (m == 0)
                            vec_oprnds_i[i] = 0;
                          vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
-                         if (loop_vinfo
-                             && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+                         if (vect_can_use_mask_p (vinfo, slp_node))
                            {
-                             vec_loop_masks *loop_masks
-                               = &LOOP_VINFO_MASKS (loop_vinfo);
-                             tree loop_mask
-                               = vect_get_loop_mask (loop_vinfo, gsi,
-                                                     loop_masks, ncopies,
-                                                     vectype, j);
+                             tree mask = vect_get_mask (vinfo, slp_node, gsi,
+                                                        ncopies, vectype, j);
                              vec_oprnd0
-                               = prepare_vec_mask (loop_vinfo,
-                                                   TREE_TYPE (loop_mask),
-                                                   loop_mask, vec_oprnd0,
-                                                   gsi);
-                             loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
-                                                                    loop_mask 
});
-
+                               = prepare_vec_mask (vinfo, TREE_TYPE (mask),
+                                                   mask, vec_oprnd0, gsi);
+                             vinfo->vec_cond_masked_set.add (
+                               {vec_oprnd0, mask});
                            }
                          vec_oprnd0
                            = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
@@ -4791,12 +4850,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
          o = vector_unroll_factor (nunits, callee_nelements);
          for (m = j * o; m < (j + 1) * o; m++)
            {
-             if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-               {
-                 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
-                 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            ncopies, masktype, j);
-               }
+             if (vect_can_use_mask_p (vinfo, slp_node))
+               mask
+                 = vect_get_mask (vinfo, slp_node, gsi, ncopies, masktype, j);
              else
                mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
 
@@ -5207,7 +5263,6 @@ vectorizable_conversion (vec_info *vinfo,
   vec<tree> vec_oprnds1 = vNULL;
   tree vop0;
   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
-  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   int multi_step_cvt = 0;
   vec<tree> interm_types = vNULL;
   tree intermediate_type, cvt_type = NULL_TREE;
@@ -5551,8 +5606,7 @@ vectorizable_conversion (vec_info *vinfo,
     }
 
   if (modifier == WIDEN
-      && loop_vinfo
-      && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+      && vect_can_use_partial_vectors_p (vinfo, slp_node)
       && (code1 == VEC_WIDEN_MULT_EVEN_EXPR
          || widening_evenodd_fn_p (code1)))
     {
@@ -5561,7 +5615,7 @@ vectorizable_conversion (vec_info *vinfo,
                         "can't use a fully-masked loop because"
                         " widening operation on even/odd elements"
                         " mixes up lanes.\n");
-      LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+      vect_cannot_use_partial_vectors (vinfo, slp_node);
     }
 
   if (cost_vec)                /* transformation not required.  */
@@ -6641,7 +6695,6 @@ vectorizable_operation (vec_info *vinfo,
 
   int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
-  vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
   internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
 
@@ -6658,27 +6711,24 @@ vectorizable_operation (vec_info *vinfo,
 
   if (cost_vec) /* transformation not required.  */
     {
-      if (loop_vinfo
-         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
+      if (vect_can_use_partial_vectors_p (vinfo, slp_node)
          && mask_out_inactive)
        {
          if (cond_len_fn != IFN_LAST
              && direct_internal_fn_supported_p (cond_len_fn, vectype,
                                                 OPTIMIZE_FOR_SPEED))
-           vect_record_loop_len (loop_vinfo, lens, vec_num, vectype,
-                                 1);
+           vect_record_len (vinfo, slp_node, vec_num, vectype, 1);
          else if (cond_fn != IFN_LAST
                   && direct_internal_fn_supported_p (cond_fn, vectype,
                                                      OPTIMIZE_FOR_SPEED))
-           vect_record_loop_mask (loop_vinfo, masks, vec_num,
-                                  vectype, NULL);
+           vect_record_mask (vinfo, slp_node, vec_num, vectype, NULL);
          else
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "can't use a fully-masked loop because no"
                                 " conditional operation is available.\n");
-             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+             vect_cannot_use_partial_vectors (vinfo, slp_node);
            }
        }
 
@@ -6738,8 +6788,8 @@ vectorizable_operation (vec_info *vinfo,
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform binary/unary operation.\n");
 
-  bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
-  bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
+  bool masked_loop_p =  vect_can_use_mask_p (vinfo, slp_node);
+  bool len_loop_p = vect_can_use_len_p (vinfo, slp_node);
 
   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
      vectors with unsigned elements, but the result is signed.  So, we
@@ -6931,8 +6981,7 @@ vectorizable_operation (vec_info *vinfo,
        {
          tree mask;
          if (masked_loop_p)
-           mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
-                                      vec_num, vectype, i);
+           mask = vect_get_mask (vinfo, slp_node, gsi, vec_num, vectype, i);
          else
            /* Dummy mask.  */
            mask = build_minus_one_cst (truth_type_for (vectype));
@@ -6958,8 +7007,8 @@ vectorizable_operation (vec_info *vinfo,
            }
          if (len_loop_p)
            {
-             tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
-                                           vec_num, vectype, i, 1);
+             tree len
+               = vect_get_len (vinfo, slp_node, gsi, vec_num, vectype, i, 1);
              signed char biasval
                = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
              tree bias = build_int_cst (intQI_type_node, biasval);
@@ -6982,8 +7031,7 @@ vectorizable_operation (vec_info *vinfo,
          /* When combining two masks check if either of them is elsewhere
             combined with a loop mask, if that's the case we can mark that the
             new combined mask doesn't need to be combined with a loop mask.  */
-         if (masked_loop_p
-             && code == BIT_AND_EXPR
+         if (loop_vinfo && masked_loop_p && code == BIT_AND_EXPR
              && VECTOR_BOOLEAN_TYPE_P (vectype))
            {
              if (loop_vinfo->scalar_cond_masked_set.contains ({ op0, vec_num 
}))
@@ -7015,7 +7063,7 @@ vectorizable_operation (vec_info *vinfo,
          /* Enter the combined value into the vector cond hash so we don't
             AND it with a loop mask again.  */
          if (mask)
-           loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
+           vinfo->vec_cond_masked_set.add ({ new_temp, mask });
        }
 
       if (vec_cvt_dest)
@@ -8151,9 +8199,8 @@ vectorizable_store (vec_info *vinfo,
   bool costing_p = cost_vec;
   if (costing_p) /* transformation not required.  */
     {
-      if (loop_vinfo
-         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-       check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
+      if (vect_can_use_partial_vectors_p (vinfo, slp_node))
+       check_load_store_for_partial_vectors (vinfo, vectype, slp_node,
                                              vls_type, group_size, &ls,
                                              mask_node);
 
@@ -8506,10 +8553,6 @@ vectorizable_store (vec_info *vinfo,
     }
 
   gcc_assert (alignment_support_scheme);
-  vec_loop_masks *loop_masks
-    = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
-       ? &LOOP_VINFO_MASKS (loop_vinfo)
-       : NULL);
   vec_loop_lens *loop_lens
     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
        ? &LOOP_VINFO_LENS (loop_vinfo)
@@ -8521,14 +8564,14 @@ vectorizable_store (vec_info *vinfo,
      Shouldn't go with length-based approach if fully masked.  */
   if (cost_vec == NULL)
     /* The cost_vec is NULL during transfrom.  */
-    gcc_assert ((!loop_lens || !loop_masks));
+    gcc_assert ((!vect_can_use_len_p (vinfo, slp_node)
+                || !vect_can_use_mask_p (vinfo, slp_node)));
 
   /* Targets with store-lane instructions must not require explicit
      realignment.  vect_supportable_dr_alignment always returns either
      dr_aligned or dr_unaligned_supported for masked operations.  */
-  gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
-              && !mask_node
-              && !loop_masks)
+  gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask_node
+              && !vect_can_use_mask_p (vinfo, slp_node))
              || alignment_support_scheme == dr_aligned
              || alignment_support_scheme == dr_unaligned_supported);
 
@@ -8658,18 +8701,18 @@ vectorizable_store (vec_info *vinfo,
          tree final_mask = NULL;
          tree final_len = NULL;
          tree bias = NULL;
-         if (loop_masks)
-           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            ncopies, vectype, j);
+         if (vect_can_use_mask_p (vinfo, slp_node))
+           final_mask
+             = vect_get_mask (vinfo, slp_node, gsi, ncopies, vectype, j);
          if (vec_mask)
-           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+           final_mask = prepare_vec_mask (vinfo, mask_vectype, final_mask,
                                           vec_mask, gsi);
 
          if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
            {
-             if (loop_lens)
-               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+             if (vect_can_use_len_p (vinfo, slp_node))
+               final_len
+                 = vect_get_len (vinfo, slp_node, gsi, ncopies, vectype, j, 1);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -8797,12 +8840,11 @@ vectorizable_store (vec_info *vinfo,
          tree bias = NULL_TREE;
          if (!costing_p)
            {
-             if (loop_masks)
-               final_mask = vect_get_loop_mask (loop_vinfo, gsi,
-                                                loop_masks, num_stmts,
-                                                vectype, j);
+             if (vect_can_use_mask_p (vinfo, slp_node))
+               final_mask
+                 = vect_get_mask (vinfo, slp_node, gsi, num_stmts, vectype, j);
              if (vec_mask)
-               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+               final_mask = prepare_vec_mask (vinfo, mask_vectype,
                                               final_mask, vec_mask, gsi);
            }
 
@@ -8827,9 +8869,8 @@ vectorizable_store (vec_info *vinfo,
              if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
                {
                  if (loop_lens)
-                   final_len = vect_get_loop_len (loop_vinfo, gsi,
-                                                  loop_lens, num_stmts,
-                                                  vectype, j, 1);
+                   final_len = vect_get_len (vinfo, slp_node, gsi, num_stmts,
+                                             vectype, j, 1);
                  else
                    final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
 
@@ -9114,9 +9155,7 @@ vectorizable_store (vec_info *vinfo,
              || useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
   bool simd_lane_access_p
       = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
-  if (!costing_p
-      && simd_lane_access_p
-      && !loop_masks
+  if (!costing_p && simd_lane_access_p && !vect_can_use_mask_p (vinfo, 
slp_node)
       && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
       && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
       && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
@@ -9170,13 +9209,12 @@ vectorizable_store (vec_info *vinfo,
       tree final_mask = NULL_TREE;
       tree final_len = NULL_TREE;
       tree bias = NULL_TREE;
-      if (loop_masks)
-       final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                        vec_num, vectype, i);
+      if (vect_can_use_mask_p (vinfo, slp_node))
+       final_mask = vect_get_mask (vinfo, slp_node, gsi, vec_num, vectype, i);
       if (vec_mask)
        vec_mask = vec_masks[i];
       if (vec_mask)
-       final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+       final_mask = prepare_vec_mask (vinfo, mask_vectype, final_mask,
                                       vec_mask, gsi);
 
       if (i > 0)
@@ -9212,8 +9250,8 @@ vectorizable_store (vec_info *vinfo,
          new_vmode = new_ovmode.require ();
          unsigned factor
            = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
-         final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                        vec_num, vectype, i, factor);
+         final_len
+           = vect_get_len (vinfo, slp_node, gsi, vec_num, vectype, i, factor);
        }
       else if (final_mask)
        {
@@ -9737,9 +9775,8 @@ vectorizable_load (vec_info *vinfo,
          return false;
        }
 
-      if (loop_vinfo
-         && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-       check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
+      if (vect_can_use_partial_vectors_p (vinfo, slp_node))
+       check_load_store_for_partial_vectors (vinfo, vectype, slp_node,
                                              VLS_LOAD, group_size, &ls,
                                              mask_node, &ls.elsvals);
 
@@ -10252,14 +10289,10 @@ vectorizable_load (vec_info *vinfo,
       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
     }
 
-  vec_loop_masks *loop_masks
-    = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
-       ? &LOOP_VINFO_MASKS (loop_vinfo)
-       : NULL);
   vec_loop_lens *loop_lens
     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
-       ? &LOOP_VINFO_LENS (loop_vinfo)
-       : NULL);
+        ? &LOOP_VINFO_LENS (loop_vinfo)
+        : NULL);
 
   /* The vect_transform_stmt and vect_analyze_stmt will go here but there
      are some difference here.  We cannot enable both the lens and masks
@@ -10267,15 +10300,15 @@ vectorizable_load (vec_info *vinfo,
      Shouldn't go with length-based approach if fully masked.  */
   if (cost_vec == NULL)
     /* The cost_vec is NULL during transfrom.  */
-    gcc_assert ((!loop_lens || !loop_masks));
+    gcc_assert ((!vect_can_use_len_p (vinfo, slp_node)
+                || !vect_can_use_mask_p (vinfo, slp_node)));
 
   /* Targets with store-lane instructions must not require explicit
      realignment.  vect_supportable_dr_alignment always returns either
      dr_aligned or dr_unaligned_supported for (non-length) masked
      operations.  */
-  gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
-              && !mask_node
-              && !loop_masks)
+  gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask_node
+              && !vect_can_use_mask_p (vinfo, slp_node))
              || mat_gather_scatter_p (memory_access_type)
              || alignment_support_scheme == dr_aligned
              || alignment_support_scheme == dr_unaligned_supported);
@@ -10474,18 +10507,18 @@ vectorizable_load (vec_info *vinfo,
          tree final_mask = NULL_TREE;
          tree final_len = NULL_TREE;
          tree bias = NULL_TREE;
-         if (loop_masks)
-           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            ncopies, vectype, j);
+         if (vect_can_use_mask_p (vinfo, slp_node))
+           final_mask
+             = vect_get_mask (vinfo, slp_node, gsi, ncopies, vectype, j);
          if (vec_mask)
-           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
+           final_mask = prepare_vec_mask (vinfo, mask_vectype, final_mask,
                                           vec_mask, gsi);
 
          if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
            {
-             if (loop_lens)
-               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              ncopies, vectype, j, 1);
+             if (vect_can_use_len_p (vinfo, slp_node))
+               final_len
+                 = vect_get_len (vinfo, slp_node, gsi, ncopies, vectype, j, 1);
              else
                final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
              signed char biasval
@@ -10619,11 +10652,11 @@ vectorizable_load (vec_info *vinfo,
            {
              if (mask_node)
                vec_mask = vec_masks[i];
-             if (loop_masks)
-               final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                                vec_num, vectype, i);
+             if (vect_can_use_mask_p (vinfo, slp_node))
+               final_mask
+                 = vect_get_mask (vinfo, slp_node, gsi, vec_num, vectype, i);
              if (vec_mask)
-               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+               final_mask = prepare_vec_mask (vinfo, mask_vectype,
                                               final_mask, vec_mask, gsi);
 
              if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
@@ -10652,8 +10685,8 @@ vectorizable_load (vec_info *vinfo,
              if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
                {
                  if (loop_lens)
-                   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                                  vec_num, vectype, i, 1);
+                   final_len = vect_get_len (vinfo, slp_node, gsi, vec_num,
+                                             vectype, i, 1);
                  else
                    final_len = build_int_cst (sizetype,
                                               TYPE_VECTOR_SUBPARTS (vectype));
@@ -11053,11 +11086,11 @@ vectorizable_load (vec_info *vinfo,
        {
          if (mask_node)
            vec_mask = vec_masks[i];
-         if (loop_masks)
-           final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            vec_num, vectype, i);
+         if (vect_can_use_mask_p (vinfo, slp_node))
+           final_mask
+             = vect_get_mask (vinfo, slp_node, gsi, vec_num, vectype, i);
          if (vec_mask)
-           final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
+           final_mask = prepare_vec_mask (vinfo, mask_vectype,
                                           final_mask, vec_mask, gsi);
 
          if (i > 0)
@@ -11103,8 +11136,8 @@ vectorizable_load (vec_info *vinfo,
                new_vmode = new_ovmode.require ();
                unsigned factor
                  = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
-               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
-                                              vec_num, vectype, i, factor);
+               final_len = vect_get_len (vinfo, slp_node, gsi, vec_num,
+                                         vectype, i, factor);
              }
            else if (final_mask)
              {
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index ecfdb7d88ef..582953bd8e8 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -676,6 +676,9 @@ public:
     and are reset when undoing patterns.  */
   gimple_seq inv_pattern_def_seq;
 
+  /* Set of vector conditions that have loop mask applied.  */
+  vec_cond_masked_set_type vec_cond_masked_set;
+
 private:
   stmt_vec_info new_stmt_vec_info (gimple *stmt);
   void set_vinfo_for_stmt (gimple *, stmt_vec_info, bool = true);
@@ -1007,9 +1010,6 @@ public:
   /* Set of scalar conditions that have loop mask applied.  */
   scalar_cond_masked_set_type scalar_cond_masked_set;
 
-  /* Set of vector conditions that have loop mask applied.  */
-  vec_cond_masked_set_type vec_cond_masked_set;
-
   /* If we are using a loop mask to align memory addresses, this variable
      contains the number of vector elements that we should skip in the
      first iteration of the vector loop (i.e. the number of leading
@@ -2793,8 +2793,7 @@ extern slp_tree vect_create_new_slp_node (unsigned, 
tree_code);
 extern void vect_free_slp_tree (slp_tree);
 extern bool compatible_calls_p (gcall *, gcall *, bool);
 extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
-
-extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
+extern tree prepare_vec_mask (vec_info *, tree, tree, tree,
                              gimple_stmt_iterator *);
 extern tree vect_get_mask_load_else (int, tree);
 
@@ -2929,6 +2928,71 @@ vect_is_reduction (slp_tree slp_node)
   return SLP_TREE_REDUC_IDX (slp_node) != -1;
 }
 
+/* Return true if VINFO is vectorizer state for loop vectorization and we
+   still have the option of vectorizing the loop using partially-populated
+   vectors; or, true if VINFO is for basic-block vectorization and we still 
have
+   the option of vectorizing the given SLP_NODE using partial vectors;
+   otherwise, return false.  */
+inline bool
+vect_can_use_partial_vectors_p (vec_info *vinfo, slp_tree slp_node)
+{
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+  if (loop_vinfo)
+    return LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
+
+  (void) slp_node; // FORNOW
+  return false;
+}
+
+/* If VINFO is vectorizer state for loop vectorization then record that we no
+   longer have the option of vectorizing the loop using partially-populated
+   vectors; or, if VINFO is for basic-block vectorization then record that we 
no
+   longer have the option of vectorizing the given SLP_NODE using partial
+   vectors.  */
+inline void
+vect_cannot_use_partial_vectors (vec_info *vinfo, slp_tree slp_node)
+{
+  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
+  if (loop_vinfo)
+    LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
+  else
+    (void) slp_node; // FORNOW
+}
+
+/* Return true if VINFO is vectorizer state for loop vectorization, we've
+   decided to use partially-populated vectors so that the loop can handle fewer
+   than VF scalars, and at least one length has been recorded; or, if VINFO is
+   for basic-block vectorization and SLP_NODE can be vectorized by using 
lengths
+   to prevent use of inactive scalar lanes.  */
+inline bool
+vect_can_use_len_p (vec_info *vinfo, slp_tree slp_node)
+{
+  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+    return LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
+  else
+    {
+      (void) slp_node; // FORNOW
+      return false;
+    }
+}
+
+/* Return true if VINFO is vectorizer state for loop vectorization, we've
+   decided to use partially-populated vectors so that the loop can handle fewer
+   than VF scalars, and at least one mask has been recorded; or, if VINFO is
+   for basic-block vectorization and SLP_NODE can be vectorized by using masks
+   to prevent use of inactive scalar lanes.  */
+inline bool
+vect_can_use_mask_p (vec_info *vinfo, slp_tree slp_node)
+{
+  if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+    return LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  else
+    {
+      (void) slp_node; // FORNOW
+      return false;
+    }
+}
+
 /* If STMT_INFO describes a reduction, return the vect_reduction_type
    of the reduction it describes, otherwise return -1.  */
 inline int
-- 
2.43.0

[RFC 2/9] Preparation to support predicated vector tails for BB SLP

Reply via email to