Created a new function, gimple_build_vector_with_zero_padding,
for use when creating vectorized definitions for basic block
vectorization in vect_create_constant_vectors.

The existing gimple_build_vector function cannot be used
for SVE vector types because it relies on the type
associated with the tree_vector_builder having a constant
number of subparts. Even if that limitation were lifted, the
value of elements beyond the lower bound (e.g., 16 for VNx16QI)
would be indeterminate.

The new function instead guarantees that any trailing elements
for which the tree_vector_builder provides no values are
nevertheless initialized (to zero, implicitly). If necessary,
it builds a mask that exposes only those lanes of the
destination vector type, and uses this mask when copying values
from a temporary object to the destination vector.

gcc/ChangeLog:

        * gimple-fold.cc (gimple_build_vector_with_zero_padding):
        Define the new function to build a vector, implementing the
        same interface as gimple_build_vector.
        * gimple-fold.h (gimple_build_vector_with_zero_padding):
        Declare the new function and a simpler overloaded version
        with fewer parameters.
        * tree-vect-slp.cc (vect_create_constant_vectors):
        For basic block SLP vectorization, pad each constant to the
        minimum vector length (which is the same as the actual vector
        length, except in the case of variable-length vector types).
        Use gimple_build_vector_with_zero_padding instead of
        duplicate_and_interleave to create non-uniform constant
        vectors for BB SLP vectorization.

---
 gcc/gimple-fold.cc   | 86 ++++++++++++++++++++++++++++++++++++++++++++
 gcc/gimple-fold.h    | 15 ++++++++
 gcc/tree-vect-slp.cc | 42 +++++++++++++++++-----
 3 files changed, 135 insertions(+), 8 deletions(-)

diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 3fc76313622..e3aacf1b8a0 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -11274,6 +11274,92 @@ gimple_build_vector (gimple_stmt_iterator *gsi,
   return builder->build ();
 }
 
+/* Build a vector from BUILDER, handling cases in which some elements
+   are non-constant, the vector type is variable-length, or both.
+   Return a gimple value for the result, inserting any new instructions
+   to GSI honoring BEFORE and UPDATE.
+
+   Unlike gimple_build_vector, this function generates explicit zero
+   padding to ensure that all elements of the vector that it builds
+   are assigned a value.  This occurs for both fixed- and
+   variable-length vector types.  */
+
+tree
+gimple_build_vector_with_zero_padding (gimple_stmt_iterator *gsi, bool before,
+                                      gsi_iterator_update update,
+                                      location_t loc,
+                                      tree_vector_builder *builder)
+{
+  gcc_assert (builder->nelts_per_pattern () <= 2);
+  unsigned int encoded_nelts = builder->encoded_nelts ();
+  tree type = builder->type ();
+  gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
+  unsigned int lower_bound = constant_lower_bound (TYPE_VECTOR_SUBPARTS 
(type));
+  gcc_assert (encoded_nelts <= lower_bound);
+
+  if (encoded_nelts == 0)
+    return build_zero_cst (type);
+
+  /* Prepare a vector of constructor elements and find out whether all
+     of the element values are constant.  */
+  vec<constructor_elt, va_gc> *v;
+  vec_alloc (v, encoded_nelts);
+  bool is_constant = true;
+
+  for (unsigned int i = 0; i < encoded_nelts; ++i)
+    {
+      if (!CONSTANT_CLASS_P (builder->elt (i)))
+       is_constant = false;
+
+      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, builder->elt (i));
+    }
+
+  /* If all element values are constant then we can return a new VECTOR_CST
+     node.  Any elements for which no value is supplied will be zero.  */
+  if (is_constant)
+    return build_vector_from_ctor (type, v);
+
+  /* Build a constructor for only those element values that our caller
+     provided, then assign its result to a temporary object.  */
+  tree res, lower_elems = gimple_in_ssa_p (cfun) ? make_ssa_name (type)
+                                                : create_tmp_reg (type);
+
+  gimple_seq seq = NULL;
+  gimple *stmt = gimple_build_assign (lower_elems, build_constructor (type, 
v));
+  gimple_set_location (stmt, loc);
+  gimple_seq_add_stmt_without_update (&seq, stmt);
+
+  /* If there are no implicitly-zero trailing elements then we are finished.  
*/
+  if (encoded_nelts == lower_bound)
+    res = lower_elems;
+  else
+    {
+      /* Build a mask that exposes only those lanes of the destination vector
+        type for which our caller provided values.  */
+      vec<constructor_elt, va_gc> *vm;
+      vec_alloc (vm, encoded_nelts);
+      tree mask_type = truth_type_for (type);
+      for (unsigned int i = 0; i < encoded_nelts; ++i)
+       CONSTRUCTOR_APPEND_ELT (vm, NULL_TREE,
+                               build_one_cst (TREE_TYPE (mask_type)));
+      tree lower_mask = build_vector_from_ctor (mask_type, vm);
+
+      /* Copy unmasked elements from the temporary object to the destination
+        vector and assign zero to other elements.  */
+      res
+       = gimple_in_ssa_p (cfun) ? make_ssa_name (type) : create_tmp_reg (type);
+
+      tree zero_vec = build_zero_cst (type);
+      stmt = gimple_build_assign (res, VEC_COND_EXPR, lower_mask, lower_elems,
+                                 zero_vec);
+      gimple_set_location (stmt, loc);
+      gimple_seq_add_stmt_without_update (&seq, stmt);
+    }
+
+  gimple_build_insert_seq (gsi, before, update, seq);
+  return res;
+}
+
 /* Emit gimple statements into &stmts that take a value given in OLD_SIZE
    and generate a value guaranteed to be rounded upwards to ALIGN.
 
diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
index 7244941722d..074a4915d5d 100644
--- a/gcc/gimple-fold.h
+++ b/gcc/gimple-fold.h
@@ -243,6 +243,21 @@ gimple_build_vector (gimple_seq *seq, tree_vector_builder 
*builder)
                              UNKNOWN_LOCATION, builder);
 }
 
+extern tree gimple_build_vector_with_zero_padding (gimple_stmt_iterator *, 
bool,
+                                                  enum gsi_iterator_update,
+                                                  location_t,
+                                                  tree_vector_builder *);
+
+inline tree
+gimple_build_vector_with_zero_padding (gimple_seq *seq,
+                                      tree_vector_builder *builder)
+{
+  gimple_stmt_iterator gsi = gsi_last (*seq);
+  return gimple_build_vector_with_zero_padding (&gsi, false,
+                                               GSI_CONTINUE_LINKING,
+                                               UNKNOWN_LOCATION, builder);
+}
+
 extern tree gimple_build_round_up (gimple_stmt_iterator *, bool,
                                   enum gsi_iterator_update,
                                   location_t, tree, tree,
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 8fed4e5b6b7..244b0c52c48 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -10832,7 +10832,7 @@ vect_create_constant_vectors (vec_info *vinfo, slp_tree 
op_node)
   unsigned j, number_of_places_left_in_vector;
   tree vector_type;
   tree vop;
-  int group_size = op_node->ops.length ();
+  unsigned int group_size = op_node->ops.length ();
   unsigned int vec_num, i;
   unsigned number_of_copies = 1;
   bool constant_p;
@@ -10862,12 +10862,27 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
      (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
      {s5, s6, s7, s8}.  */
 
-  /* When using duplicate_and_interleave, we just need one element for
-     each scalar statement.  */
-  if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
-    nunits = group_size;
+  unsigned int elt_count = group_size;
+  if (is_a<bb_vec_info> (vinfo))
+    {
+      /* We don't use duplicate_and_interleave for basic block vectorization.
+        We know that either the group size is exactly divisible by the vector
+        length or it fits within a single vector, so all we need to do for
+        VLA is to pad the constant to the minimum vector length.  */
+      nunits = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vector_type));
+      gcc_checking_assert (multiple_p (group_size, nunits)
+                          || known_le (group_size, nunits));
+      elt_count = MAX (nunits, group_size);
+    }
+  else
+    {
+      /* When using duplicate_and_interleave, we just need one element for
+        each scalar statement.  */
+      if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
+       nunits = group_size;
+    }
 
-  number_of_copies = nunits * number_of_vectors / group_size;
+  number_of_copies = nunits * number_of_vectors / elt_count;
 
   number_of_places_left_in_vector = nunits;
   constant_p = true;
@@ -10877,9 +10892,15 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
   stmt_vec_info insert_after = NULL;
   for (j = 0; j < number_of_copies; j++)
     {
-      tree op;
-      for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
+      for (i = elt_count; i-- > 0;)
         {
+         tree op;
+         if (i < group_size)
+           op = op_node->ops[i];
+         else
+           /* Pad with zeros.  */
+           op = build_zero_cst (TREE_TYPE (vector_type));
+
           /* Create 'vect_ = {op0,op1,...,opn}'.  */
          tree orig_op = op;
          if (number_of_places_left_in_vector == nunits)
@@ -10965,6 +10986,11 @@ vect_create_constant_vectors (vec_info *vinfo, 
slp_tree op_node)
                       ? multiple_p (type_nunits, nunits)
                       : known_eq (type_nunits, nunits))
                vec_cst = gimple_build_vector (&ctor_seq, &elts);
+             else if (is_a<bb_vec_info> (vinfo))
+               {
+                 vec_cst
+                   = gimple_build_vector_with_zero_padding (&ctor_seq, &elts);
+               }
              else
                {
                  if (permute_results.is_empty ())
-- 
2.43.0

Reply via email to