For basic block superword-level parallelism, modify the
definition of the recently-introduced wrapper functions,
vect_record_(len|mask), to simply set one of two flags
to indicate that a mask or length should be used for a
given SLP node. The passed-in vec_info is ignored.
Likewise, implement vect_can_use_(mask|len)_p,
vect_can_use_partial_vectors_p (for queries) and
vect_cannot_use_partial_vectors (for updates) for
BB SLP. The relevant flag is accessed in the passed-in
SLP node instead of the passed-in vec_info.
The implementations of vect_get_mask and vect_get_len
for BB SLP are non-trivial (albeit simpler than for loop
vectorization), therefore they are delegated to SLP-specific
functions defined in tree-vect-slp.cc.
To prevent regressions, vect_can_use_partial_vectors_p still returns
false for BB SLP (for now).
---
gcc/tree-vect-slp.cc | 86 ++++++++++++++++++++++++++++++++++++++++++
gcc/tree-vect-stmts.cc | 57 +++++++++++++++++-----------
gcc/tree-vectorizer.h | 30 ++++++++++-----
3 files changed, 143 insertions(+), 30 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 3d78f91c93a..4a30cf3d187 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -124,6 +124,9 @@ _slp_tree::_slp_tree ()
SLP_TREE_GS_BASE (this) = NULL_TREE;
this->ldst_lanes = false;
this->avoid_stlf_fail = false;
+ SLP_TREE_CAN_USE_MASK_P (this) = false;
+ SLP_TREE_CAN_USE_LEN_P (this) = false;
+ SLP_TREE_CAN_USE_PARTIAL_VECTORS_P (this) = true;
SLP_TREE_VECTYPE (this) = NULL_TREE;
SLP_TREE_REPRESENTATIVE (this) = NULL;
this->cycle_info.id = -1;
@@ -12310,3 +12313,86 @@ vect_schedule_slp (vec_info *vinfo, const
vec<slp_instance> &slp_instances)
}
}
}
+
+/* Materialize mask number INDEX for a group of scalar stmts in SLP_NODE that
+ operate on NVECTORS vectors of type VECTYPE, where 0 <= INDEX < NVECTORS.
+ Masking is only required for the tail, therefore NULL_TREE is returned for
+ every value of INDEX except the last. Insert any set-up statements before
+ GSI. */
+tree
+vect_slp_get_bb_mask (slp_tree slp_node, gimple_stmt_iterator *gsi,
+ unsigned int nvectors, tree vectype, unsigned int index)
+{
+ gcc_checking_assert (SLP_TREE_CAN_USE_MASK_P (slp_node));
+
+ /* Only the last vector can be a partial vector. */
+ if (index < nvectors - 1)
+ return NULL_TREE;
+
+ /* vect_get_num_copies only allows a partial vector if it is the only
+ vector. */
+ if (nvectors > 1)
+ return NULL_TREE;
+
+ gcc_checking_assert (nvectors == 1);
+
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned int group_size = SLP_TREE_LANES (slp_node);
+
+ /* A single vector can be a full vector, in which case no mask is
+ * needed. */
+ if (known_eq (nunits, group_size))
+ return NULL_TREE;
+
+ /* Return a mask for a single partial vector.
+ FORNOW: don't bother maintaining a set of mask constants to allow
+ sharing between nodes belonging to the same instance of bb_vec_info. */
+ gcc_checking_assert (known_le (group_size, nunits));
+ gimple_seq stmts = NULL;
+ tree cmp_type = size_type_node;
+ tree start_index = build_zero_cst (cmp_type);
+ tree end_index = build_int_cst (cmp_type, group_size);
+ tree masktype = truth_type_for (vectype);
+ tree mask = make_temp_ssa_name (masktype, NULL, "slp_mask");
+ vect_gen_while_ssa_name (&stmts, masktype, start_index, end_index, mask);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+
+ return mask;
+}
+
+/* Materialize length number INDEX for a group of scalar stmts in SLP_NODE that
+ operate on NVECTORS vectors of type VECTYPE, where 0 <= INDEX < NVECTORS. A
+ length limit is only required for the tail, therefore NULL_TREE is returned
+ for every value of INDEX except that last; otherwise, return a value that
+ contains FACTOR multiplied by the number of elements that should be
+ processed. */
+
+tree
+vect_slp_get_bb_len (slp_tree slp_node, unsigned int nvectors, tree vectype,
+ unsigned int index, unsigned int factor)
+{
+ gcc_checking_assert (SLP_TREE_CAN_USE_LEN_P (slp_node));
+
+ /* Only the last vector can be a partial vector. */
+ if (index < nvectors - 1)
+ return NULL_TREE;
+
+ /* vect_get_num_copies only allows a partial vector if it is the only
+ vector. */
+ if (nvectors > 1)
+ return NULL_TREE;
+
+ gcc_checking_assert (nvectors == 1);
+
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ unsigned int group_size = SLP_TREE_LANES (slp_node);
+
+ /* A single vector can be a full vector, in which case no length limit is
+ * needed. */
+ if (known_eq (nunits, group_size))
+ return NULL_TREE;
+
+ /* Return the scaled length of a single partial vector. */
+ gcc_checking_assert (known_lt (group_size, nunits));
+ return size_int (group_size * factor);
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 3115c610736..5ec65b2b2de 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1408,7 +1408,9 @@ vectorizable_internal_function (combined_fn cfn, tree
fndecl,
/* Record that a complete set of masks associated with VINFO would need to
contain a sequence of NVECTORS masks that each control a vector of type
VECTYPE. If SCALAR_MASK is nonnull, the fully-masked loop would AND
- these vector masks with the vector version of SCALAR_MASK. */
+ these vector masks with the vector version of SCALAR_MASK. Alternatively,
+ if doing basic block vectorization, record that an equivalent mask would be
+ required to vectorize SLP_NODE. */
static void
vect_record_mask (vec_info *vinfo, slp_tree slp_node, unsigned int nvectors,
tree vectype, tree scalar_mask)
@@ -1418,7 +1420,10 @@ vect_record_mask (vec_info *vinfo, slp_tree slp_node,
unsigned int nvectors,
vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
nvectors,
vectype, scalar_mask);
else
- (void) slp_node; // FORNOW
+ {
+ gcc_checking_assert (!SLP_TREE_CAN_USE_LEN_P (slp_node));
+ SLP_TREE_CAN_USE_MASK_P (slp_node) = true;
+ }
}
/* Given a complete set of masks associated with VINFO, extract mask number
@@ -1436,16 +1441,15 @@ vect_get_mask (vec_info *vinfo, slp_tree slp_node,
gimple_stmt_iterator *gsi,
return vect_get_loop_mask (loop_vinfo, gsi, &LOOP_VINFO_MASKS (loop_vinfo),
nvectors, vectype, index);
else
- {
- (void) slp_node; // FORNOW
- return NULL_TREE;
- }
+ return vect_slp_get_bb_mask (slp_node, gsi, nvectors, vectype, index);
}
/* Record that a complete set of lengths associated with VINFO would need to
contain a sequence of NVECTORS lengths for controlling an operation on
VECTYPE. The operation splits each element of VECTYPE into FACTOR separate
- subelements, measuring the length as a number of these subelements. */
+ subelements, measuring the length as a number of these subelements.
+ Alternatively, if doing basic block vectorization, record that an equivalent
+ length would be required to vectorize SLP_NODE. */
static void
vect_record_len (vec_info *vinfo, slp_tree slp_node, unsigned int nvectors,
tree vectype, unsigned int factor)
@@ -1455,7 +1459,10 @@ vect_record_len (vec_info *vinfo, slp_tree slp_node,
unsigned int nvectors,
vect_record_loop_len (loop_vinfo, &LOOP_VINFO_LENS (loop_vinfo), nvectors,
vectype, factor);
else
- (void) slp_node; // FORNOW
+ {
+ gcc_checking_assert (!SLP_TREE_CAN_USE_MASK_P (slp_node));
+ SLP_TREE_CAN_USE_LEN_P (slp_node) = true;
+ }
}
/* Given a complete set of lengths associated with VINFO, extract length number
@@ -1476,10 +1483,7 @@ vect_get_len (vec_info *vinfo, slp_tree slp_node,
gimple_stmt_iterator *gsi,
return vect_get_loop_len (loop_vinfo, gsi, &LOOP_VINFO_LENS (loop_vinfo),
nvectors, vectype, index, factor);
else
- {
- (void) slp_node; // FORNOW
- return NULL_TREE;
- }
+ return vect_slp_get_bb_len (slp_node, nvectors, vectype, index, factor);
}
static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
@@ -14252,24 +14256,35 @@ supportable_indirect_convert_operation (code_helper
code,
mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
Add the statements to SEQ. */
+void
+vect_gen_while_ssa_name (gimple_seq *seq, tree mask_type, tree start_index,
+ tree end_index, tree ssa_name)
+{
+ tree cmp_type = TREE_TYPE (start_index);
+ gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT, cmp_type,
+ mask_type,
+ OPTIMIZE_FOR_SPEED));
+ gcall *call
+ = gimple_build_call_internal (IFN_WHILE_ULT, 3, start_index, end_index,
+ build_zero_cst (mask_type));
+ gimple_call_set_lhs (call, ssa_name);
+ gimple_seq_add_stmt (seq, call);
+}
+
+/* Like vect_gen_while_ssa_name except that it creates a new SSA_NAME node
+ for type MASK_TYPE defined in the created GIMPLE_CALL statement. If NAME
+ is not a null pointer then it is used for the SSA_NAME in dumps. */
+
tree
vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
tree end_index, const char *name)
{
- tree cmp_type = TREE_TYPE (start_index);
- gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
- cmp_type, mask_type,
- OPTIMIZE_FOR_SPEED));
- gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
- start_index, end_index,
- build_zero_cst (mask_type));
tree tmp;
if (name)
tmp = make_temp_ssa_name (mask_type, NULL, name);
else
tmp = make_ssa_name (mask_type);
- gimple_call_set_lhs (call, tmp);
- gimple_seq_add_stmt (seq, call);
+ vect_gen_while_ssa_name (seq, mask_type, start_index, end_index, tmp);
return tmp;
}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 582953bd8e8..708ce783db7 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -368,6 +368,16 @@ struct _slp_tree {
/* For BB vect, flag to indicate this load node should be vectorized
as to avoid STLF fails because of related stores. */
bool avoid_stlf_fail;
+ /* Flag to indicate this node can be vectorized by using masks to prevent
+ use of inactive scalar lanes. */
+ bool can_use_mask;
+ /* Flag to indicate this node can be vectorized by using lengths to prevent
+ use of inactive scalar lanes. */
+ bool can_use_len;
+ /* Flag to indicate whether we still have the option of vectorizing this node
+ using partial vectors (i.e. using lengths or masks to prevent use of
+ inactive scalar lanes). */
+ bool can_use_partial_vectors;
int vertex;
@@ -466,6 +476,9 @@ public:
#define SLP_TREE_GS_BASE(S) (S)->gs_base
#define SLP_TREE_REDUC_IDX(S) (S)->cycle_info.reduc_idx
#define SLP_TREE_PERMUTE_P(S) ((S)->code == VEC_PERM_EXPR)
+#define SLP_TREE_CAN_USE_MASK_P(S) (S)->can_use_mask
+#define SLP_TREE_CAN_USE_LEN_P(S) (S)->can_use_len
+#define SLP_TREE_CAN_USE_PARTIAL_VECTORS_P(S) (S)->can_use_partial_vectors
inline vect_memory_access_type
SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node)
@@ -2608,6 +2621,7 @@ extern tree vect_gen_perm_mask_checked (tree, const
vec_perm_indices &);
extern void optimize_mask_stores (class loop*);
extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
const char * = nullptr);
+extern void vect_gen_while_ssa_name (gimple_seq *, tree, tree, tree, tree);
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
extern opt_result vect_get_vector_types_for_stmt (vec_info *,
stmt_vec_info, tree *,
@@ -2793,6 +2807,10 @@ extern slp_tree vect_create_new_slp_node (unsigned,
tree_code);
extern void vect_free_slp_tree (slp_tree);
extern bool compatible_calls_p (gcall *, gcall *, bool);
extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
+extern tree vect_slp_get_bb_mask (slp_tree, gimple_stmt_iterator *,
+ unsigned int, tree, unsigned int);
+extern tree vect_slp_get_bb_len (slp_tree, unsigned int, tree, unsigned int,
+ unsigned int);
extern tree prepare_vec_mask (vec_info *, tree, tree, tree,
gimple_stmt_iterator *);
extern tree vect_get_mask_load_else (int, tree);
@@ -2956,7 +2974,7 @@ vect_cannot_use_partial_vectors (vec_info *vinfo,
slp_tree slp_node)
if (loop_vinfo)
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
else
- (void) slp_node; // FORNOW
+ SLP_TREE_CAN_USE_PARTIAL_VECTORS_P (slp_node) = false;
}
/* Return true if VINFO is vectorizer state for loop vectorization, we've
@@ -2970,10 +2988,7 @@ vect_can_use_len_p (vec_info *vinfo, slp_tree slp_node)
if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
return LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
else
- {
- (void) slp_node; // FORNOW
- return false;
- }
+ return SLP_TREE_CAN_USE_LEN_P (slp_node);
}
/* Return true if VINFO is vectorizer state for loop vectorization, we've
@@ -2987,10 +3002,7 @@ vect_can_use_mask_p (vec_info *vinfo, slp_tree slp_node)
if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
return LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
else
- {
- (void) slp_node; // FORNOW
- return false;
- }
+ return SLP_TREE_CAN_USE_MASK_P (slp_node);
}
/* If STMT_INFO describes a reduction, return the vect_reduction_type
--
2.43.0