The main gather/scatter discovery happens at SLP discovery time, the base address and the offset scale are currently not explicitly represented in the SLP tree. This requires re-discovery of them during vectorizable_store/load. The following fixes this by recording this info into the SLP tree. This allows the main vect_check_gather_scatter call to be elided from get_load_store_type and replaced with target support checks for IFN/decl or fallback emulated mode.
There's vect_check_gather_scatter left in the path using gather/scatter for strided load/store. I hope to deal with this later. Bootstrapped and tested on x86_64-unknown-linux-gnu. On aarch64 vect.exp is clean, but I hope the CI is back. * tree-vectorizer.h (_slp_tree::gs_scale): New. (_slp_tree::gs_base): Likewise. (SLP_TREE_GS_SCALE): Likewise. (SLP_TREE_GS_BASE): Likewise. * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize new members. (vect_build_slp_tree_2): Record gather/scatter base and scale. * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Add mode of operation with fixed offset vector type. * tree-vect-stmts.cc (get_load_store_type): Do not call vect_check_gather_scatter to fill gs_info, instead populate from the SLP tree. Check which of, IFN, decl or fallback is supported and record that decision. --- gcc/tree-vect-data-refs.cc | 22 ++++++++++++---- gcc/tree-vect-slp.cc | 13 ++++++++++ gcc/tree-vect-stmts.cc | 51 ++++++++++++++++++++++---------------- gcc/tree-vectorizer.h | 7 ++++++ 4 files changed, 67 insertions(+), 26 deletions(-) diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index da700cd1f3d..ce4b9112001 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4430,8 +4430,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) MASKED_P is true if the load or store is conditional. MEMORY_TYPE is the type of the memory elements being loaded or stored. OFFSET_TYPE is the type of the offset that is being applied to the invariant - base address. SCALE is the amount by which the offset should - be multiplied *after* it has been converted to address width. + base address. If OFFSET_TYPE is scalar the function chooses an + appropriate vector type for it. SCALE is the amount by which the + offset should be multiplied *after* it has been converted to address width. Return true if the function is supported, storing the function id in *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT. @@ -4474,9 +4475,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, for (;;) { - tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); - if (!offset_vectype) - return false; + tree offset_vectype; + if (VECTOR_TYPE_P (offset_type)) + offset_vectype = offset_type; + else + { + offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type); + if (!offset_vectype) + return false; + } /* Test whether the target supports this combination. */ if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type, @@ -4507,10 +4514,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p, return true; } + /* For fixed offset vector type we're done. */ + if (VECTOR_TYPE_P (offset_type)) + return false; + if (TYPE_PRECISION (offset_type) >= POINTER_SIZE && TYPE_PRECISION (offset_type) >= element_bits) return false; + /* Try a larger offset vector type. */ offset_type = build_nonstandard_integer_type (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type)); } diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ca14a2deed2..5a6f23cf18b 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -120,6 +120,8 @@ _slp_tree::_slp_tree () SLP_TREE_LANE_PERMUTATION (this) = vNULL; SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def; SLP_TREE_CODE (this) = ERROR_MARK; + SLP_TREE_GS_SCALE (this) = 0; + SLP_TREE_GS_BASE (this) = NULL_TREE; this->ldst_lanes = false; this->avoid_stlf_fail = false; SLP_TREE_VECTYPE (this) = NULL_TREE; @@ -2723,6 +2725,9 @@ out: stmt_info = stmts[0]; + int gs_scale = 0; + tree gs_base = NULL_TREE; + /* Create SLP_TREE nodes for the definition node/s. */ FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) { @@ -2745,6 +2750,12 @@ out: continue; } + if (oprnd_info->first_gs_p) + { + gs_scale = oprnd_info->first_gs_info.scale; + gs_base = oprnd_info->first_gs_info.base; + } + if (is_a <bb_vec_info> (vinfo) && oprnd_info->first_dt == vect_internal_def && !oprnd_info->any_pattern) @@ -3134,6 +3145,8 @@ fail: node = vect_create_new_slp_node (node, stmts, nops); SLP_TREE_VECTYPE (node) = vectype; SLP_TREE_CHILDREN (node).splice (children); + SLP_TREE_GS_SCALE (node) = gs_scale; + SLP_TREE_GS_BASE (node) = gs_base; return node; } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 97222f64b7e..96711509c90 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2015,31 +2015,40 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; tree offset_vectype = SLP_TREE_VECTYPE (offset_node); + memset (gs_info, 0, sizeof (gather_scatter_info)); gs_info->offset_vectype = offset_vectype; - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + gs_info->scale = SLP_TREE_GS_SCALE (slp_node); + gs_info->base = SLP_TREE_GS_BASE (slp_node); + gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr)); + gs_info->decl = NULL_TREE; + gs_info->ifn = IFN_LAST; + tree tem; + if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD, + masked_p, vectype, + gs_info->memory_type, + offset_vectype, gs_info->scale, + &gs_info->ifn, &tem, + elsvals)) + /* GATHER_SCATTER_IFN_P. */; + else if (vls_type == VLS_LOAD + ? (targetm.vectorize.builtin_gather + && (gs_info->decl + = targetm.vectorize.builtin_gather (vectype, + TREE_TYPE + (offset_vectype), + gs_info->scale))) + : (targetm.vectorize.builtin_scatter + && (gs_info->decl + = targetm.vectorize.builtin_scatter (vectype, + TREE_TYPE + (offset_vectype), + gs_info->scale)))) + /* GATHER_SCATTER_LEGACY_P. */; + else { + /* GATHER_SCATTER_EMULATED_P. */ if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant () || VECTOR_BOOLEAN_TYPE_P (offset_vectype) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 0a75ee15857..6517c2aef17 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -306,6 +306,11 @@ struct _slp_tree { unsigned int lanes; /* The operation of this node. */ enum tree_code code; + /* For gather/scatter memory operations the scale each offset element + should be multiplied by before being added to the base. */ + int gs_scale; + /* For gather/scatter memory operations the loop-invariant base value. */ + tree gs_base; /* Whether uses of this load or feeders of this store are suitable for load/store-lanes. */ bool ldst_lanes; @@ -412,6 +417,8 @@ public: #define SLP_TREE_CODE(S) (S)->code #define SLP_TREE_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type #define SLP_TREE_TYPE(S) (S)->type +#define SLP_TREE_GS_SCALE(S) (S)->gs_scale +#define SLP_TREE_GS_BASE(S) (S)->gs_base enum vect_partial_vector_style { vect_partial_vectors_none, -- 2.43.0