The following is a prototype-quality patch to make us record the get_load_store_info results from load/store analysis and re-use them during transform. In particular this moves where SLP_TREE_MEMORY_ACCESS_TYPE is stored. Rather than mass-replacing references to variables I've kept the locals but made them read-only, only adjusting a few elsval setters and adding a FIXME to strided SLP handling of alignment (allowing local override there).
The recorded gs_info data is subject to furnther cleanup with refactoring of how we handle gather/scatter, but this is to be done as followup. The FIXME shows that while a lot of analysis is done in get_load_store_type that's far from all of it. * tree-vectorizer.h (gather_scatter_info): Move. (vect_load_store_data): New. (_slp_tree::memory_access_type): Remove. (SLP_TREE_MEMORY_ACCESS_TYPE): Turn into inline function. * tree-vect-slp.cc (_slp_tree::_slp_tree): Do not initialize SLP_TREE_MEMORY_ACCESS_TYPE. * tree-vect-stmts.cc (get_load_store_type): Take pointer to vect_load_store_data instead of individual pointers. (vectorizable_store): Adjust. Re-use get_load_store_type result from analysis time. (vectorizable_load): Likewise. --- gcc/tree-vect-slp.cc | 1 - gcc/tree-vect-stmts.cc | 108 ++++++++++++++++++++++------------------- gcc/tree-vectorizer.h | 97 +++++++++++++++++++++--------------- 3 files changed, 117 insertions(+), 89 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index ca14a2deed2..97e1b87b81f 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -124,7 +124,6 @@ _slp_tree::_slp_tree () this->avoid_stlf_fail = false; SLP_TREE_VECTYPE (this) = NULL_TREE; SLP_TREE_REPRESENTATIVE (this) = NULL; - SLP_TREE_MEMORY_ACCESS_TYPE (this) = VMAT_UNINITIALIZED; SLP_TREE_REF_COUNT (this) = 1; this->failed = NULL; this->max_nunits = 1; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 97222f64b7e..5b363bb6a4d 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1951,14 +1951,16 @@ static bool get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype, slp_tree slp_node, bool masked_p, vec_load_store_type vls_type, - vect_memory_access_type *memory_access_type, - poly_int64 *poffset, - dr_alignment_support *alignment_support_scheme, - int *misalignment, - gather_scatter_info *gs_info, - internal_fn *lanes_ifn, - vec<int> *elsvals = nullptr) + vect_load_store_data *ls) { + vect_memory_access_type *memory_access_type = &ls->memory_access_type; + poly_int64 *poffset = &ls->poffset; + dr_alignment_support *alignment_support_scheme + = &ls->alignment_support_scheme; + int *misalignment = &ls->misalignment; + gather_scatter_info *gs_info = &ls->gs_info; + internal_fn *lanes_ifn = &ls->lanes_ifn; + vec<int> *elsvals = &ls->elsvals; loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL; @@ -7728,7 +7730,6 @@ vectorizable_store (vec_info *vinfo, unsigned int vec_num; bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); tree aggr_type; - gather_scatter_info gs_info; poly_uint64 vf; vec_load_store_type vls_type; tree ref_type; @@ -7817,16 +7818,21 @@ vectorizable_store (vec_info *vinfo, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, - vls_type, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn)) + vect_load_store_data _ls_data; + vect_load_store_data &ls = slp_node->get_data (_ls_data); + if (cost_vec + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + vls_type, &_ls_data)) return false; + /* Temporary aliases to analysis data, should not be modified through + these. */ + const vect_memory_access_type memory_access_type = ls.memory_access_type; + const dr_alignment_support alignment_support_scheme + = ls.alignment_support_scheme; + const int misalignment = ls.misalignment; + const poly_int64 poffset = ls.poffset; + const internal_fn lanes_ifn = ls.lanes_ifn; + const gather_scatter_info &gs_info = ls.gs_info; if (slp_node->ldst_lanes && memory_access_type != VMAT_LOAD_STORE_LANES) @@ -7899,8 +7905,6 @@ vectorizable_store (vec_info *vinfo, bool costing_p = cost_vec; if (costing_p) /* transformation not required. */ { - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; - if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, @@ -7928,8 +7932,8 @@ vectorizable_store (vec_info *vinfo, "Vectorizing an unaligned access.\n"); SLP_TREE_TYPE (slp_node) = store_vec_info_type; + slp_node->data = new vect_load_store_data (std::move (ls)); } - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); /* Transform. */ @@ -8024,6 +8028,14 @@ vectorizable_store (vec_info *vinfo, ... */ + /* ??? Modify local copies of alignment_support_scheme and + misalignment, but this part of analysis should be done + earlier and remembered, likewise the chosen load mode. */ + const dr_alignment_support tem = alignment_support_scheme; + dr_alignment_support alignment_support_scheme = tem; + const int tem2 = misalignment; + int misalignment = tem2; + unsigned nstores = const_nunits; unsigned lnel = 1; tree ltype = elem_type; @@ -9242,7 +9254,6 @@ vectorizable_load (vec_info *vinfo, bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo); poly_uint64 vf; tree aggr_type; - gather_scatter_info gs_info; tree ref_type; enum vect_def_type mask_dt = vect_unknown_def_type; enum vect_def_type els_dt = vect_unknown_def_type; @@ -9376,20 +9387,25 @@ vectorizable_load (vec_info *vinfo, else group_size = 1; - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - auto_vec<int> elsvals; - int maskload_elsval = 0; - bool need_zeroing = false; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, - VLS_LOAD, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn, &elsvals)) + vect_load_store_data _ls_data; + vect_load_store_data &ls = slp_node->get_data (_ls_data); + if (cost_vec + && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask_node, + VLS_LOAD, &ls)) return false; + /* Temporary aliases to analysis data, should not be modified through + these. */ + const vect_memory_access_type memory_access_type = ls.memory_access_type; + const dr_alignment_support alignment_support_scheme + = ls.alignment_support_scheme; + const int misalignment = ls.misalignment; + const poly_int64 poffset = ls.poffset; + const internal_fn lanes_ifn = ls.lanes_ifn; + const vec<int> &elsvals = ls.elsvals; + const gather_scatter_info &gs_info = ls.gs_info; + int maskload_elsval = 0; + bool need_zeroing = false; /* We might need to explicitly zero inactive elements if there are padding bits in the type that might leak otherwise. @@ -9460,7 +9476,7 @@ vectorizable_load (vec_info *vinfo, if (!VECTOR_MODE_P (vec_mode) || !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype), - true, NULL, &elsvals)) + true, NULL, &ls.elsvals)) return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES @@ -9503,14 +9519,12 @@ vectorizable_load (vec_info *vinfo, return false; } - SLP_TREE_MEMORY_ACCESS_TYPE (slp_node) = memory_access_type; - if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, VLS_LOAD, group_size, memory_access_type, &gs_info, - mask_node, &elsvals); + mask_node, &ls.elsvals); if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE @@ -9525,16 +9539,7 @@ vectorizable_load (vec_info *vinfo, vinfo->any_known_not_updated_vssa = true; SLP_TREE_TYPE (slp_node) = load_vec_info_type; - } - else - { - /* Here just get the else values. */ - if (loop_vinfo - && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node, - VLS_LOAD, group_size, - memory_access_type, &gs_info, - mask_node, &elsvals); + slp_node->data = new vect_load_store_data (std::move (ls)); } /* If the type needs padding we must zero inactive elements. @@ -9557,8 +9562,6 @@ vectorizable_load (vec_info *vinfo, if (elsvals.length ()) maskload_elsval = *elsvals.begin (); - gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE (slp_node)); - if (dump_enabled_p () && !costing_p) dump_printf_loc (MSG_NOTE, vect_location, "transform load.\n"); @@ -9727,6 +9730,13 @@ vectorizable_load (vec_info *vinfo, tree ltype = TREE_TYPE (vectype); tree lvectype = vectype; auto_vec<tree> dr_chain; + /* ??? Modify local copies of alignment_support_scheme and + misalignment, but this part of analysis should be done + earlier and remembered, likewise the chosen load mode. */ + const dr_alignment_support tem = alignment_support_scheme; + dr_alignment_support alignment_support_scheme = tem; + const int tem2 = misalignment; + int misalignment = tem2; if (memory_access_type == VMAT_STRIDED_SLP) { HOST_WIDE_INT n = gcd (group_size, const_nunits); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index adc095e3e88..76c72344370 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -233,6 +233,40 @@ enum stmt_vec_info_type { loop_exit_ctrl_vec_info_type }; +/* Information about a gather/scatter call. */ +struct gather_scatter_info { + /* The internal function to use for the gather/scatter operation, + or IFN_LAST if a built-in function should be used instead. */ + internal_fn ifn; + + /* The FUNCTION_DECL for the built-in gather/scatter function, + or null if an internal function should be used instead. */ + tree decl; + + /* The loop-invariant base value. */ + tree base; + + /* The TBBA alias pointer the value of which determines the alignment + of the scalar accesses. */ + tree alias_ptr; + + /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ + tree offset; + + /* Each offset element should be multiplied by this amount before + being added to the base. */ + int scale; + + /* The type of the vectorized offset. */ + tree offset_vectype; + + /* The type of the scalar elements after loading or before storing. */ + tree element_type; + + /* The type of the scalar elements being loaded or stored. */ + tree memory_type; +}; + /************************************************************************ SLP ************************************************************************/ @@ -258,6 +292,21 @@ struct vect_simd_clone_data : vect_data { auto_vec<tree> simd_clone_info; }; +/* Analysis data from vectorizable_load and vectorizable_store for + load_vec_info_type and store_vec_info_type. */ +struct vect_load_store_data : vect_data { + vect_load_store_data (vect_load_store_data &&other) = default; + vect_load_store_data () = default; + virtual ~vect_load_store_data () = default; + dr_alignment_support alignment_support_scheme; + vect_memory_access_type memory_access_type; + int misalignment; + poly_int64 poffset; + internal_fn lanes_ifn; + auto_vec<int> elsvals; + gather_scatter_info gs_info; +}; + /* A computation tree of an SLP instance. Each node corresponds to a group of stmts to be packed in a SIMD stmt. */ struct _slp_tree { @@ -315,10 +364,6 @@ struct _slp_tree { int vertex; - /* Classifies how the load or store is going to be implemented - for loop vectorization. */ - vect_memory_access_type memory_access_type; - /* The kind of operation as determined by analysis and optional kind specific data. */ enum stmt_vec_info_type type; @@ -410,9 +455,17 @@ public: #define SLP_TREE_REPRESENTATIVE(S) (S)->representative #define SLP_TREE_LANES(S) (S)->lanes #define SLP_TREE_CODE(S) (S)->code -#define SLP_TREE_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type #define SLP_TREE_TYPE(S) (S)->type +inline vect_memory_access_type +SLP_TREE_MEMORY_ACCESS_TYPE (slp_tree node) +{ + if (SLP_TREE_TYPE (node) == load_vec_info_type + || SLP_TREE_TYPE (node) == store_vec_info_type) + return static_cast<vect_load_store_data *> (node->data)->memory_access_type; + return VMAT_UNINITIALIZED; +} + enum vect_partial_vector_style { vect_partial_vectors_none, vect_partial_vectors_while_ult, @@ -1554,40 +1607,6 @@ public: bool slp_vect_pattern_only_p; }; -/* Information about a gather/scatter call. */ -struct gather_scatter_info { - /* The internal function to use for the gather/scatter operation, - or IFN_LAST if a built-in function should be used instead. */ - internal_fn ifn; - - /* The FUNCTION_DECL for the built-in gather/scatter function, - or null if an internal function should be used instead. */ - tree decl; - - /* The loop-invariant base value. */ - tree base; - - /* The TBBA alias pointer the value of which determines the alignment - of the scalar accesses. */ - tree alias_ptr; - - /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ - tree offset; - - /* Each offset element should be multiplied by this amount before - being added to the base. */ - int scale; - - /* The type of the vectorized offset. */ - tree offset_vectype; - - /* The type of the scalar elements after loading or before storing. */ - tree element_type; - - /* The type of the scalar elements being loaded or stored. */ - tree memory_type; -}; - /* Access Functions. */ #define STMT_VINFO_STMT(S) (S)->stmt #define STMT_VINFO_RELEVANT(S) (S)->relevant -- 2.43.0