The following splits up VMAT_GATHER_SCATTER into VMAT_GATHER_SCATTER_LEGACY, VMAT_GATHER_SCATTER_IFN and VMAT_GATHER_SCATTER_EMULATED. The main motivation is to reduce the uses of (full) gs_info, but it also makes the kind representable by a single entry rather than the ifn and decl tristate.
The strided load with gather case gets to use VMAT_GATHER_SCATTER_IFN, since that's what we end up checking. Bootstrap and regtest running on x86_64-unknown-linux-gnu, aarch64-linux testing running as well. Richard. * tree-vectorizer.h (vect_memory_access_type): Replace VMAT_GATHER_SCATTER with three separate access types, VMAT_GATHER_SCATTER_LEGACY, VMAT_GATHER_SCATTER_IFN and VMAT_GATHER_SCATTER_EMULATED. (mat_gather_scatter_p): New predicate. (GATHER_SCATTER_LEGACY_P): Remove. (GATHER_SCATTER_IFN_P): Likewise. (GATHER_SCATTER_EMULATED_P): Likewise. * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Adjust. (get_load_store_type): Likewise. (vect_get_loop_variant_data_ptr_increment): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Likewise. * config/riscv/riscv-vector-costs.cc (costs::need_additional_vector_vars_p): Likewise. * config/aarch64/aarch64.cc (aarch64_detect_vector_stmt_subtype): Likewise. (aarch64_vector_costs::count_ops): Likewise. (aarch64_vector_costs::add_stmt_cost): Likewise. --- gcc/config/aarch64/aarch64.cc | 10 ++--- gcc/config/i386/i386.cc | 3 +- gcc/config/riscv/riscv-vector-costs.cc | 2 +- gcc/tree-vect-stmts.cc | 62 ++++++++++++-------------- gcc/tree-vectorizer.h | 21 +++++---- 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 039f196c240..f88b7c95f97 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -17551,7 +17551,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (kind == scalar_load && node && sve_costs - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { unsigned int nunits = vect_nunits_for_cost (vectype); /* Test for VNx2 modes, which have 64-bit containers. */ @@ -17565,7 +17565,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (kind == scalar_store && node && sve_costs - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return sve_costs->scatter_store_elt_cost; /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ @@ -17821,7 +17821,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && kind == vec_to_scalar && (m_vec_flags & VEC_ADVSIMD) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { auto dr = STMT_VINFO_DATA_REF (stmt_info); tree dr_ref = DR_REF (dr); @@ -17936,7 +17936,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && sve_issue && (kind == scalar_load || kind == scalar_store) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { unsigned int pairs = CEIL (count, 2); ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs; @@ -18094,7 +18094,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && node && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve; if (sve_costs) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 65e04d3760d..793b478e654 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -26161,8 +26161,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node)))) != INTEGER_CST)) - || (SLP_TREE_MEMORY_ACCESS_TYPE (node) - == VMAT_GATHER_SCATTER))))) + || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))))) { stmt_cost = ix86_default_vector_cost (kind, mode); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 44ef44a1435..5e6cb671490 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -607,7 +607,7 @@ costs::need_additional_vector_vars_p (stmt_vec_info stmt_info, if (type == load_vec_info_type || type == store_vec_info_type) { if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return true; machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 2d999da3f4b..36bc9afd093 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1484,7 +1484,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, return; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { internal_fn ifn = (is_load ? IFN_MASK_GATHER_LOAD @@ -1503,7 +1503,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, gs_info->offset_vectype, gs_info->scale, elsvals) - || gs_info->decl != NULL_TREE) + || memory_access_type == VMAT_GATHER_SCATTER_LEGACY) vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask); else @@ -2025,7 +2025,6 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *memory_access_type = VMAT_STRIDED_SLP; else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - *memory_access_type = VMAT_GATHER_SCATTER; slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; tree offset_vectype = SLP_TREE_VECTYPE (offset_node); memset (gs_info, 0, sizeof (gather_scatter_info)); @@ -2042,7 +2041,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, offset_vectype, gs_info->scale, &gs_info->ifn, &tem, elsvals)) - /* GATHER_SCATTER_IFN_P. */; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; else if (vls_type == VLS_LOAD ? (targetm.vectorize.builtin_gather && (gs_info->decl @@ -2056,7 +2055,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, TREE_TYPE (offset_vectype), gs_info->scale)))) - /* GATHER_SCATTER_LEGACY_P. */; + *memory_access_type = VMAT_GATHER_SCATTER_LEGACY; else { /* GATHER_SCATTER_EMULATED_P. */ @@ -2072,6 +2071,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, "gather.\n"); return false; } + *memory_access_type = VMAT_GATHER_SCATTER_EMULATED; } } else @@ -2323,15 +2323,14 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, masked_p, gs_info, elsvals, group_size, single_element_p)) - *memory_access_type = VMAT_GATHER_SCATTER; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) *poffset = neg_ldst_offset; if (*memory_access_type == VMAT_ELEMENTWISE - || (*memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_LEGACY_P (*gs_info)) + || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY || *memory_access_type == VMAT_STRIDED_SLP || *memory_access_type == VMAT_INVARIANT) { @@ -2340,7 +2339,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, } else { - if (*memory_access_type == VMAT_GATHER_SCATTER + if (mat_gather_scatter_p (*memory_access_type) && !first_dr_info) *misalignment = DR_MISALIGNMENT_UNKNOWN; else @@ -2348,7 +2347,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *alignment_support_scheme = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, *misalignment, - *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr); + mat_gather_scatter_p (*memory_access_type) ? gs_info : nullptr); } if (overrun_p) @@ -2382,7 +2381,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if (loop_vinfo && dr_safe_speculative_read_required (stmt_info) && LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && (*memory_access_type == VMAT_GATHER_SCATTER + && (mat_gather_scatter_p (*memory_access_type) || *memory_access_type == VMAT_STRIDED_SLP)) { if (dump_enabled_p ()) @@ -2402,7 +2401,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, vector iteration or force masking. */ if (dr_safe_speculative_read_required (stmt_info) && (*alignment_support_scheme == dr_aligned - && *memory_access_type != VMAT_GATHER_SCATTER)) + && !mat_gather_scatter_p (*memory_access_type))) { /* We can only peel for loops, of course. */ gcc_checking_assert (loop_vinfo); @@ -3000,7 +2999,7 @@ vect_get_loop_variant_data_ptr_increment ( tree step = vect_dr_behavior (vinfo, dr_info)->step; /* gather/scatter never reach here. */ - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER); + gcc_assert (!mat_gather_scatter_p (memory_access_type)); /* When we support SELECT_VL pattern, we dynamic adjust the memory address by .SELECT_VL result. @@ -7851,17 +7850,15 @@ vectorizable_store (vec_info *vinfo, return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES - && (memory_access_type != VMAT_GATHER_SCATTER - || (GATHER_SCATTER_LEGACY_P (gs_info) - && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) + && (memory_access_type != VMAT_GATHER_SCATTER_LEGACY + || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "unsupported access type for masked store.\n"); return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_EMULATED_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7879,7 +7876,7 @@ vectorizable_store (vec_info *vinfo, dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info) - && memory_access_type != VMAT_GATHER_SCATTER); + && !mat_gather_scatter_p (memory_access_type)); if (grouped_store) { first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); @@ -8299,7 +8296,7 @@ vectorizable_store (vec_info *vinfo, aggr_type = NULL_TREE; bump = NULL_TREE; } - else if (memory_access_type == VMAT_GATHER_SCATTER) + else if (mat_gather_scatter_p (memory_access_type)) { aggr_type = elem_type; if (!costing_p) @@ -8487,7 +8484,7 @@ vectorizable_store (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { gcc_assert (!grouped_store); auto_vec<tree> vec_offsets; @@ -8555,7 +8552,7 @@ vectorizable_store (vec_info *vinfo, unsigned align = get_object_alignment (DR_REF (first_dr_info->dr)); tree alias_align_ptr = build_int_cst (ref_type, align); - if (GATHER_SCATTER_IFN_P (gs_info)) + if (memory_access_type == VMAT_GATHER_SCATTER_IFN) { if (costing_p) { @@ -8625,7 +8622,7 @@ vectorizable_store (vec_info *vinfo, vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; } - else if (GATHER_SCATTER_LEGACY_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY) { /* The builtin decls path for scatter is legacy, x86 only. */ gcc_assert (nunits.is_constant () @@ -9416,7 +9413,7 @@ vectorizable_load (vec_info *vinfo, get_load_store_type. */ if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE - || memory_access_type == VMAT_GATHER_SCATTER) + || mat_gather_scatter_p (memory_access_type)) && SLP_TREE_LANES (slp_node) == 1)) { slp_perm = true; @@ -9478,15 +9475,14 @@ vectorizable_load (vec_info *vinfo, return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES - && memory_access_type != VMAT_GATHER_SCATTER) + && !mat_gather_scatter_p (memory_access_type)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "unsupported access type for masked load.\n"); return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_EMULATED_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9526,7 +9522,7 @@ vectorizable_load (vec_info *vinfo, if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE - && memory_access_type != VMAT_GATHER_SCATTER + && !mat_gather_scatter_p (memory_access_type) && memory_access_type != VMAT_STRIDED_SLP && memory_access_type != VMAT_INVARIANT && alignment_support_scheme != dr_aligned) @@ -9958,7 +9954,7 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) grouped_load = false; if (grouped_load @@ -10054,7 +10050,7 @@ vectorizable_load (vec_info *vinfo, gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask_node && !loop_masks) - || memory_access_type == VMAT_GATHER_SCATTER + || mat_gather_scatter_p (memory_access_type) || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -10347,7 +10343,7 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { gcc_assert (!grouped_load && !slp_perm); @@ -10402,7 +10398,7 @@ vectorizable_load (vec_info *vinfo, /* 2. Create the vector-load in the loop. */ unsigned align = get_object_alignment (DR_REF (first_dr_info->dr)); tree alias_align_ptr = build_int_cst (ref_type, align); - if (GATHER_SCATTER_IFN_P (gs_info)) + if (memory_access_type == VMAT_GATHER_SCATTER_IFN) { if (costing_p) { @@ -10477,7 +10473,7 @@ vectorizable_load (vec_info *vinfo, new_stmt = call; data_ref = NULL_TREE; } - else if (GATHER_SCATTER_LEGACY_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY) { /* The builtin decls path for gather is legacy, x86 only. */ gcc_assert (!final_len && nunits.is_constant ()); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 7636d8e0cda..a315b98c33d 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -204,9 +204,21 @@ enum vect_memory_access_type { VMAT_STRIDED_SLP, /* The access uses gather loads or scatter stores. */ - VMAT_GATHER_SCATTER + VMAT_GATHER_SCATTER_LEGACY, + VMAT_GATHER_SCATTER_IFN, + VMAT_GATHER_SCATTER_EMULATED }; +/* Returns whether MAT is any of the VMAT_GATHER_SCATTER_* kinds. */ + +inline bool +mat_gather_scatter_p (vect_memory_access_type mat) +{ + return (mat == VMAT_GATHER_SCATTER_LEGACY + || mat == VMAT_GATHER_SCATTER_IFN + || mat == VMAT_GATHER_SCATTER_EMULATED); +} + /*-----------------------------------------------------------------*/ /* Info on vectorized defs. */ /*-----------------------------------------------------------------*/ @@ -1682,13 +1694,6 @@ public: #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) #define STMT_SLP_TYPE(S) (S)->slp_type -#define GATHER_SCATTER_LEGACY_P(info) ((info).decl != NULL_TREE \ - && (info).ifn == IFN_LAST) -#define GATHER_SCATTER_IFN_P(info) ((info).decl == NULL_TREE \ - && (info).ifn != IFN_LAST) -#define GATHER_SCATTER_EMULATED_P(info) ((info).decl == NULL_TREE \ - && (info).ifn == IFN_LAST) - /* Contains the scalar or vector costs for a vec_info. */ class vector_costs -- 2.43.0