https://gcc.gnu.org/g:b8ccad471e51056d442794b9301480de9cd7a19c
commit r16-3180-gb8ccad471e51056d442794b9301480de9cd7a19c Author: Richard Biener <rguent...@suse.de> Date: Tue Aug 12 13:34:30 2025 +0200 Fold GATHER_SCATTER_*_P into vect_memory_access_type The following splits up VMAT_GATHER_SCATTER into VMAT_GATHER_SCATTER_LEGACY, VMAT_GATHER_SCATTER_IFN and VMAT_GATHER_SCATTER_EMULATED. The main motivation is to reduce the uses of (full) gs_info, but it also makes the kind representable by a single entry rather than the ifn and decl tristate. The strided load with gather case gets to use VMAT_GATHER_SCATTER_IFN, since that's what we end up checking. * tree-vectorizer.h (vect_memory_access_type): Replace VMAT_GATHER_SCATTER with three separate access types, VMAT_GATHER_SCATTER_LEGACY, VMAT_GATHER_SCATTER_IFN and VMAT_GATHER_SCATTER_EMULATED. (mat_gather_scatter_p): New predicate. (GATHER_SCATTER_LEGACY_P): Remove. (GATHER_SCATTER_IFN_P): Likewise. (GATHER_SCATTER_EMULATED_P): Likewise. * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Adjust. (get_load_store_type): Likewise. (vect_get_loop_variant_data_ptr_increment): Likewise. (vectorizable_store): Likewise. (vectorizable_load): Likewise. * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): Likewise. * config/riscv/riscv-vector-costs.cc (costs::need_additional_vector_vars_p): Likewise. * config/aarch64/aarch64.cc (aarch64_detect_vector_stmt_subtype): Likewise. (aarch64_vector_costs::count_ops): Likewise. (aarch64_vector_costs::add_stmt_cost): Likewise. Diff: --- gcc/config/aarch64/aarch64.cc | 10 +++--- gcc/config/i386/i386.cc | 3 +- gcc/config/riscv/riscv-vector-costs.cc | 2 +- gcc/tree-vect-stmts.cc | 61 ++++++++++++++++------------------ gcc/tree-vectorizer.h | 21 +++++++----- 5 files changed, 49 insertions(+), 48 deletions(-) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 039f196c2408..f88b7c95f97e 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -17551,7 +17551,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (kind == scalar_load && node && sve_costs - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { unsigned int nunits = vect_nunits_for_cost (vectype); /* Test for VNx2 modes, which have 64-bit containers. */ @@ -17565,7 +17565,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (kind == scalar_store && node && sve_costs - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return sve_costs->scatter_store_elt_cost; /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ @@ -17821,7 +17821,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && kind == vec_to_scalar && (m_vec_flags & VEC_ADVSIMD) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { auto dr = STMT_VINFO_DATA_REF (stmt_info); tree dr_ref = DR_REF (dr); @@ -17936,7 +17936,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && sve_issue && (kind == scalar_load || kind == scalar_store) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { unsigned int pairs = CEIL (count, 2); ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs; @@ -18094,7 +18094,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && node && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve; if (sve_costs) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 65e04d3760d5..793b478e6548 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -26161,8 +26161,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node)))) != INTEGER_CST)) - || (SLP_TREE_MEMORY_ACCESS_TYPE (node) - == VMAT_GATHER_SCATTER))))) + || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))))) { stmt_cost = ix86_default_vector_cost (kind, mode); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 44ef44a14353..5e6cb671490c 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -607,7 +607,7 @@ costs::need_additional_vector_vars_p (stmt_vec_info stmt_info, if (type == load_vec_info_type || type == store_vec_info_type) { if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return true; machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 1aa3c37d25e0..86d878ff6e65 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1484,7 +1484,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, return; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { internal_fn ifn = (is_load ? IFN_MASK_GATHER_LOAD @@ -1503,7 +1503,7 @@ check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype, gs_info->offset_vectype, gs_info->scale, elsvals) - || gs_info->decl != NULL_TREE) + || memory_access_type == VMAT_GATHER_SCATTER_LEGACY) vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask); else @@ -2023,7 +2023,6 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *memory_access_type = VMAT_STRIDED_SLP; else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { - *memory_access_type = VMAT_GATHER_SCATTER; slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; tree offset_vectype = SLP_TREE_VECTYPE (offset_node); memset (gs_info, 0, sizeof (gather_scatter_info)); @@ -2040,7 +2039,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, offset_vectype, gs_info->scale, &gs_info->ifn, &tem, elsvals)) - /* GATHER_SCATTER_IFN_P. */; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; else if (vls_type == VLS_LOAD ? (targetm.vectorize.builtin_gather && (gs_info->decl @@ -2054,7 +2053,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, TREE_TYPE (offset_vectype), gs_info->scale)))) - /* GATHER_SCATTER_LEGACY_P. */; + *memory_access_type = VMAT_GATHER_SCATTER_LEGACY; else { /* GATHER_SCATTER_EMULATED_P. */ @@ -2070,6 +2069,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, "gather.\n"); return false; } + *memory_access_type = VMAT_GATHER_SCATTER_EMULATED; } } else @@ -2321,15 +2321,14 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, && vect_use_strided_gather_scatters_p (stmt_info, vectype, loop_vinfo, masked_p, gs_info, elsvals, group_size, single_element_p)) - *memory_access_type = VMAT_GATHER_SCATTER; + *memory_access_type = VMAT_GATHER_SCATTER_IFN; if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) *poffset = neg_ldst_offset; if (*memory_access_type == VMAT_ELEMENTWISE - || (*memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_LEGACY_P (*gs_info)) + || *memory_access_type == VMAT_GATHER_SCATTER_LEGACY || *memory_access_type == VMAT_STRIDED_SLP || *memory_access_type == VMAT_INVARIANT) { @@ -2338,7 +2337,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, } else { - if (*memory_access_type == VMAT_GATHER_SCATTER + if (mat_gather_scatter_p (*memory_access_type) && !first_dr_info) *misalignment = DR_MISALIGNMENT_UNKNOWN; else @@ -2346,7 +2345,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, *alignment_support_scheme = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, *misalignment, - *memory_access_type == VMAT_GATHER_SCATTER); + mat_gather_scatter_p (*memory_access_type)); } if (overrun_p) @@ -2380,7 +2379,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, if (loop_vinfo && dr_safe_speculative_read_required (stmt_info) && LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && (*memory_access_type == VMAT_GATHER_SCATTER + && (mat_gather_scatter_p (*memory_access_type) || *memory_access_type == VMAT_STRIDED_SLP)) { if (dump_enabled_p ()) @@ -2400,7 +2399,7 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, vector iteration or force masking. */ if (dr_safe_speculative_read_required (stmt_info) && (*alignment_support_scheme == dr_aligned - && *memory_access_type != VMAT_GATHER_SCATTER)) + && !mat_gather_scatter_p (*memory_access_type))) { /* We can only peel for loops, of course. */ gcc_checking_assert (loop_vinfo); @@ -2998,7 +2997,7 @@ vect_get_loop_variant_data_ptr_increment ( tree step = vect_dr_behavior (vinfo, dr_info)->step; /* gather/scatter never reach here. */ - gcc_assert (memory_access_type != VMAT_GATHER_SCATTER); + gcc_assert (!mat_gather_scatter_p (memory_access_type)); /* When we support SELECT_VL pattern, we dynamic adjust the memory address by .SELECT_VL result. @@ -7845,8 +7844,8 @@ vectorizable_store (vec_info *vinfo, return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES - && (memory_access_type != VMAT_GATHER_SCATTER - || (GATHER_SCATTER_LEGACY_P (gs_info) + && (!mat_gather_scatter_p (memory_access_type) + || (memory_access_type == VMAT_GATHER_SCATTER_LEGACY && !VECTOR_BOOLEAN_TYPE_P (mask_vectype)))) { if (dump_enabled_p ()) @@ -7854,8 +7853,7 @@ vectorizable_store (vec_info *vinfo, "unsupported access type for masked store.\n"); return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_EMULATED_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -7873,7 +7871,7 @@ vectorizable_store (vec_info *vinfo, dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL; grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info) - && memory_access_type != VMAT_GATHER_SCATTER); + && !mat_gather_scatter_p (memory_access_type)); if (grouped_store) { first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); @@ -8287,7 +8285,7 @@ vectorizable_store (vec_info *vinfo, aggr_type = NULL_TREE; bump = NULL_TREE; } - else if (memory_access_type == VMAT_GATHER_SCATTER) + else if (mat_gather_scatter_p (memory_access_type)) { aggr_type = elem_type; if (!costing_p) @@ -8475,7 +8473,7 @@ vectorizable_store (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { gcc_assert (!grouped_store); auto_vec<tree> vec_offsets; @@ -8543,7 +8541,7 @@ vectorizable_store (vec_info *vinfo, unsigned align = get_object_alignment (DR_REF (first_dr_info->dr)); tree alias_align_ptr = build_int_cst (ref_type, align); - if (GATHER_SCATTER_IFN_P (gs_info)) + if (memory_access_type == VMAT_GATHER_SCATTER_IFN) { if (costing_p) { @@ -8613,7 +8611,7 @@ vectorizable_store (vec_info *vinfo, vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); new_stmt = call; } - else if (GATHER_SCATTER_LEGACY_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY) { /* The builtin decls path for scatter is legacy, x86 only. */ gcc_assert (nunits.is_constant () @@ -9400,7 +9398,7 @@ vectorizable_load (vec_info *vinfo, get_load_store_type. */ if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists () && !((memory_access_type == VMAT_ELEMENTWISE - || memory_access_type == VMAT_GATHER_SCATTER) + || mat_gather_scatter_p (memory_access_type)) && SLP_TREE_LANES (slp_node) == 1)) { slp_perm = true; @@ -9462,15 +9460,14 @@ vectorizable_load (vec_info *vinfo, return false; } else if (memory_access_type != VMAT_LOAD_STORE_LANES - && memory_access_type != VMAT_GATHER_SCATTER) + && !mat_gather_scatter_p (memory_access_type)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "unsupported access type for masked load.\n"); return false; } - else if (memory_access_type == VMAT_GATHER_SCATTER - && GATHER_SCATTER_EMULATED_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_EMULATED) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -9512,7 +9509,7 @@ vectorizable_load (vec_info *vinfo, if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE - && memory_access_type != VMAT_GATHER_SCATTER + && !mat_gather_scatter_p (memory_access_type) && memory_access_type != VMAT_STRIDED_SLP && memory_access_type != VMAT_INVARIANT && alignment_support_scheme != dr_aligned) @@ -9948,7 +9945,7 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) grouped_load = false; if (grouped_load @@ -10044,7 +10041,7 @@ vectorizable_load (vec_info *vinfo, gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask_node && !loop_masks) - || memory_access_type == VMAT_GATHER_SCATTER + || mat_gather_scatter_p (memory_access_type) || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -10337,7 +10334,7 @@ vectorizable_load (vec_info *vinfo, return true; } - if (memory_access_type == VMAT_GATHER_SCATTER) + if (mat_gather_scatter_p (memory_access_type)) { gcc_assert (!grouped_load && !slp_perm); @@ -10392,7 +10389,7 @@ vectorizable_load (vec_info *vinfo, /* 2. Create the vector-load in the loop. */ unsigned align = get_object_alignment (DR_REF (first_dr_info->dr)); tree alias_align_ptr = build_int_cst (ref_type, align); - if (GATHER_SCATTER_IFN_P (gs_info)) + if (memory_access_type == VMAT_GATHER_SCATTER_IFN) { if (costing_p) { @@ -10467,7 +10464,7 @@ vectorizable_load (vec_info *vinfo, new_stmt = call; data_ref = NULL_TREE; } - else if (GATHER_SCATTER_LEGACY_P (gs_info)) + else if (memory_access_type == VMAT_GATHER_SCATTER_LEGACY) { /* The builtin decls path for gather is legacy, x86 only. */ gcc_assert (!final_len && nunits.is_constant ()); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 729c7044ef39..4d4f7bc39c6e 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -204,9 +204,21 @@ enum vect_memory_access_type { VMAT_STRIDED_SLP, /* The access uses gather loads or scatter stores. */ - VMAT_GATHER_SCATTER + VMAT_GATHER_SCATTER_LEGACY, + VMAT_GATHER_SCATTER_IFN, + VMAT_GATHER_SCATTER_EMULATED }; +/* Returns whether MAT is any of the VMAT_GATHER_SCATTER_* kinds. */ + +inline bool +mat_gather_scatter_p (vect_memory_access_type mat) +{ + return (mat == VMAT_GATHER_SCATTER_LEGACY + || mat == VMAT_GATHER_SCATTER_IFN + || mat == VMAT_GATHER_SCATTER_EMULATED); +} + /*-----------------------------------------------------------------*/ /* Info on vectorized defs. */ /*-----------------------------------------------------------------*/ @@ -1663,13 +1675,6 @@ struct gather_scatter_info { #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) #define STMT_SLP_TYPE(S) (S)->slp_type -#define GATHER_SCATTER_LEGACY_P(info) ((info).decl != NULL_TREE \ - && (info).ifn == IFN_LAST) -#define GATHER_SCATTER_IFN_P(info) ((info).decl == NULL_TREE \ - && (info).ifn != IFN_LAST) -#define GATHER_SCATTER_EMULATED_P(info) ((info).decl == NULL_TREE \ - && (info).ifn == IFN_LAST) - /* Contains the scalar or vector costs for a vec_info. */ class vector_costs