On Tue, Jul 22, 2025 at 5:21 PM Robin Dapp <[email protected]> wrote:
>
> > Note if-conversion emits IFN_MASK_LOAD/STORE, only the vectorizer later
> > emits the LEN variants. So this is about whether there are (might) be
> > uarchs that have vector aligned loads (aka target alignment is
> > sizeof(vector))
> > and in addition to that have support for misaligned loads but those with
> > still
> > element alignment. The above even says all masked load/store uarchs
> > suport aribitrary byte-aligned (len-)masked vector loads/stores.
>
> Ah yeah, of course, _LEN happens later...
>
> I moved the checks now and we can get away with an optional gs_info* parameter
> for vect_supportable_alignment so I guess that in itself it's at least
> a bit cleaner than before. Also, I removed the IFN_MASK_LOAD/STORE hunk to
> see
> what breaks.
So with removing the check you get them handled the same as
non-masked vector loads/stores. I think that's a reasonable assumption.
If we get any target where this differs the target hook would need an
extra indication that masking is performed (like the gather/scatter flag).
> Attached is the current version and these mentioned changes are
> the only ones.
LGTM.
Thanks,
Richard.
> riscv and power10 didn't show any noticeable problems, aarch64 is still
> running. x86 is unchanged (as it has legacy gathers/scatters anyway).
> Power7 would be interesting I guess but cfarm110 only has a base GCC 4.8.5.
>
> --
> Regards
> Robin
>
>
> [PATCH] vect: Misalign checks for gather/scatter.
>
> This patch adds simple misalignment checks for gather/scatter
> operations. Previously, we assumed that those perform element accesses
> internally so alignment does not matter. The riscv vector spec however
> explicitly states that vector operations are allowed to fault on
> element-misaligned accesses. Reasonable uarchs won't, but...
>
> For gather/scatter we have two paths in the vectorizer:
>
> (1) Regular analysis based on datarefs. Here we can also create
> strided loads.
> (2) Non-affine access where each gather index is relative to the
> initial address.
>
> The assumption this patch works off is that once the alignment for the
> first scalar is correct, all others will fall in line, as the index is
> always a multiple of the first element's size.
>
> For (1) we have a dataref and can check it for alignment as in other
> cases. For (2) this patch checks the object alignment of BASE and
> compares it against the natural alignment of the current vectype's unit.
>
> The patch also adds a pointer argument to the gather/scatter IFNs that
> contains the necessary alignment. Most of the patch is thus mechanical
> in that it merely adjusts indices.
>
> I tested the riscv version with a custom qemu version that faults on
> element-misaligned vector accesses. With this patch applied, there is
> just a single fault left, which is due to PR120782 and which will be
> addressed separately.
>
> Bootstrapped and regtested on x86 and aarch64. Regtested on
> rv64gcv_zvl512b with and without unaligned vector support.
>
> gcc/ChangeLog:
>
> * internal-fn.cc (internal_fn_len_index): Adjust indices for new
> alias_ptr param.
> (internal_fn_else_index): Ditto.
> (internal_fn_mask_index): Ditto.
> (internal_fn_stored_value_index): Ditto.
> (internal_fn_alias_ptr_index): Ditto.
> (internal_fn_offset_index): Ditto.
> (internal_fn_scale_index): Ditto.
> (internal_gather_scatter_fn_supported_p): Ditto.
> * optabs-query.cc (supports_vec_gather_load_p): Ditto.
> * tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias
> pointer.
> * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add
> alias pointer.
> * tree-vect-slp.cc (vect_get_operand_map): Adjust for alias
> pointer.
> * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add
> alias pointer and misalignment handling.
> (get_load_store_type): Move from here...
> (get_group_load_store_type): ...To here.
> (vectorizable_store): Add alias pointer.
> (vectorizable_load): Ditto.
> * tree-vectorizer.h (struct gather_scatter_info): Ditto.
>
> asdfsdf
> ---
> gcc/internal-fn.cc | 43 ++++++---
> gcc/internal-fn.h | 1 +
> gcc/optabs-query.cc | 6 +-
> gcc/tree-vect-data-refs.cc | 61 ++++++++++---
> gcc/tree-vect-patterns.cc | 17 ++--
> gcc/tree-vect-slp.cc | 16 ++--
> gcc/tree-vect-stmts.cc | 179 +++++++++++++++++++++----------------
> gcc/tree-vectorizer.h | 7 +-
> 8 files changed, 206 insertions(+), 124 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 1411f449789..bf2fac81807 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -4967,11 +4967,13 @@ internal_fn_len_index (internal_fn fn)
> return 2;
>
> case IFN_MASK_LEN_SCATTER_STORE:
> + return 6;
> +
> case IFN_MASK_LEN_STRIDED_LOAD:
> return 5;
>
> case IFN_MASK_LEN_GATHER_LOAD:
> - return 6;
> + return 7;
>
> case IFN_COND_LEN_FMA:
> case IFN_COND_LEN_FMS:
> @@ -5075,7 +5077,7 @@ internal_fn_else_index (internal_fn fn)
>
> case IFN_MASK_GATHER_LOAD:
> case IFN_MASK_LEN_GATHER_LOAD:
> - return 5;
> + return 6;
>
> default:
> return -1;
> @@ -5110,7 +5112,7 @@ internal_fn_mask_index (internal_fn fn)
> case IFN_MASK_SCATTER_STORE:
> case IFN_MASK_LEN_GATHER_LOAD:
> case IFN_MASK_LEN_SCATTER_STORE:
> - return 4;
> + return 5;
>
> case IFN_VCOND_MASK:
> case IFN_VCOND_MASK_LEN:
> @@ -5135,10 +5137,11 @@ internal_fn_stored_value_index (internal_fn fn)
>
> case IFN_MASK_STORE:
> case IFN_MASK_STORE_LANES:
> + return 3;
> case IFN_SCATTER_STORE:
> case IFN_MASK_SCATTER_STORE:
> case IFN_MASK_LEN_SCATTER_STORE:
> - return 3;
> + return 4;
>
> case IFN_LEN_STORE:
> return 4;
> @@ -5152,6 +5155,28 @@ internal_fn_stored_value_index (internal_fn fn)
> }
> }
>
> +/* If FN has an alias pointer return its index, otherwise return -1. */
> +
> +int
> +internal_fn_alias_ptr_index (internal_fn fn)
> +{
> + switch (fn)
> + {
> + case IFN_MASK_LOAD:
> + case IFN_MASK_LEN_LOAD:
> + case IFN_GATHER_LOAD:
> + case IFN_MASK_GATHER_LOAD:
> + case IFN_MASK_LEN_GATHER_LOAD:
> + case IFN_SCATTER_STORE:
> + case IFN_MASK_SCATTER_STORE:
> + case IFN_MASK_LEN_SCATTER_STORE:
> + return 1;
> +
> + default:
> + return -1;
> + }
> +}
> +
> /* If FN is a gather/scatter return the index of its offset argument,
> otherwise return -1. */
>
> @@ -5169,7 +5194,7 @@ internal_fn_offset_index (internal_fn fn)
> case IFN_SCATTER_STORE:
> case IFN_MASK_SCATTER_STORE:
> case IFN_MASK_LEN_SCATTER_STORE:
> - return 1;
> + return 2;
>
> default:
> return -1;
> @@ -5193,7 +5218,7 @@ internal_fn_scale_index (internal_fn fn)
> case IFN_SCATTER_STORE:
> case IFN_MASK_SCATTER_STORE:
> case IFN_MASK_LEN_SCATTER_STORE:
> - return 2;
> + return 3;
>
> default:
> return -1;
> @@ -5277,13 +5302,9 @@ internal_gather_scatter_fn_supported_p (internal_fn
> ifn, tree vector_type,
> && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
> && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
>
> - /* For gather the optab's operand indices do not match the IFN's because
> - the latter does not have the extension operand (operand 3). It is
> - implicitly added during expansion so we use the IFN's else index + 1.
> - */
> if (ok && elsvals)
> get_supported_else_vals
> - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
> + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
>
> return ok;
> }
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 825381660bb..fd21694dfeb 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -242,6 +242,7 @@ extern int internal_fn_else_index (internal_fn);
> extern int internal_fn_stored_value_index (internal_fn);
> extern int internal_fn_offset_index (internal_fn fn);
> extern int internal_fn_scale_index (internal_fn fn);
> +extern int internal_fn_alias_ptr_index (internal_fn fn);
> extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
> tree, tree, int,
> vec<int> * = nullptr);
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index f5ca98da818..5335d0d8401 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int>
> *elsvals)
> = (icode != CODE_FOR_nothing) ? 1 : -1;
> }
>
> - /* For gather the optab's operand indices do not match the IFN's because
> - the latter does not have the extension operand (operand 3). It is
> - implicitly added during expansion so we use the IFN's else index + 1.
> - */
> if (elsvals && icode != CODE_FOR_nothing)
> get_supported_else_vals
> - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals);
> + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals);
>
> return this_fn_optabs->supports_vec_gather_load[mode] > 0;
> }
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 019f0b6ca36..277bc132bcd 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -4539,6 +4539,8 @@ vect_describe_gather_scatter_call (stmt_vec_info
> stmt_info,
> info->ifn = gimple_call_internal_fn (call);
> info->decl = NULL_TREE;
> info->base = gimple_call_arg (call, 0);
> + info->alias_ptr = gimple_call_arg
> + (call, internal_fn_alias_ptr_index (info->ifn));
> info->offset = gimple_call_arg
> (call, internal_fn_offset_index (info->ifn));
> info->offset_dt = vect_unknown_def_type;
> @@ -4869,6 +4871,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info,
> loop_vec_info loop_vinfo,
> info->ifn = ifn;
> info->decl = decl;
> info->base = base;
> +
> + info->alias_ptr = build_int_cst
> + (reference_alias_ptr_type (DR_REF (dr)),
> + get_object_alignment (DR_REF (dr)));
> +
> info->offset = off;
> info->offset_dt = vect_unknown_def_type;
> info->offset_vectype = offset_vectype;
> @@ -7364,13 +7371,14 @@ vect_can_force_dr_alignment_p (const_tree decl,
> poly_uint64 alignment)
> alignment.
> If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
> it is aligned, i.e., check if it is possible to vectorize it with
> different
> - alignment. */
> + alignment. If GS_INFO is passed we are dealing with a gather/scatter. */
>
> enum dr_alignment_support
> vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info,
> - tree vectype, int misalignment)
> + tree vectype, int misalignment,
> + gather_scatter_info *gs_info)
> {
> - data_reference *dr = dr_info->dr;
> + data_reference *dr = dr_info ? dr_info->dr : nullptr;
> stmt_vec_info stmt_info = dr_info->stmt;
> machine_mode mode = TYPE_MODE (vectype);
> loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> @@ -7382,14 +7390,6 @@ vect_supportable_dr_alignment (vec_info *vinfo,
> dr_vec_info *dr_info,
> else if (dr_safe_speculative_read_required (stmt_info))
> return dr_unaligned_unsupported;
>
> - /* For now assume all conditional loads/stores support unaligned
> - access without any special code. */
> - if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
> - if (gimple_call_internal_p (stmt)
> - && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
> - || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
> - return dr_unaligned_supported;
> -
> if (loop_vinfo)
> {
> vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
> @@ -7459,7 +7459,7 @@ vect_supportable_dr_alignment (vec_info *vinfo,
> dr_vec_info *dr_info,
> }
> } */
>
> - if (DR_IS_READ (dr))
> + if (dr && DR_IS_READ (dr))
> {
> if (can_implement_p (vec_realign_load_optab, mode)
> && (!targetm.vectorize.builtin_mask_for_load
> @@ -7487,10 +7487,43 @@ vect_supportable_dr_alignment (vec_info *vinfo,
> dr_vec_info *dr_info,
>
> bool is_packed = false;
> tree type = TREE_TYPE (DR_REF (dr));
> + bool is_gather_scatter = gs_info != nullptr;
> if (misalignment == DR_MISALIGNMENT_UNKNOWN)
> - is_packed = not_size_aligned (DR_REF (dr));
> + {
> + if (!is_gather_scatter || dr != nullptr)
> + is_packed = not_size_aligned (DR_REF (dr));
> + else
> + {
> + /* Gather-scatter accesses normally perform only component accesses
> + so alignment is irrelevant for them. Targets like riscv do care
> + about scalar alignment in vector accesses, though, so check
> scalar
> + alignment here. We determined the alias pointer as well as the
> + base alignment during pattern recognition and can re-use it here.
> +
> + As we do not have an analyzed dataref we only know the alignment
> + of the reference itself and nothing about init, steps, etc.
> + For now don't try harder to determine misalignment and
> + just assume it is unknown. We consider the type packed if its
> + scalar alignment is lower than the natural alignment of a vector
> + element's type. */
> +
> + gcc_assert (!GATHER_SCATTER_LEGACY_P (*gs_info));
> + gcc_assert (dr == nullptr);
> +
> + tree inner_vectype = TREE_TYPE (vectype);
> +
> + unsigned HOST_WIDE_INT scalar_align
> + = tree_to_uhwi (gs_info->alias_ptr);
> + unsigned HOST_WIDE_INT inner_vectype_sz
> + = tree_to_uhwi (TYPE_SIZE (inner_vectype));
> +
> + bool is_misaligned = scalar_align < inner_vectype_sz;
> + is_packed = scalar_align > 1 && is_misaligned;
> + }
> + }
> if (targetm.vectorize.support_vector_misalignment (mode, type,
> misalignment,
> - is_packed, false))
> + is_packed,
> + is_gather_scatter))
> return dr_unaligned_supported;
>
> /* Unsupported. */
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index 0f6d6b77ea1..f0ddbf9660c 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
>
> tree vec_els
> = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype));
> - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base,
> + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base,
> + gs_info.alias_ptr,
> offset, scale, zero,
> mask,
> vec_els);
> }
> else
> - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
> + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
> + gs_info.alias_ptr,
> offset, scale, zero);
> tree lhs = gimple_get_lhs (stmt_info->stmt);
> tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
> @@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
> {
> tree rhs = vect_get_store_rhs (stmt_info);
> if (mask != NULL)
> - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
> - base, offset, scale, rhs,
> - mask);
> + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6,
> + base, gs_info.alias_ptr,
> + offset, scale, rhs, mask);
> else
> - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4,
> - base, offset, scale, rhs);
> + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5,
> + base, gs_info.alias_ptr,
> + offset, scale, rhs);
> }
> gimple_call_set_nothrow (pattern_stmt, true);
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index af42f455cca..73a3c899295 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -511,11 +511,11 @@ vect_def_types_match (enum vect_def_type dta, enum
> vect_def_type dtb)
>
> static const int no_arg_map[] = { 0 };
> static const int arg0_map[] = { 1, 0 };
> -static const int arg1_map[] = { 1, 1 };
> +static const int arg2_map[] = { 1, 2 };
> static const int arg2_arg3_map[] = { 2, 2, 3 };
> -static const int arg1_arg3_map[] = { 2, 1, 3 };
> -static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 };
> -static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 };
> +static const int arg2_arg4_map[] = { 2, 2, 4 };
> +static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 };
> +static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 };
> static const int arg3_arg2_map[] = { 2, 3, 2 };
> static const int op1_op0_map[] = { 2, 1, 0 };
> static const int off_map[] = { 1, GATHER_SCATTER_OFFSET };
> @@ -570,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool
> gather_scatter_p = false,
> return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map;
>
> case IFN_GATHER_LOAD:
> - return arg1_map;
> + return arg2_map;
>
> case IFN_MASK_GATHER_LOAD:
> case IFN_MASK_LEN_GATHER_LOAD:
> - return arg1_arg4_arg5_map;
> + return arg2_arg5_arg6_map;
>
> case IFN_SCATTER_STORE:
> - return arg1_arg3_map;
> + return arg2_arg4_map;
>
> case IFN_MASK_SCATTER_STORE:
> case IFN_MASK_LEN_SCATTER_STORE:
> - return arg1_arg3_arg4_map;
> + return arg2_arg4_arg5_map;
>
> case IFN_MASK_STORE:
> return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map;
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 66e79891b09..d5c5fbe25f4 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info
> stmt_info,
> /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
> but we don't need to store that here. */
> gs_info->base = NULL_TREE;
> + gs_info->alias_ptr = build_int_cst
> + (reference_alias_ptr_type (DR_REF (dr)),
> + get_object_alignment (DR_REF (dr)));
> gs_info->element_type = TREE_TYPE (vectype);
> gs_info->offset = fold_convert (offset_type, step);
> gs_info->offset_dt = vect_constant_def;
> @@ -2106,7 +2109,7 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> separated by the stride, until we have a complete vector.
> Fall back to scalar accesses if that isn't possible. */
> *memory_access_type = VMAT_STRIDED_SLP;
> - else
> + else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> {
> int cmp = compare_step_with_zero (vinfo, stmt_info);
> if (cmp < 0)
> @@ -2349,19 +2352,71 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> allows us to use contiguous accesses. */
> if ((*memory_access_type == VMAT_ELEMENTWISE
> || *memory_access_type == VMAT_STRIDED_SLP)
> + && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
> && single_element_p
> && SLP_TREE_LANES (slp_node) == 1
> && loop_vinfo
> && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
> masked_p, gs_info, elsvals))
> *memory_access_type = VMAT_GATHER_SCATTER;
> + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> + {
> + *memory_access_type = VMAT_GATHER_SCATTER;
> + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> + elsvals))
> + gcc_unreachable ();
> + /* When using internal functions, we rely on pattern recognition
> + to convert the type of the offset to the type that the target
> + requires, with the result being a call to an internal function.
> + If that failed for some reason (e.g. because another pattern
> + took priority), just handle cases in which the offset already
> + has the right type. */
> + else if (GATHER_SCATTER_IFN_P (*gs_info)
> + && !is_gimple_call (stmt_info->stmt)
> + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
> + TREE_TYPE
> (gs_info->offset_vectype)))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "%s offset requires a conversion\n",
> + vls_type == VLS_LOAD ? "gather" : "scatter");
> + return false;
> + }
> + else if (!vect_is_simple_use (gs_info->offset, vinfo,
> + &gs_info->offset_dt,
> + &gs_info->offset_vectype))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "%s index use not simple.\n",
> + vls_type == VLS_LOAD ? "gather" : "scatter");
> + return false;
> + }
> + else if (GATHER_SCATTER_EMULATED_P (*gs_info))
> + {
> + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
> + || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant
> ()
> + || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
> + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
> + (gs_info->offset_vectype),
> + TYPE_VECTOR_SUBPARTS (vectype)))
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "unsupported vector types for emulated "
> + "gather.\n");
> + return false;
> + }
> + }
> + }
>
> if (*memory_access_type == VMAT_CONTIGUOUS_DOWN
> || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
> *poffset = neg_ldst_offset;
>
> - if (*memory_access_type == VMAT_GATHER_SCATTER
> - || *memory_access_type == VMAT_ELEMENTWISE
> + if (*memory_access_type == VMAT_ELEMENTWISE
> + || (*memory_access_type == VMAT_GATHER_SCATTER
> + && GATHER_SCATTER_LEGACY_P (*gs_info))
> || *memory_access_type == VMAT_STRIDED_SLP
> || *memory_access_type == VMAT_INVARIANT)
> {
> @@ -2370,10 +2425,15 @@ get_group_load_store_type (vec_info *vinfo,
> stmt_vec_info stmt_info,
> }
> else
> {
> - *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
> + if (*memory_access_type == VMAT_GATHER_SCATTER
> + && !first_dr_info)
> + *misalignment = DR_MISALIGNMENT_UNKNOWN;
> + else
> + *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
> *alignment_support_scheme
> - = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
> - *misalignment);
> + = vect_supportable_dr_alignment
> + (vinfo, first_dr_info, vectype, *misalignment,
> + *memory_access_type == VMAT_GATHER_SCATTER ? gs_info : nullptr);
> }
>
> if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
> @@ -2443,58 +2503,12 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
> stmt_info,
> poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> *misalignment = DR_MISALIGNMENT_UNKNOWN;
> *poffset = 0;
> - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
> - {
> - *memory_access_type = VMAT_GATHER_SCATTER;
> - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
> - elsvals))
> - gcc_unreachable ();
> - /* When using internal functions, we rely on pattern recognition
> - to convert the type of the offset to the type that the target
> - requires, with the result being a call to an internal function.
> - If that failed for some reason (e.g. because another pattern
> - took priority), just handle cases in which the offset already
> - has the right type. */
> - else if (GATHER_SCATTER_IFN_P (*gs_info)
> - && !is_gimple_call (stmt_info->stmt)
> - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
> - TREE_TYPE
> (gs_info->offset_vectype)))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "%s offset requires a conversion\n",
> - vls_type == VLS_LOAD ? "gather" : "scatter");
> - return false;
> - }
> - slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
> - gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node);
> - gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node);
> - if (gs_info->ifn == IFN_LAST && !gs_info->decl)
> - {
> - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
> - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant
> ()
> - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype)
> - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
> - (gs_info->offset_vectype),
> - TYPE_VECTOR_SUBPARTS (vectype)))
> - {
> - if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> - "unsupported vector types for emulated "
> - "gather.\n");
> - return false;
> - }
> - }
> - /* Gather-scatter accesses perform only component accesses, alignment
> - is irrelevant for them. */
> - *alignment_support_scheme = dr_unaligned_supported;
> - }
> - else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
> - masked_p,
> - vls_type, memory_access_type, poffset,
> - alignment_support_scheme,
> - misalignment, gs_info, lanes_ifn,
> - elsvals))
> + if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
> + masked_p,
> + vls_type, memory_access_type, poffset,
> + alignment_support_scheme,
> + misalignment, gs_info, lanes_ifn,
> + elsvals))
> return false;
>
> if ((*memory_access_type == VMAT_ELEMENTWISE
> @@ -2528,17 +2542,18 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info
> stmt_info,
> "alignment. With non-contiguous memory
> vectorization"
> " could read out of bounds at %G ",
> STMT_VINFO_STMT (stmt_info));
> - if (inbounds)
> - LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
> - else
> - return false;
> + if (inbounds)
> + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true;
> + else
> + return false;
> }
>
> /* If this DR needs alignment for correctness, we must ensure the target
> alignment is a constant power-of-two multiple of the amount read per
> vector iteration or force masking. */
> if (dr_safe_speculative_read_required (stmt_info)
> - && *alignment_support_scheme == dr_aligned)
> + && (*alignment_support_scheme == dr_aligned
> + && *memory_access_type != VMAT_GATHER_SCATTER))
> {
> /* We can only peel for loops, of course. */
> gcc_checking_assert (loop_vinfo);
> @@ -8456,7 +8471,6 @@ vectorizable_store (vec_info *vinfo,
>
> if (dump_enabled_p ()
> && memory_access_type != VMAT_ELEMENTWISE
> - && memory_access_type != VMAT_GATHER_SCATTER
> && memory_access_type != VMAT_STRIDED_SLP
> && memory_access_type != VMAT_INVARIANT
> && alignment_support_scheme != dr_aligned)
> @@ -9157,24 +9171,31 @@ vectorizable_store (vec_info *vinfo,
> {
> if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
> call = gimple_build_call_internal (
> - IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
> + IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr,
> + gs_info.alias_ptr,
> vec_offset, scale, vec_oprnd, final_mask,
> final_len,
> bias);
> else
> /* Non-vector offset indicates that prefer to take
> MASK_LEN_STRIDED_STORE instead of the
> - IFN_MASK_SCATTER_STORE with direct stride arg. */
> + IFN_MASK_SCATTER_STORE with direct stride arg.
> + Similar to the gather case we have checked the
> + alignment for a scatter already and assume
> + that the strided store has the same requirements. */
> call = gimple_build_call_internal (
> IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr,
> vec_offset, vec_oprnd, final_mask, final_len,
> bias);
> }
> else if (final_mask)
> call = gimple_build_call_internal
> - (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
> + (IFN_MASK_SCATTER_STORE, 6, dataref_ptr,
> + gs_info.alias_ptr,
> vec_offset, scale, vec_oprnd, final_mask);
> else
> - call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
> - dataref_ptr, vec_offset,
> + call = gimple_build_call_internal (IFN_SCATTER_STORE, 5,
> + dataref_ptr,
> + gs_info.alias_ptr,
> + vec_offset,
> scale, vec_oprnd);
> gimple_call_set_nothrow (call, true);
> vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
> @@ -10641,7 +10662,6 @@ vectorizable_load (vec_info *vinfo,
> vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
> }
>
> - gcc_assert (alignment_support_scheme);
> vec_loop_masks *loop_masks
> = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
> ? &LOOP_VINFO_MASKS (loop_vinfo)
> @@ -10661,10 +10681,12 @@ vectorizable_load (vec_info *vinfo,
>
> /* Targets with store-lane instructions must not require explicit
> realignment. vect_supportable_dr_alignment always returns either
> - dr_aligned or dr_unaligned_supported for masked operations. */
> + dr_aligned or dr_unaligned_supported for (non-length) masked
> + operations. */
> gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
> && !mask
> && !loop_masks)
> + || memory_access_type == VMAT_GATHER_SCATTER
> || alignment_support_scheme == dr_aligned
> || alignment_support_scheme == dr_unaligned_supported);
>
> @@ -11009,8 +11031,6 @@ vectorizable_load (vec_info *vinfo,
>
> if (memory_access_type == VMAT_GATHER_SCATTER)
> {
> - gcc_assert (alignment_support_scheme == dr_aligned
> - || alignment_support_scheme == dr_unaligned_supported);
> gcc_assert (!grouped_load && !slp_perm);
>
> unsigned int inside_cost = 0, prologue_cost = 0;
> @@ -11099,7 +11119,8 @@ vectorizable_load (vec_info *vinfo,
> {
> if (VECTOR_TYPE_P (TREE_TYPE (vec_offset)))
> call = gimple_build_call_internal
> (IFN_MASK_LEN_GATHER_LOAD,
> - 8, dataref_ptr,
> + 9, dataref_ptr,
> + gs_info.alias_ptr,
> vec_offset, scale,
> zero,
> final_mask, vec_els,
> final_len, bias);
> @@ -11114,13 +11135,15 @@ vectorizable_load (vec_info *vinfo,
> }
> else if (final_mask)
> call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD,
> - 6, dataref_ptr,
> + 7, dataref_ptr,
> + gs_info.alias_ptr,
> vec_offset, scale,
> zero, final_mask, vec_els);
> else
> - call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
> - dataref_ptr, vec_offset,
> - scale, zero);
> + call = gimple_build_call_internal (IFN_GATHER_LOAD, 5,
> + dataref_ptr,
> + gs_info.alias_ptr,
> + vec_offset, scale, zero);
> gimple_call_set_nothrow (call, true);
> new_stmt = call;
> data_ref = NULL_TREE;
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 7b927491b1c..4d51ad61fa8 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1557,6 +1557,10 @@ struct gather_scatter_info {
> /* The loop-invariant base value. */
> tree base;
>
> + /* The TBBA alias pointer the value of which determines the alignment
> + of the scalar accesses. */
> + tree alias_ptr;
> +
> /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */
> tree offset;
>
> @@ -2542,7 +2546,8 @@ extern bool ref_within_array_bound (gimple *, tree);
> /* In tree-vect-data-refs.cc. */
> extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
> extern enum dr_alignment_support vect_supportable_dr_alignment
> - (vec_info *, dr_vec_info *, tree, int);
> + (vec_info *, dr_vec_info *, tree, int,
> + gather_scatter_info * = nullptr);
> extern tree vect_get_smallest_scalar_type (stmt_vec_info, tree);
> extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned
> int *);
> extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance);
> --
> 2.50.0
>