On Thu, Oct 9, 2025 at 10:10 AM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> This patch adjusts vect_gather_scatter_fn_p to always check an offset
> type with swapped signedness (vs. the original offset argument).
> If the target supports the gather/scatter with the new offset type as
> well as the conversion of the offset we now emit an explicit offset
> conversion before the actual gather/scatter.
>
> The relaxation is only done for the IFN path of gather/scatter and the
> general idea looks roughly like:
>
>   - vect_gather_scatter_fn_p tries both signed and unsigned offset type
>   and sets supported_offset_vectype to the type that actually worked
>   while offset_vectype_out is the type that was requested.
>   - vect_check_gather_scatter works as before but uses the relaxed
>   vect_gather_scatter_fn_p.
>   - get_load_store_type sets ls_data->supported_offset_vectype if the
>   requested type wasn't supported but another one was.
>   - check_load_store_for_partial_vectors uses the
>   supported_offset_vectype in order to validate what get_load_store_type
>   determined.
>   - vectorizable_load/store emit a conversion if
>   ls_data->supported_offset_vectype is nonzero and cost it.
>
> The offset type is either of pointer size (if we started with a signed
> offset) or twice the size of the original offset (when that one was
> unsigned).
>
> Changes from v1:
>  - Check for conversion support.
>  - Rework/refactor vect_gather_scatter_fn_p.
>
> I'm aware it's not exactly pretty but I hope to not have complicated
> things too much.  Suggestions welcome of course.
>
> Bootstrapped on x86 and power10, regtested on aarch64 and rv64gcv_zvl512b.
>
> Regards
>  Robin
>
> gcc/ChangeLog:
>
>         * tree-vect-data-refs.cc (vect_gather_scatter_fn_p):
>         Use vect_gather_scatter_try_ifns.
>         (vect_gather_scatter_try_ifns): New function.
>         (vect_check_gather_scatter): Add argument to
>         vect_gather_scatter_fn_p.
>         * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset):
>         Ditto.
>         (vect_use_grouped_gather): Ditto.
>         (get_load_store_type): Ditto.
>         (vectorizable_store): Cost and emit conversion.
>         (vectorizable_load): Ditto.
>         * tree-vectorizer.h (struct vect_load_store_data): Add
>         supported_offset_vectype.
>         (vect_gather_scatter_fn_p): Add argument.
> ---
>  gcc/tree-vect-data-refs.cc | 115 +++++++++++++++++++++++++++++--------
>  gcc/tree-vect-stmts.cc     |  80 ++++++++++++++++++++++++--
>  gcc/tree-vectorizer.h      |   6 +-
>  3 files changed, 169 insertions(+), 32 deletions(-)
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index c7941108887..9f88627b533 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -4425,32 +4425,16 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
> loop_vinfo)
>    return opt_result::success ();
>  }
>
> -/* Check whether we can use an internal function for a gather load
> -   or scatter store.  READ_P is true for loads and false for stores.
> -   MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
> -   the type of the memory elements being loaded or stored.  OFFSET_TYPE
> -   is the type of the offset that is being applied to the invariant
> -   base address.  If OFFSET_TYPE is scalar the function chooses an
> -   appropriate vector type for it.  SCALE is the amount by which the
> -   offset should be multiplied *after* it has been converted to address 
> width.
> +/* Helper for vect_gather_scatter_fn that checks if there is a supported
> +   gather/scatter internal function with the given parameters.  */
>
> -   Return true if the function is supported, storing the function id in
> -   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
> -
> -   If we can use gather and store the possible else values in ELSVALS.  */
> -
> -bool
> -vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
> -                         tree vectype, tree memory_type, tree offset_type,
> -                         int scale, internal_fn *ifn_out,
> -                         tree *offset_vectype_out, vec<int> *elsvals)
> +static bool
> +vect_gather_scatter_try_ifns (vec_info *vinfo, bool read_p, bool masked_p,
> +                             tree vectype, tree memory_type, tree 
> offset_type,
> +                             int scale, internal_fn *ifn_out,
> +                             tree *offset_vectype_out, vec<int> *elsvals)
>  {
> -  unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
>    unsigned int element_bits = vector_element_bits (vectype);
> -  if (element_bits != memory_bits)
> -    /* For now the vector elements must be the same width as the
> -       memory elements.  */
> -    return false;
>
>    /* Work out which function we need.  */
>    internal_fn ifn, alt_ifn, alt_ifn2;
> @@ -4528,6 +4512,80 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool 
> read_p, bool masked_p,
>      }
>  }
>
> +/* Check whether we can use an internal function for a gather load
> +   or scatter store.  READ_P is true for loads and false for stores.
> +   MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
> +   the type of the memory elements being loaded or stored.  OFFSET_TYPE
> +   is the type of the offset that is being applied to the invariant
> +   base address.  If OFFSET_TYPE is scalar the function chooses an
> +   appropriate vector type for it.  SCALE is the amount by which the
> +   offset should be multiplied *after* it has been converted to address 
> width.
> +
> +   Return true if the function is supported, storing the function id in
> +   *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
> +   If we support an offset vector type with different signedness than
> +   OFFSET_TYPE store it in SUPPORTED_OFFSET_VECTYPE.
> +
> +   If we can use gather/scatter and ELSVALS is nonzero, store the possible
> +   else values in ELSVALS.  */
> +
> +bool
> +vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
> +                         tree vectype, tree memory_type, tree offset_type,
> +                         int scale, internal_fn *ifn_out,
> +                         tree *offset_vectype_out,
> +                         tree *supported_offset_vectype,
> +                         vec<int> *elsvals)
> +{
> +  *supported_offset_vectype = NULL_TREE;
> +  unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
> +  unsigned int element_bits = vector_element_bits (vectype);
> +  if (element_bits != memory_bits)
> +    /* For now the vector elements must be the same width as the
> +       memory elements.  */
> +    return false;
> +
> +  /* Try if the current offset type and scale are supported directly.  */
> +  if (vect_gather_scatter_try_ifns (vinfo, read_p, masked_p, vectype,
> +                                   memory_type, offset_type, scale,
> +                                   ifn_out, offset_vectype_out, elsvals))
> +    return true;
> +
> +  enum tree_code tmp;
> +  tree offset_vectype_old = VECTOR_TYPE_P (offset_type)
> +    ? offset_type : get_vectype_for_scalar_type (vinfo, offset_type);
> +
> +  /* If the offset type is unsupported try a larger one with swapped
> +     signedness.  If we started out with a signed type we can try a
> +     pointer-sized unsigned type.  For an unsigned type a signed type
> +     of twice the size is sufficient.  */
> +  if (VECTOR_TYPE_P (offset_type))
> +    offset_type = TREE_TYPE (offset_type);
> +  if (!TYPE_OVERFLOW_WRAPS (offset_type))
> +    offset_type = build_nonstandard_integer_type (POINTER_SIZE, 1);

I think it's better to use 'sizetype' here, vect_check_gather_scatter matches
up the precision of the base pointer, but that's not readily available here.
The concern is address-spaces, but the optabs do not have the pointer mode
as discriminator anyway.  I also wonder why you check !TYPE_OVERFLOW_WRAPS,
the comment suggests to check !TYPE_UNSIGNED?  There's no check
that the pointer-sized type is actually larger - you possibly rely on
what vect_check_gather_scatter does here but an explicit test would be nice to
have.

The patch otherwise looks OK.  With all the checking we do I now wonder
whether it's possible to do this the other way around - get all supported
offset vector types for a data vector types by walking the optabs?  It
seems the only way is to basically iterate over all (reasonable)
vector integer modes
for the offset part and then poke the operand predicate for the sign.
I'm not sure
doing this will make the code much simpler.  It might be sth to keep in mind.

Thanks,
Richard.

> +  else
> +    {
> +      int prec = TYPE_PRECISION (offset_type) * 2;
> +      prec = std::min ((int) POINTER_SIZE, prec);
> +      offset_type = build_nonstandard_integer_type (prec, 0);
> +    }
> +  if (vect_gather_scatter_try_ifns (vinfo, read_p, masked_p, vectype,
> +                                   memory_type, offset_type, scale,
> +                                   ifn_out, offset_vectype_out, elsvals)
> +      && (tree_nop_conversion_p (*offset_vectype_out, offset_vectype_old)
> +         || supportable_convert_operation (CONVERT_EXPR, *offset_vectype_out,
> +                                           offset_vectype_old, &tmp)))
> +
> +    {
> +      if (!tree_nop_conversion_p (*offset_vectype_out, offset_vectype_old))
> +       *supported_offset_vectype = *offset_vectype_out;
> +      *offset_vectype_out = offset_vectype_old;
> +      return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* STMT_INFO is a call to an internal gather load or scatter store function.
>     Describe the operation in INFO.  */
>
> @@ -4678,6 +4736,7 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>
>    base = fold_convert (sizetype, base);
>    base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
> +  tree tmp_offset_vectype;
>
>    /* OFF at this point may be either a SSA_NAME or some tree expression
>       from get_inner_reference.  Try to peel off loop invariants from it
> @@ -4752,12 +4811,14 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>                                                 signed_char_type_node,
>                                                 new_scale, &ifn,
>                                                 &offset_vectype,
> +                                               &tmp_offset_vectype,
>                                                 elsvals)
>                   && !vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                                 masked_p, vectype, 
> memory_type,
>                                                 unsigned_char_type_node,
>                                                 new_scale, &ifn,
>                                                 &offset_vectype,
> +                                               &tmp_offset_vectype,
>                                                 elsvals))
>                 break;
>               scale = new_scale;
> @@ -4781,7 +4842,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>               && vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr),
>                                            masked_p, vectype, memory_type,
>                                            TREE_TYPE (off), scale, &ifn,
> -                                          &offset_vectype, elsvals))
> +                                          &offset_vectype,
> +                                          &tmp_offset_vectype,
> +                                          elsvals))
>             break;
>
>           if (TYPE_PRECISION (TREE_TYPE (op0))
> @@ -4835,7 +4898,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
> tree vectype,
>      {
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                      vectype, memory_type, offtype, scale,
> -                                    &ifn, &offset_vectype, elsvals))
> +                                    &ifn, &offset_vectype,
> +                                    &tmp_offset_vectype,
> +                                    elsvals))
>         ifn = IFN_LAST;
>        decl = NULL_TREE;
>      }
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index d8b1ee73b19..ff26461f6c5 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1505,6 +1505,14 @@ check_load_store_for_partial_vectors (loop_vec_info 
> loop_vinfo, tree vectype,
>                           : ls->strided_offset_vectype);
>        tree memory_type = TREE_TYPE (DR_REF (STMT_VINFO_DR_INFO (repr)->dr));
>        int scale = SLP_TREE_GS_SCALE (slp_node);
> +
> +      /* The following "supported" checks just verify what we established in
> +        get_load_store_type and don't try different offset types.
> +        Therefore, off_vectype must be a supported offset type.  In case
> +        we chose a different one use this instead.  */
> +      if (ls->supported_offset_vectype)
> +       off_vectype = ls->supported_offset_vectype;
> +
>        if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
>                                                   memory_type,
>                                                   off_vectype, scale,
> @@ -1697,10 +1705,12 @@ vect_truncate_gather_scatter_offset (stmt_vec_info 
> stmt_info, tree vectype,
>        /* See whether the target supports the operation with an offset
>          no narrower than OFFSET_TYPE.  */
>        tree memory_type = TREE_TYPE (DR_REF (dr));
> +      tree tmp_offset_vectype;
> +      int supported_scale;
>        if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
>                                      vectype, memory_type, offset_type, scale,
>                                      &gs_info->ifn, &gs_info->offset_vectype,
> -                                    elsvals)
> +                                    &tmp_offset_vectype, elsvals)
>           || gs_info->ifn == IFN_LAST)
>         continue;
>
> @@ -1779,10 +1789,11 @@ vect_use_grouped_gather (dr_vec_info *dr_info, tree 
> vectype,
>       type must exist) so it is possible that even though a gather/scatter is
>       not available we still have a strided load/store.  */
>    bool ok = false;
> +  tree tmp_vectype;
>    if (vect_gather_scatter_fn_p
>        (loop_vinfo, DR_IS_READ (dr), masked_p, *pun_vectype,
>         TREE_TYPE (*pun_vectype), *pun_vectype, 1, &ifn,
> -       &offset_vectype, elsvals))
> +       &offset_vectype, &tmp_vectype, elsvals))
>      ok = true;
>    else if (internal_strided_fn_supported_p (strided_ifn, *pun_vectype,
>                                             elsvals))
> @@ -2081,6 +2092,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    internal_fn *lanes_ifn = &ls->lanes_ifn;
>    vec<int> *elsvals = &ls->elsvals;
>    tree *ls_type = &ls->ls_type;
> +  tree *supported_offset_vectype = &ls->supported_offset_vectype;
>    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
>    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
>    class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
> @@ -2144,12 +2156,25 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>        tree memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
>        tree tem;
>        if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
> -                                   masked_p, vectype,
> -                                   memory_type,
> +                                   masked_p, vectype, memory_type,
>                                     offset_vectype, scale,
>                                     &ls->gs.ifn, &tem,
> -                                   elsvals))
> -       *memory_access_type = VMAT_GATHER_SCATTER_IFN;
> +                                   supported_offset_vectype, elsvals))
> +       {
> +         if (dump_enabled_p ())
> +           {
> +             dump_printf_loc (MSG_NOTE, vect_location,
> +                              "gather/scatter with required "
> +                              "offset vector type "
> +                              "%T and offset scale %d.\n",
> +                              offset_vectype, scale);
> +             if (*supported_offset_vectype)
> +               dump_printf_loc (MSG_NOTE, vect_location,
> +                                " target supports offset vector type %T.\n",
> +                                *supported_offset_vectype);
> +           }
> +         *memory_access_type = VMAT_GATHER_SCATTER_IFN;
> +       }
>        else if (vls_type == VLS_LOAD
>                ? (targetm.vectorize.builtin_gather
>                   && (ls->gs.decl
> @@ -2413,6 +2438,19 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>                                                  masked_p, &gs_info, elsvals,
>                                                  group_size, 
> single_element_p))
>         {
> +         /* vect_use_strided_gather_scatters_p does not save the actually
> +            supported scale and offset type so do that here.
> +            We need it later in check_load_store_for_partial_vectors
> +            where we only check if the given internal function is supported
> +            (to choose whether to use the IFN, LEGACY, or EMULATED flavor
> +            of gather/scatter) and don't re-do the full analysis.  */
> +         tree tmp;
> +         gcc_assert (vect_gather_scatter_fn_p
> +                     (loop_vinfo, vls_type == VLS_LOAD, masked_p, vectype,
> +                      gs_info.memory_type, TREE_TYPE (gs_info.offset),
> +                      gs_info.scale, &gs_info.ifn,
> +                      &tmp, supported_offset_vectype, elsvals));
> +
>           SLP_TREE_GS_SCALE (slp_node) = gs_info.scale;
>           SLP_TREE_GS_BASE (slp_node) = error_mark_node;
>           ls->gs.ifn = gs_info.ifn;
> @@ -8746,6 +8784,11 @@ vectorizable_store (vec_info *vinfo,
>             {
>               if (costing_p)
>                 {
> +                 if (ls.supported_offset_vectype)
> +                   inside_cost
> +                     += record_stmt_cost (cost_vec, 1, vector_stmt,
> +                                          slp_node, 0, vect_body);
> +
>                   unsigned int cnunits = vect_nunits_for_cost (vectype);
>                   inside_cost
>                     += record_stmt_cost (cost_vec, cnunits, scalar_store,
> @@ -8757,6 +8800,16 @@ vectorizable_store (vec_info *vinfo,
>                 vec_offset = vec_offsets[j];
>
>               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
> +             bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
> +
> +             /* Perform the offset conversion if necessary.  */
> +             if (!strided && ls.supported_offset_vectype)
> +               {
> +                 gimple_seq stmts = NULL;
> +                 vec_offset = gimple_convert
> +                   (&stmts, ls.supported_offset_vectype, vec_offset);
> +                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +               }
>
>               if (ls.gs.ifn == IFN_MASK_LEN_SCATTER_STORE)
>                 {
> @@ -10628,6 +10681,11 @@ vectorizable_load (vec_info *vinfo,
>             {
>               if (costing_p)
>                 {
> +                 if (ls.supported_offset_vectype)
> +                   inside_cost
> +                     += record_stmt_cost (cost_vec, 1, vector_stmt,
> +                                          slp_node, 0, vect_body);
> +
>                   unsigned int cnunits = vect_nunits_for_cost (vectype);
>                   inside_cost
>                     = record_stmt_cost (cost_vec, cnunits, scalar_load,
> @@ -10638,6 +10696,16 @@ vectorizable_load (vec_info *vinfo,
>                 vec_offset = vec_offsets[i];
>               tree zero = build_zero_cst (vectype);
>               tree scale = size_int (SLP_TREE_GS_SCALE (slp_node));
> +             bool strided = !VECTOR_TYPE_P (TREE_TYPE (vec_offset));
> +
> +             /* Perform the offset conversion if necessary.  */
> +             if (!strided && ls.supported_offset_vectype)
> +               {
> +                 gimple_seq stmts = NULL;
> +                 vec_offset = gimple_convert
> +                   (&stmts, ls.supported_offset_vectype, vec_offset);
> +                 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> +               }
>
>               if (ls.gs.ifn == IFN_MASK_LEN_GATHER_LOAD)
>                 {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 52bc0d672bf..39d67fcd081 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -289,6 +289,10 @@ struct vect_load_store_data : vect_data {
>    } gs;
>    tree strided_offset_vectype; // VMAT_GATHER_SCATTER_IFN, originally strided
>    tree ls_type; // VMAT_GATHER_SCATTER_IFN
> +  /* This is set to a supported offset vector type if we don't support the
> +     originally requested offset type.  In that case there will be an
> +     additional offset conversion before the gather/scatter.  */
> +  tree supported_offset_vectype; // VMAT_GATHER_SCATTER_IFN
>    auto_vec<int> elsvals;
>    unsigned n_perms; // SLP_TREE_LOAD_PERMUTATION
>  };
> @@ -2595,7 +2599,7 @@ extern opt_result vect_analyze_data_ref_accesses 
> (vec_info *, vec<int> *);
>  extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
>  extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree,
>                                       tree, int, internal_fn *, tree *,
> -                                     vec<int> * = nullptr);
> +                                     tree *, vec<int> * = nullptr);
>  extern bool vect_check_gather_scatter (stmt_vec_info, tree,
>                                        loop_vec_info, gather_scatter_info *,
>                                        vec<int> * = nullptr);
> --
> 2.51.0
>

Reply via email to