On Thu, Sep 18, 2025 at 1:21 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> This patch adds an explicit variant of vect_transform_slp_perm_load that
> just does the analysis part of vect_transform_slp_perm_load.
>
> I find it slightly clearer to indicate "analysis" in the
> function name already rather than having to pass "analyze_only = true"
> and set two other params to NULL.
>
> One call of vect_analyze_slp_perm_load is equivalent to
>
>    return vect_transform_slp_perm_load_1 (vinfo, node,
>                                           SLP_TREE_LOAD_PERMUTATION (node),
>                                           vNULL, nullptr, vf, true,
>                                           dump_enabled_p (), n_perms,
>                                           punning_vectype);
>
> The patch also introduces an override for the vectype and moves the
> load-perm analysis from before get_load_store_type into the function.
>
> This is more a small refactoring than anything and in preparation for the
> "grouped gather" patch.  Should we have more guardrails on the overriding
> vectype like asserting same size etc.?

It would need to have the same element size to have the
permutation still reflect reality?  So I doubt overriding vectype is what we
want?

>
> Bootstrapped and regtested on x86 and power10.  Regtested on rv64gcv_zvl512b,
> still running on aarch64.
>
> Regards
>  Robin
>
> gcc/ChangeLog:
>
>         * tree-vect-slp.cc (vect_transform_slp_perm_load_1): Add
>         type-punning argument.
>         (vect_transform_slp_perm_load): Ditto.
>         (vect_analyze_slp_perm_load): New function.
>         * tree-vect-stmts.cc (get_load_store_type): Add perm_ok
>         argument.
>         (vectorizable_store): Ditto.
>         (vectorizable_load): Ditto.
>         * tree-vectorizer.h (vect_transform_slp_perm_load): Add
>         argument.
>         (vect_analyze_slp_perm_load): Ditto.
> ---
>  gcc/tree-vect-slp.cc   | 35 ++++++++++++++++++++++++++++++-----
>  gcc/tree-vect-stmts.cc | 38 +++++++++++++++++++-------------------
>  gcc/tree-vectorizer.h  |  5 +++++
>  3 files changed, 54 insertions(+), 24 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 895fb88ab7f..ead9b558131 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -59,6 +59,7 @@ static bool vect_transform_slp_perm_load_1 (vec_info *, 
> slp_tree,
>                                             gimple_stmt_iterator *,
>                                             poly_uint64, bool, bool,
>                                             unsigned *,
> +                                           tree = NULL_TREE,
>                                             unsigned * = nullptr,
>                                             bool = false);
>  static int vectorizable_slp_permutation_1 (vec_info *, gimple_stmt_iterator 
> *,
> @@ -10595,12 +10596,16 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
> slp_tree node,
>                                 const vec<tree> &dr_chain,
>                                 gimple_stmt_iterator *gsi, poly_uint64 vf,
>                                 bool analyze_only, bool dump_p,
> -                               unsigned *n_perms, unsigned int *n_loads,
> +                               unsigned *n_perms,
> +                               tree punning_vectype,
> +                               unsigned int *n_loads,
>                                 bool dce_chain)
>  {
>    stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
>    int vec_index = 0;
> -  tree vectype = SLP_TREE_VECTYPE (node);
> +  tree vectype = punning_vectype;
> +  if (!vectype)
> +    vectype = SLP_TREE_VECTYPE (node);
>    unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
>    unsigned int mask_element;
>    unsigned dr_group_size;
> @@ -10868,22 +10873,42 @@ vect_transform_slp_perm_load_1 (vec_info *vinfo, 
> slp_tree node,
>     permute statements for the SLP node NODE.  Store the number of vector
>     permute instructions in *N_PERMS and the number of vector load
>     instructions in *N_LOADS.  If DCE_CHAIN is true, remove all definitions
> -   that were not needed.  */
> +   that were not needed.
> +   When PUNNING_VECTYPE is passed, use that one instead of NODE's vectype
> +   for calculating the permutations.  This can be used when performing the
> +   load with a different ("punning") vectype and we want to know whether
> +   the load permutation would be a nop with the punning vectype.  */
>
>  bool
>  vect_transform_slp_perm_load (vec_info *vinfo,
>                               slp_tree node, const vec<tree> &dr_chain,
>                               gimple_stmt_iterator *gsi, poly_uint64 vf,
>                               bool analyze_only, unsigned *n_perms,
> -                             unsigned int *n_loads, bool dce_chain)
> +                             tree punning_vectype, unsigned int *n_loads,
> +                             bool dce_chain)
>  {
>    return vect_transform_slp_perm_load_1 (vinfo, node,
>                                          SLP_TREE_LOAD_PERMUTATION (node),
>                                          dr_chain, gsi, vf, analyze_only,
> -                                        dump_enabled_p (), n_perms, n_loads,
> +                                        dump_enabled_p (), n_perms,
> +                                        punning_vectype, n_loads,
>                                          dce_chain);
>  }
>
> +/* Similar to vect_transform_slp_perm_load but only perform analysis
> +   without changing anything.  */
> +
> +bool
> +vect_analyze_slp_perm_load (vec_info *vinfo, slp_tree node, poly_uint64 vf,
> +                           unsigned *n_perms, tree punning_vectype)
> +{
> +  return vect_transform_slp_perm_load_1 (vinfo, node,
> +                                        SLP_TREE_LOAD_PERMUTATION (node),
> +                                        vNULL, nullptr, vf, true,
> +                                        dump_enabled_p (), n_perms,
> +                                        punning_vectype);
> +}
> +
>  /* Produce the next vector result for SLP permutation NODE by adding a vector
>     statement at GSI.  If MASK_VEC is nonnull, add:
>
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 6274956e2a5..f3cc54b6c4c 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -1972,7 +1972,7 @@ static bool
>  get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
>                      tree vectype, slp_tree slp_node,
>                      bool masked_p, vec_load_store_type vls_type,
> -                    bool perm_ok, vect_load_store_data *ls)
> +                    bool *perm_ok, vect_load_store_data *ls)
>  {
>    vect_memory_access_type *memory_access_type = &ls->memory_access_type;
>    poly_int64 *poffset = &ls->poffset;
> @@ -1989,6 +1989,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    unsigned HOST_WIDE_INT gap;
>    bool single_element_p;
>    poly_int64 neg_ldst_offset = 0;
> +  poly_int64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
> +  unsigned int *n_perms = &ls->n_perms;
>
>    *misalignment = DR_MISALIGNMENT_UNKNOWN;
>    *poffset = 0;
> @@ -2030,6 +2032,9 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>    if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
>      first_dr_info = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
>
> +  if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
> +    *perm_ok = vect_analyze_slp_perm_load (vinfo, slp_node, vf, n_perms);
> +
>    if (STMT_VINFO_STRIDED_P (first_stmt_info))
>      /* Try to use consecutive accesses of as many elements as possible,
>         separated by the stride, until we have a complete vector.
> @@ -2162,7 +2167,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
> stmt_info,
>           && (*memory_access_type == VMAT_CONTIGUOUS
>               || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
>           && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
> -         && !perm_ok)
> +         && !*perm_ok)
>         {
>           *memory_access_type = VMAT_ELEMENTWISE;
>           if (dump_enabled_p ())
> @@ -7878,11 +7883,13 @@ vectorizable_store (vec_info *vinfo,
>    if (!STMT_VINFO_DATA_REF (stmt_info))
>      return false;
>
> +  bool perm_ok_tmp;
> +
>    vect_load_store_data _ls_data{};
>    vect_load_store_data &ls = slp_node->get_data (_ls_data);
>    if (cost_vec
>        && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, 
> mask_node,
> -                              vls_type, false, &_ls_data))
> +                              vls_type, &perm_ok_tmp, &_ls_data))
>      return false;
>    /* Temporary aliases to analysis data, should not be modified through
>       these.  */
> @@ -9449,16 +9456,12 @@ vectorizable_load (vec_info *vinfo,
>      group_size = 1;
>
>    bool perm_ok = true;
> -  unsigned n_perms = -1U;
> -  if (cost_vec && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
> -    perm_ok = vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
> -                                           true, &n_perms);
>
>    vect_load_store_data _ls_data{};
>    vect_load_store_data &ls = slp_node->get_data (_ls_data);
>    if (cost_vec
>        && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, 
> mask_node,
> -                              VLS_LOAD, perm_ok, &ls))
> +                              VLS_LOAD, &perm_ok, &ls))
>      return false;
>    /* Temporary aliases to analysis data, should not be modified through
>       these.  */
> @@ -9523,10 +9526,7 @@ vectorizable_load (vec_info *vinfo,
>                                  "unsupported load permutation\n");
>               return false;
>             }
> -         ls.n_perms = n_perms;
>         }
> -      else
> -       n_perms = ls.n_perms;
>      }
>
>    if (slp_node->ldst_lanes
> @@ -9999,8 +9999,8 @@ vectorizable_load (vec_info *vinfo,
>         {
>           if (costing_p)
>             {
> -             gcc_assert (n_perms != -1U);
> -             inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
> +             gcc_assert (ls.n_perms != -1U);
> +             inside_cost += record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
>                                                slp_node, 0, vect_body);
>             }
>           else
> @@ -10008,7 +10008,7 @@ vectorizable_load (vec_info *vinfo,
>               unsigned n_perms2;
>               vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, 
> vf,
>                                             false, &n_perms2);
> -             gcc_assert (n_perms == n_perms2);
> +             gcc_assert (ls.n_perms == n_perms2);
>             }
>         }
>
> @@ -11393,9 +11393,9 @@ vectorizable_load (vec_info *vinfo,
>          in PR101120 and friends.  */
>        if (costing_p)
>         {
> -         gcc_assert (n_perms != -1U);
> -         if (n_perms != 0)
> -           inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
> +         gcc_assert (ls.n_perms != -1U);
> +         if (ls.n_perms != 0)
> +           inside_cost = record_stmt_cost (cost_vec, ls.n_perms, vec_perm,
>                                             slp_node, 0, vect_body);
>         }
>        else
> @@ -11403,8 +11403,8 @@ vectorizable_load (vec_info *vinfo,
>           unsigned n_perms2;
>           bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
>                                                   gsi, vf, false, &n_perms2,
> -                                                 nullptr, true);
> -         gcc_assert (ok && n_perms == n_perms2);
> +                                                 NULL_TREE, nullptr, true);
> +         gcc_assert (ok && ls.n_perms == n_perms2);
>         }
>        dr_chain.release ();
>      }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index b7c2188ab3d..8f0e99b1457 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2755,7 +2755,12 @@ extern void vect_free_slp_instance (slp_instance);
>  extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const 
> vec<tree> &,
>                                           gimple_stmt_iterator *, poly_uint64,
>                                           bool, unsigned *,
> +                                         tree = NULL_TREE,
>                                           unsigned * = nullptr, bool = false);
> +extern bool vect_analyze_slp_perm_load (vec_info *, slp_tree,
> +                                       poly_uint64,
> +                                       unsigned *,
> +                                       tree = NULL_TREE);
>  extern bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
>                                           slp_tree, stmt_vector_for_cost *);
>  extern bool vect_slp_analyze_operations (vec_info *);
> --
> 2.51.0
>

Reply via email to