On Wed, 30 Jul 2025, Tamar Christina wrote:

> > -----Original Message-----
> > From: Richard Biener <rguent...@suse.de>
> > Sent: Monday, July 28, 2025 2:28 PM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Richard Sandiford <richard.sandif...@arm.com>; Tamar Christina
> > <tamar.christ...@arm.com>
> > Subject: [PATCH] Record get_load_store_info results from analysis
> > 
> > The following is a prototype-quality patch to make us record the
> > get_load_store_info results from load/store analysis and re-use
> > them during transform.  Rather than mass-replacing references to
> > variables I've kept the locals but made them read-only, only
> > adjusting a few elsval setters and adding a FIXME to strided SLP
> > handling of alignment (allowing local override there).
> > 
> > In particular for gs_info the recorded data is likely excessive,
> > most data isn't actually necessary later.  I'll fix this up
> > for the final patch.  Also in particular for gs_info this should
> > allow us to scrap the DR updating from update_epilogue_loop_vinfo
> > (to some extent).
> > 
> > The FIXME shows that while a lot of analysis is done in
> > get_load_store_type that's far from all of it.
> > 
> > The patch does provide a recipie how to handle storing of local
> > analysis results, it adds a vect_load_store_data type, local
> > data in vectorizable_load/store and an alias that either refers
> > to previous analysis results or the local data:
> > 
> > +  vect_load_store_data _ls_data;
> > +  vect_load_store_data &ls = cost_vec ? _ls_data : *slp_node->u.ls;
> > 
> > and it shows how to then record it with the current union abstraction:
> > 
> >        SLP_TREE_TYPE (slp_node) = store_vec_info_type;
> > +      slp_node->u.ls = new vect_load_store_data (ls);
> > 
> >     * tree-vectorizer.h (gather_scatter_info): Move.
> >     (vect_load_store_data): New.
> >     (_slp_tree::u::ls): New.
> >     * tree-vect-stmts.cc (get_load_store_type): Take pointer
> >     to vect_load_store_data instead of individual pointers.
> >     (vectorizable_store): Adjust.  Re-use get_load_store_type
> >     result from analysis time.
> >     (vectorizable_load): Likewise.
> >     * tree-vect-slp.cc (_slp_tree::~_slp_tree): Delete load-store
> >     data.
> > ---
> >  gcc/tree-vect-slp.cc   |   8 ++++
> >  gcc/tree-vect-stmts.cc | 101 ++++++++++++++++++++++++-----------------
> >  gcc/tree-vectorizer.h  |  98 ++++++++++++++++++++++++---------------
> >  3 files changed, 128 insertions(+), 79 deletions(-)
> > 
> > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > index cb27d166c55..1e93fae39d9 100644
> > --- a/gcc/tree-vect-slp.cc
> > +++ b/gcc/tree-vect-slp.cc
> > @@ -153,6 +153,14 @@ _slp_tree::~_slp_tree ()
> >    SLP_TREE_SIMD_CLONE_INFO (this).release ();
> >    if (this->failed)
> >      free (failed);
> > +  switch (SLP_TREE_TYPE (this))
> > +    {
> > +    case load_vec_info_type:
> > +    case store_vec_info_type:
> > +      delete this->u.ls;
> > +      break;
> > +    default:;
> > +    }
> >  }
> 
> I wonder if we could avoid this by changing the union type to
> a base class.  I remember you considered this before.

You mean like a pure virtual

class vect_data {
public:
   virtual ~vect_data ();
   vect_data () = 0;
};

and

class vect_load_store_data : vect_data {
  ...
};

hmm, yeah - that's probably the more "modern" way of coding ;)
The SLP_TREE_TYPE should then be probably a member of vect_data
so it can safely "upcast" via as_a/is_a I guess.
 
> >  /* Push the single SSA definition in DEF to the vector of vector defs.  */
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > index 1a2f6344e97..bc288bf82a3 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -2361,14 +2361,17 @@ get_load_store_type (vec_info  *vinfo,
> > stmt_vec_info stmt_info,
> >                  tree vectype, slp_tree slp_node,
> >                  bool masked_p, vec_load_store_type vls_type,
> >                  unsigned int,
> > -                vect_memory_access_type *memory_access_type,
> > -                poly_int64 *poffset,
> > -                dr_alignment_support *alignment_support_scheme,
> > -                int *misalignment,
> > -                gather_scatter_info *gs_info,
> > -                internal_fn *lanes_ifn,
> > -                vec<int> *elsvals = nullptr)
> > +                vect_load_store_data *ls)
> >  {
> > +  vect_memory_access_type *memory_access_type = &ls->memory_access_type;
> > +  poly_int64 *poffset = &ls->poffset;
> > +  dr_alignment_support *alignment_support_scheme
> > +    = &ls->alignment_support_scheme;
> > +  int *misalignment = &ls->misalignment;
> > +  gather_scatter_info *gs_info = &ls->gs_info;
> > +  internal_fn *lanes_ifn = &ls->lanes_ifn;
> > +  vec<int> *elsvals = &ls->elsvals;
> > +
> >    loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
> >    poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
> >    *misalignment = DR_MISALIGNMENT_UNKNOWN;
> > @@ -7773,7 +7776,6 @@ vectorizable_store (vec_info *vinfo,
> >    unsigned int vec_num;
> >    bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
> >    tree aggr_type;
> > -  gather_scatter_info gs_info;
> >    poly_uint64 vf;
> >    vec_load_store_type vls_type;
> >    tree ref_type;
> > @@ -7863,16 +7865,20 @@ vectorizable_store (vec_info *vinfo,
> >    if (!STMT_VINFO_DATA_REF (stmt_info))
> >      return false;
> > 
> > -  vect_memory_access_type memory_access_type;
> > -  enum dr_alignment_support alignment_support_scheme;
> > -  int misalignment;
> > -  poly_int64 poffset;
> > -  internal_fn lanes_ifn;
> > -  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
> > vls_type,
> > -                       1, &memory_access_type, &poffset,
> > -                       &alignment_support_scheme, &misalignment, &gs_info,
> > -                       &lanes_ifn))
> > +  vect_load_store_data _ls_data;
> > +  vect_load_store_data &ls = cost_vec ? _ls_data : *slp_node->u.ls;
> > +  if (cost_vec
> > +      && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask,
> > +                          vls_type, 1, &_ls_data))
> >      return false;
> 
> I wonder if it makes the usage easier if get_load_store_type doesn't take the
> extra argument, and we store the value in slp_node->u.ls directly, since we
> should only be calling it during analysis now.
>
> Then we could avoid the ternary and just have
> 
> vect_load_store_data _ls_data = *slp_node->u.ls;
> 
> in both cases? and we could assert on slp_node->u.ls.

Since we currently call vectorizable_* in sequence we can't really
allocate the "proper" kind specific data type in advance, only
when analysis suceeds.  That's why I analyze into a local copy.
I could of course set slp_node->u.ls "temporarily" (and clear
that at each failure point ...).

We could of course analyze into local variables and construct
the data type only later, but I'm not sure this makes things
easier here.

> Starting to look like a nice cleanup though!

Specifically I'd like to decide on SLP_TREE_TYPE eary for some kinds,
like SLP discovery already knows what's a load and what's a store
and also we know what's a reduction (and reduction data is another
large bunch of thing we want to separate out).

I'll incorporate the class idea with the next round (I again missed
a good point for the CI and the patch failed to apply :/).  I'll
also see to pick another easy vectorizable_* to see how a "final"
conversion would look like (as noticed there's some way to go with
vectorizable_load/store - but I need to make forward progress there
because of gather/scatter stuff re-walking the scalar IL).

Richard.

> Thanks,
> Tamar
> 
> > +  /* Temporary aliases to analysis data, should not be modified through
> > +     these.  */
> > +  const vect_memory_access_type memory_access_type =
> > ls.memory_access_type;
> > +  const dr_alignment_support alignment_support_scheme =
> > ls.alignment_support_scheme;
> > +  const int misalignment = ls.misalignment;
> > +  const poly_int64 poffset = ls.poffset;
> > +  const internal_fn lanes_ifn = ls.lanes_ifn;
> > +  const gather_scatter_info &gs_info = ls.gs_info;
> > 
> >    if (slp_node->ldst_lanes
> >        && memory_access_type != VMAT_LOAD_STORE_LANES)
> > @@ -7974,6 +7980,7 @@ vectorizable_store (vec_info *vinfo,
> >                      "Vectorizing an unaligned access.\n");
> > 
> >        SLP_TREE_TYPE (slp_node) = store_vec_info_type;
> > +      slp_node->u.ls = new vect_load_store_data (ls);
> >      }
> >    gcc_assert (memory_access_type == SLP_TREE_MEMORY_ACCESS_TYPE
> > (slp_node));
> > 
> > @@ -8071,6 +8078,14 @@ vectorizable_store (vec_info *vinfo,
> >          ...
> >           */
> > 
> > +      /* ???  Modify local copies of alignment_support_scheme and
> > +    misalignment, but this part of analysis should be done
> > +    earlier and remembered, likewise the chosen load mode.  */
> > +      const dr_alignment_support tem = alignment_support_scheme;
> > +      dr_alignment_support alignment_support_scheme = tem;
> > +      const int tem2 = misalignment;
> > +      int misalignment = tem2;
> > +
> >        unsigned nstores = const_nunits;
> >        unsigned lnel = 1;
> >        tree ltype = elem_type;
> > @@ -9289,7 +9304,6 @@ vectorizable_load (vec_info *vinfo,
> >    bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
> >    poly_uint64 vf;
> >    tree aggr_type;
> > -  gather_scatter_info gs_info;
> >    tree ref_type;
> >    enum vect_def_type mask_dt = vect_unknown_def_type;
> >    enum vect_def_type els_dt = vect_unknown_def_type;
> > @@ -9423,20 +9437,25 @@ vectorizable_load (vec_info *vinfo,
> >    else
> >      group_size = 1;
> > 
> > -  vect_memory_access_type memory_access_type;
> > -  enum dr_alignment_support alignment_support_scheme;
> > -  int misalignment;
> > -  poly_int64 poffset;
> > -  internal_fn lanes_ifn;
> > -  auto_vec<int> elsvals;
> > -  int maskload_elsval = 0;
> > -  bool need_zeroing = false;
> > -  if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
> > VLS_LOAD,
> > -                       1, &memory_access_type, &poffset,
> > -                       &alignment_support_scheme, &misalignment, &gs_info,
> > -                       &lanes_ifn, &elsvals))
> > +  vect_load_store_data _ls_data;
> > +  vect_load_store_data &ls = cost_vec ? _ls_data : *slp_node->u.ls;
> > +  if (cost_vec
> > +      && !get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask,
> > +                          VLS_LOAD, 1, &ls))
> >      return false;
> > +  /* Temporary aliases to analysis data, should not be modified through
> > +     these.  */
> > +  const vect_memory_access_type memory_access_type =
> > ls.memory_access_type;
> > +  const dr_alignment_support alignment_support_scheme
> > +    = ls.alignment_support_scheme;
> > +  const int misalignment = ls.misalignment;
> > +  const poly_int64 poffset = ls.poffset;
> > +  const internal_fn lanes_ifn = ls.lanes_ifn;
> > +  const vec<int> &elsvals = ls.elsvals;
> > +  const gather_scatter_info &gs_info = ls.gs_info;
> > 
> > +  int maskload_elsval = 0;
> > +  bool need_zeroing = false;
> > 
> >    /* We might need to explicitly zero inactive elements if there are
> >       padding bits in the type that might leak otherwise.
> > @@ -9507,7 +9526,7 @@ vectorizable_load (vec_info *vinfo,
> >       if (!VECTOR_MODE_P (vec_mode)
> >           || !can_vec_mask_load_store_p (vec_mode,
> >                                          TYPE_MODE (mask_vectype),
> > -                                        true, NULL, &elsvals))
> > +                                        true, NULL, &ls.elsvals))
> >         return false;
> >     }
> >        else if (memory_access_type != VMAT_LOAD_STORE_LANES
> > @@ -9557,7 +9576,7 @@ vectorizable_load (vec_info *vinfo,
> >     check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> >                                           VLS_LOAD, group_size,
> >                                           memory_access_type, &gs_info,
> > -                                         mask, &elsvals);
> > +                                         mask, &ls.elsvals);
> > 
> >        if (dump_enabled_p ()
> >       && memory_access_type != VMAT_ELEMENTWISE
> > @@ -9572,16 +9591,7 @@ vectorizable_load (vec_info *vinfo,
> >     vinfo->any_known_not_updated_vssa = true;
> > 
> >        SLP_TREE_TYPE (slp_node) = load_vec_info_type;
> > -    }
> > -  else
> > -    {
> > -      /* Here just get the else values.  */
> > -      if (loop_vinfo
> > -     && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
> > -   check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
> > -                                         VLS_LOAD, group_size,
> > -                                         memory_access_type, &gs_info,
> > -                                         mask, &elsvals);
> > +      slp_node->u.ls = new vect_load_store_data (ls);
> >      }
> > 
> >    /* If the type needs padding we must zero inactive elements.
> > @@ -9774,6 +9784,13 @@ vectorizable_load (vec_info *vinfo,
> >        tree ltype = TREE_TYPE (vectype);
> >        tree lvectype = vectype;
> >        auto_vec<tree> dr_chain;
> > +      /* ???  Modify local copies of alignment_support_scheme and
> > +    misalignment, but this part of analysis should be done
> > +    earlier and remembered, likewise the chosen load mode.  */
> > +      const dr_alignment_support tem = alignment_support_scheme;
> > +      dr_alignment_support alignment_support_scheme = tem;
> > +      const int tem2 = misalignment;
> > +      int misalignment = tem2;
> >        if (memory_access_type == VMAT_STRIDED_SLP)
> >     {
> >       HOST_WIDE_INT n = gcd (group_size, const_nunits);
> > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > index 1db344bbffd..2ca9fbed158 100644
> > --- a/gcc/tree-vectorizer.h
> > +++ b/gcc/tree-vectorizer.h
> > @@ -231,6 +231,43 @@ enum stmt_vec_info_type {
> >    loop_exit_ctrl_vec_info_type
> >  };
> > 
> > +/* Information about a gather/scatter call.  */
> > +struct gather_scatter_info {
> > +  /* The internal function to use for the gather/scatter operation,
> > +     or IFN_LAST if a built-in function should be used instead.  */
> > +  internal_fn ifn;
> > +
> > +  /* The FUNCTION_DECL for the built-in gather/scatter function,
> > +     or null if an internal function should be used instead.  */
> > +  tree decl;
> > +
> > +  /* The loop-invariant base value.  */
> > +  tree base;
> > +
> > +  /* The TBBA alias pointer the value of which determines the alignment
> > +     of the scalar accesses.  */
> > +  tree alias_ptr;
> > +
> > +  /* The original scalar offset, which is a non-loop-invariant SSA_NAME.  
> > */
> > +  tree offset;
> > +
> > +  /* Each offset element should be multiplied by this amount before
> > +     being added to the base.  */
> > +  int scale;
> > +
> > +  /* The definition type for the vectorized offset.  */
> > +  enum vect_def_type offset_dt;
> > +
> > +  /* The type of the vectorized offset.  */
> > +  tree offset_vectype;
> > +
> > +  /* The type of the scalar elements after loading or before storing.  */
> > +  tree element_type;
> > +
> > +  /* The type of the scalar elements being loaded or stored.  */
> > +  tree memory_type;
> > +};
> > +
> > 
> > /**********************************************************************
> > **
> >    SLP
> > 
> > **********************************************************************
> > **/
> > @@ -239,6 +276,28 @@ typedef auto_vec<std::pair<unsigned, unsigned>, 16>
> > auto_lane_permutation_t;
> >  typedef vec<unsigned> load_permutation_t;
> >  typedef auto_vec<unsigned, 16> auto_load_permutation_t;
> > 
> > +/* Analysis data from vectorizable_load and vectorizable_store.  */
> > +struct vect_load_store_data {
> > +  vect_load_store_data (vect_load_store_data &other)
> > +    : alignment_support_scheme (other.alignment_support_scheme),
> > +    memory_access_type (other.memory_access_type),
> > +    misalignment (other.misalignment),
> > +    poffset (other.poffset),
> > +    lanes_ifn (other.lanes_ifn),
> > +    gs_info (other.gs_info)
> > +  {
> > +    elsvals = std::move (other.elsvals);
> > +  }
> > +  vect_load_store_data () = default;
> > +  dr_alignment_support alignment_support_scheme;
> > +  vect_memory_access_type memory_access_type;
> > +  int misalignment;
> > +  poly_int64 poffset;
> > +  internal_fn lanes_ifn;
> > +  auto_vec<int> elsvals;
> > +  gather_scatter_info gs_info;
> > +};
> > +
> >  /* A computation tree of an SLP instance.  Each node corresponds to a 
> > group of
> >     stmts to be packed in a SIMD stmt.  */
> >  struct _slp_tree {
> > @@ -310,6 +369,8 @@ struct _slp_tree {
> >    enum stmt_vec_info_type type;
> >    union {
> >        void *undef;
> > +      /* load_vec_info_type, store_vec_info_type */
> > +      vect_load_store_data *ls;
> >    } u;
> > 
> >    /* If not NULL this is a cached failed SLP discovery attempt with
> > @@ -1536,43 +1597,6 @@ public:
> >    bool slp_vect_pattern_only_p;
> >  };
> > 
> > -/* Information about a gather/scatter call.  */
> > -struct gather_scatter_info {
> > -  /* The internal function to use for the gather/scatter operation,
> > -     or IFN_LAST if a built-in function should be used instead.  */
> > -  internal_fn ifn;
> > -
> > -  /* The FUNCTION_DECL for the built-in gather/scatter function,
> > -     or null if an internal function should be used instead.  */
> > -  tree decl;
> > -
> > -  /* The loop-invariant base value.  */
> > -  tree base;
> > -
> > -  /* The TBBA alias pointer the value of which determines the alignment
> > -     of the scalar accesses.  */
> > -  tree alias_ptr;
> > -
> > -  /* The original scalar offset, which is a non-loop-invariant SSA_NAME.  
> > */
> > -  tree offset;
> > -
> > -  /* Each offset element should be multiplied by this amount before
> > -     being added to the base.  */
> > -  int scale;
> > -
> > -  /* The definition type for the vectorized offset.  */
> > -  enum vect_def_type offset_dt;
> > -
> > -  /* The type of the vectorized offset.  */
> > -  tree offset_vectype;
> > -
> > -  /* The type of the scalar elements after loading or before storing.  */
> > -  tree element_type;
> > -
> > -  /* The type of the scalar elements being loaded or stored.  */
> > -  tree memory_type;
> > -};
> > -
> >  /* Access Functions.  */
> >  #define STMT_VINFO_STMT(S)                 (S)->stmt
> >  #define STMT_VINFO_RELEVANT(S)             (S)->relevant
> > --
> > 2.43.0
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to