The main gather/scatter discovery happens at SLP discovery time,
the base address and the offset scale are currently not explicitly
represented in the SLP tree.  This requires re-discovery of them
during vectorizable_store/load.  The following fixes this by
recording this info into the SLP tree.  This allows the main
vect_check_gather_scatter call to be elided from get_load_store_type
and replaced with target support checks for IFN/decl or fallback
emulated mode.

There's vect_check_gather_scatter left in the path using gather/scatter
for strided load/store.  I hope to deal with this later.

Bootstrapped and tested on x86_64-unknown-linux-gnu.  On aarch64
vect.exp is clean, but I hope the CI is back.

        * tree-vectorizer.h (_slp_tree::gs_scale): New.
        (_slp_tree::gs_base): Likewise.
        (SLP_TREE_GS_SCALE): Likewise.
        (SLP_TREE_GS_BASE): Likewise.
        * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize
        new members.
        (vect_build_slp_tree_2): Record gather/scatter base and scale.
        * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Add
        mode of operation with fixed offset vector type.
        * tree-vect-stmts.cc (get_load_store_type): Do not call
        vect_check_gather_scatter to fill gs_info, instead populate
        from the SLP tree.  Check which of, IFN, decl or fallback
        is supported and record that decision.
---
 gcc/tree-vect-data-refs.cc | 22 ++++++++++++----
 gcc/tree-vect-slp.cc       | 13 ++++++++++
 gcc/tree-vect-stmts.cc     | 51 ++++++++++++++++++++++----------------
 gcc/tree-vectorizer.h      |  7 ++++++
 4 files changed, 67 insertions(+), 26 deletions(-)

diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index da700cd1f3d..ce4b9112001 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4430,8 +4430,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info 
loop_vinfo)
    MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
    the type of the memory elements being loaded or stored.  OFFSET_TYPE
    is the type of the offset that is being applied to the invariant
-   base address.  SCALE is the amount by which the offset should
-   be multiplied *after* it has been converted to address width.
+   base address.  If OFFSET_TYPE is scalar the function chooses an
+   appropriate vector type for it.  SCALE is the amount by which the
+   offset should be multiplied *after* it has been converted to address width.
 
    Return true if the function is supported, storing the function id in
    *IFN_OUT and the vector type for the offset in *OFFSET_VECTYPE_OUT.
@@ -4474,9 +4475,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
 
   for (;;)
     {
-      tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
-      if (!offset_vectype)
-       return false;
+      tree offset_vectype;
+      if (VECTOR_TYPE_P (offset_type))
+       offset_vectype = offset_type;
+      else
+       {
+         offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
+         if (!offset_vectype)
+           return false;
+       }
 
       /* Test whether the target supports this combination.  */
       if (internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
@@ -4507,10 +4514,15 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, 
bool masked_p,
          return true;
        }
 
+      /* For fixed offset vector type we're done.  */
+      if (VECTOR_TYPE_P (offset_type))
+       return false;
+
       if (TYPE_PRECISION (offset_type) >= POINTER_SIZE
          && TYPE_PRECISION (offset_type) >= element_bits)
        return false;
 
+      /* Try a larger offset vector type.  */
       offset_type = build_nonstandard_integer_type
        (TYPE_PRECISION (offset_type) * 2, TYPE_UNSIGNED (offset_type));
     }
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ca14a2deed2..5a6f23cf18b 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -120,6 +120,8 @@ _slp_tree::_slp_tree ()
   SLP_TREE_LANE_PERMUTATION (this) = vNULL;
   SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def;
   SLP_TREE_CODE (this) = ERROR_MARK;
+  SLP_TREE_GS_SCALE (this) = 0;
+  SLP_TREE_GS_BASE (this) = NULL_TREE;
   this->ldst_lanes = false;
   this->avoid_stlf_fail = false;
   SLP_TREE_VECTYPE (this) = NULL_TREE;
@@ -2723,6 +2725,9 @@ out:
 
   stmt_info = stmts[0];
 
+  int gs_scale = 0;
+  tree gs_base = NULL_TREE;
+
   /* Create SLP_TREE nodes for the definition node/s.  */
   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
     {
@@ -2745,6 +2750,12 @@ out:
          continue;
        }
 
+      if (oprnd_info->first_gs_p)
+       {
+         gs_scale = oprnd_info->first_gs_info.scale;
+         gs_base = oprnd_info->first_gs_info.base;
+       }
+
       if (is_a <bb_vec_info> (vinfo)
          && oprnd_info->first_dt == vect_internal_def
          && !oprnd_info->any_pattern)
@@ -3134,6 +3145,8 @@ fail:
   node = vect_create_new_slp_node (node, stmts, nops);
   SLP_TREE_VECTYPE (node) = vectype;
   SLP_TREE_CHILDREN (node).splice (children);
+  SLP_TREE_GS_SCALE (node) = gs_scale;
+  SLP_TREE_GS_BASE (node) = gs_base;
   return node;
 }
 
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 97222f64b7e..96711509c90 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2015,31 +2015,40 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
   else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
     {
       *memory_access_type = VMAT_GATHER_SCATTER;
-      if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info,
-                                     elsvals))
-       gcc_unreachable ();
       slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0];
       tree offset_vectype = SLP_TREE_VECTYPE (offset_node);
+      memset (gs_info, 0, sizeof (gather_scatter_info));
       gs_info->offset_vectype = offset_vectype;
-      /* When using internal functions, we rely on pattern recognition
-        to convert the type of the offset to the type that the target
-        requires, with the result being a call to an internal function.
-        If that failed for some reason (e.g. because another pattern
-        took priority), just handle cases in which the offset already
-        has the right type.  */
-      if (GATHER_SCATTER_IFN_P (*gs_info)
-         && !is_gimple_call (stmt_info->stmt)
-         && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
-                                    TREE_TYPE (offset_vectype)))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "%s offset requires a conversion\n",
-                            vls_type == VLS_LOAD ? "gather" : "scatter");
-         return false;
-       }
-      else if (GATHER_SCATTER_EMULATED_P (*gs_info))
+      gs_info->scale = SLP_TREE_GS_SCALE (slp_node);
+      gs_info->base = SLP_TREE_GS_BASE (slp_node);
+      gs_info->memory_type = TREE_TYPE (DR_REF (first_dr_info->dr));
+      gs_info->decl = NULL_TREE;
+      gs_info->ifn = IFN_LAST;
+      tree tem;
+      if (vect_gather_scatter_fn_p (loop_vinfo, vls_type == VLS_LOAD,
+                                   masked_p, vectype,
+                                   gs_info->memory_type,
+                                   offset_vectype, gs_info->scale,
+                                   &gs_info->ifn, &tem,
+                                   elsvals))
+       /* GATHER_SCATTER_IFN_P.  */;
+      else if (vls_type == VLS_LOAD
+              ? (targetm.vectorize.builtin_gather
+                 && (gs_info->decl
+                       = targetm.vectorize.builtin_gather (vectype,
+                                                           TREE_TYPE
+                                                             (offset_vectype),
+                                                           gs_info->scale)))
+              : (targetm.vectorize.builtin_scatter
+                 && (gs_info->decl
+                       = targetm.vectorize.builtin_scatter (vectype,
+                                                            TREE_TYPE
+                                                              (offset_vectype),
+                                                            gs_info->scale))))
+       /* GATHER_SCATTER_LEGACY_P.  */;
+      else
        {
+         /* GATHER_SCATTER_EMULATED_P.  */
          if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
              || !TYPE_VECTOR_SUBPARTS (offset_vectype).is_constant ()
              || VECTOR_BOOLEAN_TYPE_P (offset_vectype)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 0a75ee15857..6517c2aef17 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -306,6 +306,11 @@ struct _slp_tree {
   unsigned int lanes;
   /* The operation of this node.  */
   enum tree_code code;
+  /* For gather/scatter memory operations the scale each offset element
+     should be multiplied by before being added to the base.  */
+  int gs_scale;
+  /* For gather/scatter memory operations the loop-invariant base value.  */
+  tree gs_base;
   /* Whether uses of this load or feeders of this store are suitable
      for load/store-lanes.  */
   bool ldst_lanes;
@@ -412,6 +417,8 @@ public:
 #define SLP_TREE_CODE(S)                        (S)->code
 #define SLP_TREE_MEMORY_ACCESS_TYPE(S)          (S)->memory_access_type
 #define SLP_TREE_TYPE(S)                        (S)->type
+#define SLP_TREE_GS_SCALE(S)                    (S)->gs_scale
+#define SLP_TREE_GS_BASE(S)                     (S)->gs_base
 
 enum vect_partial_vector_style {
     vect_partial_vectors_none,
-- 
2.43.0

Reply via email to