https://gcc.gnu.org/g:d3b05ff815921ffe525787ed375665ef08a35135

commit r16-7006-gd3b05ff815921ffe525787ed375665ef08a35135
Author: Robin Dapp <[email protected]>
Date:   Fri Jan 23 11:51:57 2026 +0100

    vect: Only scale vec_offset once [PR123767].
    
    Since allowing "unsupported" scales by just multiplying there was an
    issue with how the vec_offset was adjusted:
    
    For "real" gathers/scatters we have a separate vec_offset per stmt copy.
    For strided gather/scatter, however, there is just one vec_offset common
    to all copies.
    
    In case of an unsupported scale we need to multiply vec_offset with the
    required scale which is currently done like this:
     for (i = 0; i < num_vec; i++)
       vec_offset = vec_offset * scale_constant;
    where vec_offset is only different for real gathers/scatter.
    
    Thus, for more than one copy of a strided gather/scatter, we will
    erroneously multiply an already scaled vec_offset.
    
    This patch only performs the vec_offset scaling
     - for each copy in real gathers/scatters or
     - once for the first copy for strided gathers/scatters.
    
            PR tree-optimization/123767
    
    gcc/ChangeLog:
    
            * tree-vect-stmts.cc (vectorizable_store): Only scale offset
            once.
            (vectorizable_load): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/aarch64/sve/pr123767.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/pr123767.c | 35 +++++++++++++++++++++
 gcc/tree-vect-stmts.cc                          | 42 +++++++++++++++----------
 2 files changed, 61 insertions(+), 16 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
new file mode 100644
index 000000000000..5b123c057680
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr123767.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv9-a -O3 -mmax-vectorization -msve-vector-bits=128 
-mautovec-preference=sve-only -fdump-tree-vect-details" } */
+
+struct partition_elem
+{
+  int class_element;
+  struct partition_elem* next;
+  unsigned class_count;
+};
+
+typedef struct partition_def
+{
+  int num_elements;
+  struct partition_elem elements[1];
+} *partition;
+
+partition part;
+
+partition
+partition_new (int num_elements)
+{
+  int e;
+
+  /* No need to allocate memory, just a compile test.  */
+  for (e = 0; e < num_elements; ++e)
+    {
+      part->elements[e].class_element = e;
+      part->elements[e].next = &(part->elements[e]);
+      part->elements[e].class_count = 1;
+    }
+
+  return part;
+}
+
+/* { dg-final { scan-tree-dump-not "\\{ 0, 576 \\}" "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 234a3aa4f4ba..ee98e72d1e57 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8972,14 +8972,19 @@ vectorizable_store (vec_info *vinfo,
                      (&stmts, ls.supported_offset_vectype, vec_offset);
                  if (ls.supported_scale)
                    {
-                     tree mult_cst = build_int_cst
-                       (TREE_TYPE (TREE_TYPE (vec_offset)),
-                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
-                     tree mult = build_vector_from_val
-                       (TREE_TYPE (vec_offset), mult_cst);
-                     vec_offset = gimple_build
-                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
-                        vec_offset, mult);
+                     /* Only scale the vec_offset if we haven't already.  */
+                     if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+                         || j == 0)
+                       {
+                         tree mult_cst = build_int_cst
+                           (TREE_TYPE (TREE_TYPE (vec_offset)),
+                            SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+                         tree mult = build_vector_from_val
+                           (TREE_TYPE (vec_offset), mult_cst);
+                         vec_offset = gimple_build
+                           (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+                            vec_offset, mult);
+                       }
                      scale = size_int (ls.supported_scale);
                    }
                  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
@@ -10923,14 +10928,19 @@ vectorizable_load (vec_info *vinfo,
                      (&stmts, ls.supported_offset_vectype, vec_offset);
                  if (ls.supported_scale)
                    {
-                     tree mult_cst = build_int_cst
-                       (TREE_TYPE (TREE_TYPE (vec_offset)),
-                        SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
-                     tree mult = build_vector_from_val
-                       (TREE_TYPE (vec_offset), mult_cst);
-                     vec_offset = gimple_build
-                       (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
-                        vec_offset, mult);
+                     /* Only scale the vec_offset if we haven't already.  */
+                     if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+                         || i == 0)
+                       {
+                         tree mult_cst = build_int_cst
+                           (TREE_TYPE (TREE_TYPE (vec_offset)),
+                            SLP_TREE_GS_SCALE (slp_node) / ls.supported_scale);
+                         tree mult = build_vector_from_val
+                           (TREE_TYPE (vec_offset), mult_cst);
+                         vec_offset = gimple_build
+                           (&stmts, MULT_EXPR, TREE_TYPE (vec_offset),
+                            vec_offset, mult);
+                       }
                      scale = size_int (ls.supported_scale);
                    }
                  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);

Reply via email to