https://gcc.gnu.org/g:1ff5f8f8a05dd57620a1e2abbf87bd511b113cce

commit r15-1726-g1ff5f8f8a05dd57620a1e2abbf87bd511b113cce
Author: Feng Xue <f...@os.amperecomputing.com>
Date:   Wed Jun 26 22:02:53 2024 +0800

    vect: Fix shift-by-induction for single-lane slp
    
    Allow shift-by-induction for slp node, when it is single lane, which is
    aligned with the original loop-based handling.
    
    2024-06-26 Feng Xue <f...@os.amperecomputing.com>
    
    gcc/
            * tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction
            for single-lane slp node.
    
    gcc/testsuite/
            * gcc.dg/vect/vect-shift-6.c
            * gcc.dg/vect/vect-shift-7.c

Diff:
---
 gcc/testsuite/gcc.dg/vect/vect-shift-6.c | 52 ++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/vect-shift-7.c | 69 ++++++++++++++++++++++++++++++++
 gcc/tree-vect-stmts.cc                   |  2 +-
 3 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c 
b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
new file mode 100644
index 00000000000..277093bc7bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 32
+
+int32_t A[N]; 
+int32_t B[N];
+
+#define FN(name)                   \
+__attribute__((noipa))             \
+void name(int32_t *a)                  \
+{                                  \
+  for (int i = 0; i < N / 2; i++)  \
+    {                              \
+       a[2 * i + 0] <<= i;         \
+       a[2 * i + 1] <<= i;         \
+    }                              \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+#pragma GCC novector
+  for (i = 0; i < N; i++)
+    A[i] = B[i] = -(i + 1);
+
+  foo_vec(A);
+  foo_novec(B);
+
+  /* check results:  */
+#pragma GCC novector
+  for (i = 0; i < N; i++)
+    if (A[i] != B[i])
+      abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c 
b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
new file mode 100644
index 00000000000..6de3f39a87f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c
@@ -0,0 +1,69 @@
+/* { dg-require-effective-target vect_shift } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "--param max-completely-peel-times=6" } */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include "tree-vect.h"
+
+#define N 16
+#define M 16
+
+int32_t A[N];
+int32_t B[N];
+
+#define FN(name)                   \
+__attribute__((noipa))             \
+void name(int32_t *a, int m)       \
+{                                  \
+  for (int i = 0; i < N / 2; i++)  \
+    {                              \
+      int s1 = i;                  \
+      int s2 = s1 + 1;             \
+      int32_t r1 = 0;              \
+      int32_t r2 = 7;              \
+      int32_t t1 = m;              \
+                                  \
+      for (int j = 0; j < M; j++)  \
+         {                         \
+            r1 += t1 << s1;        \
+            r2 += t1 << s2;        \
+            t1++;                  \
+            s1++;                  \
+            s2++;                  \
+         }                         \
+                                   \
+       a[2 * i + 0] = r1;          \
+       a[2 * i + 1] = r2;          \
+    }                              \
+}
+
+
+FN(foo_vec)
+
+#pragma GCC push_options
+#pragma GCC optimize ("O0")
+FN(foo_novec)
+#pragma GCC pop_options
+
+int main ()
+{
+  int i;
+
+  check_vect ();
+
+#pragma GCC novector
+  for (i = 0; i < N; i++)
+    A[i] = B[i] = 0;
+
+  foo_vec(A, 0);
+  foo_novec(B, 0);
+
+  /* check results:  */
+#pragma GCC novector
+  for (i = 0; i < N; i++)
+    if (A[i] != B[i])
+      abort ();
+
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 7b889f31645..aab3aa59962 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6175,7 +6175,7 @@ vectorizable_shift (vec_info *vinfo,
   if ((dt[1] == vect_internal_def
        || dt[1] == vect_induction_def
        || dt[1] == vect_nested_cycle)
-      && !slp_node)
+      && (!slp_node || SLP_TREE_LANES (slp_node) == 1))
     scalar_shift_arg = false;
   else if (dt[1] == vect_constant_def
           || dt[1] == vect_external_def

Reply via email to