https://gcc.gnu.org/g:1ff5f8f8a05dd57620a1e2abbf87bd511b113cce
commit r15-1726-g1ff5f8f8a05dd57620a1e2abbf87bd511b113cce Author: Feng Xue <f...@os.amperecomputing.com> Date: Wed Jun 26 22:02:53 2024 +0800 vect: Fix shift-by-induction for single-lane slp Allow shift-by-induction for slp node, when it is single lane, which is aligned with the original loop-based handling. 2024-06-26 Feng Xue <f...@os.amperecomputing.com> gcc/ * tree-vect-stmts.cc (vectorizable_shift): Allow shift-by-induction for single-lane slp node. gcc/testsuite/ * gcc.dg/vect/vect-shift-6.c * gcc.dg/vect/vect-shift-7.c Diff: --- gcc/testsuite/gcc.dg/vect/vect-shift-6.c | 52 ++++++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/vect-shift-7.c | 69 ++++++++++++++++++++++++++++++++ gcc/tree-vect-stmts.cc | 2 +- 3 files changed, 122 insertions(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-6.c b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c new file mode 100644 index 00000000000..277093bc7bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-6.c @@ -0,0 +1,52 @@ +/* { dg-require-effective-target vect_shift } */ +/* { dg-require-effective-target vect_int } */ + +#include <stdarg.h> +#include <stdint.h> +#include "tree-vect.h" + +#define N 32 + +int32_t A[N]; +int32_t B[N]; + +#define FN(name) \ +__attribute__((noipa)) \ +void name(int32_t *a) \ +{ \ + for (int i = 0; i < N / 2; i++) \ + { \ + a[2 * i + 0] <<= i; \ + a[2 * i + 1] <<= i; \ + } \ +} + + +FN(foo_vec) + +#pragma GCC push_options +#pragma GCC optimize ("O0") +FN(foo_novec) +#pragma GCC pop_options + +int main () +{ + int i; + + check_vect (); + +#pragma GCC novector + for (i = 0; i < N; i++) + A[i] = B[i] = -(i + 1); + + foo_vec(A); + foo_novec(B); + + /* check results: */ +#pragma GCC novector + for (i = 0; i < N; i++) + if (A[i] != B[i]) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-shift-7.c b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c new file mode 100644 index 00000000000..6de3f39a87f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-shift-7.c @@ -0,0 +1,69 @@ +/* { dg-require-effective-target vect_shift } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "--param max-completely-peel-times=6" } */ + +#include <stdarg.h> +#include <stdint.h> +#include "tree-vect.h" + +#define N 16 +#define M 16 + +int32_t A[N]; +int32_t B[N]; + +#define FN(name) \ +__attribute__((noipa)) \ +void name(int32_t *a, int m) \ +{ \ + for (int i = 0; i < N / 2; i++) \ + { \ + int s1 = i; \ + int s2 = s1 + 1; \ + int32_t r1 = 0; \ + int32_t r2 = 7; \ + int32_t t1 = m; \ + \ + for (int j = 0; j < M; j++) \ + { \ + r1 += t1 << s1; \ + r2 += t1 << s2; \ + t1++; \ + s1++; \ + s2++; \ + } \ + \ + a[2 * i + 0] = r1; \ + a[2 * i + 1] = r2; \ + } \ +} + + +FN(foo_vec) + +#pragma GCC push_options +#pragma GCC optimize ("O0") +FN(foo_novec) +#pragma GCC pop_options + +int main () +{ + int i; + + check_vect (); + +#pragma GCC novector + for (i = 0; i < N; i++) + A[i] = B[i] = 0; + + foo_vec(A, 0); + foo_novec(B, 0); + + /* check results: */ +#pragma GCC novector + for (i = 0; i < N; i++) + if (A[i] != B[i]) + abort (); + + return 0; +} diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 7b889f31645..aab3aa59962 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -6175,7 +6175,7 @@ vectorizable_shift (vec_info *vinfo, if ((dt[1] == vect_internal_def || dt[1] == vect_induction_def || dt[1] == vect_nested_cycle) - && !slp_node) + && (!slp_node || SLP_TREE_LANES (slp_node) == 1)) scalar_shift_arg = false; else if (dt[1] == vect_constant_def || dt[1] == vect_external_def