This lets the vectorizer handle some simple strides expressed using left-shift
rather than mul, e.g. a[i << 1] (whereas previously only a[i * 2] would have
been handled).
This patch does *not* handle the general case of shifts - neither a[i << j]
nor a[1 << i] will be handled; that would be a significantly bigger patch
(probably duplicating or generalizing much of chrec_fold_multiply and
chrec_fold_multiply_poly_poly in tree-chrec.c), and would probably also only
be applicable to machines with gather-load support.
Bootstrapped+check-gcc,g++,gfortran on x86_64, AArch64 and ARM, also Ada on
x86_64.
Is this OK for trunk?
gcc/ChangeLog:
PR tree-optimization/65963
* tree-scalar-evolution.c (interpret_rhs_expr): Handle some LSHIFT_EXPRs
as equivalent MULT_EXPRs.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-strided-shift-1.c: New.
---
gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c | 33 ++++++++++++++++++++++++
gcc/tree-scalar-evolution.c | 18 +++++++++++++
2 files changed, 51 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
new file mode 100644
index 0000000..b1ce2ec
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/65963. */
+#include "tree-vect.h"
+
+#define N 512
+
+int in[2*N], out[N];
+
+__attribute__ ((noinline)) void
+loop (void)
+{
+ for (int i = 0; i < N; i++)
+ out[i] = in[i << 1] + 7;
+}
+
+int
+main (int argc, char **argv)
+{
+ check_vect ();
+ for (int i = 0; i < 2*N; i++)
+ {
+ in[i] = i;
+ __asm__ volatile ("" : : : "memory");
+ }
+ loop ();
+ __asm__ volatile ("" : : : "memory");
+ for (int i = 0; i < N; i++)
+ {
+ if (out[i] != i*2 + 7)
+ abort ();
+ }
+ return 0;
+}
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect"
{ target { vect_strided2 } } } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 0753bf3..e478b0e 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1831,12 +1831,30 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt,
break;
case MULT_EXPR:
+ case LSHIFT_EXPR:
+ /* Handle A<<B as A * (1<<B). */
chrec1 = analyze_scalar_evolution (loop, rhs1);
chrec2 = analyze_scalar_evolution (loop, rhs2);
chrec1 = chrec_convert (type, chrec1, at_stmt);
chrec2 = chrec_convert (type, chrec2, at_stmt);
chrec1 = instantiate_parameters (loop, chrec1);
chrec2 = instantiate_parameters (loop, chrec2);
+ if (code == LSHIFT_EXPR)
+ {
+ /* Do the shift in the larger size, as in e.g. (long) << (int)32,
+ we must do 1<<32 as a long or we'd overflow. */
+ tree type = TREE_TYPE (chrec2);
+ if (TYPE_PRECISION (TREE_TYPE (chrec1)) > TYPE_PRECISION (type))
+ type = TREE_TYPE (chrec1);
+ if (TYPE_PRECISION (type) == 0)
+ {
+ res = chrec_dont_know;
+ break;
+ }
+ chrec2 = fold_build2 (LSHIFT_EXPR, type,
+ build_int_cst (type, 1),
+ chrec2);
+ }
res = chrec_fold_multiply (type, chrec1, chrec2);
break;
--
1.9.1