Hi,
The vectorizer performs the following alias checks for data-refs with
unknown dependence:
((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
|| (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
where segment_length is data-ref's step in the loop multiplied by the
loop's number of iterations (in the general case). For invariant
data-refs segment_length is 0, since the step is 0. This creates
incorrect check for:
for (i = 0; i < 1000; i++)
for (j = 0; j < 1000; j++)
a[j] = a[i] + 1;
We check:
&a + 4000 <= &a + i*4
|| &a + i*4 <= &a
and the second check is wrong for i=0.
This patch makes segment_length to be sizeof (data-ref type) in case
of zero step, changing the checks into
&a + 4000 <= &a + i*4
|| &a + i*4 + 4 <= &a
Bootstrapped and tested on powerpc64-suse-linux.
Committed revision 176434.
Ira
ChangeLog:
PR tree-optimization/49771
* tree-vect-loop-manip.c (vect_vfa_segment_size): In case of
zero step, set segment length to the size of the data-ref's type.
testsuite/ChangeLog:
PR tree-optimization/49771
* gcc.dg/vect/pr49771.c: New test.
Index: tree-vect-loop-manip.c
===================================================================
--- tree-vect-loop-manip.c (revision 176433)
+++ tree-vect-loop-manip.c (working copy)
@@ -2356,9 +2356,14 @@ static tree
vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
{
tree segment_length;
- segment_length = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- fold_convert (sizetype, length_factor));
+
+ if (!compare_tree_int (DR_STEP (dr), 0))
+ segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
+ else
+ segment_length = size_binop (MULT_EXPR,
+ fold_convert (sizetype, DR_STEP (dr)),
+ fold_convert (sizetype, length_factor));
+
if (vect_supportable_dr_alignment (dr, false)
== dr_explicit_realign_optimized)
{
Index: testsuite/gcc.dg/vect/pr49771.c
===================================================================
--- testsuite/gcc.dg/vect/pr49771.c (revision 0)
+++ testsuite/gcc.dg/vect/pr49771.c (revision 0)
@@ -0,0 +1,26 @@
+#include <stdlib.h>
+#include <stdarg.h>
+
+static int a[1000];
+
+int
+foo (void)
+{
+ int j;
+ int i;
+ for (i = 0; i < 1000; i++)
+ for (j = 0; j < 1000; j++)
+ a[j] = a[i] + 1;
+ return a[0];
+}
+
+int
+main (void)
+{
+ int res = foo ();
+ if (res != 1999)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */