Hi,
As explained in the PR, given below test case:
int a[8][10] = { [2][5] = 4 }, c;

int
main ()
{
  short b;
  int i, d;
  for (b = 4; b >= 0; b--)
    for (c = 0; c <= 6; c++)
      a[c + 1][b + 2] = a[c][b + 1];
  for (i = 0; i < 8; i++)
    for (d = 0; d < 10; d++)
      if (a[i][d] != (i == 3 && d == 6) * 4)
        __builtin_abort ();
  return 0;

the loop nest is illegal for vectorization without reversing inner loop.  The 
issue
is in data dependence checking of vectorizer, I believe the mentioned revision 
just
exposed this.  Previously the vectorization is skipped because of unsupported 
memory
operation.  The outer loop vectorization unrolls the outer loop into:

  for (b = 4; b > 0; b -= 4)
  {
    for (c = 0; c <= 6; c++)
      a[c + 1][6] = a[c][5];
    for (c = 0; c <= 6; c++)
      a[c + 1][5] = a[c][4];
    for (c = 0; c <= 6; c++)
      a[c + 1][4] = a[c][3];
    for (c = 0; c <= 6; c++)
      a[c + 1][3] = a[c][2];
  }
Then four inner loops are fused into:
  for (b = 4; b > 0; b -= 4)
  {
    for (c = 0; c <= 6; c++)
    {
      a[c + 1][6] = a[c][5];  // S1
      a[c + 1][5] = a[c][4];  // S2
      a[c + 1][4] = a[c][3];
      a[c + 1][3] = a[c][2];
    }
  }
The loop fusion needs to meet the dependence requirement.  Basically, GCC's data
dependence analyzer does not model dep between references in sibling loops, but
in practice, fusion requirement can be checked by analyzing all data references
after fusion, and there is no backward data dependence.

Apparently, the requirement is violated because we have backward data dependence
between references (a[c][5], a[c+1][5]) in S1/S2.  Note, if we reverse the inner
loop, the outer loop would become legal for vectorization.

This patch fixes the issue by enforcing dependence check.  It also adds two 
tests
with one shouldn't be vectorized and the other should.  Bootstrap and test on 
x86_64
and AArch64.  Is it OK?

Thanks,
bin
2017-12-15  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/81740
        * tree-vect-data-refs.c (vect_analyze_data_ref_dependence): In case
        of outer loop vectorization, check backward dependence at inner loop
        if dependence at outer loop is reversed.

gcc/testsuite
2017-12-15  Bin Cheng  <bin.ch...@arm.com>

        PR tree-optimization/81740
        * gcc.dg/vect/pr81740-1.c: New test.
        * gcc.dg/vect/pr81740-2.c: Refine test.
From c0c8cfae08c0bde2cec41a8d3abcbfea0bd2e211 Mon Sep 17 00:00:00 2001
From: Bin Cheng <binch...@e108451-lin.cambridge.arm.com>
Date: Thu, 14 Dec 2017 15:32:02 +0000
Subject: [PATCH] pr81740-20171212.txt

---
 gcc/testsuite/gcc.dg/vect/pr81740-1.c | 17 +++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr81740-2.c | 21 +++++++++++++++++++++
 gcc/tree-vect-data-refs.c             | 11 +++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr81740-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr81740-2.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr81740-1.c 
b/gcc/testsuite/gcc.dg/vect/pr81740-1.c
new file mode 100644
index 0000000..d90aba5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr81740-1.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+int a[8][10] = { [2][5] = 4 }, c;
+
+int
+main ()
+{
+  short b;
+  int i, d;
+  for (b = 4; b >= 0; b--)
+    for (c = 0; c <= 6; c++)
+      a[c + 1][b + 2] = a[c][b + 1];
+  for (i = 0; i < 8; i++)
+    for (d = 0; d < 10; d++)
+      if (a[i][d] != (i == 3 && d == 6) * 4)
+        __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr81740-2.c 
b/gcc/testsuite/gcc.dg/vect/pr81740-2.c
new file mode 100644
index 0000000..fb5b300
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr81740-2.c
@@ -0,0 +1,21 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
+
+int a[8][10] = { [2][5] = 4 }, c;
+
+int
+main ()
+{
+  short b;
+  int i, d;
+  for (b = 4; b >= 0; b--)
+    for (c = 6; c >= 0; c--)
+      a[c + 1][b + 2] = a[c][b + 1];
+  for (i = 0; i < 8; i++)
+    for (d = 0; d < 10; d++)
+      if (a[i][d] != (i == 3 && d == 6) * 4)
+        __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect"  } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 996d156..3b780cf1 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -435,6 +435,17 @@ vect_analyze_data_ref_dependence (struct 
data_dependence_relation *ddr,
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "dependence distance negative.\n");
+         /* When doing outer loop vectorization, we need to check if there is
+            backward dependence at inner loop level if dependence at the outer
+            loop is reversed.  See PR81740 for more information.  */
+         if (nested_in_vect_loop_p (loop, DR_STMT (dra))
+             || nested_in_vect_loop_p (loop, DR_STMT (drb)))
+           {
+             unsigned inner_depth = index_in_loop_nest (loop->inner->num,
+                                                        DDR_LOOP_NEST (ddr));
+             if (dist_v[inner_depth] < 0)
+               return true;
+           }
          /* Record a negative dependence distance to later limit the
             amount of stmt copying / unrolling we can perform.
             Only need to handle read-after-write dependence.  */
-- 
1.9.1

Reply via email to