This fixes vectorization re-start without SLP when SLP analysis
removed an SLP instance - I was relying on the reduction chains
still being in the SLP instances.  The following properly
detects this case and also handles SLP reductions properly
(they can be vectorized non-SLP as well).

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-12-09  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/68806
        * tree-vect-loop.c (vect_analyze_loop_2): Properly detect
        reduction chains and ignore SLP reductions.

        * gcc.dg/torture/pr68806.c: New testcase.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c        (revision 231451)
+++ gcc/tree-vect-loop.c        (working copy)
@@ -2123,9 +2123,12 @@ again:
   if (!slp)
     return false;
 
+  /* If there are reduction chains re-trying will fail anyway.  */
+  if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
+    return false;
+
   /* Likewise if the grouped loads or stores in the SLP cannot be handled
-     via interleaving or lane instructions or if there were any SLP
-     reductions.  */
+     via interleaving or lane instructions.  */
   slp_instance instance;
   slp_tree node;
   unsigned i, j;
@@ -2135,7 +2138,7 @@ again:
       vinfo = vinfo_for_stmt
          (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
       if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
-       return false;
+       continue;
       vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo));
       unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo);
       tree vectype = STMT_VINFO_VECTYPE (vinfo);
Index: gcc/testsuite/gcc.dg/torture/pr68806.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr68806.c      (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr68806.c      (working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+
+int sad(const unsigned char *p1, long p2)
+{
+  int a = 0;
+  for (int y = 0; y < 16; y++)
+    {
+      for (int x = 0; x < 12; x++)
+       a += p1[x];
+      p1 += p2;
+    }
+  return a;
+}

Reply via email to