[PATCH] Avoid peeling in cunrolli

Richard Biener Wed, 29 Nov 2017 04:39:07 -0800

It turns out that we don't vectorize the 2nd testcase in PR83202
(or rather we do that in weird ways during BB vectorization) because
cunrolli decides to peel the inner loop completely based on
the size of the accessed arrays.  That unfortunately leaves exit
tests in the outer loop body which in turn makes us not vectorize
the loop.


We have a late unrolling pass for these kind of unrollings so this
patch simply avoids doing this during cunrolli.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2017-11-29  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/83202
        * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Add
        allow_peel argument and guard peeling.
        (canonicalize_loop_induction_variables): Likewise.
        (canonicalize_induction_variables): Pass false.
        (tree_unroll_loops_completely_1): Pass unroll_outer to disallow
        peeling from cunrolli.

        * gcc.dg/vect/pr83202-1.c: New testcase.

Index: gcc/tree-ssa-loop-ivcanon.c
===================================================================
--- gcc/tree-ssa-loop-ivcanon.c (revision 255201)
+++ gcc/tree-ssa-loop-ivcanon.c (working copy)
@@ -679,7 +679,7 @@ try_unroll_loop_completely (struct loop
                            edge exit, tree niter,
                            enum unroll_level ul,
                            HOST_WIDE_INT maxiter,
-                           location_t locus)
+                           location_t locus, bool allow_peel)
 {
   unsigned HOST_WIDE_INT n_unroll = 0;
   bool n_unroll_found = false;
@@ -711,7 +711,8 @@ try_unroll_loop_completely (struct loop
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  if (maxiter >= 0
+  if (allow_peel
+      && maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
       n_unroll = maxiter;
@@ -1139,7 +1140,7 @@ try_peel_loop (struct loop *loop,
 static bool
 canonicalize_loop_induction_variables (struct loop *loop,
                                       bool create_iv, enum unroll_level ul,
-                                      bool try_eval)
+                                      bool try_eval, bool allow_peel)
 {
   edge exit = NULL;
   tree niter;
@@ -1207,7 +1208,8 @@ canonicalize_loop_induction_variables (s
      populates the loop bounds.  */
   modified |= remove_redundant_iv_tests (loop);
 
-  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus))
+  if (try_unroll_loop_completely (loop, exit, niter, ul, maxiter, locus,
+                                 allow_peel))
     return true;
 
   if (create_iv
@@ -1238,7 +1240,7 @@ canonicalize_induction_variables (void)
     {
       changed |= canonicalize_loop_induction_variables (loop,
                                                        true, UL_SINGLE_ITER,
-                                                       true);
+                                                       true, false);
     }
   gcc_assert (!need_ssa_update_p (cfun));
 
@@ -1353,7 +1355,7 @@ tree_unroll_loops_completely_1 (bool may
     ul = UL_NO_GROWTH;
 
   if (canonicalize_loop_induction_variables
-        (loop, false, ul, !flag_tree_loop_ivcanon))
+        (loop, false, ul, !flag_tree_loop_ivcanon, unroll_outer))
     {
       /* If we'll continue unrolling, we need to propagate constants
         within the new basic blocks to fold away induction variable
Index: gcc/testsuite/gcc.dg/vect/pr83202-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr83202-1.c       (nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr83202-1.c       (working copy)
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_double } */
+
+void test(double data[8][8])
+{
+  for (int i = 0; i < 8; i++)
+    {
+      for (int j = 0; j < i; j+=4)
+       {
+         data[i][j] *= data[i][j];
+         data[i][j+1] *= data[i][j+1];
+         data[i][j+2] *= data[i][j+2];
+         data[i][j+3] *= data[i][j+3];
+       }
+    }
+}
+
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
+/* { dg-final { scan-tree-dump "ectorized 1 loops" "vect" } } */

[PATCH] Avoid peeling in cunrolli

Reply via email to