On 2021-05-18 14:36, Bernd Edlinger wrote:
On 5/17/21 4:01 AM, Jiufu Guo via Gcc-patches wrote:
When there is the possibility that overflow/wrap may happen on the loop index,
a few optimizations would not happen. For example code:

foo (int *a, int *b, unsigned k, unsigned n)
{
  while (++k != n)
    a[k] = b[k]  + 1;
}

For this code, if "k > n", k would wrap.  if "k < n" at begining,
it could be optimized (e.g. vectorization).

We can split the loop into two loops:

  while (++k > n)
    a[k] = b[k]  + 1;
  while (l++ < n)
    a[k] = b[k]  + 1;

then for the second loop, it could be optimized.

This patch is spltting this kind of small loop to achieve better performance.

Bootstrap and regtest pass on ppc64le.  Is this ok for trunk?

Thanks!

Jiufu Guo.

gcc/ChangeLog:

2021-05-15  Jiufu Guo  <guoji...@linux.ibm.com>

        * tree-ssa-loop-split.c (connect_loop_phis): Add new param.
        (get_ne_cond_branch): New function.
        (split_ne_loop): New function.
        (split_loop_on_ne_cond): New function.
        (tree_ssa_split_loops): Use split_loop_on_ne_cond.

gcc/testsuite/ChangeLog:

2021-05-15  Jiufu Guo  <guoji...@linux.ibm.com>

        * gcc.dg/loop-split1.c: New test.
        * g++.dg/vect/pr98064.cc: Suppress warning.
---
 gcc/testsuite/g++.dg/vect/pr98064.cc |   4 +-
 gcc/testsuite/gcc.dg/loop-split1.c   | 108 +++++++++++++++
gcc/tree-ssa-loop-split.c | 188 ++++++++++++++++++++++++++-
 3 files changed, 296 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-split1.c

diff --git a/gcc/testsuite/g++.dg/vect/pr98064.cc b/gcc/testsuite/g++.dg/vect/pr98064.cc
index 74043ce7725..dcb2985d05a 100644
--- a/gcc/testsuite/g++.dg/vect/pr98064.cc
+++ b/gcc/testsuite/g++.dg/vect/pr98064.cc
@@ -1,5 +1,7 @@
 // { dg-do compile }
-// { dg-additional-options "-O3" }
+// { dg-additional-options "-O3 -Wno-stringop-overflow" }
+/* There is warning message when "short g = var_8; g; g++"
+   is optimized/analyzed as string operation,e.g. memset.  */

 const long long &min(const long long &__a, long long &__b) {
   if (__b < __a)
diff --git a/gcc/testsuite/gcc.dg/loop-split1.c b/gcc/testsuite/gcc.dg/loop-split1.c
new file mode 100644
index 00000000000..30b006b1b14
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-split1.c
@@ -0,0 +1,108 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+void
+foo (int *a, int *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    a[l] = b[l]  + 1;
+}
+void
+foo_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (++l != n)
+    a[l] = b[l]  + 1;
+}
+
+void
+foo1 (int *a, int *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    a[l] = b[l]  + 1;
+}
+
+/* No wrap.  */
+void
+foo1_1 (int *a, int *b, unsigned n)
+{
+  unsigned l = 0;
+  while (l++ != n)
+    a[l] = b[l]  + 1;
+}
+
+unsigned
+foo2 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo2_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (++l != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+unsigned
+foo3 (char *a, char *b, unsigned l, unsigned n)
+{
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+/* No wrap.  */
+unsigned
+foo3_1 (char *a, char *b, unsigned l, unsigned n)
+{
+  l = 0;
+  while (l++ != n)
+    if (a[l] != b[l])
+      break;
+
+  return l;
+}
+
+void bar();
+void foo4(unsigned n,  unsigned i)
+{
+  do
+    {
+      if (i == n)
+        return;
+      bar();
+      ++i;
+    }
+  while (1);
+}
+
+unsigned
+foo5 (double *a, unsigned n, unsigned i)
+{
+  while (a[i] > 0 && i != n)
+    i++;
+
+  return i;
+}
+
+unsigned
+find_skip_diff (char *p, char *q, unsigned n, unsigned i)
+{
+  while (p[i] == q[i] && ++i != n)
+    p++,q++;
+
+  return i;
+}
+
+/* { dg-final { scan-tree-dump-times "Loop split" 9 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..5c1742b5ff4 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "gimple-fold.h"
 #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"

 /* This file implements two kinds of loop splitting.

@@ -233,7 +234,8 @@ easy_exit_values (class loop *loop)
    this.  The loops need to fulfill easy_exit_values().  */

 static void
-connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
+connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
+                  bool use_prev = false)
 {
   basic_block rest = loop_preheader_edge (loop2)->src;
   gcc_assert (new_e->dest == rest);
@@ -279,7 +281,8 @@ connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)

       gphi * newphi = create_phi_node (new_init, rest);
       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
-      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
+ add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next, new_e,
+                  UNKNOWN_LOCATION);
       SET_USE (op, new_init);
     }
 }
@@ -1593,6 +1596,184 @@ split_loop_on_cond (struct loop *loop)
   return do_split;
 }

+/* Check if the LOOP exit branch likes "if (idx != bound)",
+   Return the branch edge which exit loop, if overflow/wrap
+   may happen on "idx".  */
+
+static edge
+get_ne_cond_branch (struct loop *loop)
+{
+  int i;
+  edge e;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      /* Check if there is possible wrap/overflow.  */
+      class tree_niter_desc niter;
+      if (!number_of_iterations_exit (loop, e, &niter, false, false))
+       continue;
+      if (niter.control.no_overflow)
+       return NULL;
+      if (niter.cmp != NE_EXPR)
+       continue;
+
+      /* Check loop is simple to split.  */
+      if (single_pred_p (loop->latch)
+         && single_pred_edge (loop->latch)->src == e->src
+         && (gsi_end_p (gsi_start_nondebug_bb (loop->latch))))
+       return e;
+
+      /* Simple header.  */
+      if (e->src == loop->header)
+       {
+         if (get_virtual_phi (e->src))
+           continue;
+
+         /* Only one phi.  */
+         gphi_iterator psi = gsi_start_phis (e->src);
+         if (gsi_end_p (psi))
+           continue;
+         gsi_next (&psi);
+         if (!gsi_end_p (psi))
+           continue;
+
+         /* ++i or ++i */
+         gimple_stmt_iterator gsi = gsi_start_bb (e->src);
+         if (gsi_end_p (gsi))
+           continue;
+
+         gimple *gc = last_stmt (e->src);
+         tree idx = gimple_cond_lhs (gc);
+         if (expr_invariant_in_loop_p (loop, idx))
+           idx = gimple_cond_rhs (gc);
+
+         gimple *s1 = gsi_stmt (gsi);
+         if (!(is_gimple_assign (s1) && idx
+               && (idx == gimple_assign_lhs (s1)
+                   || idx == gimple_assign_rhs1 (s1))))
+           continue;
+
+         gsi_next (&gsi);
+         if (!gsi_end_p (gsi) && gsi_stmt (gsi) == gc)
+           return e;
+       }
+    }
+
+  return NULL;
+}
+
+/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR. */
+
+static bool
+split_ne_loop (struct loop *loop, edge cond_e)
+{
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
+                                    profile_probability::always (),
+                                    profile_probability::never (),
+                                    profile_probability::always (),
+                                    profile_probability::always (), true);
+
+  gcc_assert (loop2);
+  update_ssa (TODO_update_ssa);
+
+  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
+  free_original_copy_tables ();
+
+  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
+  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;
+
+  gimple_cond_set_code (gc, up_code);
+  gimple_cond_set_code (dup_gc, down_code);
+
+  /* Link the exit cond edge to new loop.  */
+  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
+  edge pred_e = single_pred_edge (loop->latch);
+  bool simple_loop = pred_e && pred_e->src == cond_e->src
+                    && (gsi_end_p (gsi_start_nondebug_bb (loop->latch)));
+  if (simple_loop)
+    gimple_cond_set_code (break_cond, down_code);
+  else
+    gimple_cond_make_true (break_cond);
+
+  basic_block break_bb = split_edge (cond_e);
+  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_exit = single_succ_edge (break_bb);
+ edge to_new_loop = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
+  to_new_loop->flags |= EDGE_TRUE_VALUE;
+  to_exit->flags |= EDGE_FALSE_VALUE;
+  to_exit->flags &= ~EDGE_FALLTHRU;
+  to_exit->probability = cond_e->probability;
+  to_new_loop->probability = to_exit->probability.invert ();
+
+  update_ssa (TODO_update_ssa);
+
+  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split on wrap index.\n");
+
+  return true;
+}
+
+/* Checks if LOOP contains a suitable NE_EXPR conditional block to split.
+L_H:
+ if (i!=N)
+   S;
+ i++;
+ goto L_H;
+
+The "i!=N" is like "i>N || i<N", then it could be transform to:
+
+L_H:
+ if (i>N)
+   S;
+ i++;
+ goto L_H;
+L1_H:
+ if (i<N)
+   S;
+ i++;
+ goto L1_H;
+
+The loop with "i<N" is in favor both GIMPLE and RTL passes.  */
+
+static bool
+split_loop_on_ne_cond (class loop *loop)
+{
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+ num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+  free (bbs);
+
+  if (num > param_max_peeled_insns)
+    return false;
+
+  edge branch_edge = get_ne_cond_branch (loop);
+  if (branch_edge && split_ne_loop (loop, branch_edge))
+    return true;
+
+  return false;
+}
+
/* Main entry point. Perform loop splitting on all suitable loops. */

 static unsigned int
@@ -1622,7 +1803,8 @@ tree_ssa_split_loops (void)
       if (optimize_loop_for_size_p (loop))
        continue;

-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+         || split_loop_on_ne_cond (loop))
        {
/* Mark our containing loop as having had some split inner loops. */
          loop_outer (loop)->aux = loop;


Hi,

I've tried this patch and it does not seem to pass its own test:

FAIL: gcc.dg/loop-split1.c scan-tree-dump-times lsplit "Loop split" 9

Note you should always do a bootstrap and regression test as described
here: https://gcc.gnu.org/contribute.html#testing

Also please state in your future patch submissions how you tested the patch, like "bootstrap and regression test on x86_64-pc-linux-gnu" for instance.

Hi Bernd,

Thanks for your tests and comments!

It is interesting, this patch would be target independent.
I had a double test, it passes at my side.

# of expected passes            2

And if change the test case slightly, the case fails.
e.g. change to: scan-tree-dump-times "Loop split" 10 "lsplit"

My test machine is powerpc64le-linux-gnu.

BR,
Jiufu Guo.



Bernd.

Reply via email to