[RFC] split loop for NE condition.

guojiufu via Gcc-patches Fri, 14 May 2021 09:31:41 -0700

I've refined the patch as below.
This patch is checking "unsigned type" adn iv.no_overflow.

While I'm thinking to use "number_of_iterations_exit (loop, e, &niter,false, false, NULL)"

and "niter.control.no_overflow" to check overflow/wrap, which maybe
more accurate, but relative "expensive".


"nowrap_type_p and scev_probably_wraps_p" may be little cheaper,
but "number_of_iterations_exit" would be more accurate.

Is this right?

BR,
Jiufu Guo.


diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3a09bbc39e5..425593ca70f 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "gimple-fold.h"
 #include "gimplify-me.h"
+#include "tree-ssa-loop-ivopts.h"

 /* This file implements two kinds of loop splitting.

@@ -233,7 +234,8 @@ easy_exit_values (class loop *loop)
    this.  The loops need to fulfill easy_exit_values().  */

 static void
-connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
+connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e,
+                  bool use_prev = false)
 {
   basic_block rest = loop_preheader_edge (loop2)->src;
   gcc_assert (new_e->dest == rest);

@@ -279,7 +281,8 @@ connect_loop_phis (class loop *loop1, class loop*loop2, edge new_e)


       gphi * newphi = create_phi_node (new_init, rest);
       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
-      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);

+ add_phi_arg (newphi, use_prev ? PHI_RESULT (phi_first) : next,new_e,

+                  UNKNOWN_LOCATION);
       SET_USE (op, new_init);
     }
 }
@@ -1593,6 +1596,229 @@ split_loop_on_cond (struct loop *loop)
   return do_split;
 }

+/* Check if the LOOP exit branch likes "if (idx != bound)",
+   Return the branch edge which exit loop, if overflow/wrap
+   may happen on "idx".  */
+
+static edge
+get_ne_cond_branch (struct loop *loop)
+{
+  int i;
+  edge e;
+
+  auto_vec<edge> edges = get_loop_exit_edges (loop);
+  FOR_EACH_VEC_ELT (edges, i, e)
+    {
+      basic_block bb = e->src;
+
+      /* Check gcond.  */
+      gimple *last = last_stmt (bb);
+      if (!last || gimple_code (last) != GIMPLE_COND)
+       continue;
+      gcond *cond = as_a<gcond *> (last);
+      enum tree_code code = gimple_cond_code (cond);
+      if (!(code == NE_EXPR
+           || (code == EQ_EXPR && (e->flags & EDGE_TRUE_VALUE))))
+       continue;
+
+      /* Check if bound is invarant.  */
+      tree idx = gimple_cond_lhs (cond);
+      tree bnd = gimple_cond_rhs (cond);
+      if (expr_invariant_in_loop_p (loop, idx))
+       std::swap (idx, bnd);
+      else if (!expr_invariant_in_loop_p (loop, bnd))
+       continue;
+
+      /* By default, unsigned type conversion could cause overflow.  */
+      tree type = TREE_TYPE (idx);
+      if (!INTEGRAL_TYPE_P (type) || TREE_CODE (idx) != SSA_NAME
+         || !TYPE_UNSIGNED (type)
+         || TYPE_PRECISION (type) == TYPE_PRECISION (sizetype))
+       continue;
+
+      /* Avoid to split if bound is MAX/MIN val.  */
+      tree bound_type = TREE_TYPE (bnd);

+ if (TREE_CODE (bnd) == INTEGER_CST && INTEGRAL_TYPE_P(bound_type)

+         && (bnd == TYPE_MAX_VALUE (bound_type)
+             || bnd == TYPE_MIN_VALUE (bound_type)))
+       continue;
+
+      /* Extract conversion from idx.  */
+      if (TREE_CODE (idx) == SSA_NAME)
+       {
+         gimple *stmt = SSA_NAME_DEF_STMT (idx);
+         if (is_gimple_assign (stmt)
+             && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
+             && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
+           idx = gimple_assign_rhs1 (stmt);
+       }
+
+      /* Check if idx is simple iv with possible overflow/wrap.  */
+      class loop *useloop = loop_containing_stmt (cond);
+      affine_iv iv;
+      if (!simple_iv (loop, useloop, idx, &iv, false))
+       continue;
+      if (iv.no_overflow)
+       return NULL;
+

+ /* If base is know value (esplically 0/1), other optimizationsmay be

+        able to analyze "idx != bnd" as "idx < bnd" or "idx > bnd".  */
+      if (TREE_CODE (iv.base) == INTEGER_CST)
+       continue;
+
+      /* Check loop is simple to split.  */
+      gcc_assert (bb != loop->latch);
+
+      if (single_pred_p (loop->latch)
+         && single_pred_edge (loop->latch)->src == bb
+         && (gsi_end_p (gsi_start_nondebug_bb (loop->latch))))
+       return e;
+
+      /* Cheap header.  */
+      if (bb == loop->header)
+       {
+         if (get_virtual_phi (bb))
+           continue;
+
+         /* Only one phi.  */
+         gphi_iterator psi = gsi_start_phis (bb);
+         if (gsi_end_p (psi))
+           continue;
+         gsi_next (&psi);
+         if (!gsi_end_p (psi))
+           continue;
+
+         /* ++i or ++i */
+         gimple_stmt_iterator gsi = gsi_start_bb (bb);
+         if (gsi_end_p (gsi))
+           continue;
+
+         gimple *s1 = gsi_stmt (gsi);
+         if (!(is_gimple_assign (s1)
+               && (idx == gimple_assign_lhs (s1)
+                   || idx == gimple_assign_rhs1 (s1))))
+           continue;
+
+         gsi_next (&gsi);
+         if (!gsi_end_p (gsi) && gsi_stmt (gsi) == cond)
+           return e;
+       }
+    }
+
+  return NULL;
+}
+

+/* Split the LOOP with NE_EXPR into two loops with GT_EXPR and LT_EXPR.*/

+
+static bool
+split_ne_loop (struct loop *loop, edge cond_e)
+{
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop, boolean_true_node, NULL,
+                                    profile_probability::always (),
+                                    profile_probability::never (),
+                                    profile_probability::always (),
+                                    profile_probability::always (), true);
+
+  gcc_assert (loop2);
+  update_ssa (TODO_update_ssa);
+
+  basic_block loop2_cond_exit_bb = get_bb_copy (cond_e->src);
+  free_original_copy_tables ();
+
+  gcond *gc = as_a<gcond *> (last_stmt (cond_e->src));
+  gcond *dup_gc = as_a<gcond *> (last_stmt (loop2_cond_exit_bb));
+
+  /* Change if (i != n) to LOOP1:if (i > n) and LOOP2:if (i < n) */
+  bool inv = expr_invariant_in_loop_p (loop, gimple_cond_lhs (gc));
+  enum tree_code up_code = inv ? LT_EXPR : GT_EXPR;
+  enum tree_code down_code = inv ? GT_EXPR : LT_EXPR;
+
+  gimple_cond_set_code (gc, up_code);
+  gimple_cond_set_code (dup_gc, down_code);
+
+  /* Link the exit cond edge to new loop.  */
+  gcond *break_cond = as_a<gcond *> (gimple_copy (gc));
+  edge pred_e = single_pred_edge (loop->latch);
+  bool simple_loop = pred_e && pred_e->src == cond_e->src
+                    && (gsi_end_p (gsi_start_nondebug_bb (loop->latch)));
+  if (simple_loop)
+    gimple_cond_set_code (break_cond, down_code);
+  else
+    gimple_cond_make_true (break_cond);
+
+  basic_block break_bb = split_edge (cond_e);
+  gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_exit = single_succ_edge (break_bb);

+ edge to_new_loop = make_edge (break_bb, loop_preheader_edge(loop2)->src, 0);

+  to_new_loop->flags |= EDGE_TRUE_VALUE;
+  to_exit->flags |= EDGE_FALSE_VALUE;
+  to_exit->flags &= ~EDGE_FALLTHRU;
+  to_exit->probability = cond_e->probability;
+  to_new_loop->probability = to_exit->probability.invert ();
+
+  update_ssa (TODO_update_ssa);
+
+  connect_loop_phis (loop, loop2, to_new_loop, !simple_loop);
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    fprintf (dump_file, ";; Loop split on wrap index.\n");
+
+  return true;
+}
+

+/* Checks if LOOP contains a suitable NE_EXPR conditional block tosplit.

+L_H:
+ if (i!=N)
+   S;
+ i++;
+ goto L_H;
+
+The "i!=N" is like "i>N || i<N", then it could be transform to:
+
+L_H:
+ if (i>N)
+   S;
+ i++;
+ goto L_H;
+L1_H:
+ if (i<N)
+   S;
+ i++;
+ goto L1_H;
+
+The loop with "i<N" is in favor both GIMPLE and RTL passes.  */
+
+static bool
+split_loop_on_ne_cond (class loop *loop)
+{
+  int num = 0;
+  basic_block *bbs = get_loop_body (loop);
+
+  if (!can_copy_bbs_p (bbs, loop->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+  free (bbs);
+
+  if (num > param_max_peeled_insns)
+    return false;
+
+  edge branch_edge = get_ne_cond_branch (loop);
+  if (branch_edge && split_ne_loop (loop, branch_edge))
+    return true;
+
+  return false;
+}
+
 /* Main entry point.  Perform loop splitting on all suitable loops.  */

 static unsigned int
@@ -1622,7 +1848,8 @@ tree_ssa_split_loops (void)
       if (optimize_loop_for_size_p (loop))
        continue;

-      if (split_loop (loop) || split_loop_on_cond (loop))
+      if (split_loop (loop) || split_loop_on_cond (loop)
+         || split_loop_on_ne_cond (loop))
        {

/* Mark our containing loop as having had some split inner loops.*/

          loop_outer (loop)->aux = loop;

[RFC] split loop for NE condition.

Reply via email to