[PATCH, stage1] Move insns without introducing new temporaries in loop2_invariant

Thomas Preud'homme Thu, 05 Mar 2015 01:53:44 -0800

Note: this is stage1 material.

Currently loop2_invariant pass hoist instructions out of loop by creating a new 
temporary for the destination register of that instruction and leaving there a 
mov from new temporary to old register as shown below:


loop header
start of loop body
//stuff
(set (reg 128) (const_int 0))
//other stuff
end of loop body

becomes:

(set (reg 129) (const_int 0))
loop header
start of loop body
//stuff
(set (reg 128) (reg 128))
//other stuff
end of loop body

This is one of the errors that led to a useless ldr ending up inside a loop 
(PR64616). This patch fix this specific bit (some other bit was fixed in [1]) 
by simply moving the instruction if it's known to be safe. This is decided by 
looking at all the uses of the register set in the instruction and checking 
that (i) they were all dominated by the instruction and (ii) there is no other 
def in the loop that could end up reaching one of the use.

[1] https://gcc.gnu.org/ml/gcc-patches/2015-02/msg00933.html

ChangeLog entries are as follows:

*** gcc/ChangeLog ***

2015-02-16  Thomas Preud'homme  <thomas.preudho...@arm.com>

        * dominance.c (nearest_common_dominator_for_set): Fix A_Dominated_by_B
        code in comment.
        * loop-invariant.c (can_move_invariant_reg): New.
        (move_invariant_reg): Call above new function to decide whether
        instruction can just be moved, skipping creation of temporary
        register.

*** gcc/testsuite/ChangeLog ***

2015-02-16  Thomas Preud'homme  <thomas.preudho...@arm.com>

        * gcc.dg/loop-7.c: Run on all targets and check for loop2_invariant
        being able to move instructions without introducing new temporary
        register.
        * gcc.dg/loop-8.c: New test.

diff --git a/gcc/dominance.c b/gcc/dominance.c
index 33d4ae4..09c8c90 100644
--- a/gcc/dominance.c
+++ b/gcc/dominance.c
@@ -982,7 +982,7 @@ nearest_common_dominator_for_set (enum cdi_direction dir, 
bitmap blocks)
 
    A_Dominated_by_B (node A, node B)
    {
-     return DFS_Number_In(A) >= DFS_Number_In(A)
+     return DFS_Number_In(A) >= DFS_Number_In(B)
             && DFS_Number_Out (A) <= DFS_Number_Out(B);
    }  */
 
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index f79b497..ab2a45c 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -1512,6 +1512,99 @@ replace_uses (struct invariant *inv, rtx reg, bool 
in_group)
   return 1;
 }
 
+/* Whether invariant INV setting REG can be moved out of LOOP, at the end of
+   the block preceding its header.  */
+
+static bool
+can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg)
+{
+  df_ref def, use;
+  bool ret = false;
+  unsigned int i, dest_regno, defs_in_loop_count = 0;
+  rtx_insn *insn = inv->insn;
+  bitmap may_exit, has_exit, always_executed;
+  basic_block *body, bb = BLOCK_FOR_INSN (inv->insn);
+
+  /* We ignore hard register and memory access for cost and complexity reasons.
+     Hard register are few at this stage and expensive to consider as they
+     require building a separate data flow.  Memory access would require using
+     df_simulate_* and can_move_insns_across functions and is more complex.  */
+  if (!REG_P (reg) || HARD_REGISTER_P (reg))
+    return false;
+
+  /* Check whether the set is always executed.  We could omit this condition if
+     we know that the register is unused outside of the loop, but it does not
+     seem worth finding out.  */
+  may_exit = BITMAP_ALLOC (NULL);
+  has_exit = BITMAP_ALLOC (NULL);
+  always_executed = BITMAP_ALLOC (NULL);
+  body = get_loop_body_in_dom_order (loop);
+  find_exits (loop, body, may_exit, has_exit);
+  compute_always_reached (loop, body, has_exit, always_executed);
+  /* Find bit position for basic block bb.  */
+  for (i = 0; i < loop->num_nodes && body[i] != bb; i++);
+  if (!bitmap_bit_p (always_executed, i))
+    goto cleanup;
+
+  /* Check that all uses reached by the def in insn would still be reached
+     it.  */
+  dest_regno = REGNO (reg);
+  for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use))
+    {
+      rtx ref;
+      basic_block use_bb;
+
+      ref = DF_REF_INSN (use);
+      use_bb = BLOCK_FOR_INSN (ref);
+
+      /* Ignore instruction considered for moving.  */
+      if (ref == insn)
+       continue;
+
+      /* Don't consider uses outside loop.  */
+      if (!flow_bb_inside_loop_p (loop, use_bb))
+       continue;
+
+      /* Don't move if a use is not dominated by def in insn.  */
+      if (use_bb == bb && DF_INSN_LUID (insn) > DF_INSN_LUID (ref))
+       goto cleanup;
+      if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb))
+       goto cleanup;
+
+      /* Check for other defs.  Any other def in the loop might reach a use
+        currently reached by the def in insn.  */
+      if (!defs_in_loop_count)
+       {
+         for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG 
(def))
+           {
+             basic_block def_bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
+
+             /* Defs in exit block cannot reach a use they weren't
+                already.  */
+             if (single_succ_p (def_bb))
+               {
+                 basic_block def_bb_succ;
+
+                 def_bb_succ = single_succ (def_bb);
+                 if (!flow_bb_inside_loop_p (loop, def_bb_succ))
+                   continue;
+               }
+
+             if (++defs_in_loop_count > 1)
+               goto cleanup;
+           }
+       }
+    }
+  ret = true;
+
+cleanup:
+  BITMAP_FREE (always_executed);
+  BITMAP_FREE (may_exit);
+  BITMAP_FREE (has_exit);
+  free (body);
+  return ret;
+}
+
 /* Move invariant INVNO out of the LOOP.  Returns true if this succeeds, false
    otherwise.  */
 
@@ -1545,11 +1638,8 @@ move_invariant_reg (struct loop *loop, unsigned invno)
            }
        }
 
-      /* Move the set out of the loop.  If the set is always executed (we could
-        omit this condition if we know that the register is unused outside of
-        the loop, but it does not seem worth finding out) and it has no uses
-        that would not be dominated by it, we may just move it (TODO).
-        Otherwise we need to create a temporary register.  */
+      /* If possible, just move the set out of the loop.  Otherwise, we
+        need to create a temporary register.  */
       set = single_set (inv->insn);
       reg = dest = SET_DEST (set);
       if (GET_CODE (reg) == SUBREG)
@@ -1557,19 +1647,25 @@ move_invariant_reg (struct loop *loop, unsigned invno)
       if (REG_P (reg))
        regno = REGNO (reg);
 
-      reg = gen_reg_rtx_and_attrs (dest);
+      if (!can_move_invariant_reg (loop, inv, reg))
+       {
+         reg = gen_reg_rtx_and_attrs (dest);
 
-      /* Try replacing the destination by a new pseudoregister.  */
-      validate_change (inv->insn, &SET_DEST (set), reg, true);
+         /* Try replacing the destination by a new pseudoregister.  */
+         validate_change (inv->insn, &SET_DEST (set), reg, true);
 
-      /* As well as all the dominated uses.  */
-      replace_uses (inv, reg, true);
+         /* As well as all the dominated uses.  */
+         replace_uses (inv, reg, true);
 
-      /* And validate all the changes.  */
-      if (!apply_change_group ())
-       goto fail;
+         /* And validate all the changes.  */
+         if (!apply_change_group ())
+           goto fail;
 
-      emit_insn_after (gen_move_insn (dest, reg), inv->insn);
+         emit_insn_after (gen_move_insn (dest, reg), inv->insn);
+       }
+      else if (dump_file)
+       fprintf (dump_file, "Invariant %d moved without introducing a new "
+                           "temporary register\n", invno);
       reorder_insns (inv->insn, inv->insn, BB_END (preheader));
 
       /* If there is a REG_EQUAL note on the insn we just moved, and the
diff --git a/gcc/testsuite/gcc.dg/loop-7.c b/gcc/testsuite/gcc.dg/loop-7.c
index 0457add..eac645c 100644
--- a/gcc/testsuite/gcc.dg/loop-7.c
+++ b/gcc/testsuite/gcc.dg/loop-7.c
@@ -1,6 +1,6 @@
 /* PR rtl-optimization/31360  */
 
-/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-do compile { target } } */
 /* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
 
 void f(int *a)
@@ -12,5 +12,6 @@ void f(int *a)
 
 /* Load of 0 is moved out of the loop.  */
 /* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */
+/* { dg-final { scan-rtl-dump-times "without introducing a new temporary 
register" 1 "loop2_invariant" } } */
 /* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
 
diff --git a/gcc/testsuite/gcc.dg/loop-8.c b/gcc/testsuite/gcc.dg/loop-8.c
new file mode 100644
index 0000000..592e54c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-8.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */
+
+void
+f (int *a, int *b)
+{
+  int i;
+
+  for (i = 0; i < 100; i++)
+    {
+      int d = 42;
+
+      a[i] = d;
+      if (i % 2)
+       d = i;
+      b[i] = d;
+    }
+}
+
+/* Load of 42 is moved out of the loop, introducing a new pseudo register.  */
+/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */
+/* { dg-final { scan-rtl-dump-not "without introducing a new temporary 
register" "loop2_invariant" } } */
+/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */
+

Testsuite was run in QEMU when compiled by an arm-none-eabi GCC cross-compiler 
targeting Cortex-M3 and a bootstrapped x86_64 native GCC without any regression.

Is this ok for stage1?

Best regards,

Thomas

[PATCH, stage1] Move insns without introducing new temporaries in loop2_invariant

Reply via email to