Note: this is stage1 material. Currently loop2_invariant pass hoist instructions out of loop by creating a new temporary for the destination register of that instruction and leaving there a mov from new temporary to old register as shown below:
loop header start of loop body //stuff (set (reg 128) (const_int 0)) //other stuff end of loop body becomes: (set (reg 129) (const_int 0)) loop header start of loop body //stuff (set (reg 128) (reg 128)) //other stuff end of loop body This is one of the errors that led to a useless ldr ending up inside a loop (PR64616). This patch fix this specific bit (some other bit was fixed in [1]) by simply moving the instruction if it's known to be safe. This is decided by looking at all the uses of the register set in the instruction and checking that (i) they were all dominated by the instruction and (ii) there is no other def in the loop that could end up reaching one of the use. [1] https://gcc.gnu.org/ml/gcc-patches/2015-02/msg00933.html ChangeLog entries are as follows: *** gcc/ChangeLog *** 2015-02-16 Thomas Preud'homme <thomas.preudho...@arm.com> * dominance.c (nearest_common_dominator_for_set): Fix A_Dominated_by_B code in comment. * loop-invariant.c (can_move_invariant_reg): New. (move_invariant_reg): Call above new function to decide whether instruction can just be moved, skipping creation of temporary register. *** gcc/testsuite/ChangeLog *** 2015-02-16 Thomas Preud'homme <thomas.preudho...@arm.com> * gcc.dg/loop-7.c: Run on all targets and check for loop2_invariant being able to move instructions without introducing new temporary register. * gcc.dg/loop-8.c: New test. diff --git a/gcc/dominance.c b/gcc/dominance.c index 33d4ae4..09c8c90 100644 --- a/gcc/dominance.c +++ b/gcc/dominance.c @@ -982,7 +982,7 @@ nearest_common_dominator_for_set (enum cdi_direction dir, bitmap blocks) A_Dominated_by_B (node A, node B) { - return DFS_Number_In(A) >= DFS_Number_In(A) + return DFS_Number_In(A) >= DFS_Number_In(B) && DFS_Number_Out (A) <= DFS_Number_Out(B); } */ diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index f79b497..ab2a45c 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -1512,6 +1512,99 @@ replace_uses (struct invariant *inv, rtx reg, bool in_group) return 1; } +/* Whether invariant INV setting REG can be moved out of LOOP, at the end of + the block preceding its header. */ + +static bool +can_move_invariant_reg (struct loop *loop, struct invariant *inv, rtx reg) +{ + df_ref def, use; + bool ret = false; + unsigned int i, dest_regno, defs_in_loop_count = 0; + rtx_insn *insn = inv->insn; + bitmap may_exit, has_exit, always_executed; + basic_block *body, bb = BLOCK_FOR_INSN (inv->insn); + + /* We ignore hard register and memory access for cost and complexity reasons. + Hard register are few at this stage and expensive to consider as they + require building a separate data flow. Memory access would require using + df_simulate_* and can_move_insns_across functions and is more complex. */ + if (!REG_P (reg) || HARD_REGISTER_P (reg)) + return false; + + /* Check whether the set is always executed. We could omit this condition if + we know that the register is unused outside of the loop, but it does not + seem worth finding out. */ + may_exit = BITMAP_ALLOC (NULL); + has_exit = BITMAP_ALLOC (NULL); + always_executed = BITMAP_ALLOC (NULL); + body = get_loop_body_in_dom_order (loop); + find_exits (loop, body, may_exit, has_exit); + compute_always_reached (loop, body, has_exit, always_executed); + /* Find bit position for basic block bb. */ + for (i = 0; i < loop->num_nodes && body[i] != bb; i++); + if (!bitmap_bit_p (always_executed, i)) + goto cleanup; + + /* Check that all uses reached by the def in insn would still be reached + it. */ + dest_regno = REGNO (reg); + for (use = DF_REG_USE_CHAIN (dest_regno); use; use = DF_REF_NEXT_REG (use)) + { + rtx ref; + basic_block use_bb; + + ref = DF_REF_INSN (use); + use_bb = BLOCK_FOR_INSN (ref); + + /* Ignore instruction considered for moving. */ + if (ref == insn) + continue; + + /* Don't consider uses outside loop. */ + if (!flow_bb_inside_loop_p (loop, use_bb)) + continue; + + /* Don't move if a use is not dominated by def in insn. */ + if (use_bb == bb && DF_INSN_LUID (insn) > DF_INSN_LUID (ref)) + goto cleanup; + if (!dominated_by_p (CDI_DOMINATORS, use_bb, bb)) + goto cleanup; + + /* Check for other defs. Any other def in the loop might reach a use + currently reached by the def in insn. */ + if (!defs_in_loop_count) + { + for (def = DF_REG_DEF_CHAIN (dest_regno); def; def = DF_REF_NEXT_REG (def)) + { + basic_block def_bb = BLOCK_FOR_INSN (DF_REF_INSN (def)); + + /* Defs in exit block cannot reach a use they weren't + already. */ + if (single_succ_p (def_bb)) + { + basic_block def_bb_succ; + + def_bb_succ = single_succ (def_bb); + if (!flow_bb_inside_loop_p (loop, def_bb_succ)) + continue; + } + + if (++defs_in_loop_count > 1) + goto cleanup; + } + } + } + ret = true; + +cleanup: + BITMAP_FREE (always_executed); + BITMAP_FREE (may_exit); + BITMAP_FREE (has_exit); + free (body); + return ret; +} + /* Move invariant INVNO out of the LOOP. Returns true if this succeeds, false otherwise. */ @@ -1545,11 +1638,8 @@ move_invariant_reg (struct loop *loop, unsigned invno) } } - /* Move the set out of the loop. If the set is always executed (we could - omit this condition if we know that the register is unused outside of - the loop, but it does not seem worth finding out) and it has no uses - that would not be dominated by it, we may just move it (TODO). - Otherwise we need to create a temporary register. */ + /* If possible, just move the set out of the loop. Otherwise, we + need to create a temporary register. */ set = single_set (inv->insn); reg = dest = SET_DEST (set); if (GET_CODE (reg) == SUBREG) @@ -1557,19 +1647,25 @@ move_invariant_reg (struct loop *loop, unsigned invno) if (REG_P (reg)) regno = REGNO (reg); - reg = gen_reg_rtx_and_attrs (dest); + if (!can_move_invariant_reg (loop, inv, reg)) + { + reg = gen_reg_rtx_and_attrs (dest); - /* Try replacing the destination by a new pseudoregister. */ - validate_change (inv->insn, &SET_DEST (set), reg, true); + /* Try replacing the destination by a new pseudoregister. */ + validate_change (inv->insn, &SET_DEST (set), reg, true); - /* As well as all the dominated uses. */ - replace_uses (inv, reg, true); + /* As well as all the dominated uses. */ + replace_uses (inv, reg, true); - /* And validate all the changes. */ - if (!apply_change_group ()) - goto fail; + /* And validate all the changes. */ + if (!apply_change_group ()) + goto fail; - emit_insn_after (gen_move_insn (dest, reg), inv->insn); + emit_insn_after (gen_move_insn (dest, reg), inv->insn); + } + else if (dump_file) + fprintf (dump_file, "Invariant %d moved without introducing a new " + "temporary register\n", invno); reorder_insns (inv->insn, inv->insn, BB_END (preheader)); /* If there is a REG_EQUAL note on the insn we just moved, and the diff --git a/gcc/testsuite/gcc.dg/loop-7.c b/gcc/testsuite/gcc.dg/loop-7.c index 0457add..eac645c 100644 --- a/gcc/testsuite/gcc.dg/loop-7.c +++ b/gcc/testsuite/gcc.dg/loop-7.c @@ -1,6 +1,6 @@ /* PR rtl-optimization/31360 */ -/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-do compile { target } } */ /* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ void f(int *a) @@ -12,5 +12,6 @@ void f(int *a) /* Load of 0 is moved out of the loop. */ /* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */ +/* { dg-final { scan-rtl-dump-times "without introducing a new temporary register" 1 "loop2_invariant" } } */ /* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ diff --git a/gcc/testsuite/gcc.dg/loop-8.c b/gcc/testsuite/gcc.dg/loop-8.c new file mode 100644 index 0000000..592e54c --- /dev/null +++ b/gcc/testsuite/gcc.dg/loop-8.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O1 -fdump-rtl-loop2_invariant" } */ + +void +f (int *a, int *b) +{ + int i; + + for (i = 0; i < 100; i++) + { + int d = 42; + + a[i] = d; + if (i % 2) + d = i; + b[i] = d; + } +} + +/* Load of 42 is moved out of the loop, introducing a new pseudo register. */ +/* { dg-final { scan-rtl-dump-times "Decided" 1 "loop2_invariant" } } */ +/* { dg-final { scan-rtl-dump-not "without introducing a new temporary register" "loop2_invariant" } } */ +/* { dg-final { cleanup-rtl-dump "loop2_invariant" } } */ + Testsuite was run in QEMU when compiled by an arm-none-eabi GCC cross-compiler targeting Cortex-M3 and a bootstrapped x86_64 native GCC without any regression. Is this ok for stage1? Best regards, Thomas