currently, the instruction sink in "prepare_shrink_wrap" is a bit conservative
that some further optimization opportunities have been missed.

given the prologue use register A by:

  (store A, [sp + offset])

then given the entry_basic_block contains a simply register copy like:

  (move A, B)

current "prepare_shrink_wrap" will sink the move instruction as deep as it can,
then the entry_basic_block could be marked as "don't need prologue".

while if we replace "(move A, B)" into either one of

  * "(move B, CONST_K)",
  * "(move B, (plus A, CONST_K))"

we still could do the same sink optimization, but *current gcc do not*.

pattern like (move B, CONST_K) are very normal for some RISC targets.

for example on AArch64, we could have the following pair:

  adrp    x22, global_data_a
  add     x0, x22, :lo12:global_data_a

if "adrp" be scheduled into the entry_basic_block then the write of x22 may
prevent shrink-wrap happen.

when judge whether one instruction is sink-able, move_insn_for_shrink_wrap only
accept simply reg copy that both dest and src are REG_P, while the second
operand of adrp is actually a SYMBOL_REF, thus it's reject by the optimization.

this patch relax the restriction on src to accept any one of the following:

  + REG
  + CONST_OBJ, like SYMBOL_REF
  + combination of single REG and any other CONST_OBJs.
    (reg def/use calculation will not affected by CONST_OBJs)

RISC backend may benefit more from this relax, although there still
be minor improvements on x86. for example, there are 17 more functions
shrink-wrapped during x86-64 bootstrap, like sort_bucket in ira-color.c.

test done
=========
  no regression on aarch64-none-elf bare-metal.
  no regression on x86-64 check-gcc.
  both aarch64 and x86-64 bootstrap OK.

ok for install?

2014-09-04 Jiong Wang<jiong.w...@arm.com>

gcc/
  * shrink-wrap.c (rtx_search_arg): New structure type.
  (rtx_search_arg_p): New typedef.
  (count_reg_const): New callback function.
  (move_insn_for_shrink_wrap): Relax the restriction on src operand.

diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index 0938f2c..5b5ca85 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -156,6 +156,37 @@ live_edge_for_reg (basic_block bb, int regno, int end_regno)
   return live_edge;
 }
 
+struct rtx_search_arg
+{
+  unsigned int reg_found;
+  unsigned int nonconst_found;
+  rtx reg;
+};
+
+typedef struct rtx_search_arg *rtx_search_arg_p;
+
+/* A for_each_rtx callback used by move_insn_for_shrink_wrap to count the
+   numbers of register and non-constant objects.  */
+
+static int
+count_reg_const (rtx *loc, void *arg)
+{
+  rtx_search_arg_p p = (rtx_search_arg_p) arg;
+  rtx x;
+
+  x = *loc;
+
+  if (REG_P (x))
+    {
+      p->reg_found++;
+      p->reg = x;
+    }
+  else if (! CONSTANT_P (x))
+    p->nonconst_found++;
+
+  return 0;
+}
+
 /* Try to move INSN from BB to a successor.  Return true on success.
    USES and DEFS are the set of registers that are used and defined
    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
@@ -169,7 +200,9 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
 {
   rtx set, src, dest;
   bitmap live_out, live_in, bb_uses, bb_defs;
-  unsigned int i, dregno, end_dregno, sregno, end_sregno;
+  unsigned int i, dregno, end_dregno;
+  unsigned int sregno = FIRST_PSEUDO_REGISTER;
+  unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
   basic_block next_block;
   edge live_edge;
 
@@ -179,7 +212,25 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
     return false;
   src = SET_SRC (set);
   dest = SET_DEST (set);
-  if (!REG_P (dest) || !REG_P (src)
+
+  if (!REG_P (src))
+    {
+      struct rtx_search_arg arg;
+
+      arg.reg_found = 0;
+      arg.nonconst_found = 0;
+      arg.reg = NULL_RTX;
+
+      for_each_rtx (&src, count_reg_const, (void *) &arg);
+
+      if (arg.nonconst_found
+	  || arg.reg_found > 1)
+	src = NULL_RTX;
+      else if (arg.reg_found == 1)
+	src = arg.reg;
+    }
+
+  if (!REG_P (dest) || src == NULL_RTX
       /* STACK or FRAME related adjustment might be part of prologue.
 	 So keep them in the entry block.  */
       || dest == stack_pointer_rtx
@@ -188,10 +238,13 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
     return false;
 
   /* Make sure that the source register isn't defined later in BB.  */
-  sregno = REGNO (src);
-  end_sregno = END_REGNO (src);
-  if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
-    return false;
+  if (REG_P (src))
+    {
+      sregno = REGNO (src);
+      end_sregno = END_REGNO (src);
+      if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
+	return false;
+    }
 
   /* Make sure that the destination register isn't referenced later in BB.  */
   dregno = REGNO (dest);
diff --git a/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c b/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c
new file mode 100644
index 0000000..ad2e588
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/shrink_wrap_symbol_ref_1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */
+
+extern char *asm_out_file;
+extern void default_elf_asm_output_ascii (char *, const char *, int);
+
+void
+assemble_string (const char *p, int size)
+{
+  int pos = 0;
+  int maximum = 2000;
+
+  while (pos < size)
+    {
+      int thissize = size - pos;
+
+      if (thissize > maximum)
+	thissize = maximum;
+
+      default_elf_asm_output_ascii (asm_out_file, p, thissize);;
+
+      pos += thissize;
+      p += thissize;
+    }
+}
+
+/* { dg-final { scan-rtl-dump "Performing shrink-wrapping" "pro_and_epilogue"  } } */
+/* { dg-final { cleanup-rtl-dump "pro_and_epilogue" } } */

Reply via email to