Re: PowerPC prologue and epilogue 3

2012-04-22 Thread David Edelsohn
On Tue, Apr 17, 2012 at 11:13 AM, Alan Modra amo...@gmail.com wrote:
 This continues the prologue and epilogue cleanup.  Not many user
 visible changes here, except for:
 - a bugfix to the LR save RTL emitted by rs6000_emit_savres_rtx which
  may affect SPE,
 - a bugfix for SPE code emitted when using a static chain,
 - vector saves will be done using r1 for large frames just over 32k in
  size, and,
 - using r11 as a frame pointer whenever we need to set up r11 for
  out-of-line saves, and merging two pointer reg setup insns.
 The latter is a necessary prerequisite to enabling out-of-line
 save/restore for large frames, as I do in a later patch.  Currently
 this will only affect abiv4 -Os when using out-of-line saves.

 eg. -m32 -Os -mno-multiple
 int f (double x)
 {
  char a[33];
  __asm __volatile (#%0 : =m (a) : : fr31, r27, r28);
  return (int) x;
 }
        old                     new
        stwu 1,-96(1)           mflr 0
        mflr 0                  addi 11,1,-8
        addi 11,1,88            stwu 1,-96(1)
        stw 0,100(1)            stw 0,12(11)
        stfd 31,88(1)           bl _savegpr_27
        bl _savegpr_27          stfd 31,0(11)


        * config/rs6000/rs6000.c (rs6000_emit_stack_reset): Delete forward
        decl.  Move logic selecting update reg to callers.  Update all callers.
        (rs6000_emit_allocate_stack): Add copy_off param.
        (emit_frame_save): Don't handle reg+reg addressing.
        (ptr_regno_for_savres): New function, extracted from..
        (rs6000_emit_savres_rtx): ..here.  Add lr_offset param.
        (rs6000_emit_prologue): Generate frame_ptr_rtx as we need it.
        Set frame_reg_rtx to r11 whenever r11 is needed, and merge
        frame offset adjustment for out-of-line save with copy from sp.
        Simplify condition controlling whether cr is saved early or
        late.  Use ptr_regno_for_savres to verify correct reg is set
        up for out-of-line saves.  Pass the actual pointer reg used to
        rs6000_emit_savres_rtx so rtl matches insns in out-of-line
        function.  Rearrange spe vars so code is similar to that
        elsewhere in this function.  Don't update frame_off when spe
        save code will restore r11.  Use emit_frame_save for spe and
        gpr saves.  Consolidate darwin out-of-line gpr setup with that
        for other abis.  Don't assume frame_offset is zero and frame
        reg is sp when setting up altivec reg saves, and calculate
        exact offset requirement.
        (rs6000_emit_epilogue): Use HOST_WIDE_INT for frame_off.  Tidy
        spe restore code.  Consolidate darwin out-of-line gpr setup
        with that for other abis.

This patch is okay.

Thanks, David


PowerPC prologue and epilogue 3

2012-04-17 Thread Alan Modra
This continues the prologue and epilogue cleanup.  Not many user
visible changes here, except for:
- a bugfix to the LR save RTL emitted by rs6000_emit_savres_rtx which
  may affect SPE,
- a bugfix for SPE code emitted when using a static chain,
- vector saves will be done using r1 for large frames just over 32k in
  size, and,
- using r11 as a frame pointer whenever we need to set up r11 for
  out-of-line saves, and merging two pointer reg setup insns.
The latter is a necessary prerequisite to enabling out-of-line
save/restore for large frames, as I do in a later patch.  Currently
this will only affect abiv4 -Os when using out-of-line saves.

eg. -m32 -Os -mno-multiple
int f (double x)
{
  char a[33];
  __asm __volatile (#%0 : =m (a) : : fr31, r27, r28);
  return (int) x;
}
old new
stwu 1,-96(1)   mflr 0
mflr 0  addi 11,1,-8
addi 11,1,88stwu 1,-96(1)
stw 0,100(1)stw 0,12(11)
stfd 31,88(1)   bl _savegpr_27
bl _savegpr_27  stfd 31,0(11)


* config/rs6000/rs6000.c (rs6000_emit_stack_reset): Delete forward
decl.  Move logic selecting update reg to callers.  Update all callers.
(rs6000_emit_allocate_stack): Add copy_off param.
(emit_frame_save): Don't handle reg+reg addressing.
(ptr_regno_for_savres): New function, extracted from..
(rs6000_emit_savres_rtx): ..here.  Add lr_offset param.
(rs6000_emit_prologue): Generate frame_ptr_rtx as we need it.
Set frame_reg_rtx to r11 whenever r11 is needed, and merge
frame offset adjustment for out-of-line save with copy from sp.
Simplify condition controlling whether cr is saved early or
late.  Use ptr_regno_for_savres to verify correct reg is set
up for out-of-line saves.  Pass the actual pointer reg used to
rs6000_emit_savres_rtx so rtl matches insns in out-of-line
function.  Rearrange spe vars so code is similar to that
elsewhere in this function.  Don't update frame_off when spe
save code will restore r11.  Use emit_frame_save for spe and
gpr saves.  Consolidate darwin out-of-line gpr setup with that
for other abis.  Don't assume frame_offset is zero and frame
reg is sp when setting up altivec reg saves, and calculate
exact offset requirement.
(rs6000_emit_epilogue): Use HOST_WIDE_INT for frame_off.  Tidy
spe restore code.  Consolidate darwin out-of-line gpr setup
with that for other abis.

diff -urp gcc-alan2/gcc/config/rs6000/rs6000.c 
gcc-alan3/gcc/config/rs6000/rs6000.c
--- gcc-alan2/gcc/config/rs6000/rs6000.c2012-04-16 11:58:01.50108 
+0930
+++ gcc-alan3/gcc/config/rs6000/rs6000.c2012-04-17 07:19:42.927931887 
+0930
@@ -951,7 +951,6 @@ static void rs6000_eliminate_indexed_mem
 static const char *rs6000_mangle_type (const_tree);
 static void rs6000_set_default_type_attributes (tree);
 static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool);
-static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
 static bool rs6000_reg_live_or_pic_offset_p (int);
 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
 static tree rs6000_builtin_vectorized_function (tree, tree, tree);
@@ -18534,7 +18533,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard
The generated code may use hard register 0 as a temporary.  */
 
 static void
-rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg)
+rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
 {
   rtx insn;
   rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
@@ -18578,7 +18577,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
 }
 
   if (copy_reg)
-emit_move_insn (copy_reg, stack_reg);
+{
+  if (copy_off != 0)
+   emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
+  else
+   emit_move_insn (copy_reg, stack_reg);
+}
 
   if (size  32767)
 {
@@ -18921,42 +18925,22 @@ static rtx
 emit_frame_save (rtx frame_reg, enum machine_mode mode,
 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
 {
-  rtx reg, offset_rtx, insn, mem, addr, int_rtx;
-  rtx replacea, replaceb;
-
-  int_rtx = GEN_INT (offset);
+  rtx reg, insn, mem, addr;
 
   /* Some cases that need register indexed addressing.  */
-  if ((TARGET_ALTIVEC_ABI  ALTIVEC_VECTOR_MODE (mode))
-  || (TARGET_VSX  ALTIVEC_OR_VSX_VECTOR_MODE (mode))
-  || (TARGET_E500_DOUBLE  mode == DFmode)
-  || (TARGET_SPE_ABI
-  SPE_VECTOR_MODE (mode)
-  !SPE_CONST_OFFSET_OK (offset)))
-{
-  /* Whomever calls us must make sure r11 is available in the
-flow path of instructions in the prologue.  */
-  offset_rtx = gen_rtx_REG (Pmode, 11);
-  emit_move_insn (offset_rtx, int_rtx);
-
-  replacea = offset_rtx;
-  replaceb = int_rtx;
-}
-  else