Re: [RS6000] Fix PR53038, cfa_restore out of order

2012-04-30 Thread David Edelsohn
On Thu, Apr 26, 2012 at 1:00 AM, Alan Modra amo...@gmail.com wrote:

        PR target/53038
        * config/rs6000/rs6000.c (load_lr_save, restore_saved_lr,
        load_cr_save, add_crlr_cfa_restore): New functions.
        (rs6000_restore_saved_cr): Rename to..
        (restore_saved_cr): ..this.  Add cfa_restore notes for cr.
        (rs6000_emit_epilogue): Use new functions.  Adjust condition
        for emitting lr and cr cfa_restore.  Emit cfa_restores for fp
        regs when using out-of-line restore only when shrink wrapping.

Okay.

Thanks, David


Re: [RS6000] Fix PR53038, cfa_restore out of order

2012-04-25 Thread Richard Henderson
On 04/20/12 18:58, Alan Modra wrote:
 +  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
 +  RTX_FRAME_RELATED_P (insn) = 1;

You're probably better off using REG_CFA_REGISTER here,
rather than making dwarf2cfi.c guess what you meant.
The guessing is rather tuned to prologues...


r~


Re: [RS6000] Fix PR53038, cfa_restore out of order

2012-04-25 Thread Alan Modra
On Wed, Apr 25, 2012 at 02:00:48PM -0700, Richard Henderson wrote:
 On 04/20/12 18:58, Alan Modra wrote:
  +  add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
  +  RTX_FRAME_RELATED_P (insn) = 1;
 
 You're probably better off using REG_CFA_REGISTER here,
 rather than making dwarf2cfi.c guess what you meant.
 The guessing is rather tuned to prologues...

It worked!  I take your point though, and thanks for reviewing the
patch!  The new version is just s/FRAME_RELATED_EXPR/CFA_REGISTER/
but I'll post the entire patch again since my other prologue/epilogue
patches mean the original pr53038 fix no longer applies cleanly.

Bootstrapped and regression tested powerpc-linux.

PR target/53038
* config/rs6000/rs6000.c (load_lr_save, restore_saved_lr,
load_cr_save, add_crlr_cfa_restore): New functions.
(rs6000_restore_saved_cr): Rename to..
(restore_saved_cr): ..this.  Add cfa_restore notes for cr.
(rs6000_emit_epilogue): Use new functions.  Adjust condition
for emitting lr and cr cfa_restore.  Emit cfa_restores for fp
regs when using out-of-line restore only when shrink wrapping.

Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 186854)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -952,7 +952,6 @@
 static bool rs6000_reg_live_or_pic_offset_p (int);
 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
 static tree rs6000_builtin_vectorized_function (tree, tree, tree);
-static void rs6000_restore_saved_cr (rtx, int);
 static bool rs6000_output_addr_const_extra (FILE *, rtx);
 static void rs6000_output_function_prologue (FILE *, HOST_WIDE_INT);
 static void rs6000_output_function_epilogue (FILE *, HOST_WIDE_INT);
@@ -20276,10 +20275,37 @@
we restore after the pop when possible.  */
 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
 
+/* Restoring cr is a two step process: loading a reg from the frame
+   save, then moving the reg to cr.  For ABI_V4 we must let the
+   unwinder know that the stack location is no longer valid at or
+   before the stack deallocation, but we can't emit a cfa_restore for
+   cr at the stack deallocation like we do for other registers.
+   The trouble is that it is possible for the move to cr to be
+   scheduled after the stack deallocation.  So say exactly where cr
+   is located on each of the two insns.  */
+
+static rtx
+load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
+{
+  rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
+  rtx reg = gen_rtx_REG (SImode, regno);
+  rtx insn = emit_move_insn (reg, mem);
+
+  if (!exit_func  DEFAULT_ABI == ABI_V4)
+{
+  rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
+  rtx set = gen_rtx_SET (VOIDmode, reg, cr);
+
+  add_reg_note (insn, REG_CFA_REGISTER, set);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+  return reg;
+}
+
 /* Reload CR from REG.  */
 
 static void
-rs6000_restore_saved_cr (rtx reg, int using_mfcr_multiple)
+restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
 {
   int count = 0;
   int i;
@@ -20317,13 +20343,61 @@
   else
 for (i = 0; i  8; i++)
   if (df_regs_ever_live_p (CR0_REGNO+i)  ! call_used_regs[CR0_REGNO+i])
-   {
- emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode,
-  CR0_REGNO+i),
- reg));
-   }
+   emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO+i),
+   reg));
+
+  if (!exit_func  (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+{
+  rtx insn = get_last_insn ();
+  rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
+
+  add_reg_note (insn, REG_CFA_RESTORE, cr);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
 }
 
+/* Like cr, the move to lr instruction can be scheduled after the
+   stack deallocation, but unlike cr, its stack frame save is still
+   valid.  So we only need to emit the cfa_restore on the correct
+   instruction.  */
+
+static void
+load_lr_save (int regno, rtx frame_reg_rtx, int offset)
+{
+  rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
+  rtx reg = gen_rtx_REG (Pmode, regno);
+
+  emit_move_insn (reg, mem);
+}
+
+static void
+restore_saved_lr (int regno, bool exit_func)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
+  rtx insn = emit_move_insn (lr, reg);
+
+  if (!exit_func  flag_shrink_wrap)
+{
+  add_reg_note (insn, REG_CFA_RESTORE, lr);
+  RTX_FRAME_RELATED_P (insn) = 1;
+}
+}
+
+static rtx
+add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
+{
+  if (info-cr_save_p)
+cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
+  gen_rtx_REG (SImode, CR2_REGNO),
+  cfa_restores);
+  if (info-lr_save_p)
+cfa_restores = alloc_reg_note