Re: PowerPC prologue and epilogue 6

2012-05-31 Thread Alan Modra
On Thu, May 31, 2012 at 10:41:26AM +0930, Alan Modra wrote:
 Looks like it is one I introduced.  gcc-4.6 uses r12 to save altivec
 regs, my new code tries to use r11.  Will fix.

Please try out this patch on Darwin.  Bootstrapped and regression
tested powerpc-linux.

gcc/
* config/rs6000/rs6000.c (ptr_regno_for_savres): Comment.
(rs6000_emit_prologue): Ensure register used for inline saves
of vector regs is not the static chain register.  Revise comment.
gcc/testsuite/
* gcc.target/powerpc/savres.c: Add -static to dg-options.
Check static chain in nested funcs.

Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 187999)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -19108,6 +19161,9 @@ rs6000_emit_stack_reset (rs6000_stack_t *info,
   return NULL_RTX;
 }
 
+/* Return the register number used as a pointer by out-of-line
+   save/restore functions.  */
+
 static inline unsigned
 ptr_regno_for_savres (int sel)
 {
@@ -19845,6 +19901,9 @@ rs6000_emit_prologue (void)
  int sel = SAVRES_SAVE | SAVRES_VR;
  unsigned ptr_regno = ptr_regno_for_savres (sel);
 
+ if (using_static_chain_p
+  ptr_regno == STATIC_CHAIN_REGNUM)
+   ptr_regno = 12;
  if (REGNO (frame_reg_rtx) != ptr_regno)
START_USE (ptr_regno);
  ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
@@ -19953,9 +20012,9 @@ rs6000_emit_prologue (void)
   int offset;
   int save_regno;
 
-  /* Get VRSAVE onto a GPR.  Note that ABI_V4 might be using r12
-as frame_reg_rtx and r11 as the static chain pointer for
-nested functions.  */
+  /* Get VRSAVE onto a GPR.  Note that ABI_V4 and ABI_DARWIN might
+be using r12 as frame_reg_rtx and r11 as the static chain
+pointer for nested functions.  */
   save_regno = 12;
   if (DEFAULT_ABI == ABI_AIX  !using_static_chain_p)
save_regno = 11;
Index: gcc/testsuite/gcc.target/powerpc/savres.c
===
--- gcc/testsuite/gcc.target/powerpc/savres.c   (revision 187999)
+++ gcc/testsuite/gcc.target/powerpc/savres.c   (working copy)
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options -fno-inline -fomit-frame-pointer } */
+/* { dg-options -fno-inline -fomit-frame-pointer -static } */
 
 /* -fno-inline -maltivec -m32/-m64 -mmultiple/no-multiple -Os/-O2.  */
 #ifndef NO_BODY
@@ -73,6 +73,7 @@ __attribute__ ((vector_size (16))) int val31 = {-3
 
 #else /* NO_BODY */
 /* For looking at prologue and epilogue code without distractions.  */
+#define abort()
 #define TRASH_ALL_CR
 #define TRASH_ALL_VR
 #define TRASH_ALL_FPR
@@ -458,7 +459,7 @@ void s_0 (void)
 void wb_all (void)
 {
   char b[10];
-  void nb_all (void)
+  char *nb_all (void)
   {
 char a[33000];
 TRASH_ALL_CR;
@@ -470,14 +471,16 @@ void wb_all (void)
 USE_ALL_FPR;
 USE_ALL_GPR;
 __asm __volatile (#%0 %1 : =m (a), =m (b) : : cr2, cr3, cr4, 
v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, 
v31, fr14, fr15, fr16, fr17, fr18, fr19, fr20, fr21, fr22, 
fr23, fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31, r14, 
r15, r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, 
r26, r27, r28, r29, r30, r31);
+return b;
   }
-  nb_all();
+  if (nb_all() != b)
+abort ();
 }
 
 void wb_cvfr (void)
 {
   char b[10];
-  void nb_cvfr (void)
+  char *nb_cvfr (void)
   {
 char a[33000];
 TRASH_SOME_CR;
@@ -489,14 +492,16 @@ void wb_cvfr (void)
 USE_SOME_FPR;
 USE_SOME_GPR;
 __asm __volatile (#%0 %1 : =m (a), =m (b) : : cr2, v26, v27, 
v31, fr28, fr31, r30, r31);
+return b;
   }
-  nb_cvfr ();
+  if (nb_cvfr () != b)
+abort ();
 }
 
 void wb_vfr (void)
 {
   char b[10];
-  void nb_vfr (void)
+  char *nb_vfr (void)
   {
 char a[33000];
 TRASH_SOME_VR;
@@ -506,14 +511,16 @@ void wb_vfr (void)
 USE_SOME_FPR;
 USE_SOME_GPR;
 __asm __volatile (#%0 %1 : =m (a), =m (b) : : v26, v27, v31, 
fr28, fr31, r30, r31);
+return b;
   }
-  nb_vfr ();
+  if (nb_vfr () != b)
+abort ();
 }
 
 void wb_cvf (void)
 {
   char b[10];
-  void nb_cvf (void)
+  char *nb_cvf (void)
   {
 char a[33000];
 TRASH_SOME_CR;
@@ -523,14 +530,16 @@ void wb_cvf (void)
 USE_SOME_VR;
 USE_SOME_FPR;
 __asm __volatile (#%0 %1 : =m (a), =m (b) : : cr2, v26, v27, 
v31, fr28, fr31);
+return b;
   }
-  nb_cvf ();
+  if (nb_cvf () != b)
+abort ();
 }
 
 void wb_vf (void)
 {
   char b[10];
-  void nb_vf (void)
+  char *nb_vf (void)
   {
 char a[33000];
 TRASH_SOME_VR;
@@ -538,15 +547,17 @@ void wb_vf (void)
 USE_SOME_VR;
 USE_SOME_FPR;
 __asm __volatile (#%0 %1 : =m (a), =m (b) : : v26, v27, v31, 
fr28, fr31);
+return b;
   }
-  nb_vf ();
+  if (nb_vf () != b)
+abort ();
 }
 #endif
 
 void wb_cvr (void)
 {
   char b[10];
-  void nb_cvr (void)
+  char *nb_cvr (void)
   {
 char 

Re: PowerPC prologue and epilogue 6

2012-05-31 Thread Dominique Dhumieres
 Please try out this patch on Darwin.  Bootstrapped and regression
 tested powerpc-linux.

I have applied the patch to r188026 and updated the build.
As patched the test gcc.target/powerpc/savres.c now fails with

FAIL: gcc.target/powerpc/savres.c (test for excess errors)
Excess errors:
ld_classic: can't locate file for: -lcrt0.o

According Iain Sandoe

 -static is not applicable to Darwin (except for kernel code).
 to make the test non-pic - use -mdynamic-no-pic (Darwin-only).

Replacing -static with -mdynamic-no-pic makes the test to pass
(the final patch will require the suitable dg directives;-).

Thanks

Dominique

PS Clean bootstrap and full regtesting scheduled for the next week-end).


Re: PowerPC prologue and epilogue 6

2012-05-31 Thread Alan Modra
On Thu, May 31, 2012 at 02:16:32PM +0200, Dominique Dhumieres wrote:
 (the final patch will require the suitable dg directives;-).

This is really stretching my testsuite knowledge.  Maybe add

/* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */


-- 
Alan Modra
Australia Development Lab, IBM


Re: PowerPC prologue and epilogue 6

2012-05-31 Thread Dominique Dhumieres
 This is really stretching my testsuite knowledge.  Maybe add

/* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */

Using

/* { dg-options -fno-inline -fomit-frame-pointer } */
/* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */

works for me on powerpc-apple-darwin9, but I can't test it on nondarwin powerpc.

Dominique


Re: PowerPC prologue and epilogue 6

2012-05-31 Thread Mike Stump
On May 31, 2012, at 6:42 AM, Dominique Dhumieres wrote:
 This is really stretching my testsuite knowledge.  Maybe add
 
 /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */
 
 Using
 
 /* { dg-options -fno-inline -fomit-frame-pointer } */
 /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */
 
 works for me on powerpc-apple-darwin9, but I can't test it on nondarwin 
 powerpc.

Looks good...



Re: PowerPC prologue and epilogue 6

2012-05-30 Thread Dominique Dhumieres
 Yes indeed, and it would be wise to ensure torture-options.exp is
 loaded too.  I'm committing the following as obvious.

Thanks

 Hmm, this will be because darwin is PIC by default.  Does adding
 -static to the dg-options line in savres.c fix the darwin fail?

With the following change

--- /opt/gcc/_gcc_clean/gcc/testsuite/gcc.target/powerpc/savres.c   
2012-05-02 14:25:40.0 +0200
+++ /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c 2012-05-30 
13:45:15.0 +0200
@@ -1,5 +1,5 @@
 /* { dg-do run } */
-/* { dg-options -fno-inline -fomit-frame-pointer } */
+/* { dg-options -fno-inline -fomit-frame-pointer -static } */
 
 /* -fno-inline -maltivec -m32/-m64 -mmultiple/no-multiple -Os/-O2.  */
 #ifndef NO_BODY

I get an ICE of the form

/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c: In function 'nb_all':
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:473:3: internal 
compiler error: in rs6000_emit_prologue, at config/rs6000/rs6000.c:19850

Is the test intended to work on PIC targets?

Cheers,

Dominique


Re: PowerPC prologue and epilogue 6

2012-05-30 Thread Alan Modra
On Wed, May 30, 2012 at 03:21:28PM +0200, Dominique Dhumieres wrote:
 I get an ICE of the form
 
 /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c: In function 'nb_all':
 /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:473:3: internal 
 compiler error: in rs6000_emit_prologue, at config/rs6000/rs6000.c:19850
 
 Is the test intended to work on PIC targets?

No, but see rs6000/darwin.h CC1_SPEC.  -static makes you non-PIC.

I've just built a darwin cc1 to reproduce the problem.  The ICE is on
START_USE (ptr_regno);
when setting up a reg to use for altivec saves.  The reg clashes with
the static chain pointer (nb_all is a nested function), so this is a
real bug that the register checks have uncovered.  I haven't
determined whether this is a new bug introduced with my prologue
changes, or whether it's a long-standing bug.  I suspect the latter.

-- 
Alan Modra
Australia Development Lab, IBM


Re: PowerPC prologue and epilogue 6

2012-05-30 Thread Alan Modra
On Thu, May 31, 2012 at 09:43:09AM +0930, Alan Modra wrote:
 real bug that the register checks have uncovered.  I haven't
 determined whether this is a new bug introduced with my prologue
 changes, or whether it's a long-standing bug.  I suspect the latter.

Looks like it is one I introduced.  gcc-4.6 uses r12 to save altivec
regs, my new code tries to use r11.  Will fix.

-- 
Alan Modra
Australia Development Lab, IBM


Re: PowerPC prologue and epilogue 6

2012-05-29 Thread Dominique Dhumieres
Alan,

I think the following patch

--- ../_gcc_clean/gcc/testsuite/gcc.target/powerpc/powerpc.exp  2012-05-02 
14:25:40.0 +0200
+++ ../work/gcc/testsuite/gcc.target/powerpc/powerpc.exp2012-05-29 
21:14:48.0 +0200
@@ -47,4 +47,5 @@ set-torture-options $SAVRES_TEST_OPTS
 gcc-dg-runtest [list $srcdir/$subdir/savres.c] $alti
 
 # All done.
+torture-finish
 dg-finish

is required to avoid the errors of the kind 

ERROR: tcl error sourcing 
/home/gccbuild/gcc_trunk_anonsvn/gcc/gcc/testsuite/gcc.target/powerpc/powerpc.exp.
ERROR: torture-init: torture_without_loops is not empty as expected

(see http://gcc.gnu.org/ml/gcc-testresults/2012-05/msg02608.html ).

In addition the tests of savres.c fails on powerpc-apple-darwin9 with

FAIL: gcc.target/powerpc/savres.c (test for excess errors)
Excess errors:
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:109:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:123:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:135:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:170:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:180:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:212:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:222:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:251:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:259:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:289:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:303:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:315:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:350:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:360:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:392:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:402:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:431:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:439:3: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:472:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:491:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:508:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:558:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:573:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:620:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:635:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:679:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:692:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:737:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:756:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:773:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:823:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:838:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:885:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:900:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:944:5: error: PIC 
register clobbered by 'r31' in 'asm'
/opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:957:5: error: PIC 
register clobbered by 'r31' in 'asm'

WARNING: gcc.target/powerpc/savres.c compilation failed to produce executable

However I am not able to say if this generic or due to 

Re: PowerPC prologue and epilogue 6

2012-04-25 Thread David Edelsohn
On Wed, Apr 25, 2012 at 1:20 AM, Alan Modra amo...@gmail.com wrote:

 This patch adds a testcase to verify register saves and restores.
 I tried to write it so that it will run on all powerpc targets.  From
 past experience it probably won't.  OK to apply anyway, and fix
 fallout later?

        * gcc.target/powerpc/savres.c: New test.
        * gcc.target/powerpc/powerpc.exp: Run it.

Okay.

Thanks, David


Re: PowerPC prologue and epilogue 4

2012-04-24 Thread David Edelsohn
On Tue, Apr 17, 2012 at 11:13 AM, Alan Modra amo...@gmail.com wrote:
 This provides some protection against misuse of r0, r11 and r12.  I
 found it useful when enabling out-of-line saves for large frames.  ;-)

        * config/rs6000/rs6000.c (START_USE, END_USE, NOT_INUSE): Define.
        (rs6000_emit_prologue): Use the above to catch register overlap.

This patch is okay.

Thanks, David


Re: PowerPC prologue and epilogue 5

2012-04-24 Thread David Edelsohn
On Thu, Apr 19, 2012 at 11:36 AM, Alan Modra amo...@gmail.com wrote:
 On Thu, Apr 19, 2012 at 08:00:15PM +0930, Alan Modra wrote:
 On Wed, Apr 18, 2012 at 12:45:16AM +0930, Alan Modra wrote:
  This enables out-of-line save and restore for large frames, and for
  ABI_AIX when using the static chain.

 Further testing revealed two problems when compiling nested
 functions.
 1) The logic I had for cr_save_regno is wrong, resulting in one of my
    NOT_INUSE asserts triggering.

 Fixed in this revised patch.  Bootstrapped etc. powerpc-linux.

 2) In some cases the prologue uses in-line saves while the epilogue
    uses out-of-line restores.  This can lead to restoring regs that
    haven't been saved.

 This turned out to be a pre-existing problem, patch in PR50340.

        * config/rs6000/rs6000.c (rs6000_savres_strategy): Allow
        out-of-line save/restore for large frames.  Don't disable
        out-of-line saves on ABI_AIX when using static chain reg.
        (rs6000_emit_prologue): Adjust cr_save_regno on ABI_AIX to not
        clobber static chain reg, and tweak for out-of-line gpr saves
        that use r1.

This patch is okay.

Thanks, David


Re: PowerPC prologue and epilogue 6

2012-04-24 Thread David Edelsohn
On Sat, Apr 21, 2012 at 2:48 AM, Alan Modra amo...@gmail.com wrote:
 This patch adds out-of-line vector saves and restores.  To do this I
 made some infrastructure changes to various functions like
 rs6000_emit_savres_rtx that currently take boolean parameters (savep,
 gpr, and lr).  Rather than add yet another boolean to specify vector
 regs, I chose to lump them all together in a bitmask.  This made the
 patch a little larger but overall is a better interface, I think.

 I also revert a change I made in
 http://gcc.gnu.org/ml/gcc-patches/2012-04/msg01014.html to always use
 r11 as a frame reg whenever abiv4 emits out-of-line saves.  Code
 quality in functions with small frames is better without that
 particular change.  This however meant some changes are required later
 when setting up pointer regs for gpr and fpr out-of-line saves.

 What else is here?  Improved register selection when saving vrsave in
 the prologue and when restoring cr in the epilogue, allowing better
 scheduling.  A fix to rs6000_output_function_prologue to output the
 correct .extern for ELF, then deciding we don't need such things
 anyway.  And various other little code cleanups.  Bootstrapped and
 regression tested powerpc-linux.

 gcc/
        * config/rs6000/rs6000 (SAVE_INLINE_VRS, REST_INLINE_VRS,
        V_SAVE_INLINE, SAVRES_LR, SAVRES_SAVE, SAVRES_REG,
        SAVRES_GPR, SAVRES_FPR, SAVRES_VR): Define.
        (no_global_regs_above): Delete.
        (no_global_regs): New function.
        (rs6000_savres_strategy): Handle vector regs.  Use proper lr_save_p
        value for load multiple test.
        (savres_routine_syms): Increase size.
        (rs6000_savres_routine_name, rs6000_savres_routine_sym,
        ptr_regno_for_savres, rs6000_emit_savres_rtx): Pass in int selector
        rather than a number of boolean flags.  Update all callers.
        (rs6000_savres_routine_name): Generate vector save/restore names.
        (rs6000_savres_routine_sym): Handle vector regs.  Delete forward decl.
        (ptr_regno_for_savres, rs6000_emit_savres_rtx): Likewise.
        (rs6000_emit_prologue): Delete saving_FPRs_inline, saving_GPRs_inline
        and using_store_multiple.  Expand uses.  Don't always use r11 as
        frame reg when needed for out-of-line saves.  Set up initial offset
        for out-of-line vector saves when buying stack frame.  Handle pointer
        reg setup for out-of-line fp save.  Emit call to out-of-line vector
        save function.  Choose r11 or r12 for vrsave reg when available for
        better scheduling.
        (rs6000_output_function_prologue): Don't emit .extern for ELF.
        (rs6000_emit_epilogue): Choose a better frame reg when restoring
        from back-chain to suit out-of-line vector restore functions.  Emit
        call to out-of-line vector restore function.  Adjust register used
        for cr restore.  Tweak pointer register setup for gpr restore.
        * config/rs6000/rs6000.h (FIRST_SAVED_GP_REGNO): Take into account
        FIXED_R13.
        * config/rs6000/sysv4.h (FP_SAVE_INLINE, GP_SAVE_INLINE): Simplify.
        (V_SAVE_INLINE): Define.
        * config/rs6000/altivec.md (save_vregs_*, restore_vregs_*): New insns.
 libgcc/
        * config/rs6000/crtsavevr.S: New file.
        * config/rs6000/crtrestvr.S: New file.
        * config/rs6000/t-savresfgpr: Build the above.
        * config/rs6000/t-netbsd: Likewise.

This patch is okay with the macro usage fix.

Thanks, David


Re: PowerPC prologue and epilogue 6

2012-04-24 Thread Alan Modra
On Tue, Apr 24, 2012 at 07:19:42PM -0400, David Edelsohn wrote:
 This patch is okay with the macro usage fix.

Thanks, series 2 to 6 committed as 186796, 186797, 186798, 186799,
186800.  I noticed after I committed the lot that 186797 has some
duplicated lines (harmless), corrected in 186798, and 186799 kept the
old cr_save_regno assignment (again harmless), corrected in 186800.
A result of merge conflicts.  I normally start from a clean source
tree, apply patch as posted, commit, repeat.  This time I had a series
of directories with the cumulative patches applied.  Bad idea unless
you use mf to resolve conflicts..

This patch adds a testcase to verify register saves and restores.
I tried to write it so that it will run on all powerpc targets.  From
past experience it probably won't.  OK to apply anyway, and fix
fallout later?

* gcc.target/powerpc/savres.c: New test.
* gcc.target/powerpc/powerpc.exp: Run it.

Index: gcc/testsuite/gcc.target/powerpc/powerpc.exp
===
--- gcc/testsuite/gcc.target/powerpc/powerpc.exp(revision 186800)
+++ gcc/testsuite/gcc.target/powerpc/powerpc.exp(working copy)
@@ -37,5 +37,14 @@
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
 $DEFAULT_CFLAGS
 
+set SAVRES_TEST_OPTS [list -Os -O2 {-Os -mno-multiple} {-O2 -mno-multiple}]
+set alti 
+if [check_vmx_hw_available] {
+set alti -maltivec
+}
+torture-init
+set-torture-options $SAVRES_TEST_OPTS
+gcc-dg-runtest [list $srcdir/$subdir/savres.c] $alti
+
 # All done.
 dg-finish
Index: gcc/testsuite/gcc.target/powerpc/savres.c
===
--- gcc/testsuite/gcc.target/powerpc/savres.c   (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/savres.c   (revision 0)
@@ -0,0 +1,1158 @@
+/* { dg-do run } */
+/* { dg-options -fno-inline -fomit-frame-pointer } */
+
+/* -fno-inline -maltivec -m32/-m64 -mmultiple/no-multiple -Os/-O2.  */
+#ifndef NO_BODY
+#define abort() __builtin_abort ()
+#define vec_all_eq(v1,v2) __builtin_vec_vcmpeq_p (2, v1, v2)
+#define SET(T,R,V) register T R __asm__ (#R) = V
+#define SET_GPR(R,V) SET (long, R, V)
+#define SET_FPR(R,V) SET (double, R, V)
+#define SET_VR(R,V) SET (__attribute__ ((vector_size (16))) int, R, V)
+#define SET_CR(R,V) __asm__ __volatile__ (mtcrf %0,%1 : : n (1(7-R)), 
r (V(4*(7-R))) : cr #R)
+#define TRASH_GPR(R) SET_GPR (R, 0)
+#define TRASH_FPR(R) SET_FPR (R, 0)
+#define TRASH_VR(R) SET_VR (R, val0)
+#define TRASH_CR(R) SET_CR (R, 0)
+#define TRASH_SOME_GPR TRASH_GPR (r30); TRASH_GPR (r31)
+#define TRASH_SOME_FPR TRASH_FPR (fr28); TRASH_FPR (fr31)
+#define TRASH_SOME_VR TRASH_VR (v26); TRASH_VR (v27); TRASH_VR (v31)
+#define TRASH_SOME_CR TRASH_CR (2)
+#define TRASH_ALL_GPR TRASH_GPR (r14); TRASH_GPR (r15); TRASH_GPR (r16); 
TRASH_GPR (r17); TRASH_GPR (r18); TRASH_GPR (r19); TRASH_GPR (r20); TRASH_GPR 
(r21); TRASH_GPR (r22); TRASH_GPR (r23); TRASH_GPR (r24); TRASH_GPR (r25); 
TRASH_GPR (r26); TRASH_GPR (r27); TRASH_GPR (r28); TRASH_GPR (r29); TRASH_GPR 
(r30); TRASH_GPR (r31)
+#define TRASH_ALL_FPR TRASH_FPR (fr14); TRASH_FPR (fr15); TRASH_FPR (fr16); 
TRASH_FPR (fr17); TRASH_FPR (fr18); TRASH_FPR (fr19); TRASH_FPR (fr20); 
TRASH_FPR (fr21); TRASH_FPR (fr22); TRASH_FPR (fr23); TRASH_FPR (fr24); 
TRASH_FPR (fr25); TRASH_FPR (fr26); TRASH_FPR (fr27); TRASH_FPR (fr28); 
TRASH_FPR (fr29); TRASH_FPR (fr30); TRASH_FPR (fr31)
+#define TRASH_ALL_VR TRASH_VR (v20); TRASH_VR (v21); TRASH_VR (v22); TRASH_VR 
(v23); TRASH_VR (v24); TRASH_VR (v25); TRASH_VR (v26); TRASH_VR (v27); TRASH_VR 
(v28); TRASH_VR (v29); TRASH_VR (v30); TRASH_VR (v31)
+#define TRASH_ALL_CR TRASH_CR (2); TRASH_CR (3); TRASH_CR (4)
+#define USE_SOME_GPR __asm__ __volatile__ (#%0 %1 : : r (r30), r (r31))
+#define USE_SOME_FPR __asm__ __volatile__ (#%0 %1 : : f (fr28), f (fr31))
+#define USE_SOME_VR __asm__ __volatile__ (#%0 %1 %2 : : v (v26), v 
(v27), v (v31))
+#define USE_SOME_CR
+#define USE_ALL_GPR __asm__ __volatile__ (#%0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 
%11 %12 %13 %14 %15 %16 %17 : : r (r14), r (r15), r (r16), r (r17), 
r (r18), r (r19), r (r20), r (r21), r (r22), r (r23), r (r24), 
r (r25), r (r26), r (r27), r (r28), r (r29), r (r30), r (r31))
+#define USE_ALL_FPR __asm__ __volatile__ (#%0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 
%11 %12 %13 %14 %15 %16 %17 : : f (fr14), f (fr15), f (fr16), f 
(fr17), f (fr18), f (fr19), f (fr20), f (fr21), f (fr22), f (fr23), 
f (fr24), f (fr25), f (fr26), f (fr27), f (fr28), f (fr29), f 
(fr30), f (fr31))
+#define USE_ALL_VR __asm__ __volatile__ (#%0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 
%11 : : v (v20), v (v21), v (v22), v (v23), v (v24), v (v25), v 
(v26), v (v27), v (v28), v (v29), v (v30), v (v31))
+#define USE_ALL_CR
+
+#define INIT_GPR SET_GPR (r14, 14); SET_GPR (r15, 15); SET_GPR (r16, 16); 
SET_GPR (r17, 17); SET_GPR (r18, 18); SET_GPR (r19, 19); SET_GPR (r20, 20); 
SET_GPR (r21, 21); SET_GPR (r22, 22); SET_GPR (r23, 23); 

Re: PowerPC prologue and epilogue 2

2012-04-22 Thread David Edelsohn
On Tue, Apr 17, 2012 at 11:12 AM, Alan Modra amo...@gmail.com wrote:
 This fixes a lot of confusion in rs6000_frame_related call arguments.
 At the time rs6000_frame_related first appeared, the prologue only
 used sp_reg_rtx (r1) or frame_ptr_rtx (r12) as frame_reg_rtx to access
 register save slots.  If r12 was used, it was necessary to add a note
 that gave the equivalent offset relative to r1.

 Nowadays, r11 is used as frame_reg_rtx too, when abiv4 and saving regs
 out-of-line with a large frame.  When that change was made the calls
 to rs6000_frame_related were not updated.  So rs6000_frame_related
 won't replace r11 in register save rtl.  As it happens this isn't a
 bug because when you look closely, out-of-line saves are disabled with
 a large frame!  A fix for that will come later in this patch series.
 I also optimize rs6000_frame_related a little to save generating
 duplicate rtl.

        * config/rs6000/rs6000.c (rs6000_frame_related): Don't emit a
        REG_FRAME_RELATED_EXPR note when the instruction exactly matches
        the replacement.
        (emit_frame_save): Delete frame_ptr param.  Rename total_size to
        frame_reg_to_sp.
        (rs6000_emit_prologue): Add sp_off.  Update rs6000_frame_related
        and emit_frame_save calls.  Cope with possibly missing note.

Please change 1 to STACK_POINTER_REGNUM.

Okay with that change.

Thanks, David


Re: PowerPC prologue and epilogue 3

2012-04-22 Thread David Edelsohn
On Tue, Apr 17, 2012 at 11:13 AM, Alan Modra amo...@gmail.com wrote:
 This continues the prologue and epilogue cleanup.  Not many user
 visible changes here, except for:
 - a bugfix to the LR save RTL emitted by rs6000_emit_savres_rtx which
  may affect SPE,
 - a bugfix for SPE code emitted when using a static chain,
 - vector saves will be done using r1 for large frames just over 32k in
  size, and,
 - using r11 as a frame pointer whenever we need to set up r11 for
  out-of-line saves, and merging two pointer reg setup insns.
 The latter is a necessary prerequisite to enabling out-of-line
 save/restore for large frames, as I do in a later patch.  Currently
 this will only affect abiv4 -Os when using out-of-line saves.

 eg. -m32 -Os -mno-multiple
 int f (double x)
 {
  char a[33];
  __asm __volatile (#%0 : =m (a) : : fr31, r27, r28);
  return (int) x;
 }
        old                     new
        stwu 1,-96(1)           mflr 0
        mflr 0                  addi 11,1,-8
        addi 11,1,88            stwu 1,-96(1)
        stw 0,100(1)            stw 0,12(11)
        stfd 31,88(1)           bl _savegpr_27
        bl _savegpr_27          stfd 31,0(11)


        * config/rs6000/rs6000.c (rs6000_emit_stack_reset): Delete forward
        decl.  Move logic selecting update reg to callers.  Update all callers.
        (rs6000_emit_allocate_stack): Add copy_off param.
        (emit_frame_save): Don't handle reg+reg addressing.
        (ptr_regno_for_savres): New function, extracted from..
        (rs6000_emit_savres_rtx): ..here.  Add lr_offset param.
        (rs6000_emit_prologue): Generate frame_ptr_rtx as we need it.
        Set frame_reg_rtx to r11 whenever r11 is needed, and merge
        frame offset adjustment for out-of-line save with copy from sp.
        Simplify condition controlling whether cr is saved early or
        late.  Use ptr_regno_for_savres to verify correct reg is set
        up for out-of-line saves.  Pass the actual pointer reg used to
        rs6000_emit_savres_rtx so rtl matches insns in out-of-line
        function.  Rearrange spe vars so code is similar to that
        elsewhere in this function.  Don't update frame_off when spe
        save code will restore r11.  Use emit_frame_save for spe and
        gpr saves.  Consolidate darwin out-of-line gpr setup with that
        for other abis.  Don't assume frame_offset is zero and frame
        reg is sp when setting up altivec reg saves, and calculate
        exact offset requirement.
        (rs6000_emit_epilogue): Use HOST_WIDE_INT for frame_off.  Tidy
        spe restore code.  Consolidate darwin out-of-line gpr setup
        with that for other abis.

This patch is okay.

Thanks, David


PowerPC prologue and epilogue 6

2012-04-21 Thread Alan Modra
This patch adds out-of-line vector saves and restores.  To do this I
made some infrastructure changes to various functions like
rs6000_emit_savres_rtx that currently take boolean parameters (savep,
gpr, and lr).  Rather than add yet another boolean to specify vector
regs, I chose to lump them all together in a bitmask.  This made the
patch a little larger but overall is a better interface, I think.

I also revert a change I made in
http://gcc.gnu.org/ml/gcc-patches/2012-04/msg01014.html to always use
r11 as a frame reg whenever abiv4 emits out-of-line saves.  Code
quality in functions with small frames is better without that
particular change.  This however meant some changes are required later
when setting up pointer regs for gpr and fpr out-of-line saves.

What else is here?  Improved register selection when saving vrsave in
the prologue and when restoring cr in the epilogue, allowing better
scheduling.  A fix to rs6000_output_function_prologue to output the
correct .extern for ELF, then deciding we don't need such things
anyway.  And various other little code cleanups.  Bootstrapped and
regression tested powerpc-linux.

gcc/
* config/rs6000/rs6000 (SAVE_INLINE_VRS, REST_INLINE_VRS,
V_SAVE_INLINE, SAVRES_LR, SAVRES_SAVE, SAVRES_REG,
SAVRES_GPR, SAVRES_FPR, SAVRES_VR): Define.
(no_global_regs_above): Delete.
(no_global_regs): New function.
(rs6000_savres_strategy): Handle vector regs.  Use proper lr_save_p
value for load multiple test.
(savres_routine_syms): Increase size.
(rs6000_savres_routine_name, rs6000_savres_routine_sym,
ptr_regno_for_savres, rs6000_emit_savres_rtx): Pass in int selector
rather than a number of boolean flags.  Update all callers.
(rs6000_savres_routine_name): Generate vector save/restore names.
(rs6000_savres_routine_sym): Handle vector regs.  Delete forward decl.
(ptr_regno_for_savres, rs6000_emit_savres_rtx): Likewise.
(rs6000_emit_prologue): Delete saving_FPRs_inline, saving_GPRs_inline
and using_store_multiple.  Expand uses.  Don't always use r11 as
frame reg when needed for out-of-line saves.  Set up initial offset
for out-of-line vector saves when buying stack frame.  Handle pointer
reg setup for out-of-line fp save.  Emit call to out-of-line vector
save function.  Choose r11 or r12 for vrsave reg when available for
better scheduling.
(rs6000_output_function_prologue): Don't emit .extern for ELF.
(rs6000_emit_epilogue): Choose a better frame reg when restoring
from back-chain to suit out-of-line vector restore functions.  Emit
call to out-of-line vector restore function.  Adjust register used
for cr restore.  Tweak pointer register setup for gpr restore.
* config/rs6000/rs6000.h (FIRST_SAVED_GP_REGNO): Take into account
FIXED_R13.
* config/rs6000/sysv4.h (FP_SAVE_INLINE, GP_SAVE_INLINE): Simplify.
(V_SAVE_INLINE): Define.
* config/rs6000/altivec.md (save_vregs_*, restore_vregs_*): New insns.
libgcc/
* config/rs6000/crtsavevr.S: New file.
* config/rs6000/crtrestvr.S: New file.
* config/rs6000/t-savresfgpr: Build the above.
* config/rs6000/t-netbsd: Likewise.

diff -urpN gcc-alan5/gcc/config/rs6000/rs6000.c 
gcc-alan6/gcc/config/rs6000/rs6000.c
--- gcc-alan5/gcc/config/rs6000/rs6000.c2012-04-19 20:55:02.214727782 
+0930
+++ gcc-alan6/gcc/config/rs6000/rs6000.c2012-04-21 15:47:44.193462791 
+0930
@@ -937,7 +937,6 @@ static bool legitimate_small_data_p (enu
 static bool legitimate_lo_sum_address_p (enum machine_mode, rtx, int);
 static struct machine_function * rs6000_init_machine_status (void);
 static bool rs6000_assemble_integer (rtx, unsigned int, int);
-static bool no_global_regs_above (int, bool);
 #if defined (HAVE_GAS_HIDDEN)  !TARGET_MACHO
 static void rs6000_assemble_visibility (tree, int);
 #endif
@@ -950,7 +949,6 @@ static tree rs6000_handle_struct_attribu
 static void rs6000_eliminate_indexed_memrefs (rtx operands[2]);
 static const char *rs6000_mangle_type (const_tree);
 static void rs6000_set_default_type_attributes (tree);
-static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool);
 static bool rs6000_reg_live_or_pic_offset_p (int);
 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
 static tree rs6000_builtin_vectorized_function (tree, tree, tree);
@@ -17405,6 +17403,21 @@ is_altivec_return_reg (rtx reg, void *xy
 }
 
 
+/* Look for user-defined global regs in the range FIRST to LAST-1.
+   We should not restore these, and so cannot use lmw or out-of-line
+   restore functions if there are any.  We also can't save them
+   (well, emit frame notes for them), because frame unwinding during
+   exception handling will restore saved registers.  */
+
+static bool
+global_regs_p (unsigned first, unsigned last)
+{
+  while (first  last)
+if 

Re: PowerPC prologue and epilogue 5

2012-04-19 Thread Alan Modra
On Wed, Apr 18, 2012 at 12:45:16AM +0930, Alan Modra wrote:
 This enables out-of-line save and restore for large frames, and for
 ABI_AIX when using the static chain.

Further testing revealed two problems when compiling nested
functions.
1) The logic I had for cr_save_regno is wrong, resulting in one of my
   NOT_INUSE asserts triggering.
2) In some cases the prologue uses in-line saves while the epilogue
   uses out-of-line restores.  This can lead to restoring regs that
   haven't been saved.

Patch withdrawn.

-- 
Alan Modra
Australia Development Lab, IBM


Re: PowerPC prologue and epilogue 5

2012-04-19 Thread Alan Modra
On Thu, Apr 19, 2012 at 08:00:15PM +0930, Alan Modra wrote:
 On Wed, Apr 18, 2012 at 12:45:16AM +0930, Alan Modra wrote:
  This enables out-of-line save and restore for large frames, and for
  ABI_AIX when using the static chain.
 
 Further testing revealed two problems when compiling nested
 functions.
 1) The logic I had for cr_save_regno is wrong, resulting in one of my
NOT_INUSE asserts triggering.

Fixed in this revised patch.  Bootstrapped etc. powerpc-linux.

 2) In some cases the prologue uses in-line saves while the epilogue
uses out-of-line restores.  This can lead to restoring regs that
haven't been saved.

This turned out to be a pre-existing problem, patch in PR50340.

* config/rs6000/rs6000.c (rs6000_savres_strategy): Allow
out-of-line save/restore for large frames.  Don't disable
out-of-line saves on ABI_AIX when using static chain reg.
(rs6000_emit_prologue): Adjust cr_save_regno on ABI_AIX to not
clobber static chain reg, and tweak for out-of-line gpr saves
that use r1.

diff -urp gcc-alan5a/gcc/config/rs6000/rs6000.c 
gcc-alan5/gcc/config/rs6000/rs6000.c
--- gcc-alan5a/gcc/config/rs6000/rs6000.c   2012-04-19 21:24:46.643632761 
+0930
+++ gcc-alan5/gcc/config/rs6000/rs6000.c2012-04-19 20:55:02.214727782 
+0930
@@ -17432,8 +17432,7 @@ rs6000_savres_strategy (rs6000_stack_t *
 strategy |= SAVRES_MULTIPLE;
 
   if (crtl-calls_eh_return
-  || cfun-machine-ra_need_lr
-  || info-total_size  32767)
+  || cfun-machine-ra_need_lr)
 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
 | SAVE_INLINE_GPRS | REST_INLINE_GPRS);
 
@@ -17454,8 +17453,8 @@ rs6000_savres_strategy (rs6000_stack_t *
   /* Don't bother to try to save things out-of-line if r11 is occupied
  by the static chain.  It would require too much fiddling and the
  static chain is rarely used anyway.  FPRs are saved w.r.t the stack
- pointer on Darwin.  */
-  if (using_static_chain_p)
+ pointer on Darwin, and AIX uses r1 or r12.  */
+  if (using_static_chain_p  DEFAULT_ABI != ABI_AIX)
 strategy |= ((DEFAULT_ABI == ABI_DARWIN
  ? 0 : SAVE_INLINE_FPRS | REST_INLINE_FPRS)
 | SAVE_INLINE_GPRS);
@@ -19555,11 +19554,16 @@ rs6000_emit_prologue (void)
}
 }
 
-  /* If we need to save CR, put it into r12 or r11.  */
-  cr_save_regno = DEFAULT_ABI == ABI_AIX  !saving_GPRs_inline ? 11 : 12;
+  /* If we need to save CR, put it into r12 or r11.  Choose r12 except when
+ r12 will be needed by out-of-line gpr restore.  */
+  cr_save_regno = (DEFAULT_ABI == ABI_AIX
+   !(strategy  (SAVE_INLINE_GPRS
+   | SAVE_NOINLINE_GPRS_SAVES_LR))
+  ? 11 : 12);
   if (!WORLD_SAVE_P (info)
info-cr_save_p
-   REGNO (frame_reg_rtx) != cr_save_regno)
+   REGNO (frame_reg_rtx) != cr_save_regno
+   !(using_static_chain_p  cr_save_regno == 11))
 {
   rtx set;
 
-- 
Alan Modra
Australia Development Lab, IBM


Re: PowerPC prologue and epilogue

2012-04-19 Thread David Edelsohn
On Tue, Apr 17, 2012 at 11:08 AM, Alan Modra amo...@gmail.com wrote:
 This is the first in a series of patches cleaning up rs6000 prologue
 and epilogue generating code.  This one is just the formatting/style
 changes plus renaming two variables to better reflect their usage,
 and moving code around.

 The patch series has been bootstrapped and regression tested
 powerpc-linux, powerpc64-linux and powerpc-linux-gnuspe.  Please test
 on darwin and aix.

        * config/rs6000/rs6000.c (rs6000_emit_savres_rtx): Formatting.
        (rs6000_emit_prologue, rs6000_emit_epilogue): Likewise.  Rename
        sp_offset to frame_off.  Move world save code earlier.

This first patch in the series is okay.

I will start testing the next ones on AIX.

Thanks, David


PowerPC prologue and epilogue

2012-04-17 Thread Alan Modra
This is the first in a series of patches cleaning up rs6000 prologue
and epilogue generating code.  This one is just the formatting/style
changes plus renaming two variables to better reflect their usage,
and moving code around.

The patch series has been bootstrapped and regression tested
powerpc-linux, powerpc64-linux and powerpc-linux-gnuspe.  Please test
on darwin and aix.

* config/rs6000/rs6000.c (rs6000_emit_savres_rtx): Formatting.
(rs6000_emit_prologue, rs6000_emit_epilogue): Likewise.  Rename
sp_offset to frame_off.  Move world save code earlier.

diff -urp gcc-virgin/gcc/config/rs6000/rs6000.c 
gcc-alan1/gcc/config/rs6000/rs6000.c
--- gcc-virgin/gcc/config/rs6000/rs6000.c   2012-04-14 22:48:44.108432893 
+0930
+++ gcc-alan1/gcc/config/rs6000/rs6000.c2012-04-16 11:57:37.282242636 
+0930
@@ -19212,9 +19212,9 @@ rs6000_emit_savres_rtx (rs6000_stack_t *
 
   sym = rs6000_savres_routine_sym (info, savep, gpr, lr);
   RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
-  use_reg = DEFAULT_ABI == ABI_AIX ? (gpr  !lr ? 12 : 1)
-  : DEFAULT_ABI == ABI_DARWIN  !gpr ? 1
-  : 11;
+  use_reg = (DEFAULT_ABI == ABI_AIX ? (gpr  !lr ? 12 : 1)
+: DEFAULT_ABI == ABI_DARWIN  !gpr ? 1
+: 11);
   RTVEC_ELT (p, offset++)
 = gen_rtx_USE (VOIDmode,
   gen_rtx_REG (Pmode, use_reg));
@@ -19224,7 +19224,7 @@ rs6000_emit_savres_rtx (rs6000_stack_t *
   rtx addr, reg, mem;
   reg = gen_rtx_REG (reg_mode, start_reg + i);
   addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
-  GEN_INT (save_area_offset + reg_size*i));
+  GEN_INT (save_area_offset + reg_size * i));
   mem = gen_frame_mem (reg_mode, addr);
 
   RTVEC_ELT (p, i + offset) = gen_rtx_SET (VOIDmode,
@@ -19293,9 +19293,9 @@ rs6000_emit_prologue (void)
   int saving_GPRs_inline;
   int using_store_multiple;
   int using_static_chain_p = (cfun-static_chain_decl != NULL_TREE
-   df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
+  df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
   call_used_regs[STATIC_CHAIN_REGNUM]);
-  HOST_WIDE_INT sp_offset = 0;
+  HOST_WIDE_INT frame_off = 0;
 
   if (flag_stack_usage_info)
 current_function_static_stack_size = info-total_size;
@@ -19323,52 +19323,6 @@ rs6000_emit_prologue (void)
   reg_size = 8;
 }
 
-  strategy = info-savres_strategy;
-  using_store_multiple = strategy  SAVRES_MULTIPLE;
-  saving_FPRs_inline = strategy  SAVE_INLINE_FPRS;
-  saving_GPRs_inline = strategy  SAVE_INLINE_GPRS;
-
-  /* For V.4, update stack before we do any saving and set back pointer.  */
-  if (! WORLD_SAVE_P (info)
-   info-push_p
-   (DEFAULT_ABI == ABI_V4
- || crtl-calls_eh_return))
-{
-  bool need_r11 = (TARGET_SPE
-  ? (!saving_GPRs_inline
-  info-spe_64bit_regs_used == 0)
-  : (!saving_FPRs_inline || !saving_GPRs_inline));
-  rtx copy_reg = need_r11 ? gen_rtx_REG (Pmode, 11) : NULL;
-
-  if (info-total_size  32767)
-   sp_offset = info-total_size;
-  else if (need_r11)
-   frame_reg_rtx = copy_reg;
-  else if (info-cr_save_p
-  || info-lr_save_p
-  || info-first_fp_reg_save  64
-  || info-first_gp_reg_save  32
-  || info-altivec_size != 0
-  || info-vrsave_mask != 0
-  || crtl-calls_eh_return)
-   {
- copy_reg = frame_ptr_rtx;
- frame_reg_rtx = copy_reg;
-   }
-  else
-   {
- /* The prologue won't be saving any regs so there is no need
-to set up a frame register to access any frame save area.
-We also won't be using sp_offset anywhere below, but set
-the correct value anyway to protect against future
-changes to this function.  */
- sp_offset = info-total_size;
-   }
-  rs6000_emit_allocate_stack (info-total_size, copy_reg);
-  if (frame_reg_rtx != sp_reg_rtx)
-   rs6000_emit_stack_tie (frame_reg_rtx, false);
-}
-
   /* Handle world saves specially here.  */
   if (WORLD_SAVE_P (info))
 {
@@ -19396,7 +19350,7 @@ rs6000_emit_prologue (void)
   info-push_p
   info-lr_save_p
   (!crtl-calls_eh_return
-  || info-ehrd_offset == -432)
+ || info-ehrd_offset == -432)
   info-vrsave_save_offset == -224
   info-altivec_save_offset == -416);
 
@@ -19423,14 +19377,14 @@ rs6000_emit_prologue (void)
 properly.  */
   for (i = 0; i  64 - info-first_fp_reg_save; i++)
{
- rtx reg = gen_rtx_REG (((TARGET_HARD_FLOAT  TARGET_DOUBLE_FLOAT)
-  ? DFmode : SFmode), 
- 

PowerPC prologue and epilogue 2

2012-04-17 Thread Alan Modra
This fixes a lot of confusion in rs6000_frame_related call arguments.
At the time rs6000_frame_related first appeared, the prologue only
used sp_reg_rtx (r1) or frame_ptr_rtx (r12) as frame_reg_rtx to access
register save slots.  If r12 was used, it was necessary to add a note
that gave the equivalent offset relative to r1.

Nowadays, r11 is used as frame_reg_rtx too, when abiv4 and saving regs
out-of-line with a large frame.  When that change was made the calls
to rs6000_frame_related were not updated.  So rs6000_frame_related
won't replace r11 in register save rtl.  As it happens this isn't a
bug because when you look closely, out-of-line saves are disabled with
a large frame!  A fix for that will come later in this patch series.
I also optimize rs6000_frame_related a little to save generating
duplicate rtl.

* config/rs6000/rs6000.c (rs6000_frame_related): Don't emit a
REG_FRAME_RELATED_EXPR note when the instruction exactly matches
the replacement.
(emit_frame_save): Delete frame_ptr param.  Rename total_size to
frame_reg_to_sp.
(rs6000_emit_prologue): Add sp_off.  Update rs6000_frame_related
and emit_frame_save calls.  Cope with possibly missing note.

diff -urp gcc-alan1/gcc/config/rs6000/rs6000.c 
gcc-alan2/gcc/config/rs6000/rs6000.c
--- gcc-alan1/gcc/config/rs6000/rs6000.c2012-04-16 11:57:37.282242636 
+0930
+++ gcc-alan2/gcc/config/rs6000/rs6000.c2012-04-16 11:58:01.50108 
+0930
@@ -18751,7 +18751,10 @@ output_probe_stack_range (rtx reg1, rtx
with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
is not NULL.  It would be nice if dwarf2out_frame_debug_expr could
deduce these equivalences by itself so it wasn't necessary to hold
-   its hand so much.  */
+   its hand so much.  Don't be tempted to always supply d2_f_d_e with
+   the actual cfa register, ie. r31 when we are using a hard frame
+   pointer.  That fails when saving regs off r1, and sched moves the
+   r31 setup past the reg saves.  */
 
 static rtx
 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
@@ -18759,6 +18762,25 @@ rs6000_frame_related (rtx insn, rtx reg,
 {
   rtx real, temp;
 
+  if (REGNO (reg) == 1  reg2 == NULL_RTX)
+{
+  /* No need for any replacement.  Just set RTX_FRAME_RELATED_P.  */
+  int i;
+
+  gcc_checking_assert (val == 0);
+  real = PATTERN (insn);
+  if (GET_CODE (real) == PARALLEL)
+   for (i = 0; i  XVECLEN (real, 0); i++)
+ if (GET_CODE (XVECEXP (real, 0, i)) == SET)
+   {
+ rtx set = XVECEXP (real, 0, i);
+
+ RTX_FRAME_RELATED_P (set) = 1;
+   }
+  RTX_FRAME_RELATED_P (insn) = 1;
+  return insn;
+}
+
   /* copy_rtx will not make unique copies of registers, so we need to
  ensure we don't have unwanted sharing here.  */
   if (reg == reg2)
@@ -18772,10 +18794,13 @@ rs6000_frame_related (rtx insn, rtx reg,
   if (reg2 != NULL_RTX)
 real = replace_rtx (real, reg2, rreg);
 
-  real = replace_rtx (real, reg,
- gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
-   STACK_POINTER_REGNUM),
-   GEN_INT (val)));
+  if (REGNO (reg) == 1)
+gcc_checking_assert (val == 0);
+  else
+real = replace_rtx (real, reg,
+   gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
+ STACK_POINTER_REGNUM),
+ GEN_INT (val)));
 
   /* We expect that 'real' is either a SET or a PARALLEL containing
  SETs (and possibly other stuff).  In a PARALLEL, all the SETs
@@ -18893,8 +18918,8 @@ generate_set_vrsave (rtx reg, rs6000_sta
Save REGNO into [FRAME_REG + OFFSET] in mode MODE.  */
 
 static rtx
-emit_frame_save (rtx frame_reg, rtx frame_ptr, enum machine_mode mode,
-unsigned int regno, int offset, HOST_WIDE_INT total_size)
+emit_frame_save (rtx frame_reg, enum machine_mode mode,
+unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
 {
   rtx reg, offset_rtx, insn, mem, addr, int_rtx;
   rtx replacea, replaceb;
@@ -18930,7 +18955,8 @@ emit_frame_save (rtx frame_reg, rtx fram
 
   insn = emit_move_insn (mem, reg);
 
-  return rs6000_frame_related (insn, frame_ptr, total_size, replacea, 
replaceb);
+  return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
+  replacea, replaceb);
 }
 
 /* Emit an offset memory reference suitable for a frame store, while
@@ -19295,7 +19321,9 @@ rs6000_emit_prologue (void)
   int using_static_chain_p = (cfun-static_chain_decl != NULL_TREE
   df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
   call_used_regs[STATIC_CHAIN_REGNUM]);
+  /* Offset to top of frame for frame_reg and sp respectively.  */
   HOST_WIDE_INT frame_off = 0;
+  HOST_WIDE_INT sp_off = 0;
 
   if 

PowerPC prologue and epilogue 3

2012-04-17 Thread Alan Modra
This continues the prologue and epilogue cleanup.  Not many user
visible changes here, except for:
- a bugfix to the LR save RTL emitted by rs6000_emit_savres_rtx which
  may affect SPE,
- a bugfix for SPE code emitted when using a static chain,
- vector saves will be done using r1 for large frames just over 32k in
  size, and,
- using r11 as a frame pointer whenever we need to set up r11 for
  out-of-line saves, and merging two pointer reg setup insns.
The latter is a necessary prerequisite to enabling out-of-line
save/restore for large frames, as I do in a later patch.  Currently
this will only affect abiv4 -Os when using out-of-line saves.

eg. -m32 -Os -mno-multiple
int f (double x)
{
  char a[33];
  __asm __volatile (#%0 : =m (a) : : fr31, r27, r28);
  return (int) x;
}
old new
stwu 1,-96(1)   mflr 0
mflr 0  addi 11,1,-8
addi 11,1,88stwu 1,-96(1)
stw 0,100(1)stw 0,12(11)
stfd 31,88(1)   bl _savegpr_27
bl _savegpr_27  stfd 31,0(11)


* config/rs6000/rs6000.c (rs6000_emit_stack_reset): Delete forward
decl.  Move logic selecting update reg to callers.  Update all callers.
(rs6000_emit_allocate_stack): Add copy_off param.
(emit_frame_save): Don't handle reg+reg addressing.
(ptr_regno_for_savres): New function, extracted from..
(rs6000_emit_savres_rtx): ..here.  Add lr_offset param.
(rs6000_emit_prologue): Generate frame_ptr_rtx as we need it.
Set frame_reg_rtx to r11 whenever r11 is needed, and merge
frame offset adjustment for out-of-line save with copy from sp.
Simplify condition controlling whether cr is saved early or
late.  Use ptr_regno_for_savres to verify correct reg is set
up for out-of-line saves.  Pass the actual pointer reg used to
rs6000_emit_savres_rtx so rtl matches insns in out-of-line
function.  Rearrange spe vars so code is similar to that
elsewhere in this function.  Don't update frame_off when spe
save code will restore r11.  Use emit_frame_save for spe and
gpr saves.  Consolidate darwin out-of-line gpr setup with that
for other abis.  Don't assume frame_offset is zero and frame
reg is sp when setting up altivec reg saves, and calculate
exact offset requirement.
(rs6000_emit_epilogue): Use HOST_WIDE_INT for frame_off.  Tidy
spe restore code.  Consolidate darwin out-of-line gpr setup
with that for other abis.

diff -urp gcc-alan2/gcc/config/rs6000/rs6000.c 
gcc-alan3/gcc/config/rs6000/rs6000.c
--- gcc-alan2/gcc/config/rs6000/rs6000.c2012-04-16 11:58:01.50108 
+0930
+++ gcc-alan3/gcc/config/rs6000/rs6000.c2012-04-17 07:19:42.927931887 
+0930
@@ -951,7 +951,6 @@ static void rs6000_eliminate_indexed_mem
 static const char *rs6000_mangle_type (const_tree);
 static void rs6000_set_default_type_attributes (tree);
 static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool);
-static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool);
 static bool rs6000_reg_live_or_pic_offset_p (int);
 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
 static tree rs6000_builtin_vectorized_function (tree, tree, tree);
@@ -18534,7 +18533,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard
The generated code may use hard register 0 as a temporary.  */
 
 static void
-rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg)
+rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
 {
   rtx insn;
   rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
@@ -18578,7 +18577,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN
 }
 
   if (copy_reg)
-emit_move_insn (copy_reg, stack_reg);
+{
+  if (copy_off != 0)
+   emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
+  else
+   emit_move_insn (copy_reg, stack_reg);
+}
 
   if (size  32767)
 {
@@ -18921,42 +18925,22 @@ static rtx
 emit_frame_save (rtx frame_reg, enum machine_mode mode,
 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
 {
-  rtx reg, offset_rtx, insn, mem, addr, int_rtx;
-  rtx replacea, replaceb;
-
-  int_rtx = GEN_INT (offset);
+  rtx reg, insn, mem, addr;
 
   /* Some cases that need register indexed addressing.  */
-  if ((TARGET_ALTIVEC_ABI  ALTIVEC_VECTOR_MODE (mode))
-  || (TARGET_VSX  ALTIVEC_OR_VSX_VECTOR_MODE (mode))
-  || (TARGET_E500_DOUBLE  mode == DFmode)
-  || (TARGET_SPE_ABI
-  SPE_VECTOR_MODE (mode)
-  !SPE_CONST_OFFSET_OK (offset)))
-{
-  /* Whomever calls us must make sure r11 is available in the
-flow path of instructions in the prologue.  */
-  offset_rtx = gen_rtx_REG (Pmode, 11);
-  emit_move_insn (offset_rtx, int_rtx);
-
-  replacea = offset_rtx;
-  replaceb = int_rtx;
-}
-  else

PowerPC prologue and epilogue 4

2012-04-17 Thread Alan Modra
This provides some protection against misuse of r0, r11 and r12.  I
found it useful when enabling out-of-line saves for large frames.  ;-)

* config/rs6000/rs6000.c (START_USE, END_USE, NOT_INUSE): Define.
(rs6000_emit_prologue): Use the above to catch register overlap.

diff -urp gcc-alan3/gcc/config/rs6000/rs6000.c 
gcc-alan4/gcc/config/rs6000/rs6000.c
--- gcc-alan3/gcc/config/rs6000/rs6000.c2012-04-17 07:19:42.927931887 
+0930
+++ gcc-alan4/gcc/config/rs6000/rs6000.c2012-04-17 09:11:31.760669589 
+0930
@@ -19301,6 +19301,29 @@ rs6000_emit_prologue (void)
   HOST_WIDE_INT frame_off = 0;
   HOST_WIDE_INT sp_off = 0;
 
+#ifdef ENABLE_CHECKING
+  /* Track and check usage of r0, r11, r12.  */
+  int reg_inuse = using_static_chain_p ? 1  11 : 0;
+#define START_USE(R) do \
+  {\
+gcc_assert ((reg_inuse  (1  (R))) == 0);\
+reg_inuse |= 1  (R); \
+  } while (0)
+#define END_USE(R) do \
+  {\
+gcc_assert ((reg_inuse  (1  (R))) != 0);\
+reg_inuse = ~(1  (R));  \
+  } while (0)
+#define NOT_INUSE(R) do \
+  {\
+gcc_assert ((reg_inuse  (1  (R))) == 0);\
+  } while (0)
+#else
+#define START_USE(R) do {} while (0)
+#define END_USE(R) do {} while (0)
+#define NOT_INUSE(R) do {} while (0)
+#endif
+
   if (flag_stack_usage_info)
 current_function_static_stack_size = info-total_size;
 
@@ -19465,6 +19488,7 @@ rs6000_emit_prologue (void)
   if (need_r11)
{
  ptr_reg = gen_rtx_REG (Pmode, 11);
+ START_USE (11);
}
   else if (info-total_size  32767)
frame_off = info-total_size;
@@ -19477,6 +19501,7 @@ rs6000_emit_prologue (void)
   || crtl-calls_eh_return)
{
  ptr_reg = gen_rtx_REG (Pmode, 12);
+ START_USE (12);
}
   else
{
@@ -19509,6 +19534,7 @@ rs6000_emit_prologue (void)
   rtx addr, reg, mem;
 
   reg = gen_rtx_REG (Pmode, 0);
+  START_USE (0);
   insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
   RTX_FRAME_RELATED_P (insn) = 1;
 
@@ -19524,6 +19550,7 @@ rs6000_emit_prologue (void)
  insn = emit_move_insn (mem, reg);
  rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
NULL_RTX, NULL_RTX);
+ END_USE (0);
}
 }
 
@@ -19536,6 +19563,7 @@ rs6000_emit_prologue (void)
   rtx set;
 
   cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
+  START_USE (cr_save_regno);
   insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
   RTX_FRAME_RELATED_P (insn) = 1;
   /* Now, there's no way that dwarf2out_frame_debug_expr is going
@@ -19579,6 +19607,8 @@ rs6000_emit_prologue (void)
 /*savep=*/true, /*gpr=*/false, lr);
   rs6000_frame_related (insn, frame_reg_rtx, sp_off,
NULL_RTX, NULL_RTX);
+  if (lr)
+   END_USE (0);
 }
 
   /* Save GPRs.  This is done as a PARALLEL if we are using
@@ -19623,10 +19653,15 @@ rs6000_emit_prologue (void)
  if (using_static_chain_p)
{
  rtx r0 = gen_rtx_REG (Pmode, 0);
+
+ START_USE (0);
  gcc_assert (info-first_gp_reg_save  11);
 
  emit_move_insn (r0, spe_save_area_ptr);
}
+ else if (REGNO (frame_reg_rtx) != 11)
+   START_USE (11);
+
  emit_insn (gen_addsi3 (spe_save_area_ptr,
 frame_reg_rtx, GEN_INT (offset)));
  if (!using_static_chain_p  REGNO (frame_reg_rtx) == 11)
@@ -19657,8 +19692,16 @@ rs6000_emit_prologue (void)
}
 
   /* Move the static chain pointer back.  */
-  if (using_static_chain_p  !spe_regs_addressable)
-   emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
+  if (!spe_regs_addressable)
+   {
+ if (using_static_chain_p)
+   {
+ emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
+ END_USE (0);
+   }
+ else if (REGNO (frame_reg_rtx) != 11)
+   END_USE (11);
+   }
 }
   else if (!WORLD_SAVE_P (info)  !saving_GPRs_inline)
 {
@@ -19679,10 +19722,13 @@ rs6000_emit_prologue (void)
 
  if (ptr_set_up)
frame_off = -end_save;
+ else
+   NOT_INUSE (ptr_regno);
  emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
}
   else if (!ptr_set_up)
{
+ NOT_INUSE (ptr_regno);
  emit_move_insn (ptr_reg, frame_reg_rtx);
}
   ptr_off = -end_save;
@@ -19693,6 +19739,8 @@ rs6000_emit_prologue (void)
 /*savep=*/true, /*gpr=*/true, lr);
   rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
NULL_RTX, NULL_RTX);
+  if (lr)
+ 

PowerPC prologue and epilogue 5

2012-04-17 Thread Alan Modra
This enables out-of-line save and restore for large frames, and for
ABI_AIX when using the static chain.

* config/rs6000/rs6000.c (rs6000_savres_strategy): Allow
out-of-line save/restore for large frames.  Don't disable
out-of-line saves on ABI_AIX when using static chain reg.
(rs6000_emit_prologue): Adjust cr_save_regno on ABI_AIX to not
clobber static chain reg, and tweak for out-of-line gpr saves
that use r1.

diff -urp gcc-alan4/gcc/config/rs6000/rs6000.c 
gcc-alan5/gcc/config/rs6000/rs6000.c
--- gcc-alan4/gcc/config/rs6000/rs6000.c2012-04-17 09:11:31.760669589 
+0930
+++ gcc-alan5/gcc/config/rs6000/rs6000.c2012-04-17 11:16:09.369537832 
+0930
@@ -17432,8 +17432,7 @@ rs6000_savres_strategy (rs6000_stack_t *
 strategy |= SAVRES_MULTIPLE;
 
   if (crtl-calls_eh_return
-  || cfun-machine-ra_need_lr
-  || info-total_size  32767)
+  || cfun-machine-ra_need_lr)
 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
 | SAVE_INLINE_GPRS | REST_INLINE_GPRS);
 
@@ -17454,10 +17453,10 @@ rs6000_savres_strategy (rs6000_stack_t *
   /* Don't bother to try to save things out-of-line if r11 is occupied
  by the static chain.  It would require too much fiddling and the
  static chain is rarely used anyway.  FPRs are saved w.r.t the stack
- pointer on Darwin.  */
-  if (using_static_chain_p)
-strategy |= (DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
-   | SAVE_INLINE_GPRS;
+ pointer on Darwin, and AIX uses r1 or r12.  */
+  if (using_static_chain_p  DEFAULT_ABI != ABI_AIX)
+strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
+| SAVE_INLINE_GPRS);
 
   /* If we are going to use store multiple, then don't even bother
  with the out-of-line routines, since the store-multiple
@@ -19555,7 +19554,10 @@ rs6000_emit_prologue (void)
 }
 
   /* If we need to save CR, put it into r12 or r11.  */
-  cr_save_regno = DEFAULT_ABI == ABI_AIX  !saving_GPRs_inline ? 11 : 12;
+  cr_save_regno = (DEFAULT_ABI == ABI_AIX
+   (strategy  SAVE_INLINE_GPRS) == 0
+   (strategy  SAVE_NOINLINE_GPRS_SAVES_LR) == 0
+   !using_static_chain_p ? 11 : 12);
   if (!WORLD_SAVE_P (info)
info-cr_save_p
REGNO (frame_reg_rtx) != cr_save_regno)

-- 
Alan Modra
Australia Development Lab, IBM