Re: PowerPC prologue and epilogue 6
On Thu, May 31, 2012 at 10:41:26AM +0930, Alan Modra wrote: Looks like it is one I introduced. gcc-4.6 uses r12 to save altivec regs, my new code tries to use r11. Will fix. Please try out this patch on Darwin. Bootstrapped and regression tested powerpc-linux. gcc/ * config/rs6000/rs6000.c (ptr_regno_for_savres): Comment. (rs6000_emit_prologue): Ensure register used for inline saves of vector regs is not the static chain register. Revise comment. gcc/testsuite/ * gcc.target/powerpc/savres.c: Add -static to dg-options. Check static chain in nested funcs. Index: gcc/config/rs6000/rs6000.c === --- gcc/config/rs6000/rs6000.c (revision 187999) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -19108,6 +19161,9 @@ rs6000_emit_stack_reset (rs6000_stack_t *info, return NULL_RTX; } +/* Return the register number used as a pointer by out-of-line + save/restore functions. */ + static inline unsigned ptr_regno_for_savres (int sel) { @@ -19845,6 +19901,9 @@ rs6000_emit_prologue (void) int sel = SAVRES_SAVE | SAVRES_VR; unsigned ptr_regno = ptr_regno_for_savres (sel); + if (using_static_chain_p + ptr_regno == STATIC_CHAIN_REGNUM) + ptr_regno = 12; if (REGNO (frame_reg_rtx) != ptr_regno) START_USE (ptr_regno); ptr_reg = gen_rtx_REG (Pmode, ptr_regno); @@ -19953,9 +20012,9 @@ rs6000_emit_prologue (void) int offset; int save_regno; - /* Get VRSAVE onto a GPR. Note that ABI_V4 might be using r12 -as frame_reg_rtx and r11 as the static chain pointer for -nested functions. */ + /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might +be using r12 as frame_reg_rtx and r11 as the static chain +pointer for nested functions. */ save_regno = 12; if (DEFAULT_ABI == ABI_AIX !using_static_chain_p) save_regno = 11; Index: gcc/testsuite/gcc.target/powerpc/savres.c === --- gcc/testsuite/gcc.target/powerpc/savres.c (revision 187999) +++ gcc/testsuite/gcc.target/powerpc/savres.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options -fno-inline -fomit-frame-pointer } */ +/* { dg-options -fno-inline -fomit-frame-pointer -static } */ /* -fno-inline -maltivec -m32/-m64 -mmultiple/no-multiple -Os/-O2. */ #ifndef NO_BODY @@ -73,6 +73,7 @@ __attribute__ ((vector_size (16))) int val31 = {-3 #else /* NO_BODY */ /* For looking at prologue and epilogue code without distractions. */ +#define abort() #define TRASH_ALL_CR #define TRASH_ALL_VR #define TRASH_ALL_FPR @@ -458,7 +459,7 @@ void s_0 (void) void wb_all (void) { char b[10]; - void nb_all (void) + char *nb_all (void) { char a[33000]; TRASH_ALL_CR; @@ -470,14 +471,16 @@ void wb_all (void) USE_ALL_FPR; USE_ALL_GPR; __asm __volatile (#%0 %1 : =m (a), =m (b) : : cr2, cr3, cr4, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, fr14, fr15, fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23, fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31, r14, r15, r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31); +return b; } - nb_all(); + if (nb_all() != b) +abort (); } void wb_cvfr (void) { char b[10]; - void nb_cvfr (void) + char *nb_cvfr (void) { char a[33000]; TRASH_SOME_CR; @@ -489,14 +492,16 @@ void wb_cvfr (void) USE_SOME_FPR; USE_SOME_GPR; __asm __volatile (#%0 %1 : =m (a), =m (b) : : cr2, v26, v27, v31, fr28, fr31, r30, r31); +return b; } - nb_cvfr (); + if (nb_cvfr () != b) +abort (); } void wb_vfr (void) { char b[10]; - void nb_vfr (void) + char *nb_vfr (void) { char a[33000]; TRASH_SOME_VR; @@ -506,14 +511,16 @@ void wb_vfr (void) USE_SOME_FPR; USE_SOME_GPR; __asm __volatile (#%0 %1 : =m (a), =m (b) : : v26, v27, v31, fr28, fr31, r30, r31); +return b; } - nb_vfr (); + if (nb_vfr () != b) +abort (); } void wb_cvf (void) { char b[10]; - void nb_cvf (void) + char *nb_cvf (void) { char a[33000]; TRASH_SOME_CR; @@ -523,14 +530,16 @@ void wb_cvf (void) USE_SOME_VR; USE_SOME_FPR; __asm __volatile (#%0 %1 : =m (a), =m (b) : : cr2, v26, v27, v31, fr28, fr31); +return b; } - nb_cvf (); + if (nb_cvf () != b) +abort (); } void wb_vf (void) { char b[10]; - void nb_vf (void) + char *nb_vf (void) { char a[33000]; TRASH_SOME_VR; @@ -538,15 +547,17 @@ void wb_vf (void) USE_SOME_VR; USE_SOME_FPR; __asm __volatile (#%0 %1 : =m (a), =m (b) : : v26, v27, v31, fr28, fr31); +return b; } - nb_vf (); + if (nb_vf () != b) +abort (); } #endif void wb_cvr (void) { char b[10]; - void nb_cvr (void) + char *nb_cvr (void) { char
Re: PowerPC prologue and epilogue 6
Please try out this patch on Darwin. Bootstrapped and regression tested powerpc-linux. I have applied the patch to r188026 and updated the build. As patched the test gcc.target/powerpc/savres.c now fails with FAIL: gcc.target/powerpc/savres.c (test for excess errors) Excess errors: ld_classic: can't locate file for: -lcrt0.o According Iain Sandoe -static is not applicable to Darwin (except for kernel code). to make the test non-pic - use -mdynamic-no-pic (Darwin-only). Replacing -static with -mdynamic-no-pic makes the test to pass (the final patch will require the suitable dg directives;-). Thanks Dominique PS Clean bootstrap and full regtesting scheduled for the next week-end).
Re: PowerPC prologue and epilogue 6
On Thu, May 31, 2012 at 02:16:32PM +0200, Dominique Dhumieres wrote: (the final patch will require the suitable dg directives;-). This is really stretching my testsuite knowledge. Maybe add /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */ -- Alan Modra Australia Development Lab, IBM
Re: PowerPC prologue and epilogue 6
This is really stretching my testsuite knowledge. Maybe add /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */ Using /* { dg-options -fno-inline -fomit-frame-pointer } */ /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */ works for me on powerpc-apple-darwin9, but I can't test it on nondarwin powerpc. Dominique
Re: PowerPC prologue and epilogue 6
On May 31, 2012, at 6:42 AM, Dominique Dhumieres wrote: This is really stretching my testsuite knowledge. Maybe add /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */ Using /* { dg-options -fno-inline -fomit-frame-pointer } */ /* { dg-additional-options -mdynamic-no-pic { target *-*-darwin* } } */ works for me on powerpc-apple-darwin9, but I can't test it on nondarwin powerpc. Looks good...
Re: PowerPC prologue and epilogue 6
Yes indeed, and it would be wise to ensure torture-options.exp is loaded too. I'm committing the following as obvious. Thanks Hmm, this will be because darwin is PIC by default. Does adding -static to the dg-options line in savres.c fix the darwin fail? With the following change --- /opt/gcc/_gcc_clean/gcc/testsuite/gcc.target/powerpc/savres.c 2012-05-02 14:25:40.0 +0200 +++ /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c 2012-05-30 13:45:15.0 +0200 @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options -fno-inline -fomit-frame-pointer } */ +/* { dg-options -fno-inline -fomit-frame-pointer -static } */ /* -fno-inline -maltivec -m32/-m64 -mmultiple/no-multiple -Os/-O2. */ #ifndef NO_BODY I get an ICE of the form /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c: In function 'nb_all': /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:473:3: internal compiler error: in rs6000_emit_prologue, at config/rs6000/rs6000.c:19850 Is the test intended to work on PIC targets? Cheers, Dominique
Re: PowerPC prologue and epilogue 6
On Wed, May 30, 2012 at 03:21:28PM +0200, Dominique Dhumieres wrote: I get an ICE of the form /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c: In function 'nb_all': /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:473:3: internal compiler error: in rs6000_emit_prologue, at config/rs6000/rs6000.c:19850 Is the test intended to work on PIC targets? No, but see rs6000/darwin.h CC1_SPEC. -static makes you non-PIC. I've just built a darwin cc1 to reproduce the problem. The ICE is on START_USE (ptr_regno); when setting up a reg to use for altivec saves. The reg clashes with the static chain pointer (nb_all is a nested function), so this is a real bug that the register checks have uncovered. I haven't determined whether this is a new bug introduced with my prologue changes, or whether it's a long-standing bug. I suspect the latter. -- Alan Modra Australia Development Lab, IBM
Re: PowerPC prologue and epilogue 6
On Thu, May 31, 2012 at 09:43:09AM +0930, Alan Modra wrote: real bug that the register checks have uncovered. I haven't determined whether this is a new bug introduced with my prologue changes, or whether it's a long-standing bug. I suspect the latter. Looks like it is one I introduced. gcc-4.6 uses r12 to save altivec regs, my new code tries to use r11. Will fix. -- Alan Modra Australia Development Lab, IBM
Re: PowerPC prologue and epilogue 6
Alan, I think the following patch --- ../_gcc_clean/gcc/testsuite/gcc.target/powerpc/powerpc.exp 2012-05-02 14:25:40.0 +0200 +++ ../work/gcc/testsuite/gcc.target/powerpc/powerpc.exp2012-05-29 21:14:48.0 +0200 @@ -47,4 +47,5 @@ set-torture-options $SAVRES_TEST_OPTS gcc-dg-runtest [list $srcdir/$subdir/savres.c] $alti # All done. +torture-finish dg-finish is required to avoid the errors of the kind ERROR: tcl error sourcing /home/gccbuild/gcc_trunk_anonsvn/gcc/gcc/testsuite/gcc.target/powerpc/powerpc.exp. ERROR: torture-init: torture_without_loops is not empty as expected (see http://gcc.gnu.org/ml/gcc-testresults/2012-05/msg02608.html ). In addition the tests of savres.c fails on powerpc-apple-darwin9 with FAIL: gcc.target/powerpc/savres.c (test for excess errors) Excess errors: /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:109:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:123:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:135:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:170:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:180:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:212:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:222:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:251:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:259:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:289:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:303:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:315:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:350:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:360:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:392:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:402:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:431:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:439:3: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:472:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:491:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:508:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:558:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:573:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:620:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:635:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:679:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:692:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:737:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:756:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:773:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:823:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:838:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:885:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:900:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:944:5: error: PIC register clobbered by 'r31' in 'asm' /opt/gcc/work/gcc/testsuite/gcc.target/powerpc/savres.c:957:5: error: PIC register clobbered by 'r31' in 'asm' WARNING: gcc.target/powerpc/savres.c compilation failed to produce executable However I am not able to say if this generic or due to
Re: PowerPC prologue and epilogue 6
On Wed, Apr 25, 2012 at 1:20 AM, Alan Modra amo...@gmail.com wrote: This patch adds a testcase to verify register saves and restores. I tried to write it so that it will run on all powerpc targets. From past experience it probably won't. OK to apply anyway, and fix fallout later? * gcc.target/powerpc/savres.c: New test. * gcc.target/powerpc/powerpc.exp: Run it. Okay. Thanks, David
Re: PowerPC prologue and epilogue 4
On Tue, Apr 17, 2012 at 11:13 AM, Alan Modra amo...@gmail.com wrote: This provides some protection against misuse of r0, r11 and r12. I found it useful when enabling out-of-line saves for large frames. ;-) * config/rs6000/rs6000.c (START_USE, END_USE, NOT_INUSE): Define. (rs6000_emit_prologue): Use the above to catch register overlap. This patch is okay. Thanks, David
Re: PowerPC prologue and epilogue 5
On Thu, Apr 19, 2012 at 11:36 AM, Alan Modra amo...@gmail.com wrote: On Thu, Apr 19, 2012 at 08:00:15PM +0930, Alan Modra wrote: On Wed, Apr 18, 2012 at 12:45:16AM +0930, Alan Modra wrote: This enables out-of-line save and restore for large frames, and for ABI_AIX when using the static chain. Further testing revealed two problems when compiling nested functions. 1) The logic I had for cr_save_regno is wrong, resulting in one of my NOT_INUSE asserts triggering. Fixed in this revised patch. Bootstrapped etc. powerpc-linux. 2) In some cases the prologue uses in-line saves while the epilogue uses out-of-line restores. This can lead to restoring regs that haven't been saved. This turned out to be a pre-existing problem, patch in PR50340. * config/rs6000/rs6000.c (rs6000_savres_strategy): Allow out-of-line save/restore for large frames. Don't disable out-of-line saves on ABI_AIX when using static chain reg. (rs6000_emit_prologue): Adjust cr_save_regno on ABI_AIX to not clobber static chain reg, and tweak for out-of-line gpr saves that use r1. This patch is okay. Thanks, David
Re: PowerPC prologue and epilogue 6
On Sat, Apr 21, 2012 at 2:48 AM, Alan Modra amo...@gmail.com wrote: This patch adds out-of-line vector saves and restores. To do this I made some infrastructure changes to various functions like rs6000_emit_savres_rtx that currently take boolean parameters (savep, gpr, and lr). Rather than add yet another boolean to specify vector regs, I chose to lump them all together in a bitmask. This made the patch a little larger but overall is a better interface, I think. I also revert a change I made in http://gcc.gnu.org/ml/gcc-patches/2012-04/msg01014.html to always use r11 as a frame reg whenever abiv4 emits out-of-line saves. Code quality in functions with small frames is better without that particular change. This however meant some changes are required later when setting up pointer regs for gpr and fpr out-of-line saves. What else is here? Improved register selection when saving vrsave in the prologue and when restoring cr in the epilogue, allowing better scheduling. A fix to rs6000_output_function_prologue to output the correct .extern for ELF, then deciding we don't need such things anyway. And various other little code cleanups. Bootstrapped and regression tested powerpc-linux. gcc/ * config/rs6000/rs6000 (SAVE_INLINE_VRS, REST_INLINE_VRS, V_SAVE_INLINE, SAVRES_LR, SAVRES_SAVE, SAVRES_REG, SAVRES_GPR, SAVRES_FPR, SAVRES_VR): Define. (no_global_regs_above): Delete. (no_global_regs): New function. (rs6000_savres_strategy): Handle vector regs. Use proper lr_save_p value for load multiple test. (savres_routine_syms): Increase size. (rs6000_savres_routine_name, rs6000_savres_routine_sym, ptr_regno_for_savres, rs6000_emit_savres_rtx): Pass in int selector rather than a number of boolean flags. Update all callers. (rs6000_savres_routine_name): Generate vector save/restore names. (rs6000_savres_routine_sym): Handle vector regs. Delete forward decl. (ptr_regno_for_savres, rs6000_emit_savres_rtx): Likewise. (rs6000_emit_prologue): Delete saving_FPRs_inline, saving_GPRs_inline and using_store_multiple. Expand uses. Don't always use r11 as frame reg when needed for out-of-line saves. Set up initial offset for out-of-line vector saves when buying stack frame. Handle pointer reg setup for out-of-line fp save. Emit call to out-of-line vector save function. Choose r11 or r12 for vrsave reg when available for better scheduling. (rs6000_output_function_prologue): Don't emit .extern for ELF. (rs6000_emit_epilogue): Choose a better frame reg when restoring from back-chain to suit out-of-line vector restore functions. Emit call to out-of-line vector restore function. Adjust register used for cr restore. Tweak pointer register setup for gpr restore. * config/rs6000/rs6000.h (FIRST_SAVED_GP_REGNO): Take into account FIXED_R13. * config/rs6000/sysv4.h (FP_SAVE_INLINE, GP_SAVE_INLINE): Simplify. (V_SAVE_INLINE): Define. * config/rs6000/altivec.md (save_vregs_*, restore_vregs_*): New insns. libgcc/ * config/rs6000/crtsavevr.S: New file. * config/rs6000/crtrestvr.S: New file. * config/rs6000/t-savresfgpr: Build the above. * config/rs6000/t-netbsd: Likewise. This patch is okay with the macro usage fix. Thanks, David
Re: PowerPC prologue and epilogue 6
On Tue, Apr 24, 2012 at 07:19:42PM -0400, David Edelsohn wrote: This patch is okay with the macro usage fix. Thanks, series 2 to 6 committed as 186796, 186797, 186798, 186799, 186800. I noticed after I committed the lot that 186797 has some duplicated lines (harmless), corrected in 186798, and 186799 kept the old cr_save_regno assignment (again harmless), corrected in 186800. A result of merge conflicts. I normally start from a clean source tree, apply patch as posted, commit, repeat. This time I had a series of directories with the cumulative patches applied. Bad idea unless you use mf to resolve conflicts.. This patch adds a testcase to verify register saves and restores. I tried to write it so that it will run on all powerpc targets. From past experience it probably won't. OK to apply anyway, and fix fallout later? * gcc.target/powerpc/savres.c: New test. * gcc.target/powerpc/powerpc.exp: Run it. Index: gcc/testsuite/gcc.target/powerpc/powerpc.exp === --- gcc/testsuite/gcc.target/powerpc/powerpc.exp(revision 186800) +++ gcc/testsuite/gcc.target/powerpc/powerpc.exp(working copy) @@ -37,5 +37,14 @@ dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \ $DEFAULT_CFLAGS +set SAVRES_TEST_OPTS [list -Os -O2 {-Os -mno-multiple} {-O2 -mno-multiple}] +set alti +if [check_vmx_hw_available] { +set alti -maltivec +} +torture-init +set-torture-options $SAVRES_TEST_OPTS +gcc-dg-runtest [list $srcdir/$subdir/savres.c] $alti + # All done. dg-finish Index: gcc/testsuite/gcc.target/powerpc/savres.c === --- gcc/testsuite/gcc.target/powerpc/savres.c (revision 0) +++ gcc/testsuite/gcc.target/powerpc/savres.c (revision 0) @@ -0,0 +1,1158 @@ +/* { dg-do run } */ +/* { dg-options -fno-inline -fomit-frame-pointer } */ + +/* -fno-inline -maltivec -m32/-m64 -mmultiple/no-multiple -Os/-O2. */ +#ifndef NO_BODY +#define abort() __builtin_abort () +#define vec_all_eq(v1,v2) __builtin_vec_vcmpeq_p (2, v1, v2) +#define SET(T,R,V) register T R __asm__ (#R) = V +#define SET_GPR(R,V) SET (long, R, V) +#define SET_FPR(R,V) SET (double, R, V) +#define SET_VR(R,V) SET (__attribute__ ((vector_size (16))) int, R, V) +#define SET_CR(R,V) __asm__ __volatile__ (mtcrf %0,%1 : : n (1(7-R)), r (V(4*(7-R))) : cr #R) +#define TRASH_GPR(R) SET_GPR (R, 0) +#define TRASH_FPR(R) SET_FPR (R, 0) +#define TRASH_VR(R) SET_VR (R, val0) +#define TRASH_CR(R) SET_CR (R, 0) +#define TRASH_SOME_GPR TRASH_GPR (r30); TRASH_GPR (r31) +#define TRASH_SOME_FPR TRASH_FPR (fr28); TRASH_FPR (fr31) +#define TRASH_SOME_VR TRASH_VR (v26); TRASH_VR (v27); TRASH_VR (v31) +#define TRASH_SOME_CR TRASH_CR (2) +#define TRASH_ALL_GPR TRASH_GPR (r14); TRASH_GPR (r15); TRASH_GPR (r16); TRASH_GPR (r17); TRASH_GPR (r18); TRASH_GPR (r19); TRASH_GPR (r20); TRASH_GPR (r21); TRASH_GPR (r22); TRASH_GPR (r23); TRASH_GPR (r24); TRASH_GPR (r25); TRASH_GPR (r26); TRASH_GPR (r27); TRASH_GPR (r28); TRASH_GPR (r29); TRASH_GPR (r30); TRASH_GPR (r31) +#define TRASH_ALL_FPR TRASH_FPR (fr14); TRASH_FPR (fr15); TRASH_FPR (fr16); TRASH_FPR (fr17); TRASH_FPR (fr18); TRASH_FPR (fr19); TRASH_FPR (fr20); TRASH_FPR (fr21); TRASH_FPR (fr22); TRASH_FPR (fr23); TRASH_FPR (fr24); TRASH_FPR (fr25); TRASH_FPR (fr26); TRASH_FPR (fr27); TRASH_FPR (fr28); TRASH_FPR (fr29); TRASH_FPR (fr30); TRASH_FPR (fr31) +#define TRASH_ALL_VR TRASH_VR (v20); TRASH_VR (v21); TRASH_VR (v22); TRASH_VR (v23); TRASH_VR (v24); TRASH_VR (v25); TRASH_VR (v26); TRASH_VR (v27); TRASH_VR (v28); TRASH_VR (v29); TRASH_VR (v30); TRASH_VR (v31) +#define TRASH_ALL_CR TRASH_CR (2); TRASH_CR (3); TRASH_CR (4) +#define USE_SOME_GPR __asm__ __volatile__ (#%0 %1 : : r (r30), r (r31)) +#define USE_SOME_FPR __asm__ __volatile__ (#%0 %1 : : f (fr28), f (fr31)) +#define USE_SOME_VR __asm__ __volatile__ (#%0 %1 %2 : : v (v26), v (v27), v (v31)) +#define USE_SOME_CR +#define USE_ALL_GPR __asm__ __volatile__ (#%0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17 : : r (r14), r (r15), r (r16), r (r17), r (r18), r (r19), r (r20), r (r21), r (r22), r (r23), r (r24), r (r25), r (r26), r (r27), r (r28), r (r29), r (r30), r (r31)) +#define USE_ALL_FPR __asm__ __volatile__ (#%0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 %12 %13 %14 %15 %16 %17 : : f (fr14), f (fr15), f (fr16), f (fr17), f (fr18), f (fr19), f (fr20), f (fr21), f (fr22), f (fr23), f (fr24), f (fr25), f (fr26), f (fr27), f (fr28), f (fr29), f (fr30), f (fr31)) +#define USE_ALL_VR __asm__ __volatile__ (#%0 %1 %2 %3 %4 %5 %6 %7 %8 %9 %10 %11 : : v (v20), v (v21), v (v22), v (v23), v (v24), v (v25), v (v26), v (v27), v (v28), v (v29), v (v30), v (v31)) +#define USE_ALL_CR + +#define INIT_GPR SET_GPR (r14, 14); SET_GPR (r15, 15); SET_GPR (r16, 16); SET_GPR (r17, 17); SET_GPR (r18, 18); SET_GPR (r19, 19); SET_GPR (r20, 20); SET_GPR (r21, 21); SET_GPR (r22, 22); SET_GPR (r23, 23);
Re: PowerPC prologue and epilogue 2
On Tue, Apr 17, 2012 at 11:12 AM, Alan Modra amo...@gmail.com wrote: This fixes a lot of confusion in rs6000_frame_related call arguments. At the time rs6000_frame_related first appeared, the prologue only used sp_reg_rtx (r1) or frame_ptr_rtx (r12) as frame_reg_rtx to access register save slots. If r12 was used, it was necessary to add a note that gave the equivalent offset relative to r1. Nowadays, r11 is used as frame_reg_rtx too, when abiv4 and saving regs out-of-line with a large frame. When that change was made the calls to rs6000_frame_related were not updated. So rs6000_frame_related won't replace r11 in register save rtl. As it happens this isn't a bug because when you look closely, out-of-line saves are disabled with a large frame! A fix for that will come later in this patch series. I also optimize rs6000_frame_related a little to save generating duplicate rtl. * config/rs6000/rs6000.c (rs6000_frame_related): Don't emit a REG_FRAME_RELATED_EXPR note when the instruction exactly matches the replacement. (emit_frame_save): Delete frame_ptr param. Rename total_size to frame_reg_to_sp. (rs6000_emit_prologue): Add sp_off. Update rs6000_frame_related and emit_frame_save calls. Cope with possibly missing note. Please change 1 to STACK_POINTER_REGNUM. Okay with that change. Thanks, David
Re: PowerPC prologue and epilogue 3
On Tue, Apr 17, 2012 at 11:13 AM, Alan Modra amo...@gmail.com wrote: This continues the prologue and epilogue cleanup. Not many user visible changes here, except for: - a bugfix to the LR save RTL emitted by rs6000_emit_savres_rtx which may affect SPE, - a bugfix for SPE code emitted when using a static chain, - vector saves will be done using r1 for large frames just over 32k in size, and, - using r11 as a frame pointer whenever we need to set up r11 for out-of-line saves, and merging two pointer reg setup insns. The latter is a necessary prerequisite to enabling out-of-line save/restore for large frames, as I do in a later patch. Currently this will only affect abiv4 -Os when using out-of-line saves. eg. -m32 -Os -mno-multiple int f (double x) { char a[33]; __asm __volatile (#%0 : =m (a) : : fr31, r27, r28); return (int) x; } old new stwu 1,-96(1) mflr 0 mflr 0 addi 11,1,-8 addi 11,1,88 stwu 1,-96(1) stw 0,100(1) stw 0,12(11) stfd 31,88(1) bl _savegpr_27 bl _savegpr_27 stfd 31,0(11) * config/rs6000/rs6000.c (rs6000_emit_stack_reset): Delete forward decl. Move logic selecting update reg to callers. Update all callers. (rs6000_emit_allocate_stack): Add copy_off param. (emit_frame_save): Don't handle reg+reg addressing. (ptr_regno_for_savres): New function, extracted from.. (rs6000_emit_savres_rtx): ..here. Add lr_offset param. (rs6000_emit_prologue): Generate frame_ptr_rtx as we need it. Set frame_reg_rtx to r11 whenever r11 is needed, and merge frame offset adjustment for out-of-line save with copy from sp. Simplify condition controlling whether cr is saved early or late. Use ptr_regno_for_savres to verify correct reg is set up for out-of-line saves. Pass the actual pointer reg used to rs6000_emit_savres_rtx so rtl matches insns in out-of-line function. Rearrange spe vars so code is similar to that elsewhere in this function. Don't update frame_off when spe save code will restore r11. Use emit_frame_save for spe and gpr saves. Consolidate darwin out-of-line gpr setup with that for other abis. Don't assume frame_offset is zero and frame reg is sp when setting up altivec reg saves, and calculate exact offset requirement. (rs6000_emit_epilogue): Use HOST_WIDE_INT for frame_off. Tidy spe restore code. Consolidate darwin out-of-line gpr setup with that for other abis. This patch is okay. Thanks, David
PowerPC prologue and epilogue 6
This patch adds out-of-line vector saves and restores. To do this I made some infrastructure changes to various functions like rs6000_emit_savres_rtx that currently take boolean parameters (savep, gpr, and lr). Rather than add yet another boolean to specify vector regs, I chose to lump them all together in a bitmask. This made the patch a little larger but overall is a better interface, I think. I also revert a change I made in http://gcc.gnu.org/ml/gcc-patches/2012-04/msg01014.html to always use r11 as a frame reg whenever abiv4 emits out-of-line saves. Code quality in functions with small frames is better without that particular change. This however meant some changes are required later when setting up pointer regs for gpr and fpr out-of-line saves. What else is here? Improved register selection when saving vrsave in the prologue and when restoring cr in the epilogue, allowing better scheduling. A fix to rs6000_output_function_prologue to output the correct .extern for ELF, then deciding we don't need such things anyway. And various other little code cleanups. Bootstrapped and regression tested powerpc-linux. gcc/ * config/rs6000/rs6000 (SAVE_INLINE_VRS, REST_INLINE_VRS, V_SAVE_INLINE, SAVRES_LR, SAVRES_SAVE, SAVRES_REG, SAVRES_GPR, SAVRES_FPR, SAVRES_VR): Define. (no_global_regs_above): Delete. (no_global_regs): New function. (rs6000_savres_strategy): Handle vector regs. Use proper lr_save_p value for load multiple test. (savres_routine_syms): Increase size. (rs6000_savres_routine_name, rs6000_savres_routine_sym, ptr_regno_for_savres, rs6000_emit_savres_rtx): Pass in int selector rather than a number of boolean flags. Update all callers. (rs6000_savres_routine_name): Generate vector save/restore names. (rs6000_savres_routine_sym): Handle vector regs. Delete forward decl. (ptr_regno_for_savres, rs6000_emit_savres_rtx): Likewise. (rs6000_emit_prologue): Delete saving_FPRs_inline, saving_GPRs_inline and using_store_multiple. Expand uses. Don't always use r11 as frame reg when needed for out-of-line saves. Set up initial offset for out-of-line vector saves when buying stack frame. Handle pointer reg setup for out-of-line fp save. Emit call to out-of-line vector save function. Choose r11 or r12 for vrsave reg when available for better scheduling. (rs6000_output_function_prologue): Don't emit .extern for ELF. (rs6000_emit_epilogue): Choose a better frame reg when restoring from back-chain to suit out-of-line vector restore functions. Emit call to out-of-line vector restore function. Adjust register used for cr restore. Tweak pointer register setup for gpr restore. * config/rs6000/rs6000.h (FIRST_SAVED_GP_REGNO): Take into account FIXED_R13. * config/rs6000/sysv4.h (FP_SAVE_INLINE, GP_SAVE_INLINE): Simplify. (V_SAVE_INLINE): Define. * config/rs6000/altivec.md (save_vregs_*, restore_vregs_*): New insns. libgcc/ * config/rs6000/crtsavevr.S: New file. * config/rs6000/crtrestvr.S: New file. * config/rs6000/t-savresfgpr: Build the above. * config/rs6000/t-netbsd: Likewise. diff -urpN gcc-alan5/gcc/config/rs6000/rs6000.c gcc-alan6/gcc/config/rs6000/rs6000.c --- gcc-alan5/gcc/config/rs6000/rs6000.c2012-04-19 20:55:02.214727782 +0930 +++ gcc-alan6/gcc/config/rs6000/rs6000.c2012-04-21 15:47:44.193462791 +0930 @@ -937,7 +937,6 @@ static bool legitimate_small_data_p (enu static bool legitimate_lo_sum_address_p (enum machine_mode, rtx, int); static struct machine_function * rs6000_init_machine_status (void); static bool rs6000_assemble_integer (rtx, unsigned int, int); -static bool no_global_regs_above (int, bool); #if defined (HAVE_GAS_HIDDEN) !TARGET_MACHO static void rs6000_assemble_visibility (tree, int); #endif @@ -950,7 +949,6 @@ static tree rs6000_handle_struct_attribu static void rs6000_eliminate_indexed_memrefs (rtx operands[2]); static const char *rs6000_mangle_type (const_tree); static void rs6000_set_default_type_attributes (tree); -static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool); static bool rs6000_reg_live_or_pic_offset_p (int); static tree rs6000_builtin_vectorized_libmass (tree, tree, tree); static tree rs6000_builtin_vectorized_function (tree, tree, tree); @@ -17405,6 +17403,21 @@ is_altivec_return_reg (rtx reg, void *xy } +/* Look for user-defined global regs in the range FIRST to LAST-1. + We should not restore these, and so cannot use lmw or out-of-line + restore functions if there are any. We also can't save them + (well, emit frame notes for them), because frame unwinding during + exception handling will restore saved registers. */ + +static bool +global_regs_p (unsigned first, unsigned last) +{ + while (first last) +if
Re: PowerPC prologue and epilogue 5
On Wed, Apr 18, 2012 at 12:45:16AM +0930, Alan Modra wrote: This enables out-of-line save and restore for large frames, and for ABI_AIX when using the static chain. Further testing revealed two problems when compiling nested functions. 1) The logic I had for cr_save_regno is wrong, resulting in one of my NOT_INUSE asserts triggering. 2) In some cases the prologue uses in-line saves while the epilogue uses out-of-line restores. This can lead to restoring regs that haven't been saved. Patch withdrawn. -- Alan Modra Australia Development Lab, IBM
Re: PowerPC prologue and epilogue 5
On Thu, Apr 19, 2012 at 08:00:15PM +0930, Alan Modra wrote: On Wed, Apr 18, 2012 at 12:45:16AM +0930, Alan Modra wrote: This enables out-of-line save and restore for large frames, and for ABI_AIX when using the static chain. Further testing revealed two problems when compiling nested functions. 1) The logic I had for cr_save_regno is wrong, resulting in one of my NOT_INUSE asserts triggering. Fixed in this revised patch. Bootstrapped etc. powerpc-linux. 2) In some cases the prologue uses in-line saves while the epilogue uses out-of-line restores. This can lead to restoring regs that haven't been saved. This turned out to be a pre-existing problem, patch in PR50340. * config/rs6000/rs6000.c (rs6000_savres_strategy): Allow out-of-line save/restore for large frames. Don't disable out-of-line saves on ABI_AIX when using static chain reg. (rs6000_emit_prologue): Adjust cr_save_regno on ABI_AIX to not clobber static chain reg, and tweak for out-of-line gpr saves that use r1. diff -urp gcc-alan5a/gcc/config/rs6000/rs6000.c gcc-alan5/gcc/config/rs6000/rs6000.c --- gcc-alan5a/gcc/config/rs6000/rs6000.c 2012-04-19 21:24:46.643632761 +0930 +++ gcc-alan5/gcc/config/rs6000/rs6000.c2012-04-19 20:55:02.214727782 +0930 @@ -17432,8 +17432,7 @@ rs6000_savres_strategy (rs6000_stack_t * strategy |= SAVRES_MULTIPLE; if (crtl-calls_eh_return - || cfun-machine-ra_need_lr - || info-total_size 32767) + || cfun-machine-ra_need_lr) strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS | SAVE_INLINE_GPRS | REST_INLINE_GPRS); @@ -17454,8 +17453,8 @@ rs6000_savres_strategy (rs6000_stack_t * /* Don't bother to try to save things out-of-line if r11 is occupied by the static chain. It would require too much fiddling and the static chain is rarely used anyway. FPRs are saved w.r.t the stack - pointer on Darwin. */ - if (using_static_chain_p) + pointer on Darwin, and AIX uses r1 or r12. */ + if (using_static_chain_p DEFAULT_ABI != ABI_AIX) strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS | REST_INLINE_FPRS) | SAVE_INLINE_GPRS); @@ -19555,11 +19554,16 @@ rs6000_emit_prologue (void) } } - /* If we need to save CR, put it into r12 or r11. */ - cr_save_regno = DEFAULT_ABI == ABI_AIX !saving_GPRs_inline ? 11 : 12; + /* If we need to save CR, put it into r12 or r11. Choose r12 except when + r12 will be needed by out-of-line gpr restore. */ + cr_save_regno = (DEFAULT_ABI == ABI_AIX + !(strategy (SAVE_INLINE_GPRS + | SAVE_NOINLINE_GPRS_SAVES_LR)) + ? 11 : 12); if (!WORLD_SAVE_P (info) info-cr_save_p - REGNO (frame_reg_rtx) != cr_save_regno) + REGNO (frame_reg_rtx) != cr_save_regno + !(using_static_chain_p cr_save_regno == 11)) { rtx set; -- Alan Modra Australia Development Lab, IBM
Re: PowerPC prologue and epilogue
On Tue, Apr 17, 2012 at 11:08 AM, Alan Modra amo...@gmail.com wrote: This is the first in a series of patches cleaning up rs6000 prologue and epilogue generating code. This one is just the formatting/style changes plus renaming two variables to better reflect their usage, and moving code around. The patch series has been bootstrapped and regression tested powerpc-linux, powerpc64-linux and powerpc-linux-gnuspe. Please test on darwin and aix. * config/rs6000/rs6000.c (rs6000_emit_savres_rtx): Formatting. (rs6000_emit_prologue, rs6000_emit_epilogue): Likewise. Rename sp_offset to frame_off. Move world save code earlier. This first patch in the series is okay. I will start testing the next ones on AIX. Thanks, David
PowerPC prologue and epilogue
This is the first in a series of patches cleaning up rs6000 prologue and epilogue generating code. This one is just the formatting/style changes plus renaming two variables to better reflect their usage, and moving code around. The patch series has been bootstrapped and regression tested powerpc-linux, powerpc64-linux and powerpc-linux-gnuspe. Please test on darwin and aix. * config/rs6000/rs6000.c (rs6000_emit_savres_rtx): Formatting. (rs6000_emit_prologue, rs6000_emit_epilogue): Likewise. Rename sp_offset to frame_off. Move world save code earlier. diff -urp gcc-virgin/gcc/config/rs6000/rs6000.c gcc-alan1/gcc/config/rs6000/rs6000.c --- gcc-virgin/gcc/config/rs6000/rs6000.c 2012-04-14 22:48:44.108432893 +0930 +++ gcc-alan1/gcc/config/rs6000/rs6000.c2012-04-16 11:57:37.282242636 +0930 @@ -19212,9 +19212,9 @@ rs6000_emit_savres_rtx (rs6000_stack_t * sym = rs6000_savres_routine_sym (info, savep, gpr, lr); RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym); - use_reg = DEFAULT_ABI == ABI_AIX ? (gpr !lr ? 12 : 1) - : DEFAULT_ABI == ABI_DARWIN !gpr ? 1 - : 11; + use_reg = (DEFAULT_ABI == ABI_AIX ? (gpr !lr ? 12 : 1) +: DEFAULT_ABI == ABI_DARWIN !gpr ? 1 +: 11); RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg)); @@ -19224,7 +19224,7 @@ rs6000_emit_savres_rtx (rs6000_stack_t * rtx addr, reg, mem; reg = gen_rtx_REG (reg_mode, start_reg + i); addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, - GEN_INT (save_area_offset + reg_size*i)); + GEN_INT (save_area_offset + reg_size * i)); mem = gen_frame_mem (reg_mode, addr); RTVEC_ELT (p, i + offset) = gen_rtx_SET (VOIDmode, @@ -19293,9 +19293,9 @@ rs6000_emit_prologue (void) int saving_GPRs_inline; int using_store_multiple; int using_static_chain_p = (cfun-static_chain_decl != NULL_TREE - df_regs_ever_live_p (STATIC_CHAIN_REGNUM) + df_regs_ever_live_p (STATIC_CHAIN_REGNUM) call_used_regs[STATIC_CHAIN_REGNUM]); - HOST_WIDE_INT sp_offset = 0; + HOST_WIDE_INT frame_off = 0; if (flag_stack_usage_info) current_function_static_stack_size = info-total_size; @@ -19323,52 +19323,6 @@ rs6000_emit_prologue (void) reg_size = 8; } - strategy = info-savres_strategy; - using_store_multiple = strategy SAVRES_MULTIPLE; - saving_FPRs_inline = strategy SAVE_INLINE_FPRS; - saving_GPRs_inline = strategy SAVE_INLINE_GPRS; - - /* For V.4, update stack before we do any saving and set back pointer. */ - if (! WORLD_SAVE_P (info) - info-push_p - (DEFAULT_ABI == ABI_V4 - || crtl-calls_eh_return)) -{ - bool need_r11 = (TARGET_SPE - ? (!saving_GPRs_inline - info-spe_64bit_regs_used == 0) - : (!saving_FPRs_inline || !saving_GPRs_inline)); - rtx copy_reg = need_r11 ? gen_rtx_REG (Pmode, 11) : NULL; - - if (info-total_size 32767) - sp_offset = info-total_size; - else if (need_r11) - frame_reg_rtx = copy_reg; - else if (info-cr_save_p - || info-lr_save_p - || info-first_fp_reg_save 64 - || info-first_gp_reg_save 32 - || info-altivec_size != 0 - || info-vrsave_mask != 0 - || crtl-calls_eh_return) - { - copy_reg = frame_ptr_rtx; - frame_reg_rtx = copy_reg; - } - else - { - /* The prologue won't be saving any regs so there is no need -to set up a frame register to access any frame save area. -We also won't be using sp_offset anywhere below, but set -the correct value anyway to protect against future -changes to this function. */ - sp_offset = info-total_size; - } - rs6000_emit_allocate_stack (info-total_size, copy_reg); - if (frame_reg_rtx != sp_reg_rtx) - rs6000_emit_stack_tie (frame_reg_rtx, false); -} - /* Handle world saves specially here. */ if (WORLD_SAVE_P (info)) { @@ -19396,7 +19350,7 @@ rs6000_emit_prologue (void) info-push_p info-lr_save_p (!crtl-calls_eh_return - || info-ehrd_offset == -432) + || info-ehrd_offset == -432) info-vrsave_save_offset == -224 info-altivec_save_offset == -416); @@ -19423,14 +19377,14 @@ rs6000_emit_prologue (void) properly. */ for (i = 0; i 64 - info-first_fp_reg_save; i++) { - rtx reg = gen_rtx_REG (((TARGET_HARD_FLOAT TARGET_DOUBLE_FLOAT) - ? DFmode : SFmode), -
PowerPC prologue and epilogue 2
This fixes a lot of confusion in rs6000_frame_related call arguments. At the time rs6000_frame_related first appeared, the prologue only used sp_reg_rtx (r1) or frame_ptr_rtx (r12) as frame_reg_rtx to access register save slots. If r12 was used, it was necessary to add a note that gave the equivalent offset relative to r1. Nowadays, r11 is used as frame_reg_rtx too, when abiv4 and saving regs out-of-line with a large frame. When that change was made the calls to rs6000_frame_related were not updated. So rs6000_frame_related won't replace r11 in register save rtl. As it happens this isn't a bug because when you look closely, out-of-line saves are disabled with a large frame! A fix for that will come later in this patch series. I also optimize rs6000_frame_related a little to save generating duplicate rtl. * config/rs6000/rs6000.c (rs6000_frame_related): Don't emit a REG_FRAME_RELATED_EXPR note when the instruction exactly matches the replacement. (emit_frame_save): Delete frame_ptr param. Rename total_size to frame_reg_to_sp. (rs6000_emit_prologue): Add sp_off. Update rs6000_frame_related and emit_frame_save calls. Cope with possibly missing note. diff -urp gcc-alan1/gcc/config/rs6000/rs6000.c gcc-alan2/gcc/config/rs6000/rs6000.c --- gcc-alan1/gcc/config/rs6000/rs6000.c2012-04-16 11:57:37.282242636 +0930 +++ gcc-alan2/gcc/config/rs6000/rs6000.c2012-04-16 11:58:01.50108 +0930 @@ -18751,7 +18751,10 @@ output_probe_stack_range (rtx reg1, rtx with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2 is not NULL. It would be nice if dwarf2out_frame_debug_expr could deduce these equivalences by itself so it wasn't necessary to hold - its hand so much. */ + its hand so much. Don't be tempted to always supply d2_f_d_e with + the actual cfa register, ie. r31 when we are using a hard frame + pointer. That fails when saving regs off r1, and sched moves the + r31 setup past the reg saves. */ static rtx rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val, @@ -18759,6 +18762,25 @@ rs6000_frame_related (rtx insn, rtx reg, { rtx real, temp; + if (REGNO (reg) == 1 reg2 == NULL_RTX) +{ + /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */ + int i; + + gcc_checking_assert (val == 0); + real = PATTERN (insn); + if (GET_CODE (real) == PARALLEL) + for (i = 0; i XVECLEN (real, 0); i++) + if (GET_CODE (XVECEXP (real, 0, i)) == SET) + { + rtx set = XVECEXP (real, 0, i); + + RTX_FRAME_RELATED_P (set) = 1; + } + RTX_FRAME_RELATED_P (insn) = 1; + return insn; +} + /* copy_rtx will not make unique copies of registers, so we need to ensure we don't have unwanted sharing here. */ if (reg == reg2) @@ -18772,10 +18794,13 @@ rs6000_frame_related (rtx insn, rtx reg, if (reg2 != NULL_RTX) real = replace_rtx (real, reg2, rreg); - real = replace_rtx (real, reg, - gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, - STACK_POINTER_REGNUM), - GEN_INT (val))); + if (REGNO (reg) == 1) +gcc_checking_assert (val == 0); + else +real = replace_rtx (real, reg, + gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, + STACK_POINTER_REGNUM), + GEN_INT (val))); /* We expect that 'real' is either a SET or a PARALLEL containing SETs (and possibly other stuff). In a PARALLEL, all the SETs @@ -18893,8 +18918,8 @@ generate_set_vrsave (rtx reg, rs6000_sta Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */ static rtx -emit_frame_save (rtx frame_reg, rtx frame_ptr, enum machine_mode mode, -unsigned int regno, int offset, HOST_WIDE_INT total_size) +emit_frame_save (rtx frame_reg, enum machine_mode mode, +unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp) { rtx reg, offset_rtx, insn, mem, addr, int_rtx; rtx replacea, replaceb; @@ -18930,7 +18955,8 @@ emit_frame_save (rtx frame_reg, rtx fram insn = emit_move_insn (mem, reg); - return rs6000_frame_related (insn, frame_ptr, total_size, replacea, replaceb); + return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp, + replacea, replaceb); } /* Emit an offset memory reference suitable for a frame store, while @@ -19295,7 +19321,9 @@ rs6000_emit_prologue (void) int using_static_chain_p = (cfun-static_chain_decl != NULL_TREE df_regs_ever_live_p (STATIC_CHAIN_REGNUM) call_used_regs[STATIC_CHAIN_REGNUM]); + /* Offset to top of frame for frame_reg and sp respectively. */ HOST_WIDE_INT frame_off = 0; + HOST_WIDE_INT sp_off = 0; if
PowerPC prologue and epilogue 3
This continues the prologue and epilogue cleanup. Not many user visible changes here, except for: - a bugfix to the LR save RTL emitted by rs6000_emit_savres_rtx which may affect SPE, - a bugfix for SPE code emitted when using a static chain, - vector saves will be done using r1 for large frames just over 32k in size, and, - using r11 as a frame pointer whenever we need to set up r11 for out-of-line saves, and merging two pointer reg setup insns. The latter is a necessary prerequisite to enabling out-of-line save/restore for large frames, as I do in a later patch. Currently this will only affect abiv4 -Os when using out-of-line saves. eg. -m32 -Os -mno-multiple int f (double x) { char a[33]; __asm __volatile (#%0 : =m (a) : : fr31, r27, r28); return (int) x; } old new stwu 1,-96(1) mflr 0 mflr 0 addi 11,1,-8 addi 11,1,88stwu 1,-96(1) stw 0,100(1)stw 0,12(11) stfd 31,88(1) bl _savegpr_27 bl _savegpr_27 stfd 31,0(11) * config/rs6000/rs6000.c (rs6000_emit_stack_reset): Delete forward decl. Move logic selecting update reg to callers. Update all callers. (rs6000_emit_allocate_stack): Add copy_off param. (emit_frame_save): Don't handle reg+reg addressing. (ptr_regno_for_savres): New function, extracted from.. (rs6000_emit_savres_rtx): ..here. Add lr_offset param. (rs6000_emit_prologue): Generate frame_ptr_rtx as we need it. Set frame_reg_rtx to r11 whenever r11 is needed, and merge frame offset adjustment for out-of-line save with copy from sp. Simplify condition controlling whether cr is saved early or late. Use ptr_regno_for_savres to verify correct reg is set up for out-of-line saves. Pass the actual pointer reg used to rs6000_emit_savres_rtx so rtl matches insns in out-of-line function. Rearrange spe vars so code is similar to that elsewhere in this function. Don't update frame_off when spe save code will restore r11. Use emit_frame_save for spe and gpr saves. Consolidate darwin out-of-line gpr setup with that for other abis. Don't assume frame_offset is zero and frame reg is sp when setting up altivec reg saves, and calculate exact offset requirement. (rs6000_emit_epilogue): Use HOST_WIDE_INT for frame_off. Tidy spe restore code. Consolidate darwin out-of-line gpr setup with that for other abis. diff -urp gcc-alan2/gcc/config/rs6000/rs6000.c gcc-alan3/gcc/config/rs6000/rs6000.c --- gcc-alan2/gcc/config/rs6000/rs6000.c2012-04-16 11:58:01.50108 +0930 +++ gcc-alan3/gcc/config/rs6000/rs6000.c2012-04-17 07:19:42.927931887 +0930 @@ -951,7 +951,6 @@ static void rs6000_eliminate_indexed_mem static const char *rs6000_mangle_type (const_tree); static void rs6000_set_default_type_attributes (tree); static rtx rs6000_savres_routine_sym (rs6000_stack_t *, bool, bool, bool); -static rtx rs6000_emit_stack_reset (rs6000_stack_t *, rtx, rtx, int, bool); static bool rs6000_reg_live_or_pic_offset_p (int); static tree rs6000_builtin_vectorized_libmass (tree, tree, tree); static tree rs6000_builtin_vectorized_function (tree, tree, tree); @@ -18534,7 +18533,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard The generated code may use hard register 0 as a temporary. */ static void -rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg) +rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) { rtx insn; rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); @@ -18578,7 +18577,12 @@ rs6000_emit_allocate_stack (HOST_WIDE_IN } if (copy_reg) -emit_move_insn (copy_reg, stack_reg); +{ + if (copy_off != 0) + emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off))); + else + emit_move_insn (copy_reg, stack_reg); +} if (size 32767) { @@ -18921,42 +18925,22 @@ static rtx emit_frame_save (rtx frame_reg, enum machine_mode mode, unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp) { - rtx reg, offset_rtx, insn, mem, addr, int_rtx; - rtx replacea, replaceb; - - int_rtx = GEN_INT (offset); + rtx reg, insn, mem, addr; /* Some cases that need register indexed addressing. */ - if ((TARGET_ALTIVEC_ABI ALTIVEC_VECTOR_MODE (mode)) - || (TARGET_VSX ALTIVEC_OR_VSX_VECTOR_MODE (mode)) - || (TARGET_E500_DOUBLE mode == DFmode) - || (TARGET_SPE_ABI - SPE_VECTOR_MODE (mode) - !SPE_CONST_OFFSET_OK (offset))) -{ - /* Whomever calls us must make sure r11 is available in the -flow path of instructions in the prologue. */ - offset_rtx = gen_rtx_REG (Pmode, 11); - emit_move_insn (offset_rtx, int_rtx); - - replacea = offset_rtx; - replaceb = int_rtx; -} - else
PowerPC prologue and epilogue 4
This provides some protection against misuse of r0, r11 and r12. I found it useful when enabling out-of-line saves for large frames. ;-) * config/rs6000/rs6000.c (START_USE, END_USE, NOT_INUSE): Define. (rs6000_emit_prologue): Use the above to catch register overlap. diff -urp gcc-alan3/gcc/config/rs6000/rs6000.c gcc-alan4/gcc/config/rs6000/rs6000.c --- gcc-alan3/gcc/config/rs6000/rs6000.c2012-04-17 07:19:42.927931887 +0930 +++ gcc-alan4/gcc/config/rs6000/rs6000.c2012-04-17 09:11:31.760669589 +0930 @@ -19301,6 +19301,29 @@ rs6000_emit_prologue (void) HOST_WIDE_INT frame_off = 0; HOST_WIDE_INT sp_off = 0; +#ifdef ENABLE_CHECKING + /* Track and check usage of r0, r11, r12. */ + int reg_inuse = using_static_chain_p ? 1 11 : 0; +#define START_USE(R) do \ + {\ +gcc_assert ((reg_inuse (1 (R))) == 0);\ +reg_inuse |= 1 (R); \ + } while (0) +#define END_USE(R) do \ + {\ +gcc_assert ((reg_inuse (1 (R))) != 0);\ +reg_inuse = ~(1 (R)); \ + } while (0) +#define NOT_INUSE(R) do \ + {\ +gcc_assert ((reg_inuse (1 (R))) == 0);\ + } while (0) +#else +#define START_USE(R) do {} while (0) +#define END_USE(R) do {} while (0) +#define NOT_INUSE(R) do {} while (0) +#endif + if (flag_stack_usage_info) current_function_static_stack_size = info-total_size; @@ -19465,6 +19488,7 @@ rs6000_emit_prologue (void) if (need_r11) { ptr_reg = gen_rtx_REG (Pmode, 11); + START_USE (11); } else if (info-total_size 32767) frame_off = info-total_size; @@ -19477,6 +19501,7 @@ rs6000_emit_prologue (void) || crtl-calls_eh_return) { ptr_reg = gen_rtx_REG (Pmode, 12); + START_USE (12); } else { @@ -19509,6 +19534,7 @@ rs6000_emit_prologue (void) rtx addr, reg, mem; reg = gen_rtx_REG (Pmode, 0); + START_USE (0); insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); RTX_FRAME_RELATED_P (insn) = 1; @@ -19524,6 +19550,7 @@ rs6000_emit_prologue (void) insn = emit_move_insn (mem, reg); rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, NULL_RTX, NULL_RTX); + END_USE (0); } } @@ -19536,6 +19563,7 @@ rs6000_emit_prologue (void) rtx set; cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno); + START_USE (cr_save_regno); insn = emit_insn (gen_movesi_from_cr (cr_save_rtx)); RTX_FRAME_RELATED_P (insn) = 1; /* Now, there's no way that dwarf2out_frame_debug_expr is going @@ -19579,6 +19607,8 @@ rs6000_emit_prologue (void) /*savep=*/true, /*gpr=*/false, lr); rs6000_frame_related (insn, frame_reg_rtx, sp_off, NULL_RTX, NULL_RTX); + if (lr) + END_USE (0); } /* Save GPRs. This is done as a PARALLEL if we are using @@ -19623,10 +19653,15 @@ rs6000_emit_prologue (void) if (using_static_chain_p) { rtx r0 = gen_rtx_REG (Pmode, 0); + + START_USE (0); gcc_assert (info-first_gp_reg_save 11); emit_move_insn (r0, spe_save_area_ptr); } + else if (REGNO (frame_reg_rtx) != 11) + START_USE (11); + emit_insn (gen_addsi3 (spe_save_area_ptr, frame_reg_rtx, GEN_INT (offset))); if (!using_static_chain_p REGNO (frame_reg_rtx) == 11) @@ -19657,8 +19692,16 @@ rs6000_emit_prologue (void) } /* Move the static chain pointer back. */ - if (using_static_chain_p !spe_regs_addressable) - emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0)); + if (!spe_regs_addressable) + { + if (using_static_chain_p) + { + emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0)); + END_USE (0); + } + else if (REGNO (frame_reg_rtx) != 11) + END_USE (11); + } } else if (!WORLD_SAVE_P (info) !saving_GPRs_inline) { @@ -19679,10 +19722,13 @@ rs6000_emit_prologue (void) if (ptr_set_up) frame_off = -end_save; + else + NOT_INUSE (ptr_regno); emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); } else if (!ptr_set_up) { + NOT_INUSE (ptr_regno); emit_move_insn (ptr_reg, frame_reg_rtx); } ptr_off = -end_save; @@ -19693,6 +19739,8 @@ rs6000_emit_prologue (void) /*savep=*/true, /*gpr=*/true, lr); rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off, NULL_RTX, NULL_RTX); + if (lr) +
PowerPC prologue and epilogue 5
This enables out-of-line save and restore for large frames, and for ABI_AIX when using the static chain. * config/rs6000/rs6000.c (rs6000_savres_strategy): Allow out-of-line save/restore for large frames. Don't disable out-of-line saves on ABI_AIX when using static chain reg. (rs6000_emit_prologue): Adjust cr_save_regno on ABI_AIX to not clobber static chain reg, and tweak for out-of-line gpr saves that use r1. diff -urp gcc-alan4/gcc/config/rs6000/rs6000.c gcc-alan5/gcc/config/rs6000/rs6000.c --- gcc-alan4/gcc/config/rs6000/rs6000.c2012-04-17 09:11:31.760669589 +0930 +++ gcc-alan5/gcc/config/rs6000/rs6000.c2012-04-17 11:16:09.369537832 +0930 @@ -17432,8 +17432,7 @@ rs6000_savres_strategy (rs6000_stack_t * strategy |= SAVRES_MULTIPLE; if (crtl-calls_eh_return - || cfun-machine-ra_need_lr - || info-total_size 32767) + || cfun-machine-ra_need_lr) strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS | SAVE_INLINE_GPRS | REST_INLINE_GPRS); @@ -17454,10 +17453,10 @@ rs6000_savres_strategy (rs6000_stack_t * /* Don't bother to try to save things out-of-line if r11 is occupied by the static chain. It would require too much fiddling and the static chain is rarely used anyway. FPRs are saved w.r.t the stack - pointer on Darwin. */ - if (using_static_chain_p) -strategy |= (DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS) - | SAVE_INLINE_GPRS; + pointer on Darwin, and AIX uses r1 or r12. */ + if (using_static_chain_p DEFAULT_ABI != ABI_AIX) +strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS) +| SAVE_INLINE_GPRS); /* If we are going to use store multiple, then don't even bother with the out-of-line routines, since the store-multiple @@ -19555,7 +19554,10 @@ rs6000_emit_prologue (void) } /* If we need to save CR, put it into r12 or r11. */ - cr_save_regno = DEFAULT_ABI == ABI_AIX !saving_GPRs_inline ? 11 : 12; + cr_save_regno = (DEFAULT_ABI == ABI_AIX + (strategy SAVE_INLINE_GPRS) == 0 + (strategy SAVE_NOINLINE_GPRS_SAVES_LR) == 0 + !using_static_chain_p ? 11 : 12); if (!WORLD_SAVE_P (info) info-cr_save_p REGNO (frame_reg_rtx) != cr_save_regno) -- Alan Modra Australia Development Lab, IBM