On Dec 7, 2012, at 7:38 AM, Jimi Xenidis <ji...@pobox.com> wrote: > > On Dec 6, 2012, at 11:54 PM, Michael Neuling <mi...@neuling.org> wrote: > >>> commit 279c0615917b959a652e81f4ad0d886e2d426d85 >>> Author: Jimi Xenidis <ji...@pobox.com> >>> Date: Wed Dec 5 13:43:22 2012 -0500 >>> >>> powerpc/book3e: IBM Blue Gene/Q Quad Processing eXtention (QPX) >>> >>> This enables kernel support for the QPX extention and is intended for >>> processors that support it, usually an IBM Blue Gene processor. >>> Turning it on does not effect other processors but it does add code >>> and will quadruple the per thread save and restore area for the FPU >>> (hense the name). If you have enabled VSX it will only double the >>> space. >>> >>> Signed-off-by: Jimi Xenidis <ji...@pobox.com> >>
<<snip>> >> >> >> >> +BEGIN_FTR_SECTION \ >> + SAVE_32VSRS(n,c,base); \ >> +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ >> +BEGIN_FTR_SECTION \ >> + SAVE_32QRS(n,c,base); \ >> +END_FTR_SECTION_IFSET(CPU_FTR_QPX); >> >> I don't think we want to do this. We are going to end up with 64 >> NOPS here somewhere. > > Excellent point, NOPs are cheap on most processors but not A2 and a lot of > embedded, I can wrap some branches with the FTR instead. > Do you have a concern on the code size? Thought about it a bit and came up with this solution for arch/powerpc/kernel/fpu.S. This should address the following issues - MSR_VSX vs MSR_VEC - Big chunks of NOPs in the code path - Less FTR space fixups at boot time. - IMNHSO easier to read especially when disassembled I did consider using the LR and BLR, but the !CONFIG_SMP case only adds one more special block and uses a different register set. Also if this is agreeable I would like us to consider removing the *_32FPVSRS* macros entirely and put the FTR tests in the actual code. This would allow us to use #ifdefs and reduce the amount of code that actually gets compiled. Thoughts? diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index e0ada05..5964067 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -25,30 +25,81 @@ #include <asm/asm-offsets.h> #include <asm/ptrace.h> -#ifdef CONFIG_VSX -#define __REST_32FPVSRS(n,c,base) \ -BEGIN_FTR_SECTION \ - b 2f; \ -END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ - REST_32FPRS(n,base); \ - b 3f; \ -2: REST_32VSRS(n,c,base); \ -3: - -#define __SAVE_32FPVSRS(n,c,base) \ -BEGIN_FTR_SECTION \ - b 2f; \ -END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ - SAVE_32FPRS(n,base); \ - b 3f; \ -2: SAVE_32VSRS(n,c,base); \ -3: -#else -#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) -#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) -#endif -#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) -#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) + +/* + * Restore subroutines, R4 is scratch and R5 is base + */ +vsx_restore: + REST_32VSRS(0, __REG_R4, __REG_R5) + b after_restore +qpx_restore: + REST_32QRS(0, __REG_R4, __REG_R5) + b after_restore +fpu_restore: + REST_32FPRS(0, __REG_R5) + b after_restore + +#define REST_32FPVSRS(n, c, base) \ +BEGIN_FTR_SECTION \ + b vsx_restore; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX) \ +BEGIN_FTR_SECTION \ + b qpx_restore; \ +END_FTR_SECTION_IFSET(CPU_FTR_QPX) \ + b fpu_restore; \ +after_restore: + +/* + * Save subroutines, R4 is scratch and R3 is base + */ +vsx_save: + SAVE_32VSRS(0, __REG_R4, __REG_R3) + b after_save +qpx_save: + SAVE_32QRS(0, __REG_R4, __REG_R3) + b after_save +fpu_save: + SAVE_32FPRS(0, __REG_R3) + b after_save + +#define SAVE_32FPVSRS(n, c, base) \ +BEGIN_FTR_SECTION \ + b vsx_save; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX) \ +BEGIN_FTR_SECTION \ + b qpx_save; \ +END_FTR_SECTION_IFSET(CPU_FTR_QPX) \ + b fpu_save; \ +after_save: + +#ifndef CONFIG_SMP +/* + * we need an extra save set for the !CONFIG_SMP case, see below + * Scratch it R5 and base is R4 + */ +vsx_save_nosmp: + SAVE_32VSRS(0,R5,R4) + b after_save_nosmp + +qpx_save_nosmp: + SAVE_32QRS(0,R5,R4) + b after_save_nosmp + +fpu_save_nosmp: + SAVE_32FPRS(0,R4) + b after_save_nosmp + +#define SAVE_32FPVSRS_NOSMP(n,c,base) \ +BEGIN_FTR_SECTION \ + b vsx_save_nosmp; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX) \ +BEGIN_FTR_SECTION \ + b qpx_save_nosmp; \ +END_FTR_SECTION_IFSET(CPU_FTR_QPX) \ + b fpu_save_nosmp; \ +after_save_nosmp: + +#endif /* !CONFIG_SMP */ /* * This task wants to use the FPU now. @@ -65,6 +116,11 @@ BEGIN_FTR_SECTION oris r5,r5,MSR_VSX@h END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif +#ifdef CONFIG_PPC_QPX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VEC@h +END_FTR_SECTION_IFSET(CPU_FTR_QPX) +#endif SYNC MTMSRD(r5) /* enable use of fpu now */ isync @@ -81,7 +137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) beq 1f toreal(r4) addi r4,r4,THREAD /* want last_task_used_math->thread */ - SAVE_32FPVSRS(0, R5, R4) + SAVE_32FPVSRS_NOSMP(0, R5, R4) mffs fr0 stfd fr0,THREAD_FPSCR(r4) PPC_LL r5,PT_REGS(r4) @@ -94,7 +150,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif /* CONFIG_SMP */ /* enable use of FP after return */ #ifdef CONFIG_PPC32 - mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ lwz r4,THREAD_FPEXC_MODE(r5) ori r9,r9,MSR_FP /* enable FP for current */ or r9,r9,r4 @@ -132,6 +188,11 @@ BEGIN_FTR_SECTION oris r5,r5,MSR_VSX@h END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif +#ifdef CONFIG_PPC_QPX +BEGIN_FTR_SECTION + oris r5,r5,MSR_VEC@h +END_FTR_SECTION_IFSET(CPU_FTR_QPX) +#endif SYNC_601 ISYNC_601 MTMSRD(r5) /* enable use of fpu now */ @@ -148,10 +209,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) beq 1f PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) li r3,MSR_FP|MSR_FE0|MSR_FE1 -#ifdef CONFIG_VSX +#if defined(CONFIG_VSX) || defined(CONFIG_PPC_QPX) BEGIN_FTR_SECTION oris r3,r3,MSR_VSX@h -END_FTR_SECTION_IFSET(CPU_FTR_VSX) +END_FTR_SECTION_IFSET(CPU_FTR_VSX | CPU_FTR_QPX) #endif andc r4,r4,r3 /* disable FP for previous task */ PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) -jx > >> >> I'd like to see this patch broken into different parts. > > I'm not sure how _this_ patch: > > <https://github.com/jimix/linux-bgq/commit/279c0615917b959a652e81f4ad0d886e2d426d85> > could be broken up, please advise. > >> >> Also, have you boot tested this change on a VSX enabled box? > > I can try, I may bug you for help. > Is there a commonly test (or apps) I should run? > > -jx > > >> >> Mikey > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev