Re: [PATCH] powerpc: SMT priority (PPR) save and restore

2012-07-22 Thread Michael Neuling
Haren Myneni ha...@linux.vnet.ibm.com wrote:
 On Mon, 2012-07-16 at 13:41 +1000, Michael Neuling wrote:
  Heaven Myneni ha...@linux.vnet.ibm.com wrote:
  
   powerpc: SMT priority (PPR) save and restore

snip

  Can you break this patch into a few parts that are easier to review than
  one giant patch.  Start by adding the PPR ftr bits, then the extra space
  in the paca, then the new macros, then use the new infrastructure.  I'm
  sure you can get 5 or so patches which will be much easier to review.
  
  Also this has been white space munged.  See here:
http://patchwork.ozlabs.org/patch/170993/
  All the #defines are broken.
  
  Also, do you know what the impacts of this are on null syscall/page
  faults etc on machines which need the PPR switched?  If it's big, we
  might want to have this as a CONFIG option for those who don't care and
  want the speed bump.
 
 Thanks for your comments. Sure will split this patch in to 5/6 patches. 
 With Anton's num_syscall test  -
 http://ozlabs.org/~anton/junkcode/null_syscall.c, noticed 6% overhead.
 As you suggested, will add CONFIG option for this feature. 

Eek.. 6%.. yes, definitely a CONFIG option then.

 Sorry, my e-mail client messed-up some tabs. will post next time
 properly.
 
 Please see my responses below. 

ok

   +
#define __EXCEPTION_PROLOG_1(area, extra, vec)   
   \
 GET_PACA(r13);  \
   - std r9,area+EX_R9(r13); /* save r9 - r12 */ \
   - std r10,area+EX_R10(r13);   \
   + std r9,area+EX_R9(r13); /* save r9 */   \
   + HMT_FTR_HAS_PPR(area,r9);   \
   + std r10,area+EX_R10(r13);   /* save r10 - r12 */\
 BEGIN_FTR_SECTION_NESTED(66);   \
 mfspr   r10,SPRN_CFAR;  \
 std r10,area+EX_CFAR(r13);  \
   @@ -176,8 +213,10 @@ do_kvm_##n:  
   \
 std r10,0(r1);  /* make stack chain pointer */ \
 std r0,GPR0(r1);/* save r0 in stackframe*/ \
 std r10,GPR1(r1);   /* save r1 in stackframe*/ \
   + beq 4f;\
 ACCOUNT_CPU_USER_ENTRY(r9, r10);   \
   - std r2,GPR2(r1);/* save r2 in stackframe*/ \
   + SAVE_PPR(area, r9, r10);   \
   +4:   std r2,GPR2(r1);/* save r2 in stackframe
   */ \
  
  why are we no longer doing ACCOUNT_CPU_USER_ENTRY here?
 
 No, we still doing ACCOUNT_CPU_USER_ENTRY for the user level exceptions.
 The existing code (ACCOUNT_CPU_USER_ENTRY macro) has the same branch
 instruction and skipping for exceptions coming from kernel. I just
 removed 'beq' in ACCOUNT_CPU_USER_ENTRY macro and added here since PPR
 saving will be done only for user level exceptions. 
 
 Adding comment for this branch instruction and create a separate patch
 just for the related changes should make it more clear. 

OK.

This is why it's good to split the patch into multiple parts so that you
can explain these in the associated checking comments.

   - HMT_MEDIUM;
   + HMT_FTR_NO_PPR  
  
  Can we call this something else like HMT_MEDIUM_NO_PPR?
 
 I will make the change. Similarly HMT_FTR_HAS_PPR should be changed to
 HMT_MEDIUM_HAS_PPR. 

Cool, bike shedding complete :-)

Mikey

 
 
  
  
  
 SET_SCRATCH0(r13)
#ifdef CONFIG_PPC_P7_NAP
BEGIN_FTR_SECTION
   @@ -94,7 +94,7 @@ machine_check_pSeries_1:
 . = 0x300
 .globl data_access_pSeries
data_access_pSeries:
   - HMT_MEDIUM
   + HMT_FTR_NO_PPR
 SET_SCRATCH0(r13)
BEGIN_FTR_SECTION
 b   data_access_check_stab
   @@ -106,7 +106,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_SLB)
 . = 0x380
 .globl data_access_slb_pSeries
data_access_slb_pSeries:
   - HMT_MEDIUM
   + HMT_FTR_NO_PPR  
 SET_SCRATCH0(r13)
 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
 std r3,PACA_EXSLB+EX_R3(r13)
   @@ -137,7 +137,7 @@ data_access_slb_pSeries:
 . = 0x480
 .globl instruction_access_slb_pSeries
instruction_access_slb_pSeries:
   - HMT_MEDIUM
   + HMT_FTR_NO_PPR  
 SET_SCRATCH0(r13)
 EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
 std r3,PACA_EXSLB+EX_R3(r13)
   @@ -197,7 +197,7 @@ hardware_interrupt_hv:
 . = 0xc00
 .globl  system_call_pSeries
system_call_pSeries:
   - HMT_MEDIUM
   + HMT_FTR_NO_PPR  
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 SET_SCRATCH0(r13)
 GET_PACA(r13)
   @@ -213,6 +213,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 mr  r9,r13
 GET_PACA(r13)
   + HMT_FTR_HAS_PPR(PACA_EXGEN,r10)
 mfspr   r11,SPRN_SRR0
 mfspr   r12,SPRN_SRR1
 ld  

Re: [PATCH] powerpc: SMT priority (PPR) save and restore

2012-07-22 Thread Michael Neuling
   Can you break this patch into a few parts that are easier to review than
   one giant patch.  Start by adding the PPR ftr bits, then the extra space
   in the paca, then the new macros, then use the new infrastructure.  I'm
   sure you can get 5 or so patches which will be much easier to review.
   
   Also this has been white space munged.  See here:
 http://patchwork.ozlabs.org/patch/170993/
   All the #defines are broken.
   
   Also, do you know what the impacts of this are on null syscall/page
   faults etc on machines which need the PPR switched?  If it's big, we
   might want to have this as a CONFIG option for those who don't care and
   want the speed bump.
  
  Thanks for your comments. Sure will split this patch in to 5/6 patches. 
  With Anton's num_syscall test  -
  http://ozlabs.org/~anton/junkcode/null_syscall.c, noticed 6% overhead.
  As you suggested, will add CONFIG option for this feature. 
 
 Eek.. 6%.. yes, definitely a CONFIG option then.

... maybe even a boot time option so we can turn it on and off easily
for distros.

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: SMT priority (PPR) save and restore

2012-07-21 Thread Haren Myneni
On Mon, 2012-07-16 at 13:41 +1000, Michael Neuling wrote:
 Heaven Myneni ha...@linux.vnet.ibm.com wrote:
 
  powerpc: SMT priority (PPR) save and restore
  
  On P7 systems, users can define SMT priority levels 2,3 and 4 for
  processes so that some can run higher priority than the other ones.
  In the current kernel, the default priority is set to 4 which prohibits
  processes to use higher priority. Also the kernel boosts the priority to
  4 during exception without saving the user defined priority values when
  the task enters the kernel. So we will be loosing the process PPR value
  and can not be restored it back when the task exits the kernel.
  
  This patch sets the default priority to 3 when tasks are created such
  that users can use 4 for higher priority tasks. It also provides to save
  and restore the user defined priorities for all tasks.
  
  When the task enters in to kernel space, the user defined priority (PPR)
  will be saved in to PACA at the beginning of first level exception
  vector and then copy from PACA to thread_info in second level vector.
  PPR will be restored from thread_info before exits the kernel space.
  
  P7 temporarily raises the thread priority to higher level during
  exception until the program executes HMT_* calls. But it will not modify
  PPR register. So we saves PPR value whenever some register is available
  to use and then calls HMT_MEDIUM to increase the priority. This feature
  supports on P7 or later processors.
  
  Signed-off-by: Haren Myneni hb...@us.ibm.com
 
 Can you break this patch into a few parts that are easier to review than
 one giant patch.  Start by adding the PPR ftr bits, then the extra space
 in the paca, then the new macros, then use the new infrastructure.  I'm
 sure you can get 5 or so patches which will be much easier to review.
 
 Also this has been white space munged.  See here:
   http://patchwork.ozlabs.org/patch/170993/
 All the #defines are broken.
 
 Also, do you know what the impacts of this are on null syscall/page
 faults etc on machines which need the PPR switched?  If it's big, we
 might want to have this as a CONFIG option for those who don't care and
 want the speed bump.

Thanks for your comments. Sure will split this patch in to 5/6 patches. 
With Anton's num_syscall test  -
http://ozlabs.org/~anton/junkcode/null_syscall.c, noticed 6% overhead.
As you suggested, will add CONFIG option for this feature. 

Sorry, my e-mail client messed-up some tabs. will post next time
properly.

Please see my responses below. 

 More comments below.
 
  
  diff --git a/arch/powerpc/include/asm/cputable.h
  b/arch/powerpc/include/asm/cputable.h
  index 50d82c8..e7b80d6 100644
  --- a/arch/powerpc/include/asm/cputable.h
  +++ b/arch/powerpc/include/asm/cputable.h
  @@ -203,6 +203,7 @@ extern const char *powerpc_base_platform;
   #define CPU_FTR_POPCNTD
  LONG_ASM_CONST(0x0800)
   #define CPU_FTR_ICSWX  
  LONG_ASM_CONST(0x1000)
   #define CPU_FTR_VMX_COPY   LONG_ASM_CONST(0x2000)
  +#define CPU_FTR_HAS_PPR
  LONG_ASM_CONST(0x4000)
   
   #ifndef __ASSEMBLY__
   
  @@ -432,7 +433,8 @@ extern const char *powerpc_base_platform;
  CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
  CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
  CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD |
  \
  -   CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
  +   CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY |
  \
  +   CPU_FTR_HAS_PPR)
 
 Add CPU_FTR_HAS_PPR to CPU_FTRS_POSSIBLE as well.

 CPU_FTRS_POWER7 is already defined to CPU_FTRS_POSSIBLE. So do we still
need CPU_FTR_HAS_PPR to CPU_FTRS_POSSIBLE?

#define CPU_FTRS_POSSIBLE   \
(CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 |
\
CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 |
\
CPU_FTRS_POWER7 | CPU_FTRS_CELL | CPU_FTRS_PA6T |
\
CPU_FTR_VSX)


 
 
   #define CPU_FTRS_CELL  (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
  CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
  CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
  diff --git a/arch/powerpc/include/asm/exception-64s.h
  b/arch/powerpc/include/asm/exception-64s.h
  index d58fc4e..1fae8aa 100644
  --- a/arch/powerpc/include/asm/exception-64s.h
  +++ b/arch/powerpc/include/asm/exception-64s.h
  @@ -47,6 +47,7 @@
   #define EX_R3  64
   #define EX_LR  72
   #define EX_CFAR80
  +#define EX_PPR 88  /* SMT thread status register 
  (priority) */
   
   /*
* We're short on space and time in the exception prolog, so we can't
  @@ -61,10 +62,46 @@
   #define EXC_HV H
   #define EXC_STD
   
  +/* 
  + * PPR save/restore macros - Used only on P7 or later processors
  + */
  +#define SAVE_PPR(area, ra, rb)
  \
  

Re: [PATCH] powerpc: SMT priority (PPR) save and restore

2012-07-15 Thread Michael Neuling
Heaven Myneni ha...@linux.vnet.ibm.com wrote:

 powerpc: SMT priority (PPR) save and restore
 
 On P7 systems, users can define SMT priority levels 2,3 and 4 for
 processes so that some can run higher priority than the other ones.
 In the current kernel, the default priority is set to 4 which prohibits
 processes to use higher priority. Also the kernel boosts the priority to
 4 during exception without saving the user defined priority values when
 the task enters the kernel. So we will be loosing the process PPR value
 and can not be restored it back when the task exits the kernel.
 
 This patch sets the default priority to 3 when tasks are created such
 that users can use 4 for higher priority tasks. It also provides to save
 and restore the user defined priorities for all tasks.
 
 When the task enters in to kernel space, the user defined priority (PPR)
 will be saved in to PACA at the beginning of first level exception
 vector and then copy from PACA to thread_info in second level vector.
 PPR will be restored from thread_info before exits the kernel space.
 
 P7 temporarily raises the thread priority to higher level during
 exception until the program executes HMT_* calls. But it will not modify
 PPR register. So we saves PPR value whenever some register is available
 to use and then calls HMT_MEDIUM to increase the priority. This feature
 supports on P7 or later processors.
 
 Signed-off-by: Haren Myneni hb...@us.ibm.com

Can you break this patch into a few parts that are easier to review than
one giant patch.  Start by adding the PPR ftr bits, then the extra space
in the paca, then the new macros, then use the new infrastructure.  I'm
sure you can get 5 or so patches which will be much easier to review.

Also this has been white space munged.  See here:
  http://patchwork.ozlabs.org/patch/170993/
All the #defines are broken.

Also, do you know what the impacts of this are on null syscall/page
faults etc on machines which need the PPR switched?  If it's big, we
might want to have this as a CONFIG option for those who don't care and
want the speed bump.

More comments below.

 
 diff --git a/arch/powerpc/include/asm/cputable.h
 b/arch/powerpc/include/asm/cputable.h
 index 50d82c8..e7b80d6 100644
 --- a/arch/powerpc/include/asm/cputable.h
 +++ b/arch/powerpc/include/asm/cputable.h
 @@ -203,6 +203,7 @@ extern const char *powerpc_base_platform;
  #define CPU_FTR_POPCNTD  
 LONG_ASM_CONST(0x0800)
  #define CPU_FTR_ICSWX
 LONG_ASM_CONST(0x1000)
  #define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x2000)
 +#define CPU_FTR_HAS_PPR  
 LONG_ASM_CONST(0x4000)
  
  #ifndef __ASSEMBLY__
  
 @@ -432,7 +433,8 @@ extern const char *powerpc_base_platform;
   CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
   CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
   CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD |
 \
 - CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY)
 + CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY |
 \
 + CPU_FTR_HAS_PPR)

Add CPU_FTR_HAS_PPR to CPU_FTRS_POSSIBLE as well.


  #define CPU_FTRS_CELL(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
   CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
   CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
 diff --git a/arch/powerpc/include/asm/exception-64s.h
 b/arch/powerpc/include/asm/exception-64s.h
 index d58fc4e..1fae8aa 100644
 --- a/arch/powerpc/include/asm/exception-64s.h
 +++ b/arch/powerpc/include/asm/exception-64s.h
 @@ -47,6 +47,7 @@
  #define EX_R364
  #define EX_LR72
  #define EX_CFAR  80
 +#define EX_PPR   88  /* SMT thread status register 
 (priority) */
  
  /*
   * We're short on space and time in the exception prolog, so we can't
 @@ -61,10 +62,46 @@
  #define EXC_HV   H
  #define EXC_STD
  
 +/* 
 + * PPR save/restore macros - Used only on P7 or later processors
 + */
 +#define SAVE_PPR(area, ra, rb)
 \
 +BEGIN_FTR_SECTION_NESTED(940)
 \
 +   ld  ra,area+EX_PPR(r13);/* Read PPR from paca */
 \
 +   clrrdi  rb,r1,THREAD_SHIFT; /* thread_info struct */
 \
 +   std ra,TI_PPR(rb);  /* Save PPR in thread_info */
 \
 +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
 +
 +#define RESTORE_PPR(ra,rb)
 \
 +BEGIN_FTR_SECTION_NESTED(941)
 \
 +   clrrdi  ra,r1,THREAD_SHIFT;
 \
 +   ld  rb,TI_PPR(ra);  /* Read PPR from thread_info */
 \
 +   mtspr   SPRN_PPR,rb;/* Restore PPR */
 \
 +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
 +
 +#define RESTORE_PPR_PACA(area,ra)
 \
 +BEGIN_FTR_SECTION_NESTED(942)
 \
 +   ld  ra,area+EX_PPR(r13);
 \
 +   mtspr   SPRN_PPR,ra;
 \
 +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,942)
 +
 +#define