Re: [PATCH 2/2] KVM: PPC: Book3S HV: lockless tlbie for HPT hcalls

2018-05-16 Thread Paul Mackerras
On Mon, May 14, 2018 at 02:04:10PM +1000, Michael Ellerman wrote:
[snip]
> OK good, in commit:
> 
> c17b98cf6028 ("KVM: PPC: Book3S HV: Remove code for PPC970 processors") (Dec 
> 2014)
> 
> So we should be able to do the patch below.
> 
> cheers
> 
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 17498e9a26e4..7756b0c6da75 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -269,7 +269,6 @@ struct kvm_arch {
>   unsigned long host_lpcr;
>   unsigned long sdr1;
>   unsigned long host_sdr1;
> - int tlbie_lock;
>   unsigned long lpcr;
>   unsigned long vrma_slb_v;
>   int mmu_ready;
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 78e6a392330f..89d909b3b881 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -434,24 +434,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned 
> long r)
>   (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
>  }
>  
> -static inline int try_lock_tlbie(unsigned int *lock)
> -{
> - unsigned int tmp, old;
> - unsigned int token = LOCK_TOKEN;
> -
> - asm volatile("1:lwarx   %1,0,%2\n"
> -  "  cmpwi   cr0,%1,0\n"
> -  "  bne 2f\n"
> -  "  stwcx.  %3,0,%2\n"
> -  "  bne-1b\n"
> -  "  isync\n"
> -  "2:"
> -  : "=" (tmp), "=" (old)
> -  : "r" (lock), "r" (token)
> -  : "cc", "memory");
> - return old == 0;
> -}
> -
>  static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
> long npages, int global, bool need_sync)
>  {
> @@ -463,8 +445,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long 
> *rbvalues,
>* the RS field, this is backwards-compatible with P7 and P8.
>*/
>   if (global) {
> - while (!try_lock_tlbie(>arch.tlbie_lock))
> - cpu_relax();
>   if (need_sync)
>   asm volatile("ptesync" : : : "memory");
>   for (i = 0; i < npages; ++i) {
> @@ -483,7 +463,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long 
> *rbvalues,
>   }
>  
>   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> - kvm->arch.tlbie_lock = 0;
>   } else {
>   if (need_sync)
>   asm volatile("ptesync" : : : "memory");

Seems reasonable; is that a patch submission? :)

Paul.


Re: [PATCH 2/2] KVM: PPC: Book3S HV: lockless tlbie for HPT hcalls

2018-05-13 Thread Michael Ellerman
Paul Mackerras  writes:

> On Fri, Apr 06, 2018 at 04:12:32PM +1000, Michael Ellerman wrote:
>> Nicholas Piggin  writes:
>> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
>> > b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
>> > index 78e6a392330f..0221a0f74f07 100644
>> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
>> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
>> > @@ -439,6 +439,9 @@ static inline int try_lock_tlbie(unsigned int *lock)
>> >unsigned int tmp, old;
>> >unsigned int token = LOCK_TOKEN;
>> >  
>> > +  if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
>> > +  return 1;
>> > +
>> >asm volatile("1:lwarx   %1,0,%2\n"
>> > "  cmpwi   cr0,%1,0\n"
>> > "  bne 2f\n"
>> > @@ -452,6 +455,12 @@ static inline int try_lock_tlbie(unsigned int *lock)
>> >return old == 0;
>> >  }
>> >  
>> > +static inline void unlock_tlbie_after_sync(unsigned int *lock)
>> > +{
>> > +  if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
>> > +  return;
>> > +}
>> 
>> So this is a bit hard to follow:
>> 
>> #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2\
>>  MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
>> #define MMU_FTRS_POWER   MMU_FTRS_DEFAULT_HPTE_ARCH_V2
>> #define MMU_FTRS_PPC970  MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA  
>> // does NOT
>> #define MMU_FTRS_POWER5  MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
>> #define MMU_FTRS_POWER6  MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | 
>> MMU_FTR_68_BIT_VA // includes lockless TLBIE
>> #define MMU_FTRS_POWER7  MMU_FTRS_POWER6 
>> // includes lockless TLBIE
>> #define MMU_FTRS_POWER8  MMU_FTRS_POWER6 
>> // includes lockless TLBIE
>> #define MMU_FTRS_POWER9  MMU_FTRS_POWER6 
>> // includes lockless TLBIE
>> #define MMU_FTRS_CELLMMU_FTRS_DEFAULT_HPTE_ARCH_V2 | 
>> // does NOT
>>  MMU_FTR_CI_LARGE_PAGE
>> #define MMU_FTRS_PA6TMMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \   
>> // does NOT
>>  MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
>> 
>> 
>> So it's only 970, Cell and Pasemi that *don't* have lockless TLBIE.
>> 
>> And KVM HV doesn't doesn't run on any of those.
>> 
>> So we can just not check for the feature in the KVM HV code.
>> 
>> Am I right?
>
> Yes; that code was written when we still supported HV KVM on 970,
> but we ripped that out some time ago.

OK good, in commit:

c17b98cf6028 ("KVM: PPC: Book3S HV: Remove code for PPC970 processors") (Dec 
2014)

So we should be able to do the patch below.

cheers


diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 17498e9a26e4..7756b0c6da75 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -269,7 +269,6 @@ struct kvm_arch {
unsigned long host_lpcr;
unsigned long sdr1;
unsigned long host_sdr1;
-   int tlbie_lock;
unsigned long lpcr;
unsigned long vrma_slb_v;
int mmu_ready;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 78e6a392330f..89d909b3b881 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -434,24 +434,6 @@ static inline int is_mmio_hpte(unsigned long v, unsigned 
long r)
(HPTE_R_KEY_HI | HPTE_R_KEY_LO));
 }
 
-static inline int try_lock_tlbie(unsigned int *lock)
-{
-   unsigned int tmp, old;
-   unsigned int token = LOCK_TOKEN;
-
-   asm volatile("1:lwarx   %1,0,%2\n"
-"  cmpwi   cr0,%1,0\n"
-"  bne 2f\n"
-"  stwcx.  %3,0,%2\n"
-"  bne-1b\n"
-"  isync\n"
-"2:"
-: "=" (tmp), "=" (old)
-: "r" (lock), "r" (token)
-: "cc", "memory");
-   return old == 0;
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
  long npages, int global, bool need_sync)
 {
@@ -463,8 +445,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long 
*rbvalues,
 * the RS field, this is backwards-compatible with P7 and P8.
 */
if (global) {
-   while (!try_lock_tlbie(>arch.tlbie_lock))
-   cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
@@ -483,7 +463,6 @@ static void do_tlbies(struct kvm *kvm, unsigned long 
*rbvalues,
}
 
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-   kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
  

Re: [PATCH 2/2] KVM: PPC: Book3S HV: lockless tlbie for HPT hcalls

2018-05-10 Thread Paul Mackerras
On Fri, Apr 06, 2018 at 04:12:32PM +1000, Michael Ellerman wrote:
> Nicholas Piggin  writes:
> > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
> > b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > index 78e6a392330f..0221a0f74f07 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> > @@ -439,6 +439,9 @@ static inline int try_lock_tlbie(unsigned int *lock)
> > unsigned int tmp, old;
> > unsigned int token = LOCK_TOKEN;
> >  
> > +   if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
> > +   return 1;
> > +
> > asm volatile("1:lwarx   %1,0,%2\n"
> >  "  cmpwi   cr0,%1,0\n"
> >  "  bne 2f\n"
> > @@ -452,6 +455,12 @@ static inline int try_lock_tlbie(unsigned int *lock)
> > return old == 0;
> >  }
> >  
> > +static inline void unlock_tlbie_after_sync(unsigned int *lock)
> > +{
> > +   if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
> > +   return;
> > +}
> 
> So this is a bit hard to follow:
> 
> #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
>   MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
> #define MMU_FTRS_POWERMMU_FTRS_DEFAULT_HPTE_ARCH_V2
> #define MMU_FTRS_PPC970   MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA  
> // does NOT
> #define MMU_FTRS_POWER5   MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
> #define MMU_FTRS_POWER6   MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | 
> MMU_FTR_68_BIT_VA // includes lockless TLBIE
> #define MMU_FTRS_POWER7   MMU_FTRS_POWER6 
> // includes lockless TLBIE
> #define MMU_FTRS_POWER8   MMU_FTRS_POWER6 
> // includes lockless TLBIE
> #define MMU_FTRS_POWER9   MMU_FTRS_POWER6 
> // includes lockless TLBIE
> #define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | 
> // does NOT
>   MMU_FTR_CI_LARGE_PAGE
> #define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \   
> // does NOT
>   MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
> 
> 
> So it's only 970, Cell and Pasemi that *don't* have lockless TLBIE.
> 
> And KVM HV doesn't doesn't run on any of those.
> 
> So we can just not check for the feature in the KVM HV code.
> 
> Am I right?

Yes; that code was written when we still supported HV KVM on 970,
but we ripped that out some time ago.

Paul.


Re: [PATCH 2/2] KVM: PPC: Book3S HV: lockless tlbie for HPT hcalls

2018-04-06 Thread Michael Ellerman
Nicholas Piggin  writes:
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 78e6a392330f..0221a0f74f07 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -439,6 +439,9 @@ static inline int try_lock_tlbie(unsigned int *lock)
>   unsigned int tmp, old;
>   unsigned int token = LOCK_TOKEN;
>  
> + if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
> + return 1;
> +
>   asm volatile("1:lwarx   %1,0,%2\n"
>"  cmpwi   cr0,%1,0\n"
>"  bne 2f\n"
> @@ -452,6 +455,12 @@ static inline int try_lock_tlbie(unsigned int *lock)
>   return old == 0;
>  }
>  
> +static inline void unlock_tlbie_after_sync(unsigned int *lock)
> +{
> + if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
> + return;
> +}

So this is a bit hard to follow:

#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2   \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
#define MMU_FTRS_POWER  MMU_FTRS_DEFAULT_HPTE_ARCH_V2
#define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA  
// does NOT
#define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | 
MMU_FTR_68_BIT_VA // includes lockless TLBIE
#define MMU_FTRS_POWER7 MMU_FTRS_POWER6 
// includes lockless TLBIE
#define MMU_FTRS_POWER8 MMU_FTRS_POWER6 
// includes lockless TLBIE
#define MMU_FTRS_POWER9 MMU_FTRS_POWER6 
// includes lockless TLBIE
#define MMU_FTRS_CELL   MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | 
// does NOT
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T   MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \   
// does NOT
MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B


So it's only 970, Cell and Pasemi that *don't* have lockless TLBIE.

And KVM HV doesn't doesn't run on any of those.

So we can just not check for the feature in the KVM HV code.

Am I right?

cheers


Re: [PATCH 2/2] KVM: PPC: Book3S HV: lockless tlbie for HPT hcalls

2018-04-05 Thread Nicholas Piggin
On Fri,  6 Apr 2018 03:56:31 +1000
Nicholas Piggin  wrote:

> tlbies to an LPAR do not have to be serialised since POWER4,
> MMU_FTR_LOCKLESS_TLBIE can be used to avoid the spin lock in
> do_tlbies.
> 
> Testing was done on a POWER9 system in HPT mode, with a -smp 32 guest
> in HPT mode. 32 instances of the powerpc fork benchmark from selftests
> were run with --fork, and the results measured.
> 
> Without this patch, total throughput was about 13.5K/sec, and this is
> the top of the host profile:
> 
>74.52%  [k] do_tlbies
> 2.95%  [k] kvmppc_book3s_hv_page_fault
> 1.80%  [k] calc_checksum
> 1.80%  [k] kvmppc_vcpu_run_hv
> 1.49%  [k] kvmppc_run_core
> 
> After this patch, throughput was about 51K/sec, with this profile:
> 
>21.28%  [k] do_tlbies
> 5.26%  [k] kvmppc_run_core
> 4.88%  [k] kvmppc_book3s_hv_page_fault
> 3.30%  [k] _raw_spin_lock_irqsave
> 3.25%  [k] gup_pgd_range
> 
> Signed-off-by: Nicholas Piggin 
> ---
>  arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 78e6a392330f..0221a0f74f07 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -439,6 +439,9 @@ static inline int try_lock_tlbie(unsigned int *lock)
>   unsigned int tmp, old;
>   unsigned int token = LOCK_TOKEN;
>  
> + if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
> + return 1;
> +
>   asm volatile("1:lwarx   %1,0,%2\n"
>"  cmpwi   cr0,%1,0\n"
>"  bne 2f\n"
> @@ -452,6 +455,12 @@ static inline int try_lock_tlbie(unsigned int *lock)
>   return old == 0;
>  }
>  
> +static inline void unlock_tlbie_after_sync(unsigned int *lock)
> +{
> + if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
> + return;
> +}
> +
>  static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
> long npages, int global, bool need_sync)
>  {
> @@ -483,7 +492,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long 
> *rbvalues,
>   }
>  
>   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> - kvm->arch.tlbie_lock = 0;
> + unlock_tlbie_after_sync(>arch.tlbie_lock);

Well that's a silly bug in the !LOCKLESS path, that was supposed
to move to unlock, of course. Will fix it up after some time for
comments.

Thanks,
Nick