tlbies to an LPAR do not have to be serialised since POWER4,
MMU_FTR_LOCKLESS_TLBIE can be used to avoid the spin lock in
do_tlbies.

Testing was done on a POWER9 system in HPT mode, with a -smp 32 guest
in HPT mode. 32 instances of the powerpc fork benchmark from selftests
were run with --fork, and the results measured.

Without this patch, total throughput was about 13.5K/sec, and this is
the top of the host profile:

   74.52%  [k] do_tlbies
    2.95%  [k] kvmppc_book3s_hv_page_fault
    1.80%  [k] calc_checksum
    1.80%  [k] kvmppc_vcpu_run_hv
    1.49%  [k] kvmppc_run_core

After this patch, throughput was about 51K/sec, with this profile:

   21.28%  [k] do_tlbies
    5.26%  [k] kvmppc_run_core
    4.88%  [k] kvmppc_book3s_hv_page_fault
    3.30%  [k] _raw_spin_lock_irqsave
    3.25%  [k] gup_pgd_range

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 78e6a392330f..0221a0f74f07 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -439,6 +439,9 @@ static inline int try_lock_tlbie(unsigned int *lock)
        unsigned int tmp, old;
        unsigned int token = LOCK_TOKEN;
 
+       if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
+               return 1;
+
        asm volatile("1:lwarx   %1,0,%2\n"
                     "  cmpwi   cr0,%1,0\n"
                     "  bne     2f\n"
@@ -452,6 +455,12 @@ static inline int try_lock_tlbie(unsigned int *lock)
        return old == 0;
 }
 
+static inline void unlock_tlbie_after_sync(unsigned int *lock)
+{
+       if (mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
+               return;
+}
+
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
                      long npages, int global, bool need_sync)
 {
@@ -483,7 +492,7 @@ static void do_tlbies(struct kvm *kvm, unsigned long 
*rbvalues,
                }
 
                asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-               kvm->arch.tlbie_lock = 0;
+               unlock_tlbie_after_sync(&kvm->arch.tlbie_lock);
        } else {
                if (need_sync)
                        asm volatile("ptesync" : : : "memory");
-- 
2.16.3

Reply via email to