cpu_in_guest is set to determine if a CPU needs to be IPI'ed to exit
the guest and notice the need_tlb_flush bit.

This can be implemented as a global per-CPU pointer to the currently
running guest instead of per-guest cpumasks, saving 2 atomics per
entry/exit. P7/8 doesn't require cpu_in_guest, nor does a nested HV
(only the L0 does), so move it to the P9 HV path.

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  1 -
 arch/powerpc/include/asm/kvm_host.h      |  1 -
 arch/powerpc/kvm/book3s_hv.c             | 38 +++++++++++++-----------
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 96f0fda50a07..fe07558173ef 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -44,7 +44,6 @@ struct kvm_nested_guest {
        struct mutex tlb_lock;          /* serialize page faults and tlbies */
        struct kvm_nested_guest *next;
        cpumask_t need_tlb_flush;
-       cpumask_t cpu_in_guest;
        short prev_cpu[NR_CPUS];
        u8 radix;                       /* is this nested guest radix */
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 92925f82a1e3..4de418f6c0a2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -287,7 +287,6 @@ struct kvm_arch {
        u32 online_vcores;
        atomic_t hpte_mod_interest;
        cpumask_t need_tlb_flush;
-       cpumask_t cpu_in_guest;
        u8 radix;
        u8 fwnmi_enabled;
        u8 secure_guest;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6e072e2e130a..6574e8a3731e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3009,30 +3009,33 @@ static void kvmppc_release_hwthread(int cpu)
        tpaca->kvm_hstate.kvm_split_mode = NULL;
 }
 
+static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
+
 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 {
        struct kvm_nested_guest *nested = vcpu->arch.nested;
-       cpumask_t *cpu_in_guest;
        int i;
 
        cpu = cpu_first_tlb_thread_sibling(cpu);
-       if (nested) {
+       if (nested)
                cpumask_set_cpu(cpu, &nested->need_tlb_flush);
-               cpu_in_guest = &nested->cpu_in_guest;
-       } else {
+       else
                cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
-               cpu_in_guest = &kvm->arch.cpu_in_guest;
-       }
        /*
-        * Make sure setting of bit in need_tlb_flush precedes
-        * testing of cpu_in_guest bits.  The matching barrier on
-        * the other side is the first smp_mb() in kvmppc_run_core().
+        * Make sure setting of bit in need_tlb_flush precedes testing of
+        * cpu_in_guest. The matching barrier on the other side is hwsync
+        * when switching to guest MMU mode, which happens between
+        * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
+        * being tested.
         */
        smp_mb();
        for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
-                                       i += cpu_tlb_thread_sibling_step())
-               if (cpumask_test_cpu(i, cpu_in_guest))
+                                       i += cpu_tlb_thread_sibling_step()) {
+               struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i);
+
+               if (running == kvm)
                        smp_call_function_single(i, do_nothing, NULL, 1);
+       }
 }
 
 static void do_migrate_away_vcpu(void *arg)
@@ -3100,7 +3103,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, 
struct kvmppc_vcore *vc)
 {
        int cpu;
        struct paca_struct *tpaca;
-       struct kvm *kvm = vc->kvm;
 
        cpu = vc->pcpu;
        if (vcpu) {
@@ -3111,7 +3113,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, 
struct kvmppc_vcore *vc)
                cpu += vcpu->arch.ptid;
                vcpu->cpu = vc->pcpu;
                vcpu->arch.thread_cpu = cpu;
-               cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
        }
        tpaca = paca_ptrs[cpu];
        tpaca->kvm_hstate.kvm_vcpu = vcpu;
@@ -3829,7 +3830,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
                kvmppc_release_hwthread(pcpu + i);
                if (sip && sip->napped[i])
                        kvmppc_ipi_thread(pcpu + i);
-               cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
        }
 
        spin_unlock(&vc->lock);
@@ -3997,8 +3997,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
                }
 
        } else {
+               struct kvm *kvm = vcpu->kvm;
+
                kvmppc_xive_push_vcpu(vcpu);
+
+               __this_cpu_write(cpu_in_guest, kvm);
                trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+               __this_cpu_write(cpu_in_guest, NULL);
+
                if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
                    !(vcpu->arch.shregs.msr & MSR_PR)) {
                        unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -4023,7 +4029,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
                }
                kvmppc_xive_pull_vcpu(vcpu);
 
-               if (kvm_is_radix(vcpu->kvm))
+               if (kvm_is_radix(kvm))
                        vcpu->arch.slb_max = 0;
        }
 
@@ -4500,8 +4506,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
 
        powerpc_local_irq_pmu_restore(flags);
 
-       cpumask_clear_cpu(pcpu, &kvm->arch.cpu_in_guest);
-
        preempt_enable();
 
        /*
-- 
2.23.0

Reply via email to