Rather than partition the guest PID space and catch and flush a rogue
guest, instead work around this issue by ensuring the MMU is always
disabled in HV mode while the guest MMU context is switched in.

This may be a bit less efficient, but it is a lot less complicated and
allows the P9 path to trivally implement the workaround too. Newer CPUs
are not subject to this issue.

Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 arch/powerpc/include/asm/mmu_context.h   |  6 ----
 arch/powerpc/kvm/book3s_hv.c             | 10 ++++--
 arch/powerpc/kvm/book3s_hv_interrupt.c   | 14 ++++++--
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 34 ------------------
 arch/powerpc/mm/book3s64/radix_pgtable.c | 27 +++++---------
 arch/powerpc/mm/book3s64/radix_tlb.c     | 46 ------------------------
 arch/powerpc/mm/mmu_context.c            |  4 +--
 7 files changed, 28 insertions(+), 113 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h 
b/arch/powerpc/include/asm/mmu_context.h
index 652ce85f9410..bb5c7e5e142e 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct 
*mm, unsigned long ea)
 }
 #endif
 
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
 extern void switch_cop(struct mm_struct *next);
 extern int use_cop(unsigned long acop, struct mm_struct *mm);
 extern void drop_cop(unsigned long acop, struct mm_struct *mm);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ad16331c3370..c3064075f1d7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -806,6 +806,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, 
unsigned long mflags,
                /* KVM does not support mflags=2 (AIL=2) */
                if (mflags != 0 && mflags != 3)
                        return H_UNSUPPORTED_FLAG_START;
+               /* Prefetch bug */
+               if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+                               kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
+                       return H_UNSUPPORTED_FLAG_START;
                return H_TOO_HARD;
        default:
                return H_TOO_HARD;
@@ -4286,8 +4290,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
                 * The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
                 * path, which also handles hash and dependent threads mode.
                 */
-               if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
-                   !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               if (kvm->arch.threads_indep && kvm_is_radix(kvm))
                        r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
                                                  vcpu->arch.vcore->lpcr);
                else
@@ -4914,6 +4917,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
                if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
                        pr_warn("KVM: Ignoring indep_threads_mode=N in nested 
hypervisor\n");
                        kvm->arch.threads_indep = true;
+               } else if (!indep_threads_mode && 
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+                       pr_warn("KVM: Ignoring indep_threads_mode=N on 
pre-DD2.2 POWER9\n");
+                       kvm->arch.threads_indep = true;
                } else {
                        kvm->arch.threads_indep = indep_threads_mode;
                }
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c 
b/arch/powerpc/kvm/book3s_hv_interrupt.c
index b93d861d8538..9784da3f8565 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -223,6 +223,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
        mtspr(SPRN_AMOR, ~0UL);
 
+       if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
        switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
 
        /*
@@ -231,7 +234,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
         */
        mtspr(SPRN_HDEC, hdec);
 
-       __mtmsrd(0, 1); /* clear RI */
+       if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+               __mtmsrd(0, 1); /* clear RI */
 
        mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
        mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
@@ -338,8 +342,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
        radix_clear_slb();
 
-       __mtmsrd(msr, 0);
-
        accumulate_time(vcpu, &vcpu->arch.rm_exit);
 
        /* Advance host PURR/SPURR by the amount used by guest */
@@ -406,6 +408,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
        switch_mmu_to_host_radix(kvm, host_pidr);
 
+       /*
+        * If we are in real mode, don't switch MMU on until the MMU is
+        * switched to host, to avoid the P9 radix prefetch bug.
+        */
+       __mtmsrd(msr, 0);
+
        end_timing(vcpu);
 
        return trap;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6118e8a97ddd..61f71a7df238 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1710,40 +1710,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
        eieio
        tlbsync
        ptesync
-
-BEGIN_FTR_SECTION
-       /* Radix: Handle the case where the guest used an illegal PID */
-       LOAD_REG_ADDR(r4, mmu_base_pid)
-       lwz     r3, VCPU_GUEST_PID(r9)
-       lwz     r5, 0(r4)
-       cmpw    cr0,r3,r5
-       blt     2f
-
-       /*
-        * Illegal PID, the HW might have prefetched and cached in the TLB
-        * some translations for the  LPID 0 / guest PID combination which
-        * Linux doesn't know about, so we need to flush that PID out of
-        * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
-        * the right context.
-       */
-       li      r0,0
-       mtspr   SPRN_LPID,r0
-       isync
-
-       /* Then do a congruence class local flush */
-       ld      r6,VCPU_KVM(r9)
-       lwz     r0,KVM_TLB_SETS(r6)
-       mtctr   r0
-       li      r7,0x400                /* IS field = 0b01 */
-       ptesync
-       sldi    r0,r3,32                /* RS has PID */
-1:     PPC_TLBIEL(7,0,2,1,1)           /* RIC=2, PRS=1, R=1 */
-       addi    r7,r7,0x1000
-       bdnz    1b
-       ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
 #endif /* CONFIG_PPC_RADIX_MMU */
 
        /*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 98f0b243c1ab..1ea95891a79e 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
        }
 
        /* Find out how many PID bits are supported */
-       if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
-               if (!mmu_pid_bits)
-                       mmu_pid_bits = 20;
-               mmu_base_pid = 1;
-       } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
-               if (!mmu_pid_bits)
-                       mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+                       cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
                /*
-                * When KVM is possible, we only use the top half of the
-                * PID space to avoid collisions between host and guest PIDs
-                * which can cause problems due to prefetch when exiting the
-                * guest with AIL=3
+                * Older versions of KVM on these machines perfer if the
+                * guest only uses the low 19 PID bits.
                 */
-               mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
-               mmu_base_pid = 1;
-#endif
-       } else {
-               /* The guest uses the bottom half of the PID space */
                if (!mmu_pid_bits)
                        mmu_pid_bits = 19;
-               mmu_base_pid = 1;
+       } else {
+               if (!mmu_pid_bits)
+                       mmu_pid_bits = 20;
        }
+       mmu_base_pid = 1;
 
        /*
         * Allocate Partition table and process table for the
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index 409e61210789..312236a6b085 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1336,49 +1336,3 @@ void radix__flush_tlb_all(void)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : 
"memory");
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
-{
-       unsigned long pid = mm->context.id;
-
-       if (unlikely(pid == MMU_NO_CONTEXT))
-               return;
-
-       if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
-               return;
-
-       /*
-        * If this context hasn't run on that CPU before and KVM is
-        * around, there's a slim chance that the guest on another
-        * CPU just brought in obsolete translation into the TLB of
-        * this CPU due to a bad prefetch using the guest PID on
-        * the way into the hypervisor.
-        *
-        * We work around this here. If KVM is possible, we check if
-        * any sibling thread is in KVM. If it is, the window may exist
-        * and thus we flush that PID from the core.
-        *
-        * A potential future improvement would be to mark which PIDs
-        * have never been used on the system and avoid it if the PID
-        * is new and the process has no other cpumask bit set.
-        */
-       if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
-               int cpu = smp_processor_id();
-               int sib = cpu_first_thread_sibling(cpu);
-               bool flush = false;
-
-               for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
-                       if (sib == cpu)
-                               continue;
-                       if (!cpu_possible(sib))
-                               continue;
-                       if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
-                               flush = true;
-               }
-               if (flush)
-                       _tlbiel_pid(pid, RIC_FLUSH_ALL);
-       }
-}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 18f20da0d348..7479d39976c9 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -81,9 +81,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct 
mm_struct *next,
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                asm volatile ("dssall");
 
-       if (new_on_cpu)
-               radix_kvm_prefetch_workaround(next);
-       else
+       if (!new_on_cpu)
                membarrier_arch_switch_mm(prev, next, tsk);
 
        /*
-- 
2.23.0

Reply via email to