[PATCH v1 55/55] KVM: PPC: Book3S HV P9: Remove subcore HMI handling

2021-07-25 Thread Nicholas Piggin
On POWER9 and newer, rather than the complex HMI synchronisation and
subcore state, have each thread un-apply the guest TB offset before
calling into the early HMI handler.

This allows the subcore state to be avoided, including subcore enter
/ exit guest, which includes an expensive divide that shows up
slightly in profiles.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 12 +-
 arch/powerpc/kvm/book3s_hv_hmi.c  |  7 +-
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 32 ++-
 arch/powerpc/kvm/book3s_hv_ras.c  |  4 
 4 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b727b2cfad98..3f62ada1a669 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3994,8 +3994,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
vcpu->arch.ceded = 0;
 
-   kvmppc_subcore_enter_guest();
-
vcpu_vpa_increment_dispatch(vcpu);
 
if (kvmhv_on_pseries()) {
@@ -4048,8 +4046,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
vcpu_vpa_increment_dispatch(vcpu);
 
-   kvmppc_subcore_exit_guest();
-
return trap;
 }
 
@@ -6031,9 +6027,11 @@ static int kvmppc_book3s_init_hv(void)
if (r)
return r;
 
-   r = kvm_init_subcore_bitmap();
-   if (r)
-   return r;
+   if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+   r = kvm_init_subcore_bitmap();
+   if (r)
+   return r;
+   }
 
/*
 * We need a way of accessing the XICS interrupt controller,
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
index 9af660476314..1ec50c69678b 100644
--- a/arch/powerpc/kvm/book3s_hv_hmi.c
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
@@ -20,10 +20,15 @@ void wait_for_subcore_guest_exit(void)
 
/*
 * NULL bitmap pointer indicates that KVM module hasn't
-* been loaded yet and hence no guests are running.
+* been loaded yet and hence no guests are running, or running
+* on POWER9 or newer CPU.
+*
 * If no KVM is in use, no need to co-ordinate among threads
 * as all of them will always be in host and no one is going
 * to modify TB other than the opal hmi handler.
+*
+* POWER9 and newer don't need this synchronisation.
+*
 * Hence, just return from here.
 */
if (!local_paca->sibling_subcore_state)
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 032ca6dfd83c..d23e1ef2e3a7 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -3,6 +3,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -927,7 +928,36 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
kvmppc_realmode_machine_check(vcpu);
 
} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
-   kvmppc_realmode_hmi_handler();
+   /*
+* Unapply and clear the offset first. That way, if the TB
+* was fine then no harm done, if it is corrupted then the
+* HMI resync will bring it back to host mode. This way, we
+* don't need to actualy know whether not OPAL resynced the
+* timebase. Although it would be cleaner if we could rely
+* on that, early POWER9 OPAL did not support the
+* OPAL_HANDLE_HMI2 call.
+*/
+   if (vc->tb_offset_applied) {
+   u64 new_tb = mftb() - vc->tb_offset_applied;
+   mtspr(SPRN_TBU40, new_tb);
+   if ((mftb() & 0xff) < (new_tb & 0xff)) {
+   new_tb += 0x100;
+   mtspr(SPRN_TBU40, new_tb);
+   }
+   vc->tb_offset_applied = 0;
+   }
+
+   hmi_exception_realmode(NULL);
+
+   if (vc->tb_offset) {
+   u64 new_tb = mftb() + vc->tb_offset;
+   mtspr(SPRN_TBU40, new_tb);
+   if ((mftb() & 0xff) < (new_tb & 0xff)) {
+   new_tb += 0x100;
+   mtspr(SPRN_TBU40, new_tb);
+   }
+   vc->tb_offset_applied = vc->tb_offset;
+   }
 
} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d4bca93b79f6..a49ee9bdab67 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -136,6 +136,10 @@ void 

[PATCH v1 54/55] KVM: PPC: Book3S HV P9: Stop using vc->dpdes

2021-07-25 Thread Nicholas Piggin
The P9 path uses vc->dpdes only for msgsndp / SMT emulation. This adds
an ordering requirement between vcpu->doorbell_request and vc->dpdes for
no real benefit. Use vcpu->doorbell_request directly.

XXX: verify msgsndp / DPDES emulation works properly.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 18 ++
 arch/powerpc/kvm/book3s_hv_builtin.c  |  2 ++
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 14 ++
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f233ff1c18e1..b727b2cfad98 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -766,6 +766,8 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
 
if (vcpu->arch.doorbell_request)
return true;
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   return false;
/*
 * Ensure that the read of vcore->dpdes comes after the read
 * of vcpu->doorbell_request.  This barrier matches the
@@ -2166,8 +2168,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
 * either vcore->dpdes or doorbell_request.
 * On POWER8, doorbell_request is 0.
 */
-   *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
-  vcpu->arch.doorbell_request);
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   *val = get_reg_val(id, vcpu->arch.doorbell_request);
+   else
+   *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
@@ -2404,7 +2408,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
vcpu->arch.pspb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DPDES:
-   vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1;
+   else
+   vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
break;
case KVM_REG_PPC_VTB:
vcpu->arch.vcore->vtb = set_reg_val(id, *val);
@@ -4440,11 +4447,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
 
if (!nested) {
kvmppc_core_prepare_to_enter(vcpu);
-   if (vcpu->arch.doorbell_request) {
-   vc->dpdes = 1;
-   smp_wmb();
-   vcpu->arch.doorbell_request = 0;
-   }
if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
 >arch.pending_exceptions))
lpcr |= LPCR_MER;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index a10bf93054ca..3ed90149ed2e 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -660,6 +660,8 @@ void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
int ext;
unsigned long lpcr;
 
+   WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
/* Insert EXTERNAL bit into LPCR at the MER bit position */
ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
lpcr = mfspr(SPRN_LPCR);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 338873f90c72..032ca6dfd83c 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -695,6 +695,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
unsigned long host_pidr;
unsigned long host_dawr1;
unsigned long host_dawrx1;
+   unsigned long dpdes;
 
hdec = time_limit - *tb;
if (hdec < 0)
@@ -757,8 +758,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
-   if (vc->dpdes)
-   mtspr(SPRN_DPDES, vc->dpdes);
+   if (vcpu->arch.doorbell_request) {
+   vcpu->arch.doorbell_request = 0;
+   mtspr(SPRN_DPDES, 1);
+   }
 
if (dawr_enabled()) {
if (vcpu->arch.dawr0 != host_dawr0)
@@ -995,7 +998,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
 
-   vc->dpdes = mfspr(SPRN_DPDES);
+   dpdes = mfspr(SPRN_DPDES);
+   if (dpdes)
+   vcpu->arch.doorbell_request = 1;
+
vc->vtb = mfspr(SPRN_VTB);
 
dec = mfspr(SPRN_DEC);
@@ -1057,7 +1063,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
}

[PATCH v1 53/55] KVM: PPC: Book3S HV P9: Tidy kvmppc_create_dtl_entry

2021-07-25 Thread Nicholas Piggin
This goes further to removing vcores from the P9 path. Also avoid the
memset in favour of explicitly initialising all fields.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 61 +---
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f83ae33e875c..f233ff1c18e1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -703,41 +703,30 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 
now)
return p;
 }
 
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
-   struct kvmppc_vcore *vc, u64 tb)
+static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+   unsigned int pcpu, u64 now,
+   unsigned long stolen)
 {
struct dtl_entry *dt;
struct lppaca *vpa;
-   unsigned long stolen;
-   unsigned long core_stolen;
-   u64 now;
-   unsigned long flags;
 
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
-   now = tb;
-
-   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
-   stolen = 0;
-   } else {
-   core_stolen = vcore_stolen_time(vc, now);
-   stolen = core_stolen - vcpu->arch.stolen_logged;
-   vcpu->arch.stolen_logged = core_stolen;
-   spin_lock_irqsave(>arch.tbacct_lock, flags);
-   stolen += vcpu->arch.busy_stolen;
-   vcpu->arch.busy_stolen = 0;
-   spin_unlock_irqrestore(>arch.tbacct_lock, flags);
-   }
 
if (!dt || !vpa)
return;
-   memset(dt, 0, sizeof(struct dtl_entry));
+
dt->dispatch_reason = 7;
-   dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
-   dt->timebase = cpu_to_be64(now + vc->tb_offset);
+   dt->preempt_reason = 0;
+   dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid);
dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+   dt->ready_to_enqueue_time = 0;
+   dt->waiting_to_ready_time = 0;
+   dt->timebase = cpu_to_be64(now);
+   dt->fault_addr = 0;
dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
+
++dt;
if (dt == vcpu->arch.dtl.pinned_end)
dt = vcpu->arch.dtl.pinned_addr;
@@ -748,6 +737,27 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
vcpu->arch.dtl.dirty = true;
 }
 
+static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+   struct kvmppc_vcore *vc)
+{
+   unsigned long stolen;
+   unsigned long core_stolen;
+   u64 now;
+   unsigned long flags;
+
+   now = mftb();
+
+   core_stolen = vcore_stolen_time(vc, now);
+   stolen = core_stolen - vcpu->arch.stolen_logged;
+   vcpu->arch.stolen_logged = core_stolen;
+   spin_lock_irqsave(>arch.tbacct_lock, flags);
+   stolen += vcpu->arch.busy_stolen;
+   vcpu->arch.busy_stolen = 0;
+   spin_unlock_irqrestore(>arch.tbacct_lock, flags);
+
+   __kvmppc_create_dtl_entry(vcpu, vc->pcpu, now + vc->tb_offset, stolen);
+}
+
 /* See if there is a doorbell interrupt pending for a vcpu */
 static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
 {
@@ -3730,7 +3740,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
pvc->pcpu = pcpu + thr;
for_each_runnable_thread(i, vcpu, pvc) {
kvmppc_start_thread(vcpu, pvc);
-   kvmppc_create_dtl_entry(vcpu, pvc, mftb());
+   kvmppc_create_dtl_entry(vcpu, pvc);
trace_kvm_guest_enter(vcpu);
if (!vcpu->arch.ptid)
thr0_done = true;
@@ -4281,7 +4291,7 @@ static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
if ((vc->vcore_state == VCORE_PIGGYBACK ||
 vc->vcore_state == VCORE_RUNNING) &&
   !VCORE_IS_EXITING(vc)) {
-   kvmppc_create_dtl_entry(vcpu, vc, mftb());
+   kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu, vc);
trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
@@ -4458,8 +4468,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
local_paca->kvm_hstate.ptid = 0;
local_paca->kvm_hstate.fake_suspend = 0;
 
-   vc->pcpu = pcpu; // for kvmppc_create_dtl_entry
-   kvmppc_create_dtl_entry(vcpu, vc, tb);
+   __kvmppc_create_dtl_entry(vcpu, pcpu, tb + vc->tb_offset, 0);
 
trace_kvm_guest_enter(vcpu);
 
-- 
2.23.0



[PATCH v1 52/55] KVM: PPC: Book3S HV P9: Remove most of the vcore logic

2021-07-25 Thread Nicholas Piggin
The P9 path always uses one vcpu per vcore, so none of the the vcore,
locks, stolen time, blocking logic, shared waitq, etc., is required.

Remove most of it.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 147 ---
 1 file changed, 85 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6f29fa7d77cc..f83ae33e875c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -281,6 +281,8 @@ static void kvmppc_core_start_stolen(struct kvmppc_vcore 
*vc, u64 tb)
 {
unsigned long flags;
 
+   WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
spin_lock_irqsave(>stoltb_lock, flags);
vc->preempt_tb = tb;
spin_unlock_irqrestore(>stoltb_lock, flags);
@@ -290,6 +292,8 @@ static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, 
u64 tb)
 {
unsigned long flags;
 
+   WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
spin_lock_irqsave(>stoltb_lock, flags);
if (vc->preempt_tb != TB_NIL) {
vc->stolen_tb += tb - vc->preempt_tb;
@@ -302,7 +306,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu 
*vcpu, int cpu)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
-   u64 now = mftb();
+   u64 now;
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   return;
+
+   now = mftb();
 
/*
 * We can test vc->runner without taking the vcore lock,
@@ -326,7 +335,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
-   u64 now = mftb();
+   u64 now;
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300))
+   return;
+
+   now = mftb();
 
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
kvmppc_core_start_stolen(vc, now);
@@ -678,6 +692,8 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 
now)
u64 p;
unsigned long flags;
 
+   WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
spin_lock_irqsave(>stoltb_lock, flags);
p = vc->stolen_tb;
if (vc->vcore_state != VCORE_INACTIVE &&
@@ -700,13 +716,19 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
now = tb;
-   core_stolen = vcore_stolen_time(vc, now);
-   stolen = core_stolen - vcpu->arch.stolen_logged;
-   vcpu->arch.stolen_logged = core_stolen;
-   spin_lock_irqsave(>arch.tbacct_lock, flags);
-   stolen += vcpu->arch.busy_stolen;
-   vcpu->arch.busy_stolen = 0;
-   spin_unlock_irqrestore(>arch.tbacct_lock, flags);
+
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   stolen = 0;
+   } else {
+   core_stolen = vcore_stolen_time(vc, now);
+   stolen = core_stolen - vcpu->arch.stolen_logged;
+   vcpu->arch.stolen_logged = core_stolen;
+   spin_lock_irqsave(>arch.tbacct_lock, flags);
+   stolen += vcpu->arch.busy_stolen;
+   vcpu->arch.busy_stolen = 0;
+   spin_unlock_irqrestore(>arch.tbacct_lock, flags);
+   }
+
if (!dt || !vpa)
return;
memset(dt, 0, sizeof(struct dtl_entry));
@@ -903,13 +925,14 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
 * mode handler is not called but no other threads are in the
 * source vcore.
 */
-
-   spin_lock(>lock);
-   if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
-   vcore->vcore_state != VCORE_INACTIVE &&
-   vcore->runner)
-   target = vcore->runner;
-   spin_unlock(>lock);
+   if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+   spin_lock(>lock);
+   if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+   vcore->vcore_state != VCORE_INACTIVE &&
+   vcore->runner)
+   target = vcore->runner;
+   spin_unlock(>lock);
+   }
 
return kvm_vcpu_yield_to(target);
 }
@@ -3105,13 +3128,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, 
struct kvmppc_vcore *vc)
kvmppc_ipi_thread(cpu);
 }
 
-/* Old path does this in asm */
-static void kvmppc_stop_thread(struct kvm_vcpu *vcpu)
-{
-   vcpu->cpu = -1;
-   vcpu->arch.thread_cpu = -1;
-}
-
 static void kvmppc_wait_for_nap(int n_threads)
 {
int cpu = smp_processor_id();
@@ -3200,6 +3216,8 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
 {
struct preempted_vcore_list *lp = this_cpu_ptr(_vcores);
 
+   WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
vc->vcore_state = VCORE_PREEMPT;
vc->pcpu = smp_processor_id();
if (vc->num_threads < threads_per_vcore(vc->kvm)) {
@@ -3216,6 +3234,8 @@ static void 

[PATCH v1 51/55] KVM: PPC: Book3S HV P9: Avoid cpu_in_guest atomics on entry and exit

2021-07-25 Thread Nicholas Piggin
cpu_in_guest is set to determine if a CPU needs to be IPI'ed to exit
the guest and notice the need_tlb_flush bit.

This can be implemented as a global per-CPU pointer to the currently
running guest instead of per-guest cpumasks, saving 2 atomics per
entry/exit. P7/8 doesn't require cpu_in_guest, nor does a nested HV
(only the L0 does), so move it to the P9 HV path.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  1 -
 arch/powerpc/include/asm/kvm_host.h  |  1 -
 arch/powerpc/kvm/book3s_hv.c | 38 +---
 3 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 4b0753e03731..793aa2868c3f 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -44,7 +44,6 @@ struct kvm_nested_guest {
struct mutex tlb_lock;  /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
cpumask_t need_tlb_flush;
-   cpumask_t cpu_in_guest;
short prev_cpu[NR_CPUS];
u8 radix;   /* is this nested guest radix */
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 74ee3a5b110e..650e1c0d118c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -288,7 +288,6 @@ struct kvm_arch {
u32 online_vcores;
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
-   cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
u8 secure_guest;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2bd000e2c269..6f29fa7d77cc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2989,30 +2989,33 @@ static void kvmppc_release_hwthread(int cpu)
tpaca->kvm_hstate.kvm_split_mode = NULL;
 }
 
+static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
+
 static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
 {
struct kvm_nested_guest *nested = vcpu->arch.nested;
-   cpumask_t *cpu_in_guest;
int i;
 
cpu = cpu_first_tlb_thread_sibling(cpu);
-   if (nested) {
+   if (nested)
cpumask_set_cpu(cpu, >need_tlb_flush);
-   cpu_in_guest = >cpu_in_guest;
-   } else {
+   else
cpumask_set_cpu(cpu, >arch.need_tlb_flush);
-   cpu_in_guest = >arch.cpu_in_guest;
-   }
/*
-* Make sure setting of bit in need_tlb_flush precedes
-* testing of cpu_in_guest bits.  The matching barrier on
-* the other side is the first smp_mb() in kvmppc_run_core().
+* Make sure setting of bit in need_tlb_flush precedes testing of
+* cpu_in_guest. The matching barrier on the other side is hwsync
+* when switching to guest MMU mode, which happens between
+* cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
+* being tested.
 */
smp_mb();
for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
-   i += cpu_tlb_thread_sibling_step())
-   if (cpumask_test_cpu(i, cpu_in_guest))
+   i += cpu_tlb_thread_sibling_step()) {
+   struct kvm *running = *per_cpu_ptr(_in_guest, i);
+
+   if (running == kvm)
smp_call_function_single(i, do_nothing, NULL, 1);
+   }
 }
 
 static void do_migrate_away_vcpu(void *arg)
@@ -3080,7 +3083,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, 
struct kvmppc_vcore *vc)
 {
int cpu;
struct paca_struct *tpaca;
-   struct kvm *kvm = vc->kvm;
 
cpu = vc->pcpu;
if (vcpu) {
@@ -3091,7 +3093,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, 
struct kvmppc_vcore *vc)
cpu += vcpu->arch.ptid;
vcpu->cpu = vc->pcpu;
vcpu->arch.thread_cpu = cpu;
-   cpumask_set_cpu(cpu, >arch.cpu_in_guest);
}
tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
@@ -3809,7 +3810,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
kvmppc_release_hwthread(pcpu + i);
if (sip && sip->napped[i])
kvmppc_ipi_thread(pcpu + i);
-   cpumask_clear_cpu(pcpu + i, >kvm->arch.cpu_in_guest);
}
 
spin_unlock(>lock);
@@ -3977,8 +3977,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
}
 
} else {
+   struct kvm *kvm = vcpu->kvm;
+
kvmppc_xive_push_vcpu(vcpu);
+
+   __this_cpu_write(cpu_in_guest, kvm);
trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+   __this_cpu_write(cpu_in_guest, NULL);
+
if (trap == 

[PATCH v1 50/55] KVM: PPC: Book3S HV P9: Add unlikely annotation for !mmu_ready

2021-07-25 Thread Nicholas Piggin
The mmu will almost always be ready.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ee4e38cf5df4..2bd000e2c269 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4376,7 +4376,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
vc->runner = vcpu;
 
/* See if the MMU is ready to go */
-   if (!kvm->arch.mmu_ready) {
+   if (unlikely(!kvm->arch.mmu_ready)) {
r = kvmhv_setup_mmu(vcpu);
if (r) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-- 
2.23.0



[PATCH v1 49/55] KVM: PPC: Book3S HV P9: Optimise hash guest SLB saving

2021-07-25 Thread Nicholas Piggin
slbmfee/slbmfev instructions are very expensive, moreso than a regular
mfspr instruction, so minimising them significantly improves hash guest
exit performance. The slbmfev is only required if slbmfee found a valid
SLB entry.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 1287dac918a0..338873f90c72 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -477,10 +477,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, 
struct kvmhv_tb_accumulator
 #define accumulate_time(vcpu, next) do {} while (0)
 #endif
 
-static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+static inline u64 mfslbv(unsigned int idx)
 {
-   asm volatile("slbmfev  %0,%1" : "=r" (*slbev) : "r" (idx));
-   asm volatile("slbmfee  %0,%1" : "=r" (*slbee) : "r" (idx));
+   u64 slbev;
+
+   asm volatile("slbmfev  %0,%1" : "=r" (slbev) : "r" (idx));
+
+   return slbev;
+}
+
+static inline u64 mfslbe(unsigned int idx)
+{
+   u64 slbee;
+
+   asm volatile("slbmfee  %0,%1" : "=r" (slbee) : "r" (idx));
+
+   return slbee;
 }
 
 static inline void mtslb(u64 slbee, u64 slbev)
@@ -610,8 +622,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct 
kvm_vcpu *vcpu)
 */
for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 slbee, slbev;
-   mfslb(i, , );
+
+   slbee = mfslbe(i);
if (slbee & SLB_ESID_V) {
+   slbev = mfslbv(i);
vcpu->arch.slb[nr].orige = slbee | i;
vcpu->arch.slb[nr].origv = slbev;
nr++;
-- 
2.23.0



[PATCH v1 48/55] KVM: PPC: Book3S HV P9: Improve mfmsr performance on entry

2021-07-25 Thread Nicholas Piggin
Rearrange the MSR saving on entry so it does not follow the mtmsrd to
disable interrupts, avoiding a possible RAW scoreboard stall.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  2 +
 arch/powerpc/kvm/book3s_hv.c | 18 ++-
 arch/powerpc/kvm/book3s_hv_p9_entry.c| 66 +++-
 3 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 52e2b7a352c7..4b0753e03731 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -154,6 +154,8 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu 
*vcpu)
return radix;
 }
 
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, 
unsigned long msr);
+
 int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long 
lpcr, u64 *tb);
 
 #define KVM_DEFAULT_HPT_ORDER  24  /* 16MB HPT by default */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b95e0c5e5557..ee4e38cf5df4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3858,6 +3858,8 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
s64 dec;
int trap;
 
+   msr = mfmsr();
+
save_p9_host_os_sprs(_os_sprs);
 
/*
@@ -3868,24 +3870,10 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
 */
host_psscr = mfspr(SPRN_PSSCR_PR);
 
-   hard_irq_disable();
+   kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
if (lazy_irq_pending())
return 0;
 
-   /* MSR bits may have been cleared by context switch */
-   msr = 0;
-   if (IS_ENABLED(CONFIG_PPC_FPU))
-   msr |= MSR_FP;
-   if (cpu_has_feature(CPU_FTR_ALTIVEC))
-   msr |= MSR_VEC;
-   if (cpu_has_feature(CPU_FTR_VSX))
-   msr |= MSR_VSX;
-   if ((cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
-   (vcpu->arch.hfscr & HFSCR_TM))
-   msr |= MSR_TM;
-   msr = msr_check_and_set(msr);
-
if (unlikely(load_vcpu_state(vcpu, _os_sprs)))
msr = mfmsr(); /* TM restore can update msr */
 
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 1bb81be09d4f..1287dac918a0 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -622,6 +622,44 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct 
kvm_vcpu *vcpu)
}
 }
 
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, 
unsigned long msr)
+{
+   unsigned long msr_needed = 0;
+
+   msr &= ~MSR_EE;
+
+   /* MSR bits may have been cleared by context switch so must recheck */
+   if (IS_ENABLED(CONFIG_PPC_FPU))
+   msr_needed |= MSR_FP;
+   if (cpu_has_feature(CPU_FTR_ALTIVEC))
+   msr_needed |= MSR_VEC;
+   if (cpu_has_feature(CPU_FTR_VSX))
+   msr_needed |= MSR_VSX;
+   if ((cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+   (vcpu->arch.hfscr & HFSCR_TM))
+   msr_needed |= MSR_TM;
+
+   /*
+* This could be combined with MSR[RI] clearing, but that expands
+* the unrecoverable window. It would be better to cover unrecoverable
+* with KVM bad interrupt handling rather than use MSR[RI] at all.
+*
+* Much more difficult and less worthwhile to combine with IR/DR
+* disable.
+*/
+   if ((msr & msr_needed) != msr_needed) {
+   msr |= msr_needed;
+   __mtmsrd(msr, 0);
+   } else {
+   __hard_irq_disable();
+   }
+   local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+   return msr;
+}
+EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
+
 int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long 
lpcr, u64 *tb)
 {
struct p9_host_os_sprs host_os_sprs;
@@ -655,6 +693,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
vcpu->arch.ceded = 0;
 
+   /* Save MSR for restore, with EE clear. */
+   msr = mfmsr() & ~MSR_EE;
+
host_hfscr = mfspr(SPRN_HFSCR);
host_ciabr = mfspr(SPRN_CIABR);
host_psscr = mfspr(SPRN_PSSCR_PR);
@@ -676,35 +717,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
save_p9_host_os_sprs(_os_sprs);
 
-   /*
-* This could be combined with MSR[RI] clearing, but that expands
-* the unrecoverable window. It would be better to cover unrecoverable
-* with KVM bad interrupt handling rather than use MSR[RI] at all.
-*
-* Much more difficult and less worthwhile to 

[PATCH v1 47/55] KVM: PPC: Book3S HV Nested: Avoid extra mftb() in nested entry

2021-07-25 Thread Nicholas Piggin
mftb() is expensive and one can be avoided on nested guest dispatch.

If the time checking code distinguishes between the L0 timer and the
nested HV timer, then both can be tested in the same place with the
same mftb() value.

This also nicely illustrates the relationship between the L0 and nested
HV timers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_asm.h  |  1 +
 arch/powerpc/kvm/book3s_hv.c| 12 
 arch/powerpc/kvm/book3s_hv_nested.c |  5 -
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h 
b/arch/powerpc/include/asm/kvm_asm.h
index fbbf3cec92e9..d68d71987d5c 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -79,6 +79,7 @@
 #define BOOK3S_INTERRUPT_FP_UNAVAIL0x800
 #define BOOK3S_INTERRUPT_DECREMENTER   0x900
 #define BOOK3S_INTERRUPT_HV_DECREMENTER0x980
+#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980
 #define BOOK3S_INTERRUPT_DOORBELL  0xa00
 #define BOOK3S_INTERRUPT_SYSCALL   0xc00
 #define BOOK3S_INTERRUPT_TRACE 0xd00
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3e5c6b745394..b95e0c5e5557 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1491,6 +1491,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
/* We're good on these - the host merely wanted to get our attention */
+   case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+   WARN_ON_ONCE(1); /* Should never happen */
+   vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+   fallthrough;
case BOOK3S_INTERRUPT_HV_DECREMENTER:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
@@ -1821,6 +1825,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu 
*vcpu)
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
+   /* These need to go to the nested HV */
+   case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+   vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+   vcpu->stat.dec_exits++;
+   r = RESUME_HOST;
+   break;
/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
@@ -3955,6 +3965,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
return BOOK3S_INTERRUPT_HV_DECREMENTER;
if (next_timer < time_limit)
time_limit = next_timer;
+   else if (*tb >= time_limit) /* nested time limit */
+   return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
 
vcpu->arch.ceded = 0;
 
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index fad7bc8736ea..322064564260 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -397,11 +397,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
do {
-   if (mftb() >= hdec_exp) {
-   vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
-   r = RESUME_HOST;
-   break;
-   }
r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
} while (is_kvmppc_resume_guest(r));
 
-- 
2.23.0



[PATCH v1 46/55] KVM: PPC: Book3S HV P9: Avoid tlbsync sequence on radix guest exit

2021-07-25 Thread Nicholas Piggin
Use the existing TLB flushing logic to IPI the previous CPU and run the
necessary barriers before running a guest vCPU on a new physical CPU,
to do the necessary radix GTSE barriers for handling the case of an
interrupted guest tlbie sequence.

This results in more IPIs than the TLB flush logic requires, but it's
a significant win for common case scheduling when the vCPU remains on
the same physical CPU.

-522 cycles (5754) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 31 +++
 arch/powerpc/kvm/book3s_hv_p9_entry.c |  9 
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a37ab798eb7c..3e5c6b745394 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3005,6 +3005,25 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, 
struct kvm_vcpu *vcpu)
smp_call_function_single(i, do_nothing, NULL, 1);
 }
 
+static void do_migrate_away_vcpu(void *arg)
+{
+   struct kvm_vcpu *vcpu = arg;
+   struct kvm *kvm = vcpu->kvm;
+
+   /*
+* If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync;
+* ptesync sequence on the old CPU before migrating to a new one, in
+* case we interrupted the guest between a tlbie ; eieio ;
+* tlbsync; ptesync sequence.
+*
+* Otherwise, ptesync is sufficient.
+*/
+   if (kvm->arch.lpcr & LPCR_GTSE)
+   asm volatile("eieio; tlbsync; ptesync");
+   else
+   asm volatile("ptesync");
+}
+
 static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
 {
struct kvm_nested_guest *nested = vcpu->arch.nested;
@@ -3032,10 +3051,14 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu 
*vcpu, int pcpu)
 * so we use a single bit in .need_tlb_flush for all 4 threads.
 */
if (prev_cpu != pcpu) {
-   if (prev_cpu >= 0 &&
-   cpu_first_tlb_thread_sibling(prev_cpu) !=
-   cpu_first_tlb_thread_sibling(pcpu))
-   radix_flush_cpu(kvm, prev_cpu, vcpu);
+   if (prev_cpu >= 0) {
+   if (cpu_first_tlb_thread_sibling(prev_cpu) !=
+   cpu_first_tlb_thread_sibling(pcpu))
+   radix_flush_cpu(kvm, prev_cpu, vcpu);
+
+   smp_call_function_single(prev_cpu,
+   do_migrate_away_vcpu, vcpu, 1);
+   }
if (nested)
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
else
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 52690af66ca9..1bb81be09d4f 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -1039,15 +1039,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
 
-   if (kvm_is_radix(kvm)) {
-   /*
-* Since this is radix, do a eieio; tlbsync; ptesync sequence
-* in case we interrupted the guest between a tlbie and a
-* ptesync.
-*/
-   asm volatile("eieio; tlbsync; ptesync");
-   }
-
/*
 * cp_abort is required if the processor supports local copy-paste
 * to clear the copy buffer that was under control of the guest.
-- 
2.23.0



[PATCH v1 45/55] KVM: PPC: Book3S HV P9: Don't restore PSSCR if not needed

2021-07-25 Thread Nicholas Piggin
This also moves the PSSCR update in nested entry to avoid a SPR
scoreboard stall.

-45 cycles (6276) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  |  7 +--
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 26 +++---
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c0a04ce39e00..a37ab798eb7c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3856,7 +3856,9 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
if (unlikely(load_vcpu_state(vcpu, _os_sprs)))
msr = mfmsr(); /* TM restore can update msr */
 
-   mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+   if (vcpu->arch.psscr != host_psscr)
+   mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+
kvmhv_save_hv_regs(vcpu, );
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
@@ -3897,7 +3899,6 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
-   mtspr(SPRN_PSSCR_PR, host_psscr);
 
store_vcpu_state(vcpu);
 
@@ -3910,6 +3911,8 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
timer_rearm_host_dec(*tb);
 
restore_p9_host_os_sprs(vcpu, _os_sprs);
+   if (vcpu->arch.psscr != host_psscr)
+   mtspr(SPRN_PSSCR_PR, host_psscr);
 
return trap;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 976687c3709a..52690af66ca9 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -639,6 +639,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
unsigned long host_dawr0;
unsigned long host_dawrx0;
unsigned long host_psscr;
+   unsigned long host_hpsscr;
unsigned long host_pidr;
unsigned long host_dawr1;
unsigned long host_dawrx1;
@@ -656,7 +657,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
host_hfscr = mfspr(SPRN_HFSCR);
host_ciabr = mfspr(SPRN_CIABR);
-   host_psscr = mfspr(SPRN_PSSCR);
+   host_psscr = mfspr(SPRN_PSSCR_PR);
+   if (cpu_has_feature(CPU_FTRS_POWER9_DD2_2))
+   host_hpsscr = mfspr(SPRN_PSSCR);
host_pidr = mfspr(SPRN_PID);
 
if (dawr_enabled()) {
@@ -740,8 +743,14 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
if (vcpu->arch.ciabr != host_ciabr)
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 
-   mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+   if (cpu_has_feature(CPU_FTRS_POWER9_DD2_2)) {
+   mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << 
PSSCR_FAKE_SUSPEND_LG));
+   } else {
+   if (vcpu->arch.psscr != host_psscr)
+   mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+   }
 
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
 
@@ -947,7 +956,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
vcpu->arch.ic = mfspr(SPRN_IC);
vcpu->arch.pid = mfspr(SPRN_PID);
-   vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+   vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
 
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
@@ -993,9 +1002,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
 
-   /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
-   mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+   if (cpu_has_feature(CPU_FTRS_POWER9_DD2_2)) {
+   /* Preserve PSSCR[FAKE_SUSPEND] until we've called 
kvmppc_save_tm_hv */
+   mtspr(SPRN_PSSCR, host_hpsscr |
+ (local_paca->kvm_hstate.fake_suspend << 
PSSCR_FAKE_SUSPEND_LG));
+   }
+
mtspr(SPRN_HFSCR, host_hfscr);
if (vcpu->arch.ciabr != host_ciabr)
mtspr(SPRN_CIABR, host_ciabr);
-- 
2.23.0



[PATCH v1 44/55] KVM: PPC: Book3S HV P9: Test dawr_enabled() before saving host DAWR SPRs

2021-07-25 Thread Nicholas Piggin
Some of the DAWR SPR access is already predicated on dawr_enabled(),
apply this to the remainder of the accesses.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 34 ---
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 0aad2bf29d6e..976687c3709a 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -656,13 +656,16 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
host_hfscr = mfspr(SPRN_HFSCR);
host_ciabr = mfspr(SPRN_CIABR);
-   host_dawr0 = mfspr(SPRN_DAWR0);
-   host_dawrx0 = mfspr(SPRN_DAWRX0);
host_psscr = mfspr(SPRN_PSSCR);
host_pidr = mfspr(SPRN_PID);
-   if (cpu_has_feature(CPU_FTR_DAWR1)) {
-   host_dawr1 = mfspr(SPRN_DAWR1);
-   host_dawrx1 = mfspr(SPRN_DAWRX1);
+
+   if (dawr_enabled()) {
+   host_dawr0 = mfspr(SPRN_DAWR0);
+   host_dawrx0 = mfspr(SPRN_DAWRX0);
+   if (cpu_has_feature(CPU_FTR_DAWR1)) {
+   host_dawr1 = mfspr(SPRN_DAWR1);
+   host_dawrx1 = mfspr(SPRN_DAWRX1);
+   }
}
 
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
@@ -996,15 +999,18 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
mtspr(SPRN_HFSCR, host_hfscr);
if (vcpu->arch.ciabr != host_ciabr)
mtspr(SPRN_CIABR, host_ciabr);
-   if (vcpu->arch.dawr0 != host_dawr0)
-   mtspr(SPRN_DAWR0, host_dawr0);
-   if (vcpu->arch.dawrx0 != host_dawrx0)
-   mtspr(SPRN_DAWRX0, host_dawrx0);
-   if (cpu_has_feature(CPU_FTR_DAWR1)) {
-   if (vcpu->arch.dawr1 != host_dawr1)
-   mtspr(SPRN_DAWR1, host_dawr1);
-   if (vcpu->arch.dawrx1 != host_dawrx1)
-   mtspr(SPRN_DAWRX1, host_dawrx1);
+
+   if (dawr_enabled()) {
+   if (vcpu->arch.dawr0 != host_dawr0)
+   mtspr(SPRN_DAWR0, host_dawr0);
+   if (vcpu->arch.dawrx0 != host_dawrx0)
+   mtspr(SPRN_DAWRX0, host_dawrx0);
+   if (cpu_has_feature(CPU_FTR_DAWR1)) {
+   if (vcpu->arch.dawr1 != host_dawr1)
+   mtspr(SPRN_DAWR1, host_dawr1);
+   if (vcpu->arch.dawrx1 != host_dawrx1)
+   mtspr(SPRN_DAWRX1, host_dawrx1);
+   }
}
 
if (vc->dpdes)
-- 
2.23.0



[PATCH v1 43/55] KVM: PPC: Book3S HV P9: Comment and fix MMU context switching code

2021-07-25 Thread Nicholas Piggin
Tighten up partition switching code synchronisation and comments.

In particular, hwsync ; isync is required after the last access that is
performed in the context of a partition, before the partition is
switched away from.

-301 cycles (6319) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  4 +++
 arch/powerpc/kvm/book3s_hv_p9_entry.c  | 40 +++---
 2 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index b5905ae4377c..c5508744e14c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -54,6 +54,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int 
pid,
 
preempt_disable();
 
+   asm volatile("hwsync" ::: "memory");
+   isync();
/* switch the lpid first to avoid running host with unallocated pid */
old_lpid = mfspr(SPRN_LPID);
if (old_lpid != lpid)
@@ -70,6 +72,8 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int 
pid,
else
ret = copy_to_user_nofault((void __user *)to, from, n);
 
+   asm volatile("hwsync" ::: "memory");
+   isync();
/* switch the pid first to avoid running host with unallocated pid */
if (quadrant == 1 && pid != old_pid)
mtspr(SPRN_PID, old_pid);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 5fca0a09425d..0aad2bf29d6e 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -521,17 +521,19 @@ static void switch_mmu_to_guest_radix(struct kvm *kvm, 
struct kvm_vcpu *vcpu, u6
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
 
/*
-* All the isync()s are overkill but trivially follow the ISA
-* requirements. Some can likely be replaced with justification
-* comment for why they are not needed.
+* Prior memory accesses to host PID Q3 must be completed before we
+* start switching, and stores must be drained to avoid not-my-LPAR
+* logic (see switch_mmu_to_host).
 */
+   asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_LPID, lpid);
-   isync();
mtspr(SPRN_LPCR, lpcr);
-   isync();
mtspr(SPRN_PID, vcpu->arch.pid);
-   isync();
+   /*
+* isync not required here because we are HRFID'ing to guest before
+* any guest context access, which is context synchronising.
+*/
 }
 
 static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, 
u64 lpcr)
@@ -541,25 +543,41 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, 
struct kvm_vcpu *vcpu, u64
 
lpid = kvm->arch.lpid;
 
+   /*
+* See switch_mmu_to_guest_radix. ptesync should not be required here
+* even if the host is in HPT mode because speculative accesses would
+* not cause RC updates (we are in real mode).
+*/
+   asm volatile("hwsync" ::: "memory");
+   isync();
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, vcpu->arch.pid);
 
for (i = 0; i < vcpu->arch.slb_max; i++)
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
-
-   isync();
+   /*
+* isync not required here, see switch_mmu_to_guest_radix.
+*/
 }
 
 static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
 {
+   /*
+* The guest has exited, so guest MMU context is no longer being
+* non-speculatively accessed, but a hwsync is needed before the
+* mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
+* so the not-my-LPAR tlbie logic does not overlook them.
+*/
+   asm volatile("hwsync" ::: "memory");
isync();
mtspr(SPRN_PID, pid);
-   isync();
mtspr(SPRN_LPID, kvm->arch.host_lpid);
-   isync();
mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
-   isync();
+   /*
+* isync is not required after the switch, because mtmsrd with L=0
+* is performed after this switch, which is context synchronising.
+*/
 
if (!radix_enabled())
slb_restore_bolted_realmode();
-- 
2.23.0



[PATCH v1 42/55] KVM: PPC: Book3S HV P9: Use Linux SPR save/restore to manage some host SPRs

2021-07-25 Thread Nicholas Piggin
Linux implements SPR save/restore including storage space for registers
in the task struct for process context switching. Make use of this
similarly to the way we make use of the context switching fp/vec save
restore.

This improves code reuse, allows some stack space to be saved, and helps
with avoiding VRSAVE updates if they are not required.

-61 cycles (6620) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/switch_to.h  |  2 +
 arch/powerpc/kernel/process.c |  6 ++
 arch/powerpc/kvm/book3s_hv.c  | 21 +-
 arch/powerpc/kvm/book3s_hv.h  |  3 -
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 93 +++
 5 files changed, 74 insertions(+), 51 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 9d1fbd8be1c7..de17c45314bc 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -112,6 +112,8 @@ static inline void clear_task_ebb(struct task_struct *t)
 #endif
 }
 
+void kvmppc_save_current_sprs(void);
+
 extern int set_thread_tidr(struct task_struct *t);
 
 #endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 00b55b38a460..d54baa3e20d2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1180,6 +1180,12 @@ static inline void save_sprs(struct thread_struct *t)
 #endif
 }
 
+void kvmppc_save_current_sprs(void)
+{
+   save_sprs(>thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+
 static inline void restore_sprs(struct thread_struct *old_thread,
struct thread_struct *new_thread)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5b2114c00c43..c0a04ce39e00 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4510,9 +4510,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
struct kvm_run *run = vcpu->run;
int r;
int srcu_idx;
-   unsigned long ebb_regs[3] = {}; /* shut up GCC */
-   unsigned long user_tar = 0;
-   unsigned int user_vrsave;
struct kvm *kvm;
unsigned long msr;
 
@@ -4573,14 +4570,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 
save_user_regs_kvm();
 
-   /* Save userspace EBB and other register values */
-   if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
-   ebb_regs[0] = mfspr(SPRN_EBBHR);
-   ebb_regs[1] = mfspr(SPRN_EBBRR);
-   ebb_regs[2] = mfspr(SPRN_BESCR);
-   user_tar = mfspr(SPRN_TAR);
-   }
-   user_vrsave = mfspr(SPRN_VRSAVE);
+   kvmppc_save_current_sprs();
 
vcpu->arch.waitp = >arch.vcore->wait;
vcpu->arch.pgdir = kvm->mm->pgd;
@@ -4621,15 +4611,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
 
-   /* Restore userspace EBB and other register values */
-   if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
-   mtspr(SPRN_EBBHR, ebb_regs[0]);
-   mtspr(SPRN_EBBRR, ebb_regs[1]);
-   mtspr(SPRN_BESCR, ebb_regs[2]);
-   mtspr(SPRN_TAR, user_tar);
-   }
-   mtspr(SPRN_VRSAVE, user_vrsave);
-
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(>arch.vcpus_running);
 
diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h
index a9065a380547..04884e271862 100644
--- a/arch/powerpc/kvm/book3s_hv.h
+++ b/arch/powerpc/kvm/book3s_hv.h
@@ -3,11 +3,8 @@
  * Privileged (non-hypervisor) host registers to save.
  */
 struct p9_host_os_sprs {
-   unsigned long dscr;
-   unsigned long tidr;
unsigned long iamr;
unsigned long amr;
-   unsigned long fscr;
 
unsigned int pmc1;
unsigned int pmc2;
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index db5eb83e26d1..5fca0a09425d 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -223,15 +223,26 @@ EXPORT_SYMBOL_GPL(switch_pmu_to_host);
 static void load_spr_state(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
 {
+   /* TAR is very fast */
mtspr(SPRN_TAR, vcpu->arch.tar);
 
+#ifdef CONFIG_ALTIVEC
+   if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+   current->thread.vrsave != vcpu->arch.vrsave)
+   mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+
if (vcpu->arch.hfscr & HFSCR_EBB) {
-   mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
-   mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
-   mtspr(SPRN_BESCR, vcpu->arch.bescr);
+   if (current->thread.ebbhr != vcpu->arch.ebbhr)
+   mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+   if (current->thread.ebbrr != vcpu->arch.ebbrr)
+   mtspr(SPRN_EBBRR, 

[PATCH v1 41/55] KVM: PPC: Book3S HV P9: Demand fault TM facility registers

2021-07-25 Thread Nicholas Piggin
Use HFSCR facility disabling to implement demand faulting for TM, with
a hysteresis counter similar to the load_fp etc counters in context
switching that implement the equivalent demand faulting for userspace
facilities.

This speeds up guest entry/exit by avoiding the register save/restore
when a guest is not frequently using them. When a guest does use them
often, there will be some additional demand fault overhead, but these
are not commonly used facilities.

-304 cycles (6681) POWER9 virt-mode NULL hcall with the previous patch

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_host.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c  | 26 --
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 25 +
 3 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 1c00c4a565f5..74ee3a5b110e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -581,6 +581,7 @@ struct kvm_vcpu_arch {
ulong ppr;
u32 pspb;
u8 load_ebb;
+   u8 load_tm;
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index dd8199a423cf..5b2114c00c43 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1451,6 +1451,16 @@ static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
+{
+   if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
+   return EMULATE_FAIL;
+
+   vcpu->arch.hfscr |= HFSCR_TM;
+
+   return RESUME_GUEST;
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 struct task_struct *tsk)
 {
@@ -1747,6 +1757,8 @@ XXX benchmark guest exits
r = kvmppc_pmu_unavailable(vcpu);
if (cause == FSCR_EBB_LG)
r = kvmppc_ebb_unavailable(vcpu);
+   if (cause == FSCR_TM_LG)
+   r = kvmppc_tm_unavailable(vcpu);
}
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
@@ -2763,9 +2775,9 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu 
*vcpu)
vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
 
/*
-* PM, EBB is demand-faulted so start with it clear.
+* PM, EBB, TM are demand-faulted so start with it clear.
 */
-   vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB);
+   vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM);
 
kvmppc_mmu_book3s_hv_init(vcpu);
 
@@ -3835,8 +3847,9 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
msr |= MSR_VEC;
if (cpu_has_feature(CPU_FTR_VSX))
msr |= MSR_VSX;
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+   if ((cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+   (vcpu->arch.hfscr & HFSCR_TM))
msr |= MSR_TM;
msr = msr_check_and_set(msr);
 
@@ -4552,8 +4565,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
msr |= MSR_VEC;
if (cpu_has_feature(CPU_FTR_VSX))
msr |= MSR_VSX;
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+   if ((cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+   (vcpu->arch.hfscr & HFSCR_TM))
msr |= MSR_TM;
msr = msr_check_and_set(msr);
 
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index f68a3d107d04..db5eb83e26d1 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -295,10 +295,11 @@ bool load_vcpu_state(struct kvm_vcpu *vcpu,
 {
bool ret = false;
 
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+   if ((cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+  (vcpu->arch.hfscr & HFSCR_TM)) {
unsigned long guest_msr = vcpu->arch.shregs.msr;
-   if (MSR_TM_ACTIVE(guest_msr)) {
+   if (MSR_TM_ACTIVE(guest_msr) || 
local_paca->kvm_hstate.fake_suspend) {
kvmppc_restore_tm_hv(vcpu, guest_msr, true);
ret = true;
} else {
@@ -330,15 +331,22 @@ void store_vcpu_state(struct kvm_vcpu *vcpu)
 #endif
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
 
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+   if 

[PATCH v1 40/55] KVM: PPC: Book3S HV P9: Demand fault EBB facility registers

2021-07-25 Thread Nicholas Piggin
Use HFSCR facility disabling to implement demand faulting for EBB, with
a hysteresis counter similar to the load_fp etc counters in context
switching that implement the equivalent demand faulting for userspace
facilities.

This speeds up guest entry/exit by avoiding the register save/restore
when a guest is not frequently using them. When a guest does use them
often, there will be some additional demand fault overhead, but these
are not commonly used facilities.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_host.h   |  1 +
 arch/powerpc/kvm/book3s_hv.c  | 16 +--
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 28 +--
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index f105eaeb4521..1c00c4a565f5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -580,6 +580,7 @@ struct kvm_vcpu_arch {
ulong cfar;
ulong ppr;
u32 pspb;
+   u8 load_ebb;
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 47ccea5ffba2..dd8199a423cf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1441,6 +1441,16 @@ static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
return RESUME_GUEST;
 }
 
+static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
+{
+   if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
+   return EMULATE_FAIL;
+
+   vcpu->arch.hfscr |= HFSCR_EBB;
+
+   return RESUME_GUEST;
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 struct task_struct *tsk)
 {
@@ -1735,6 +1745,8 @@ XXX benchmark guest exits
r = kvmppc_emulate_doorbell_instr(vcpu);
if (cause == FSCR_PM_LG)
r = kvmppc_pmu_unavailable(vcpu);
+   if (cause == FSCR_EBB_LG)
+   r = kvmppc_ebb_unavailable(vcpu);
}
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
@@ -2751,9 +2763,9 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu 
*vcpu)
vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
 
/*
-* PM is demand-faulted so start with it clear.
+* PM, EBB is demand-faulted so start with it clear.
 */
-   vcpu->arch.hfscr &= ~HFSCR_PM;
+   vcpu->arch.hfscr &= ~(HFSCR_PM | HFSCR_EBB);
 
kvmppc_mmu_book3s_hv_init(vcpu);
 
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index c4e93167d120..f68a3d107d04 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -224,9 +224,12 @@ static void load_spr_state(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
 {
mtspr(SPRN_TAR, vcpu->arch.tar);
-   mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
-   mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
-   mtspr(SPRN_BESCR, vcpu->arch.bescr);
+
+   if (vcpu->arch.hfscr & HFSCR_EBB) {
+   mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+   mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+   mtspr(SPRN_BESCR, vcpu->arch.bescr);
+   }
 
if (!cpu_has_feature(CPU_FTR_ARCH_31))
mtspr(SPRN_TIDR, vcpu->arch.tid);
@@ -257,9 +260,22 @@ static void load_spr_state(struct kvm_vcpu *vcpu,
 static void store_spr_state(struct kvm_vcpu *vcpu)
 {
vcpu->arch.tar = mfspr(SPRN_TAR);
-   vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
-   vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
-   vcpu->arch.bescr = mfspr(SPRN_BESCR);
+
+   if (vcpu->arch.hfscr & HFSCR_EBB) {
+   vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+   vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+   vcpu->arch.bescr = mfspr(SPRN_BESCR);
+   /*
+* This is like load_fp in context switching, turn off the
+* facility after it wraps the u8 to try avoiding saving
+* and restoring the registers each partition switch.
+*/
+   if (!vcpu->arch.nested) {
+   vcpu->arch.load_ebb++;
+   if (!vcpu->arch.load_ebb)
+   vcpu->arch.hfscr &= ~HFSCR_EBB;
+   }
+   }
 
if (!cpu_has_feature(CPU_FTR_ARCH_31))
vcpu->arch.tid = mfspr(SPRN_TIDR);
-- 
2.23.0



[PATCH v1 39/55] KVM: PPC: Book3S HV P9: More SPR speed improvements

2021-07-25 Thread Nicholas Piggin
This avoids more scoreboard stalls and reduces mtSPRs.

-193 cycles (6985) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 73 ---
 1 file changed, 43 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index d83b5d4d02c1..c4e93167d120 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -633,24 +633,29 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vc->tb_offset_applied = vc->tb_offset;
}
 
-   if (vc->pcr)
-   mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
-   mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
-
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
+   if (vc->pcr)
+   mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+   if (vc->dpdes)
+   mtspr(SPRN_DPDES, vc->dpdes);
+
if (dawr_enabled()) {
-   mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
-   mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+   if (vcpu->arch.dawr0 != host_dawr0)
+   mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+   if (vcpu->arch.dawrx0 != host_dawrx0)
+   mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
-   mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
-   mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+   if (vcpu->arch.dawr1 != host_dawr1)
+   mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+   if (vcpu->arch.dawrx1 != host_dawrx1)
+   mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
}
}
-   mtspr(SPRN_CIABR, vcpu->arch.ciabr);
-   mtspr(SPRN_IC, vcpu->arch.ic);
+   if (vcpu->arch.ciabr != host_ciabr)
+   mtspr(SPRN_CIABR, vcpu->arch.ciabr);
 
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
  (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
@@ -869,20 +874,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vc->dpdes = mfspr(SPRN_DPDES);
vc->vtb = mfspr(SPRN_VTB);
 
-   save_clear_guest_mmu(kvm, vcpu);
-   switch_mmu_to_host(kvm, host_pidr);
-
-   /*
-* If we are in real mode, only switch MMU on after the MMU is
-* switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
-*/
-   if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
-   vcpu->arch.shregs.msr & MSR_TS_MASK)
-   msr |= MSR_TS_S;
-   __mtmsrd(msr, 0);
-
-   store_vcpu_state(vcpu);
-
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -900,6 +891,22 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vc->tb_offset_applied = 0;
}
 
+   save_clear_guest_mmu(kvm, vcpu);
+   switch_mmu_to_host(kvm, host_pidr);
+
+   /*
+* Enable MSR here in order to have facilities enabled to save
+* guest registers. This enables MMU (if we were in realmode), so
+* only switch MMU on after the MMU is switched to host, to avoid
+* the P9_RADIX_PREFETCH_BUG or hash guest context.
+*/
+   if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+   vcpu->arch.shregs.msr & MSR_TS_MASK)
+   msr |= MSR_TS_S;
+   __mtmsrd(msr, 0);
+
+   store_vcpu_state(vcpu);
+
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
 
@@ -907,15 +914,21 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
mtspr(SPRN_PSSCR, host_psscr |
  (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
-   mtspr(SPRN_CIABR, host_ciabr);
-   mtspr(SPRN_DAWR0, host_dawr0);
-   mtspr(SPRN_DAWRX0, host_dawrx0);
+   if (vcpu->arch.ciabr != host_ciabr)
+   mtspr(SPRN_CIABR, host_ciabr);
+   if (vcpu->arch.dawr0 != host_dawr0)
+   mtspr(SPRN_DAWR0, host_dawr0);
+   if (vcpu->arch.dawrx0 != host_dawrx0)
+   mtspr(SPRN_DAWRX0, host_dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
-   mtspr(SPRN_DAWR1, host_dawr1);
-   mtspr(SPRN_DAWRX1, host_dawrx1);
+   if (vcpu->arch.dawr1 != host_dawr1)
+   mtspr(SPRN_DAWR1, host_dawr1);
+   if (vcpu->arch.dawrx1 != host_dawrx1)
+   mtspr(SPRN_DAWRX1, host_dawrx1);
}
 
-   mtspr(SPRN_DPDES, 0);
+   if (vc->dpdes)
+   mtspr(SPRN_DPDES, 0);
if (vc->pcr)
  

[PATCH v1 38/55] KVM: PPC: Book3S HV P9: Restrict DSISR canary workaround to processors that require it

2021-07-25 Thread Nicholas Piggin
Use CPU_FTR_P9_RADIX_PREFETCH_BUG for this, to test for DD2.1 and below
processors.

-43 cycles (7178) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 3 ++-
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 6 --
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e7dfc33e2b38..47ccea5ffba2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1598,7 +1598,8 @@ XXX benchmark guest exits
unsigned long vsid;
long err;
 
-   if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
+   if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+   unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
r = RESUME_GUEST; /* Just retry if it's the canary */
break;
}
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 737d4eaf74bc..d83b5d4d02c1 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -671,9 +671,11 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 * HDSI which should correctly update the HDSISR the second time HDSI
 * entry.
 *
-* Just do this on all p9 processors for now.
+* The "radix prefetch bug" test can be used to test for this bug, as
+* it also exists fo DD2.1 and below.
 */
-   mtspr(SPRN_HDSISR, HDSISR_CANARY);
+   if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+   mtspr(SPRN_HDSISR, HDSISR_CANARY);
 
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
-- 
2.23.0



[PATCH v1 37/55] KVM: PPC: Book3S HV P9: Switch PMU to guest as late as possible

2021-07-25 Thread Nicholas Piggin
This moves PMU switch to guest as late as possible in entry, and switch
back to host as early as possible at exit. This helps the host get the
most perf coverage of KVM entry/exit code as possible.

This is slightly suboptimal for SPR scheduling point of view when the
PMU is enabled, but when perf is disabled there is no real difference.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 6 ++
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 6 ++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8c1c93ebd669..e7dfc33e2b38 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3800,8 +3800,6 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
s64 dec;
int trap;
 
-   switch_pmu_to_guest(vcpu, _os_sprs);
-
save_p9_host_os_sprs(_os_sprs);
 
/*
@@ -3864,9 +3862,11 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
 
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+   switch_pmu_to_guest(vcpu, _os_sprs);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(),
  __pa(>arch.regs));
kvmhv_restore_hv_return_state(vcpu, );
+   switch_pmu_to_host(vcpu, _os_sprs);
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
@@ -3885,8 +3885,6 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
 
restore_p9_host_os_sprs(vcpu, _os_sprs);
 
-   switch_pmu_to_host(vcpu, _os_sprs);
-
return trap;
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 2e7498817b2e..737d4eaf74bc 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -589,8 +589,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
 
-   switch_pmu_to_guest(vcpu, _os_sprs);
-
save_p9_host_os_sprs(_os_sprs);
 
/*
@@ -732,7 +730,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
accumulate_time(vcpu, >arch.guest_time);
 
+   switch_pmu_to_guest(vcpu, _os_sprs);
kvmppc_p9_enter_guest(vcpu);
+   switch_pmu_to_host(vcpu, _os_sprs);
 
accumulate_time(vcpu, >arch.rm_intr);
 
@@ -943,8 +943,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
asm volatile(PPC_CP_ABORT);
 
 out:
-   switch_pmu_to_host(vcpu, _os_sprs);
-
end_timing(vcpu);
 
return trap;
-- 
2.23.0



[PATCH v1 36/55] KVM: PPC: Book3S HV P9: Implement TM fastpath for guest entry/exit

2021-07-25 Thread Nicholas Piggin
If TM is not active, only TM register state needs to be saved.

-348 cycles (7218) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 23 +++
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index ea531f76f116..2e7498817b2e 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -281,8 +281,15 @@ bool load_vcpu_state(struct kvm_vcpu *vcpu,
 
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
-   kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
-   ret = true;
+   unsigned long guest_msr = vcpu->arch.shregs.msr;
+   if (MSR_TM_ACTIVE(guest_msr)) {
+   kvmppc_restore_tm_hv(vcpu, guest_msr, true);
+   ret = true;
+   } else {
+   mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+   mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+   mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+   }
}
 
load_spr_state(vcpu, host_os_sprs);
@@ -308,8 +315,16 @@ void store_vcpu_state(struct kvm_vcpu *vcpu)
vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
 
if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
-   kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+   unsigned long guest_msr = vcpu->arch.shregs.msr;
+   if (MSR_TM_ACTIVE(guest_msr)) {
+   kvmppc_save_tm_hv(vcpu, guest_msr, true);
+   } else {
+   vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+   vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+   vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+   }
+   }
 }
 EXPORT_SYMBOL_GPL(store_vcpu_state);
 
-- 
2.23.0



[PATCH v1 35/55] KVM: PPC: Book3S HV P9: Move remaining SPR and MSR access into low level entry

2021-07-25 Thread Nicholas Piggin
Move register saving and loading from kvmhv_p9_guest_entry() into the HV
and nested entry handlers.

Accesses are scheduled to reduce mtSPR / mfSPR interleaving which
reduces SPR scoreboard stalls.

XXX +212 cycles here somewhere (7566), investigate  POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 79 ++
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 96 ---
 2 files changed, 109 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cb66c9534dbf..8c1c93ebd669 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3794,9 +3794,15 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long host_psscr;
+   unsigned long msr;
struct hv_guest_state hvregs;
-   int trap;
+   struct p9_host_os_sprs host_os_sprs;
s64 dec;
+   int trap;
+
+   switch_pmu_to_guest(vcpu, _os_sprs);
+
+   save_p9_host_os_sprs(_os_sprs);
 
/*
 * We need to save and restore the guest visible part of the
@@ -3805,6 +3811,27 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
 * this is done in kvmhv_vcpu_entry_p9() below otherwise.
 */
host_psscr = mfspr(SPRN_PSSCR_PR);
+
+   hard_irq_disable();
+   if (lazy_irq_pending())
+   return 0;
+
+   /* MSR bits may have been cleared by context switch */
+   msr = 0;
+   if (IS_ENABLED(CONFIG_PPC_FPU))
+   msr |= MSR_FP;
+   if (cpu_has_feature(CPU_FTR_ALTIVEC))
+   msr |= MSR_VEC;
+   if (cpu_has_feature(CPU_FTR_VSX))
+   msr |= MSR_VSX;
+   if (cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+   msr |= MSR_TM;
+   msr = msr_check_and_set(msr);
+
+   if (unlikely(load_vcpu_state(vcpu, _os_sprs)))
+   msr = mfmsr(); /* TM restore can update msr */
+
mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
kvmhv_save_hv_regs(vcpu, );
hvregs.lpcr = lpcr;
@@ -3846,12 +3873,20 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu 
*vcpu, u64 time_limit, uns
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
mtspr(SPRN_PSSCR_PR, host_psscr);
 
+   store_vcpu_state(vcpu);
+
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
*tb = mftb();
vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
 
+   timer_rearm_host_dec(*tb);
+
+   restore_p9_host_os_sprs(vcpu, _os_sprs);
+
+   switch_pmu_to_host(vcpu, _os_sprs);
+
return trap;
 }
 
@@ -3862,9 +3897,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 unsigned long lpcr, u64 *tb)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
-   struct p9_host_os_sprs host_os_sprs;
u64 next_timer;
-   unsigned long msr;
int trap;
 
next_timer = timer_get_next_tb();
@@ -3875,33 +3908,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
vcpu->arch.ceded = 0;
 
-   save_p9_host_os_sprs(_os_sprs);
-
-   /*
-* This could be combined with MSR[RI] clearing, but that expands
-* the unrecoverable window. It would be better to cover unrecoverable
-* with KVM bad interrupt handling rather than use MSR[RI] at all.
-*
-* Much more difficult and less worthwhile to combine with IR/DR
-* disable.
-*/
-   hard_irq_disable();
-   if (lazy_irq_pending())
-   return 0;
-
-   /* MSR bits may have been cleared by context switch */
-   msr = 0;
-   if (IS_ENABLED(CONFIG_PPC_FPU))
-   msr |= MSR_FP;
-   if (cpu_has_feature(CPU_FTR_ALTIVEC))
-   msr |= MSR_VEC;
-   if (cpu_has_feature(CPU_FTR_VSX))
-   msr |= MSR_VSX;
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
-   msr |= MSR_TM;
-   msr = msr_check_and_set(msr);
-
kvmppc_subcore_enter_guest();
 
vc->entry_exit_map = 1;
@@ -3909,11 +3915,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
vcpu_vpa_increment_dispatch(vcpu);
 
-   if (unlikely(load_vcpu_state(vcpu, _os_sprs)))
-   msr = mfmsr(); /* MSR may have been updated */
-
-   switch_pmu_to_guest(vcpu, _os_sprs);
-
if (kvmhv_on_pseries()) {
trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
 
@@ -3956,16 +3957,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vcpu->arch.slb_max = 0;
}
 
-   switch_pmu_to_host(vcpu, 

[PATCH v1 34/55] KVM: PPC: Book3S HV P9: Move nested guest entry into its own function

2021-07-25 Thread Nicholas Piggin
This is just refactoring.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 125 +++
 1 file changed, 67 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 977712eb74e0..cb66c9534dbf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3789,6 +3789,72 @@ static void vcpu_vpa_increment_dispatch(struct kvm_vcpu 
*vcpu)
}
 }
 
+/* call our hypervisor to load up HV regs and go */
+static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, 
unsigned long lpcr, u64 *tb)
+{
+   struct kvmppc_vcore *vc = vcpu->arch.vcore;
+   unsigned long host_psscr;
+   struct hv_guest_state hvregs;
+   int trap;
+   s64 dec;
+
+   /*
+* We need to save and restore the guest visible part of the
+* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+* doesn't do this for us. Note only required if pseries since
+* this is done in kvmhv_vcpu_entry_p9() below otherwise.
+*/
+   host_psscr = mfspr(SPRN_PSSCR_PR);
+   mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+   kvmhv_save_hv_regs(vcpu, );
+   hvregs.lpcr = lpcr;
+   vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+   hvregs.version = HV_GUEST_STATE_VERSION;
+   if (vcpu->arch.nested) {
+   hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+   hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+   } else {
+   hvregs.lpid = vcpu->kvm->arch.lpid;
+   hvregs.vcpu_token = vcpu->vcpu_id;
+   }
+   hvregs.hdec_expiry = time_limit;
+
+   /*
+* When setting DEC, we must always deal with irq_work_raise
+* via NMI vs setting DEC. The problem occurs right as we
+* switch into guest mode if a NMI hits and sets pending work
+* and sets DEC, then that will apply to the guest and not
+* bring us back to the host.
+*
+* irq_work_raise could check a flag (or possibly LPCR[HDICE]
+* for example) and set HDEC to 1? That wouldn't solve the
+* nested hv case which needs to abort the hcall or zero the
+* time limit.
+*
+* XXX: Another day's problem.
+*/
+   mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb);
+
+   mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+   mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+   trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(),
+ __pa(>arch.regs));
+   kvmhv_restore_hv_return_state(vcpu, );
+   vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+   vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+   vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+   vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+   mtspr(SPRN_PSSCR_PR, host_psscr);
+
+   dec = mfspr(SPRN_DEC);
+   if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+   dec = (s32) dec;
+   *tb = mftb();
+   vcpu->arch.dec_expires = dec + (*tb + vc->tb_offset);
+
+   return trap;
+}
+
 /*
  * Guest entry for POWER9 and later CPUs.
  */
@@ -3797,7 +3863,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
struct p9_host_os_sprs host_os_sprs;
-   s64 dec;
u64 next_timer;
unsigned long msr;
int trap;
@@ -3850,63 +3915,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
switch_pmu_to_guest(vcpu, _os_sprs);
 
if (kvmhv_on_pseries()) {
-   /*
-* We need to save and restore the guest visible part of the
-* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
-* doesn't do this for us. Note only required if pseries since
-* this is done in kvmhv_vcpu_entry_p9() below otherwise.
-*/
-   unsigned long host_psscr;
-   /* call our hypervisor to load up HV regs and go */
-   struct hv_guest_state hvregs;
-
-   host_psscr = mfspr(SPRN_PSSCR_PR);
-   mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
-   kvmhv_save_hv_regs(vcpu, );
-   hvregs.lpcr = lpcr;
-   vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
-   hvregs.version = HV_GUEST_STATE_VERSION;
-   if (vcpu->arch.nested) {
-   hvregs.lpid = vcpu->arch.nested->shadow_lpid;
-   hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
-   } else {
-   hvregs.lpid = vcpu->kvm->arch.lpid;
-   hvregs.vcpu_token = vcpu->vcpu_id;
-   }
-   hvregs.hdec_expiry = time_limit;
-
-   /*
-* When setting DEC, we must always deal with irq_work_raise
-* via NMI vs setting DEC. The problem occurs 

[PATCH v1 33/55] KVM: PPC: Book3S HV P9: Move host OS save/restore functions to built-in

2021-07-25 Thread Nicholas Piggin
Move the P9 guest/host register switching functions to the built-in
P9 entry code, and export it for nested to use as well.

This allows more flexibility in scheduling these supervisor privileged
SPR accesses with the HV privileged and PR SPR accesses in the low level
entry code.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 365 +-
 arch/powerpc/kvm/book3s_hv.h  |  39 +++
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 345 
 3 files changed, 385 insertions(+), 364 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv.h

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 45211458ac05..977712eb74e0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -80,6 +80,7 @@
 #include 
 
 #include "book3s.h"
+#include "book3s_hv.h"
 
 #define CREATE_TRACE_POINTS
 #include "trace_hv.h"
@@ -3772,370 +3773,6 @@ static noinline void kvmppc_run_core(struct 
kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
 }
 
-/*
- * Privileged (non-hypervisor) host registers to save.
- */
-struct p9_host_os_sprs {
-   unsigned long dscr;
-   unsigned long tidr;
-   unsigned long iamr;
-   unsigned long amr;
-   unsigned long fscr;
-
-   unsigned int pmc1;
-   unsigned int pmc2;
-   unsigned int pmc3;
-   unsigned int pmc4;
-   unsigned int pmc5;
-   unsigned int pmc6;
-   unsigned long mmcr0;
-   unsigned long mmcr1;
-   unsigned long mmcr2;
-   unsigned long mmcr3;
-   unsigned long mmcra;
-   unsigned long siar;
-   unsigned long sier1;
-   unsigned long sier2;
-   unsigned long sier3;
-   unsigned long sdar;
-};
-
-static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
-{
-   if (!(mmcr0 & MMCR0_FC))
-   goto do_freeze;
-   if (mmcra & MMCRA_SAMPLE_ENABLE)
-   goto do_freeze;
-   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-   if (!(mmcr0 & MMCR0_PMCCEXT))
-   goto do_freeze;
-   if (!(mmcra & MMCRA_BHRB_DISABLE))
-   goto do_freeze;
-   }
-   return;
-
-do_freeze:
-   mmcr0 = MMCR0_FC;
-   mmcra = 0;
-   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-   mmcr0 |= MMCR0_PMCCEXT;
-   mmcra = MMCRA_BHRB_DISABLE;
-   }
-
-   mtspr(SPRN_MMCR0, mmcr0);
-   mtspr(SPRN_MMCRA, mmcra);
-   isync();
-}
-
-static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
-   struct p9_host_os_sprs *host_os_sprs)
-{
-   struct lppaca *lp;
-   int load_pmu = 1;
-
-   lp = vcpu->arch.vpa.pinned_addr;
-   if (lp)
-   load_pmu = lp->pmcregs_in_use;
-
-   /* Save host */
-   if (ppc_get_pmu_inuse()) {
-   /*
-* It might be better to put PMU handling (at least for the
-* host) in the perf subsystem because it knows more about what
-* is being used.
-*/
-
-   /* POWER9, POWER10 do not implement HPMC or SPMC */
-
-   host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
-   host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
-
-   freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
-
-   host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
-   host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
-   host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
-   host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
-   host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
-   host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
-   host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
-   host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
-   host_os_sprs->sdar = mfspr(SPRN_SDAR);
-   host_os_sprs->siar = mfspr(SPRN_SIAR);
-   host_os_sprs->sier1 = mfspr(SPRN_SIER);
-
-   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
-   host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
-   host_os_sprs->sier2 = mfspr(SPRN_SIER2);
-   host_os_sprs->sier3 = mfspr(SPRN_SIER3);
-   }
-   }
-
-#ifdef CONFIG_PPC_PSERIES
-   /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */
-   if (kvmhv_on_pseries()) {
-   barrier();
-   get_lppaca()->pmcregs_in_use = load_pmu;
-   barrier();
-   }
-#endif
-
-   /*
-* Load guest. If the VPA said the PMCs are not in use but the guest
-* tried to access them anyway, HFSCR[PM] will be set by the HFAC
-* fault so we can make forward progress.
-*/
-   if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) {
-   mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
-   mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
-   mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
-   mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
- 

[PATCH v1 32/55] KVM: PPC: Book3S HV P9: Move vcpu register save/restore into functions

2021-07-25 Thread Nicholas Piggin
This should be no functional difference but makes the caller easier
to read.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 65 +++-
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c2c72875fca9..45211458ac05 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4062,6 +4062,44 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
 }
 
+/* Returns true if current MSR and/or guest MSR may have changed */
+static bool load_vcpu_state(struct kvm_vcpu *vcpu,
+  struct p9_host_os_sprs *host_os_sprs)
+{
+   bool ret = false;
+
+   if (cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+   kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+   ret = true;
+   }
+
+   load_spr_state(vcpu, host_os_sprs);
+
+   load_fp_state(>arch.fp);
+#ifdef CONFIG_ALTIVEC
+   load_vr_state(>arch.vr);
+#endif
+   mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+
+   return ret;
+}
+
+static void store_vcpu_state(struct kvm_vcpu *vcpu)
+{
+   store_spr_state(vcpu);
+
+   store_fp_state(>arch.fp);
+#ifdef CONFIG_ALTIVEC
+   store_vr_state(>arch.vr);
+#endif
+   vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+
+   if (cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+   kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+}
+
 static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
 {
if (!cpu_has_feature(CPU_FTR_ARCH_31))
@@ -4169,19 +4207,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
vcpu_vpa_increment_dispatch(vcpu);
 
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
-   kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
-   msr = mfmsr(); /* TM restore can update msr */
-   }
-
-   load_spr_state(vcpu, _os_sprs);
-
-   load_fp_state(>arch.fp);
-#ifdef CONFIG_ALTIVEC
-   load_vr_state(>arch.vr);
-#endif
-   mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+   if (unlikely(load_vcpu_state(vcpu, _os_sprs)))
+   msr = mfmsr(); /* MSR may have been updated */
 
switch_pmu_to_guest(vcpu, _os_sprs);
 
@@ -4285,17 +4312,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
switch_pmu_to_host(vcpu, _os_sprs);
 
-   store_spr_state(vcpu);
-
-   store_fp_state(>arch.fp);
-#ifdef CONFIG_ALTIVEC
-   store_vr_state(>arch.vr);
-#endif
-   vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
-
-   if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
-   kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+   store_vcpu_state(vcpu);
 
vcpu_vpa_increment_dispatch(vcpu);
 
-- 
2.23.0



[PATCH v1 31/55] KVM: PPC: Book3S HV P9: Juggle SPR switching around

2021-07-25 Thread Nicholas Piggin
This juggles SPR switching on the entry and exit sides to be more
symmetric, which makes the next refactoring patch possible with no
functional change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 56429b53f4dc..c2c72875fca9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4175,7 +4175,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
msr = mfmsr(); /* TM restore can update msr */
}
 
-   switch_pmu_to_guest(vcpu, _os_sprs);
+   load_spr_state(vcpu, _os_sprs);
 
load_fp_state(>arch.fp);
 #ifdef CONFIG_ALTIVEC
@@ -4183,7 +4183,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 #endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
 
-   load_spr_state(vcpu, _os_sprs);
+   switch_pmu_to_guest(vcpu, _os_sprs);
 
if (kvmhv_on_pseries()) {
/*
@@ -4283,6 +4283,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vcpu->arch.slb_max = 0;
}
 
+   switch_pmu_to_host(vcpu, _os_sprs);
+
store_spr_state(vcpu);
 
store_fp_state(>arch.fp);
@@ -4297,8 +4299,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
vcpu_vpa_increment_dispatch(vcpu);
 
-   switch_pmu_to_host(vcpu, _os_sprs);
-
timer_rearm_host_dec(*tb);
 
restore_p9_host_os_sprs(vcpu, _os_sprs);
-- 
2.23.0



[PATCH v1 30/55] KVM: PPC: Book3S HV P9: Only execute mtSPR if the value changed

2021-07-25 Thread Nicholas Piggin
Keep better track of the current SPR value in places where
they are to be loaded with a new context, to reduce expensive
mtSPR operations.

-73 cycles (7354) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 64 ++--
 1 file changed, 39 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0d97138e6fa4..56429b53f4dc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4009,19 +4009,28 @@ static void switch_pmu_to_host(struct kvm_vcpu *vcpu,
}
 }
 
-static void load_spr_state(struct kvm_vcpu *vcpu)
+static void load_spr_state(struct kvm_vcpu *vcpu,
+   struct p9_host_os_sprs *host_os_sprs)
 {
-   mtspr(SPRN_DSCR, vcpu->arch.dscr);
-   mtspr(SPRN_IAMR, vcpu->arch.iamr);
-   mtspr(SPRN_PSPB, vcpu->arch.pspb);
-   mtspr(SPRN_FSCR, vcpu->arch.fscr);
mtspr(SPRN_TAR, vcpu->arch.tar);
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
-   mtspr(SPRN_TIDR, vcpu->arch.tid);
-   mtspr(SPRN_AMR, vcpu->arch.amr);
-   mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   mtspr(SPRN_TIDR, vcpu->arch.tid);
+   if (host_os_sprs->iamr != vcpu->arch.iamr)
+   mtspr(SPRN_IAMR, vcpu->arch.iamr);
+   if (host_os_sprs->amr != vcpu->arch.amr)
+   mtspr(SPRN_AMR, vcpu->arch.amr);
+   if (vcpu->arch.uamor != 0)
+   mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+   if (host_os_sprs->fscr != vcpu->arch.fscr)
+   mtspr(SPRN_FSCR, vcpu->arch.fscr);
+   if (host_os_sprs->dscr != vcpu->arch.dscr)
+   mtspr(SPRN_DSCR, vcpu->arch.dscr);
+   if (vcpu->arch.pspb != 0)
+   mtspr(SPRN_PSPB, vcpu->arch.pspb);
 
/*
 * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
@@ -4036,28 +4045,31 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 
 static void store_spr_state(struct kvm_vcpu *vcpu)
 {
-   vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
-
-   vcpu->arch.iamr = mfspr(SPRN_IAMR);
-   vcpu->arch.pspb = mfspr(SPRN_PSPB);
-   vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.tar = mfspr(SPRN_TAR);
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
-   vcpu->arch.tid = mfspr(SPRN_TIDR);
+
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   vcpu->arch.tid = mfspr(SPRN_TIDR);
+   vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+   vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
+   vcpu->arch.pspb = mfspr(SPRN_PSPB);
+
+   vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
 }
 
 static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
 {
-   host_os_sprs->dscr = mfspr(SPRN_DSCR);
-   host_os_sprs->tidr = mfspr(SPRN_TIDR);
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   host_os_sprs->tidr = mfspr(SPRN_TIDR);
host_os_sprs->iamr = mfspr(SPRN_IAMR);
host_os_sprs->amr = mfspr(SPRN_AMR);
host_os_sprs->fscr = mfspr(SPRN_FSCR);
+   host_os_sprs->dscr = mfspr(SPRN_DSCR);
 }
 
 /* vcpu guest regs must already be saved */
@@ -4066,18 +4078,20 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu 
*vcpu,
 {
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
-   mtspr(SPRN_PSPB, 0);
-   mtspr(SPRN_UAMOR, 0);
-
-   mtspr(SPRN_DSCR, host_os_sprs->dscr);
-   mtspr(SPRN_TIDR, host_os_sprs->tidr);
-   mtspr(SPRN_IAMR, host_os_sprs->iamr);
-
+   if (!cpu_has_feature(CPU_FTR_ARCH_31))
+   mtspr(SPRN_TIDR, host_os_sprs->tidr);
+   if (host_os_sprs->iamr != vcpu->arch.iamr)
+   mtspr(SPRN_IAMR, host_os_sprs->iamr);
+   if (vcpu->arch.uamor != 0)
+   mtspr(SPRN_UAMOR, 0);
if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_os_sprs->amr);
-
if (host_os_sprs->fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, host_os_sprs->fscr);
+   if (host_os_sprs->dscr != vcpu->arch.dscr)
+   mtspr(SPRN_DSCR, host_os_sprs->dscr);
+   if (vcpu->arch.pspb != 0)
+   mtspr(SPRN_PSPB, 0);
 
/* Save guest CTRL register, set runlatch to 1 */
if (!(vcpu->arch.ctrl & 1))
@@ -4169,7 +4183,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 #endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
 
-   load_spr_state(vcpu);
+   load_spr_state(vcpu, _os_sprs);
 
if (kvmhv_on_pseries()) {
/*
-- 
2.23.0



[PATCH v1 29/55] KVM: PPC: Book3S HV P9: Avoid SPR scoreboard stalls

2021-07-25 Thread Nicholas Piggin
Avoid interleaving mfSPR and mtSPR.

-151 cycles (7427) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  |  8 
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 19 +++
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fa44bbca75e4..0d97138e6fa4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4271,10 +4271,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
store_spr_state(vcpu);
 
-   timer_rearm_host_dec(*tb);
-
-   restore_p9_host_os_sprs(vcpu, _os_sprs);
-
store_fp_state(>arch.fp);
 #ifdef CONFIG_ALTIVEC
store_vr_state(>arch.vr);
@@ -4289,6 +4285,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
switch_pmu_to_host(vcpu, _os_sprs);
 
+   timer_rearm_host_dec(*tb);
+
+   restore_p9_host_os_sprs(vcpu, _os_sprs);
+
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
 
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 2bd96d8256d1..bd0021cd3a67 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -228,6 +228,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
 
+   local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+   local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+
if (vc->tb_offset) {
u64 new_tb = *tb + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
@@ -244,8 +247,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
 
-   local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
-   local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
@@ -448,10 +449,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
-   mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
-   mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
+   local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
+   local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
 
@@ -464,6 +463,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
 
+   vc->dpdes = mfspr(SPRN_DPDES);
+   vc->vtb = mfspr(SPRN_VTB);
+
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -481,6 +483,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vc->tb_offset_applied = 0;
}
 
+   mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
+   mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
+
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_psscr |
  (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
@@ -509,8 +514,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
if (cpu_has_feature(CPU_FTR_ARCH_31))
asm volatile(PPC_CP_ABORT);
 
-   vc->dpdes = mfspr(SPRN_DPDES);
-   vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
-- 
2.23.0



[PATCH v1 28/55] KVM: PPC: Book3S HV P9: Optimise timebase reads

2021-07-25 Thread Nicholas Piggin
Reduce the number of mfTB executed by passing the current timebase
around entry and exit code rather than read it multiple times.

-213 cycles (7578) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  2 +-
 arch/powerpc/kvm/book3s_hv.c | 88 +---
 arch/powerpc/kvm/book3s_hv_p9_entry.c| 33 +
 3 files changed, 65 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index df6bed4b2a46..52e2b7a352c7 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -154,7 +154,7 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu 
*vcpu)
return radix;
 }
 
-int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long 
lpcr);
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long 
lpcr, u64 *tb);
 
 #define KVM_DEFAULT_HPT_ORDER  24  /* 16MB HPT by default */
 #endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 027ae0b60e70..fa44bbca75e4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -276,22 +276,22 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu 
*vcpu)
  * they should never fail.)
  */
 
-static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
 {
unsigned long flags;
 
spin_lock_irqsave(>stoltb_lock, flags);
-   vc->preempt_tb = mftb();
+   vc->preempt_tb = tb;
spin_unlock_irqrestore(>stoltb_lock, flags);
 }
 
-static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb)
 {
unsigned long flags;
 
spin_lock_irqsave(>stoltb_lock, flags);
if (vc->preempt_tb != TB_NIL) {
-   vc->stolen_tb += mftb() - vc->preempt_tb;
+   vc->stolen_tb += tb - vc->preempt_tb;
vc->preempt_tb = TB_NIL;
}
spin_unlock_irqrestore(>stoltb_lock, flags);
@@ -301,6 +301,7 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, 
int cpu)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
+   u64 now = mftb();
 
/*
 * We can test vc->runner without taking the vcore lock,
@@ -309,12 +310,12 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu 
*vcpu, int cpu)
 * ever sets it to NULL.
 */
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
-   kvmppc_core_end_stolen(vc);
+   kvmppc_core_end_stolen(vc, now);
 
spin_lock_irqsave(>arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
-   vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+   vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt;
vcpu->arch.busy_preempt = TB_NIL;
}
spin_unlock_irqrestore(>arch.tbacct_lock, flags);
@@ -324,13 +325,14 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
+   u64 now = mftb();
 
if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
-   kvmppc_core_start_stolen(vc);
+   kvmppc_core_start_stolen(vc, now);
 
spin_lock_irqsave(>arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
-   vcpu->arch.busy_preempt = mftb();
+   vcpu->arch.busy_preempt = now;
spin_unlock_irqrestore(>arch.tbacct_lock, flags);
 }
 
@@ -685,7 +687,7 @@ static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 
now)
 }
 
 static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
-   struct kvmppc_vcore *vc)
+   struct kvmppc_vcore *vc, u64 tb)
 {
struct dtl_entry *dt;
struct lppaca *vpa;
@@ -696,7 +698,7 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 
dt = vcpu->arch.dtl_ptr;
vpa = vcpu->arch.vpa.pinned_addr;
-   now = mftb();
+   now = tb;
core_stolen = vcore_stolen_time(vc, now);
stolen = core_stolen - vcpu->arch.stolen_logged;
vcpu->arch.stolen_logged = core_stolen;
@@ -2889,14 +2891,14 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 extern int __kvmppc_vcore_entry(void);
 
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
-  struct kvm_vcpu *vcpu)
+  struct kvm_vcpu *vcpu, u64 tb)
 {
u64 now;
 
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
spin_lock_irq(>arch.tbacct_lock);
-   now = mftb();
+   now = tb;
vcpu->arch.busy_stolen 

[PATCH v1 27/55] KVM: PPC: Book3S HV P9: Move TB updates

2021-07-25 Thread Nicholas Piggin
Move the TB updates between saving and loading guest and host SPRs,
to improve scheduling by keeping issue-NTC operations together as
much as possible.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 36 +--
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 814b0dfd590f..e7793bb806eb 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -215,15 +215,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
 
vcpu->arch.ceded = 0;
 
-   if (vc->tb_offset) {
-   u64 new_tb = tb + vc->tb_offset;
-   mtspr(SPRN_TBU40, new_tb);
-   tb = mftb();
-   if ((tb & 0xff) < (new_tb & 0xff))
-   mtspr(SPRN_TBU40, new_tb + 0x100);
-   vc->tb_offset_applied = vc->tb_offset;
-   }
-
/* Could avoid mfmsr by passing around, but probably no big deal */
msr = mfmsr();
 
@@ -238,6 +229,15 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
 
+   if (vc->tb_offset) {
+   u64 new_tb = tb + vc->tb_offset;
+   mtspr(SPRN_TBU40, new_tb);
+   tb = mftb();
+   if ((tb & 0xff) < (new_tb & 0xff))
+   mtspr(SPRN_TBU40, new_tb + 0x100);
+   vc->tb_offset_applied = vc->tb_offset;
+   }
+
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
@@ -469,6 +469,15 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
tb = mftb();
vcpu->arch.dec_expires = dec + tb;
 
+   if (vc->tb_offset_applied) {
+   u64 new_tb = tb - vc->tb_offset_applied;
+   mtspr(SPRN_TBU40, new_tb);
+   tb = mftb();
+   if ((tb & 0xff) < (new_tb & 0xff))
+   mtspr(SPRN_TBU40, new_tb + 0x100);
+   vc->tb_offset_applied = 0;
+   }
+
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_psscr |
  (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
@@ -503,15 +512,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
 
-   if (vc->tb_offset_applied) {
-   u64 new_tb = mftb() - vc->tb_offset_applied;
-   mtspr(SPRN_TBU40, new_tb);
-   tb = mftb();
-   if ((tb & 0xff) < (new_tb & 0xff))
-   mtspr(SPRN_TBU40, new_tb + 0x100);
-   vc->tb_offset_applied = 0;
-   }
-
/* HDEC must be at least as large as DEC, so decrementer_max fits */
mtspr(SPRN_HDEC, decrementer_max);
 
-- 
2.23.0



[PATCH v1 26/55] KVM: PPC: Book3S HV: Change dec_expires to be relative to guest timebase

2021-07-25 Thread Nicholas Piggin
Change dec_expires to be relative to the guest timebase, and allow
it to be moved into low level P9 guest entry functions, to improve
SPR access scheduling.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_book3s.h   |  6 +++
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/book3s_hv.c| 58 +
 arch/powerpc/kvm/book3s_hv_nested.c |  3 ++
 arch/powerpc/kvm/book3s_hv_p9_entry.c   | 10 -
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 --
 6 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index caaa0f592d8e..15b573671f99 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu 
*vcpu)
return vcpu->arch.fault_dar;
 }
 
+/* Expiry time of vcpu DEC relative to host TB */
+static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
+{
+   return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
+}
+
 static inline bool is_kvmppc_resume_guest(int r)
 {
return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index aee41edcfe6b..f105eaeb4521 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -742,7 +742,7 @@ struct kvm_vcpu_arch {
 
struct hrtimer dec_timer;
u64 dec_jiffies;
-   u64 dec_expires;
+   u64 dec_expires;/* Relative to guest timebase. */
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 4d757e4904c4..027ae0b60e70 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2237,8 +2237,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
*val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
break;
case KVM_REG_PPC_DEC_EXPIRY:
-   *val = get_reg_val(id, vcpu->arch.dec_expires +
-  vcpu->arch.vcore->tb_offset);
+   *val = get_reg_val(id, vcpu->arch.dec_expires);
break;
case KVM_REG_PPC_ONLINE:
*val = get_reg_val(id, vcpu->arch.online);
@@ -2490,8 +2489,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DEC_EXPIRY:
-   vcpu->arch.dec_expires = set_reg_val(id, *val) -
-   vcpu->arch.vcore->tb_offset;
+   vcpu->arch.dec_expires = set_reg_val(id, *val);
break;
case KVM_REG_PPC_ONLINE:
i = set_reg_val(id, *val);
@@ -2877,13 +2875,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
unsigned long dec_nsec, now;
 
now = get_tb();
-   if (now > vcpu->arch.dec_expires) {
+   if (now > kvmppc_dec_expires_host_tb(vcpu)) {
/* decrementer has already gone negative */
kvmppc_core_queue_dec(vcpu);
kvmppc_core_prepare_to_enter(vcpu);
return;
}
-   dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
+   dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
hrtimer_start(>arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
vcpu->arch.timer_running = 1;
 }
@@ -3355,7 +3353,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, 
bool is_master)
 */
spin_unlock(>lock);
/* cancel pending dec exception if dec is positive */
-   if (now < vcpu->arch.dec_expires &&
+   if (now < kvmppc_dec_expires_host_tb(vcpu) &&
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
 
@@ -4174,20 +4172,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
load_spr_state(vcpu);
 
-   /*
-* When setting DEC, we must always deal with irq_work_raise via NMI vs
-* setting DEC. The problem occurs right as we switch into guest mode
-* if a NMI hits and sets pending work and sets DEC, then that will
-* apply to the guest and not bring us back to the host.
-*
-* irq_work_raise could check a flag (or possibly LPCR[HDICE] for
-* example) and set HDEC to 1? That wouldn't solve the nested hv
-* case which needs to abort the hcall or zero the time limit.
-*
-* XXX: Another day's problem.
-*/
-   mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
-
if (kvmhv_on_pseries()) {
/*
 * We need to save and restore the guest visible part of the
@@ -4213,6 +4197,23 @@ static int 

[PATCH v1 25/55] KVM: PPC: Book3S HV P9: Add kvmppc_stop_thread to match kvmppc_start_thread

2021-07-25 Thread Nicholas Piggin
Small cleanup makes it a bit easier to match up entry and exit
operations.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7654235c1507..4d757e4904c4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3045,6 +3045,13 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, 
struct kvmppc_vcore *vc)
kvmppc_ipi_thread(cpu);
 }
 
+/* Old path does this in asm */
+static void kvmppc_stop_thread(struct kvm_vcpu *vcpu)
+{
+   vcpu->cpu = -1;
+   vcpu->arch.thread_cpu = -1;
+}
+
 static void kvmppc_wait_for_nap(int n_threads)
 {
int cpu = smp_processor_id();
@@ -4260,8 +4267,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
dec = (s32) dec;
tb = mftb();
vcpu->arch.dec_expires = dec + tb;
-   vcpu->cpu = -1;
-   vcpu->arch.thread_cpu = -1;
 
store_spr_state(vcpu);
 
@@ -4733,6 +4738,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
 
guest_exit_irqoff();
 
+   kvmppc_stop_thread(vcpu);
+
powerpc_local_irq_pmu_restore(flags);
 
cpumask_clear_cpu(pcpu, >arch.cpu_in_guest);
-- 
2.23.0



[PATCH v1 24/55] KVM: PPC: Book3S HV P9: Improve mtmsrd scheduling by delaying MSR[EE] disable

2021-07-25 Thread Nicholas Piggin
Moving the mtmsrd after the host SPRs are saved and before the guest
SPRs start to be loaded can prevent an SPR scoreboard stall (because
the mtmsrd is L=1 type which does not cause context synchronisation.

This is also now more convenient to combined with the mtmsrd L=0
instruction to enable facilities just below, but that is not done yet.

-12 cycles (7791) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index dedcf3ddba3b..7654235c1507 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4119,6 +4119,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
save_p9_host_os_sprs(_os_sprs);
 
+   /*
+* This could be combined with MSR[RI] clearing, but that expands
+* the unrecoverable window. It would be better to cover unrecoverable
+* with KVM bad interrupt handling rather than use MSR[RI] at all.
+*
+* Much more difficult and less worthwhile to combine with IR/DR
+* disable.
+*/
+   hard_irq_disable();
+   if (lazy_irq_pending())
+   return 0;
+
/* MSR bits may have been cleared by context switch */
msr = 0;
if (IS_ENABLED(CONFIG_PPC_FPU))
@@ -4618,6 +4630,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
+   unsigned long flags;
 
trace_kvmppc_run_vcpu_enter(vcpu);
 
@@ -4661,11 +4674,11 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
if (kvm_is_radix(kvm))
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
 
-   local_irq_disable();
-   hard_irq_disable();
+   /* flags save not required, but irq_pmu has no disable/enable API */
+   powerpc_local_irq_pmu_save(flags);
if (signal_pending(current))
goto sigpend;
-   if (lazy_irq_pending() || need_resched() || !kvm->arch.mmu_ready)
+   if (need_resched() || !kvm->arch.mmu_ready)
goto out;
 
if (!nested) {
@@ -4720,7 +4733,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
 
guest_exit_irqoff();
 
-   local_irq_enable();
+   powerpc_local_irq_pmu_restore(flags);
 
cpumask_clear_cpu(pcpu, >arch.cpu_in_guest);
 
@@ -4778,7 +4791,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
  out:
-   local_irq_enable();
+   powerpc_local_irq_pmu_restore(flags);
preempt_enable();
goto done;
 }
-- 
2.23.0



[PATCH v1 23/55] KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs

2021-07-25 Thread Nicholas Piggin
This reduces the number of mtmsrd required to enable facility bits when
saving/restoring registers, by having the KVM code set all bits up front
rather than using individual facility functions that set their particular
MSR bits.

-42 cycles (7803) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/process.c | 24 +++
 arch/powerpc/kvm/book3s_hv.c  | 61 ++-
 arch/powerpc/kvm/book3s_hv_p9_entry.c |  1 +
 3 files changed, 67 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 185beb290580..00b55b38a460 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -593,6 +593,30 @@ static void save_all(struct task_struct *tsk)
msr_check_and_clear(msr_all_available);
 }
 
+void save_user_regs_kvm(void)
+{
+   unsigned long usermsr;
+
+   if (!current->thread.regs)
+   return;
+
+   usermsr = current->thread.regs->msr;
+
+   if (usermsr & MSR_FP)
+   save_fpu(current);
+
+   if (usermsr & MSR_VEC)
+   save_altivec(current);
+
+   if (usermsr & MSR_TM) {
+current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+current->thread.regs->msr &= ~MSR_TM;
+   }
+}
+EXPORT_SYMBOL_GPL(save_user_regs_kvm);
+
 void flush_all_to_thread(struct task_struct *tsk)
 {
if (tsk->thread.regs) {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2e966d62a583..dedcf3ddba3b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4103,6 +4103,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb, next_timer;
+   unsigned long msr;
int trap;
 
WARN_ON_ONCE(vcpu->arch.ceded);
@@ -4114,8 +4115,23 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
if (next_timer < time_limit)
time_limit = next_timer;
 
+   vcpu->arch.ceded = 0;
+
save_p9_host_os_sprs(_os_sprs);
 
+   /* MSR bits may have been cleared by context switch */
+   msr = 0;
+   if (IS_ENABLED(CONFIG_PPC_FPU))
+   msr |= MSR_FP;
+   if (cpu_has_feature(CPU_FTR_ALTIVEC))
+   msr |= MSR_VEC;
+   if (cpu_has_feature(CPU_FTR_VSX))
+   msr |= MSR_VSX;
+   if (cpu_has_feature(CPU_FTR_TM) ||
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+   msr |= MSR_TM;
+   msr = msr_check_and_set(msr);
+
kvmppc_subcore_enter_guest();
 
vc->entry_exit_map = 1;
@@ -4124,12 +4140,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vcpu_vpa_increment_dispatch(vcpu);
 
if (cpu_has_feature(CPU_FTR_TM) ||
-   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+   cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
+   msr = mfmsr(); /* TM restore can update msr */
+   }
 
switch_pmu_to_guest(vcpu, _os_sprs);
 
-   msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
load_fp_state(>arch.fp);
 #ifdef CONFIG_ALTIVEC
load_vr_state(>arch.vr);
@@ -4238,7 +4255,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
restore_p9_host_os_sprs(vcpu, _os_sprs);
 
-   msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(>arch.fp);
 #ifdef CONFIG_ALTIVEC
store_vr_state(>arch.vr);
@@ -4767,6 +4783,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 
time_limit,
goto done;
 }
 
+void save_user_regs_kvm(void);
+
 static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
 {
struct kvm_run *run = vcpu->run;
@@ -4776,19 +4794,24 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
unsigned long user_tar = 0;
unsigned int user_vrsave;
struct kvm *kvm;
+   unsigned long msr;
 
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
 
+   /* No need to go into the guest when all we'll do is come back out */
+   if (signal_pending(current)) {
+   run->exit_reason = KVM_EXIT_INTR;
+   return -EINTR;
+   }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
 * Don't allow entry with a suspended transaction, because
 * the guest entry/exit code will lose it.
-* If the guest has TM enabled, save away their TM-related SPRs
-* (they will get restored by the TM unavailable interrupt).
 */
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&

[PATCH v1 22/55] KVM: PPC: Book3S HV P9: Move SPRG restore to restore_p9_host_os_sprs

2021-07-25 Thread Nicholas Piggin
Move the SPR update into its relevant helper function. This will
help with SPR scheduling improvements in later changes.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f212d5013622..2e966d62a583 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4057,6 +4057,8 @@ static void save_p9_host_os_sprs(struct p9_host_os_sprs 
*host_os_sprs)
 static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
 {
+   mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
+
mtspr(SPRN_PSPB, 0);
mtspr(SPRN_UAMOR, 0);
 
@@ -4256,8 +4258,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
timer_rearm_host_dec(tb);
 
-   mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
-
kvmppc_subcore_exit_guest();
 
return trap;
-- 
2.23.0



[PATCH v1 21/55] KVM: PPC: Book3S HV: CTRL SPR does not require read-modify-write

2021-07-25 Thread Nicholas Piggin
Processors that support KVM HV do not require read-modify-write of
the CTRL SPR to set/clear their thread's runlatch. Just write 1 or 0
to it.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c|  2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 15 ++-
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 772f1e6c93e1..f212d5013622 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4024,7 +4024,7 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
 */
 
if (!(vcpu->arch.ctrl & 1))
-   mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+   mtspr(SPRN_CTRLT, 0);
 }
 
 static void store_spr_state(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 551ce223b40c..05be8648937d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -775,12 +775,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr   SPRN_AMR,r5
mtspr   SPRN_UAMOR,r6
 
-   /* Restore state of CTRL run bit; assume 1 on entry */
+   /* Restore state of CTRL run bit; the host currently has it set to 1 */
lwz r5,VCPU_CTRL(r4)
andi.   r5,r5,1
bne 4f
-   mfspr   r6,SPRN_CTRLF
-   clrrdi  r6,r6,1
+   li  r6,0
mtspr   SPRN_CTRLT,r6
 4:
/* Secondary threads wait for primary to have done partition switch */
@@ -1203,12 +1202,12 @@ guest_bypass:
stw r0, VCPU_CPU(r9)
stw r0, VCPU_THREAD_CPU(r9)
 
-   /* Save guest CTRL register, set runlatch to 1 */
+   /* Save guest CTRL register, set runlatch to 1 if it was clear */
mfspr   r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
andi.   r0,r6,1
bne 4f
-   ori r6,r6,1
+   li  r6,1
mtspr   SPRN_CTRLT,r6
 4:
/*
@@ -2178,8 +2177,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 * Also clear the runlatch bit before napping.
 */
 kvm_do_nap:
-   mfspr   r0, SPRN_CTRLF
-   clrrdi  r0, r0, 1
+   li  r0,0
mtspr   SPRN_CTRLT, r0
 
li  r0,1
@@ -2198,8 +2196,7 @@ kvm_nap_sequence: /* desired LPCR value in r5 */
 
bl  isa206_idle_insn_mayloss
 
-   mfspr   r0, SPRN_CTRLF
-   ori r0, r0, 1
+   li  r0,1
mtspr   SPRN_CTRLT, r0
 
mtspr   SPRN_SRR1, r3
-- 
2.23.0



[PATCH v1 20/55] KVM: PPC: Book3S HV P9: Factor out yield_count increment

2021-07-25 Thread Nicholas Piggin
Factor duplicated code into a helper function.

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7c75f63648d6..772f1e6c93e1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4081,6 +4081,16 @@ static inline bool hcall_is_xics(unsigned long req)
req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
 }
 
+static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
+{
+   struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+   if (lp) {
+   u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+   lp->yield_count = cpu_to_be32(yield_count);
+   vcpu->arch.vpa.dirty = 1;
+   }
+}
+
 /*
  * Guest entry for POWER9 and later CPUs.
  */
@@ -4109,12 +4119,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vc->entry_exit_map = 1;
vc->in_guest = 1;
 
-   if (vcpu->arch.vpa.pinned_addr) {
-   struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-   u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
-   lp->yield_count = cpu_to_be32(yield_count);
-   vcpu->arch.vpa.dirty = 1;
-   }
+   vcpu_vpa_increment_dispatch(vcpu);
 
if (cpu_has_feature(CPU_FTR_TM) ||
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
@@ -4242,12 +4247,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_save_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
-   if (vcpu->arch.vpa.pinned_addr) {
-   struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-   u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
-   lp->yield_count = cpu_to_be32(yield_count);
-   vcpu->arch.vpa.dirty = 1;
-   }
+   vcpu_vpa_increment_dispatch(vcpu);
 
switch_pmu_to_host(vcpu, _os_sprs);
 
-- 
2.23.0



[PATCH v1 19/55] KVM: PPC: Book3S HV P9: Demand fault PMU SPRs when marked not inuse

2021-07-25 Thread Nicholas Piggin
The pmcregs_in_use field in the guest VPA can not be trusted to reflect
what the guest is doing with PMU SPRs, so the PMU must always be managed
(stopped) when exiting the guest, and SPR values set when entering the
guest to ensure it can't cause a covert channel or otherwise cause other
guests or the host to misbehave.

So prevent guest access to the PMU with HFSCR[PM] if pmcregs_in_use is
clear, and avoid the PMU SPR access on every partition switch. Guests
that set pmcregs_in_use incorrectly or when first setting it and using
the PMU will take a hypervisor facility unavailable interrupt that will
bring in the PMU SPRs.

-774 cycles (7759) cycles POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   1 +
 arch/powerpc/include/asm/kvm_host.h  |   1 +
 arch/powerpc/kvm/book3s_hv.c | 133 +--
 arch/powerpc/kvm/book3s_hv_nested.c  |  38 +++
 4 files changed, 119 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index eaf3a562bf1e..df6bed4b2a46 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -39,6 +39,7 @@ struct kvm_nested_guest {
pgd_t *shadow_pgtable;  /* our page table for this guest */
u64 l1_gr_to_hr;/* L1's addr of part'n-scoped table */
u64 process_table;  /* process table entry for this guest */
+   u64 hfscr;  /* L1's HFSCR */
long refcnt;/* number of pointers to this struct */
struct mutex tlb_lock;  /* serialize page faults and tlbies */
struct kvm_nested_guest *next;
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 9f52f282b1aa..aee41edcfe6b 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -804,6 +804,7 @@ struct kvm_vcpu_arch {
struct kvmppc_vpa slb_shadow;
 
spinlock_t tbacct_lock;
+   u64 hfscr_permitted;/* A mask of permitted HFSCR facilities */
u64 busy_stolen;
u64 busy_preempt;
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 091b67ef6eba..7c75f63648d6 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1421,6 +1421,23 @@ static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu 
*vcpu)
return RESUME_GUEST;
 }
 
+/*
+ * If the lppaca had pmcregs_in_use clear when we exited the guest, then
+ * HFSCR_PM is cleared for next entry. If the guest then tries to access
+ * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
+ * back in the guest HFSCR will cause the next entry to load the PMU SPRs and
+ * allow the guest access to continue.
+ */
+static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
+{
+   if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
+   return EMULATE_FAIL;
+
+   vcpu->arch.hfscr |= HFSCR_PM;
+
+   return RESUME_GUEST;
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 struct task_struct *tsk)
 {
@@ -1705,16 +1722,22 @@ XXX benchmark guest exits
 * to emulate.
 * Otherwise, we just generate a program interrupt to the guest.
 */
-   case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+   case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
r = EMULATE_FAIL;
-   if (((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG) &&
-   cpu_has_feature(CPU_FTR_ARCH_300))
-   r = kvmppc_emulate_doorbell_instr(vcpu);
+   if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+   unsigned long cause = vcpu->arch.hfscr >> 56;
+
+   if (cause == FSCR_MSGP_LG)
+   r = kvmppc_emulate_doorbell_instr(vcpu);
+   if (cause == FSCR_PM_LG)
+   r = kvmppc_pmu_unavailable(vcpu);
+   }
if (r == EMULATE_FAIL) {
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
r = RESUME_GUEST;
}
break;
+   }
 
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
@@ -2723,6 +2746,13 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu 
*vcpu)
if (cpu_has_feature(CPU_FTR_TM_COMP))
vcpu->arch.hfscr |= HFSCR_TM;
 
+   vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+
+   /*
+* PM is demand-faulted so start with it clear.
+*/
+   vcpu->arch.hfscr &= ~HFSCR_PM;
+
kvmppc_mmu_book3s_hv_init(vcpu);
 
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -3793,6 +3823,14 @@ static void freeze_pmu(unsigned long mmcr0, unsigned 
long mmcra)
 static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
 

[PATCH v1 18/55] KVM: PPC: Book3S HV P9: Factor PMU save/load into context switch functions

2021-07-25 Thread Nicholas Piggin
Rather than guest/host save/retsore functions, implement context switch
functions that take care of details like the VPA update for nested.

The reason to split these kind of helpers into explicit save/load
functions is mainly to schedule SPR access nicely, but PMU is a special
case where the load requires mtSPR (to stop counters) and other
difficulties, so there's less possibility to schedule those nicely. The
SPR accesses also have side-effects if the PMU is running, and in later
changes we keep the host PMU running as long as possible so this code
can be better profiled, which also complicates scheduling.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 61 +---
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d20b579ddcdf..091b67ef6eba 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3790,7 +3790,8 @@ static void freeze_pmu(unsigned long mmcr0, unsigned long 
mmcra)
isync();
 }
 
-static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+   struct p9_host_os_sprs *host_os_sprs)
 {
if (ppc_get_pmu_inuse()) {
/*
@@ -3824,10 +3825,21 @@ static void save_p9_host_pmu(struct p9_host_os_sprs 
*host_os_sprs)
host_os_sprs->sier3 = mfspr(SPRN_SIER3);
}
}
-}
 
-static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
-{
+#ifdef CONFIG_PPC_PSERIES
+   if (kvmhv_on_pseries()) {
+   barrier();
+   if (vcpu->arch.vpa.pinned_addr) {
+   struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+   get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+   } else {
+   get_lppaca()->pmcregs_in_use = 1;
+   }
+   barrier();
+   }
+#endif
+
+   /* load guest */
mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
@@ -3852,7 +3864,8 @@ static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
/* No isync necessary because we're starting counters */
 }
 
-static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
+static void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+   struct p9_host_os_sprs *host_os_sprs)
 {
struct lppaca *lp;
int save_pmu = 1;
@@ -3887,10 +3900,15 @@ static void save_p9_guest_pmu(struct kvm_vcpu *vcpu)
} else {
freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
}
-}
 
-static void load_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
-{
+#ifdef CONFIG_PPC_PSERIES
+   if (kvmhv_on_pseries()) {
+   barrier();
+   get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+   barrier();
+   }
+#endif
+
if (ppc_get_pmu_inuse()) {
mtspr(SPRN_PMC1, host_os_sprs->pmc1);
mtspr(SPRN_PMC2, host_os_sprs->pmc2);
@@ -4019,8 +4037,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
save_p9_host_os_sprs(_os_sprs);
 
-   save_p9_host_pmu(_os_sprs);
-
kvmppc_subcore_enter_guest();
 
vc->entry_exit_map = 1;
@@ -4037,19 +4053,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
-#ifdef CONFIG_PPC_PSERIES
-   if (kvmhv_on_pseries()) {
-   barrier();
-   if (vcpu->arch.vpa.pinned_addr) {
-   struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
-   get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
-   } else {
-   get_lppaca()->pmcregs_in_use = 1;
-   }
-   barrier();
-   }
-#endif
-   load_p9_guest_pmu(vcpu);
+   switch_pmu_to_guest(vcpu, _os_sprs);
 
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
load_fp_state(>arch.fp);
@@ -4178,14 +4182,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vcpu->arch.vpa.dirty = 1;
}
 
-   save_p9_guest_pmu(vcpu);
-#ifdef CONFIG_PPC_PSERIES
-   if (kvmhv_on_pseries()) {
-   barrier();
-   get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
-   barrier();
-   }
-#endif
+   switch_pmu_to_host(vcpu, _os_sprs);
 
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
@@ -4194,8 +4191,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
 
-   load_p9_host_pmu(_os_sprs);
-
kvmppc_subcore_exit_guest();
 
return trap;
-- 
2.23.0



[PATCH v1 17/55] KVM: PPC: Book3S HV P9: Implement PMU save/restore in C

2021-07-25 Thread Nicholas Piggin
Implement the P9 path PMU save/restore code in C, and remove the
POWER9/10 code from the P7/8 path assembly.

-449 cycles (8533) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/asm-prototypes.h |   5 -
 arch/powerpc/kvm/book3s_hv.c  | 205 --
 arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +
 4 files changed, 200 insertions(+), 66 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 222823861a67..41b8a1e1144a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -141,11 +141,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu 
*vcpu, u64 msr,
bool preserve_nv) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-void kvmhv_save_host_pmu(void);
-void kvmhv_load_host_pmu(void);
-void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
-void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-
 void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
 
 long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2eef708c4354..d20b579ddcdf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3735,6 +3735,188 @@ static noinline void kvmppc_run_core(struct 
kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
 }
 
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+   unsigned long dscr;
+   unsigned long tidr;
+   unsigned long iamr;
+   unsigned long amr;
+   unsigned long fscr;
+
+   unsigned int pmc1;
+   unsigned int pmc2;
+   unsigned int pmc3;
+   unsigned int pmc4;
+   unsigned int pmc5;
+   unsigned int pmc6;
+   unsigned long mmcr0;
+   unsigned long mmcr1;
+   unsigned long mmcr2;
+   unsigned long mmcr3;
+   unsigned long mmcra;
+   unsigned long siar;
+   unsigned long sier1;
+   unsigned long sier2;
+   unsigned long sier3;
+   unsigned long sdar;
+};
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+   if (!(mmcr0 & MMCR0_FC))
+   goto do_freeze;
+   if (mmcra & MMCRA_SAMPLE_ENABLE)
+   goto do_freeze;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   if (!(mmcr0 & MMCR0_PMCCEXT))
+   goto do_freeze;
+   if (!(mmcra & MMCRA_BHRB_DISABLE))
+   goto do_freeze;
+   }
+   return;
+
+do_freeze:
+   mmcr0 = MMCR0_FC;
+   mmcra = 0;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   mmcr0 |= MMCR0_PMCCEXT;
+   mmcra = MMCRA_BHRB_DISABLE;
+   }
+
+   mtspr(SPRN_MMCR0, mmcr0);
+   mtspr(SPRN_MMCRA, mmcra);
+   isync();
+}
+
+static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
+{
+   if (ppc_get_pmu_inuse()) {
+   /*
+* It might be better to put PMU handling (at least for the
+* host) in the perf subsystem because it knows more about what
+* is being used.
+*/
+
+   /* POWER9, POWER10 do not implement HPMC or SPMC */
+
+   host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+   host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+   freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+   host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+   host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+   host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+   host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+   host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+   host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+   host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+   host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+   host_os_sprs->sdar = mfspr(SPRN_SDAR);
+   host_os_sprs->siar = mfspr(SPRN_SIAR);
+   host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+   host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+   host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+   }
+   }
+}
+
+static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
+{
+   mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+   mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+   mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+   mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+   mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+   mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+   mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+   mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+   mtspr(SPRN_SDAR, vcpu->arch.sdar);
+   mtspr(SPRN_SIAR, vcpu->arch.siar);
+   mtspr(SPRN_SIER, 

[PATCH v1 16/55] powerpc/64s: Implement PMU override command line option

2021-07-25 Thread Nicholas Piggin
It can be useful in simulators (with very constrained environments)
to allow some PMCs to run from boot so they can be sampled directly
by a test harness, rather than having to run perf.

A previous change freezes counters at boot by default, so provide
a boot time option to un-freeze (plus a bit more flexibility).

Signed-off-by: Nicholas Piggin 
---
 .../admin-guide/kernel-parameters.txt |  7 
 arch/powerpc/perf/core-book3s.c   | 35 +++
 2 files changed, 42 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index bdb22006f713..96b7d0ebaa40 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4089,6 +4089,13 @@
Override pmtimer IOPort with a hex value.
e.g. pmtmr=0x508
 
+   pmu=[PPC] Manually enable the PMU.
+   Enable the PMU by setting MMCR0 to 0 (clear FC bit).
+   This option is implemented for Book3S processors.
+   If a number is given, then MMCR1 is set to that number,
+   otherwise (e.g., 'pmu=on'), it is left 0. The perf
+   subsystem is disabled if this option is used.
+
pm_debug_messages   [SUSPEND,KNL]
Enable suspend/resume debug messages during boot up.
 
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 65795cadb475..e7cef4fe17d7 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2428,8 +2428,24 @@ int register_power_pmu(struct power_pmu *pmu)
 }
 
 #ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+   ppc_set_pmu_inuse(1);
+   if (pmu_override_val)
+   mtspr(SPRN_MMCR1, pmu_override_val);
+   mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
 static int __init init_ppc64_pmu(void)
 {
+   if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+   printk(KERN_WARNING "perf: disabling perf due to pmu= command 
line option.\n");
+   on_each_cpu(do_pmu_override, NULL, 1);
+   return 0;
+   }
+
/* run through all the pmu drivers one at a time */
if (!init_power5_pmu())
return 0;
@@ -2451,4 +2467,23 @@ static int __init init_ppc64_pmu(void)
return init_generic_compat_pmu();
 }
 early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+   unsigned long val;
+
+   if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+   return 0;
+
+   pmu_override = true;
+
+   if (kstrtoul(str, 0, ))
+   val = 0;
+
+   pmu_override_val = val;
+
+   return 1;
+}
+__setup("pmu=", pmu_setup);
+
 #endif
-- 
2.23.0



[PATCH v1 15/55] powerpc/64s: Always set PMU control registers to frozen/disabled when not in use

2021-07-25 Thread Nicholas Piggin
KVM PMU management code looks for particular frozen/disabled bits in
the PMU registers so it knows whether it must clear them when coming
out of a guest or not. Setting this up helps KVM make these optimisations
without getting confused. Longer term the better approach might be to
move guest/host PMU switching to the perf subsystem.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/cpu_setup_power.c | 4 ++--
 arch/powerpc/kernel/dt_cpu_ftrs.c | 6 +++---
 arch/powerpc/kvm/book3s_hv.c  | 5 +
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.c 
b/arch/powerpc/kernel/cpu_setup_power.c
index a29dc8326622..3dc61e203f37 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)
 static void init_PMU(void)
 {
mtspr(SPRN_MMCRA, 0);
-   mtspr(SPRN_MMCR0, 0);
+   mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
 }
@@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)
 {
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-   mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+   mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 /*
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 38ea20fadc4a..a6bb0ee179cd 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -353,7 +353,7 @@ static void init_pmu_power8(void)
}
 
mtspr(SPRN_MMCRA, 0);
-   mtspr(SPRN_MMCR0, 0);
+   mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
mtspr(SPRN_MMCRS, 0);
@@ -392,7 +392,7 @@ static void init_pmu_power9(void)
mtspr(SPRN_MMCRC, 0);
 
mtspr(SPRN_MMCRA, 0);
-   mtspr(SPRN_MMCR0, 0);
+   mtspr(SPRN_MMCR0, MMCR0_FC);
mtspr(SPRN_MMCR1, 0);
mtspr(SPRN_MMCR2, 0);
 }
@@ -428,7 +428,7 @@ static void init_pmu_power10(void)
 
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
-   mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
+   mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
 }
 
 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ab89db561c85..2eef708c4354 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2691,6 +2691,11 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu 
*vcpu)
 #endif
 #endif
vcpu->arch.mmcr[0] = MMCR0_FC;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   vcpu->arch.mmcr[0] |= MMCR0_PMCCEXT;
+   vcpu->arch.mmcra = MMCRA_BHRB_DISABLE;
+   }
+
vcpu->arch.ctrl = CTRL_RUNLATCH;
/* default to host PVR, since we can't spoof it */
kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
-- 
2.23.0



[PATCH v1 14/55] KVM: PPC: Book3S HV: Don't always save PMU for guest capable of nesting

2021-07-25 Thread Nicholas Piggin
Revert the workaround added by commit 63279eeb7f93a ("KVM: PPC: Book3S
HV: Always save guest pmu for guest capable of nesting").

Nested capable guests running with the earlier commit ("KVM: PPC: Book3S
HV Nested: Indicate guest PMU in-use in VPA") will now indicate the PMU
in-use status of their guests, which means the parent does not need to
unconditionally save the PMU for nested capable guests.

This will cause the PMU to break for nested guests when running older
nested hypervisor guests under a kernel with this change. It's unclear
there's an easy way to avoid that, so this could wait for a release or
so for the fix to filter into stable kernels.

-134 cycles (8982) POWER9 virt-mode NULL hcall

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e7f8cc04944b..ab89db561c85 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4003,8 +4003,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
-   /* Must save pmu if this guest is capable of running nested guests */
-   save_pmu |= nesting_enabled(vcpu->kvm);
 
kvmhv_save_guest_pmu(vcpu, save_pmu);
 #ifdef CONFIG_PPC_PSERIES
-- 
2.23.0



[PATCH v1 13/55] powerpc/64s: Keep AMOR SPR a constant ~0 at runtime

2021-07-25 Thread Nicholas Piggin
This register controls supervisor SPR modifications, and as such is only
relevant for KVM. KVM always sets AMOR to ~0 on guest entry, and never
restores it coming back out to the host, so it can be kept constant and
avoid the mtSPR in KVM guest entry.

-21 cycles (9116) cycles POWER9 virt-mode NULL hcall

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/cpu_setup_power.c|  8 
 arch/powerpc/kernel/dt_cpu_ftrs.c|  2 ++
 arch/powerpc/kvm/book3s_hv_p9_entry.c|  2 --
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  2 --
 arch/powerpc/mm/book3s64/radix_pgtable.c | 15 ---
 arch/powerpc/platforms/powernv/idle.c|  8 +++-
 6 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_power.c 
b/arch/powerpc/kernel/cpu_setup_power.c
index 3cca88ee96d7..a29dc8326622 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct 
cpu_spec *t)
return;
 
mtspr(SPRN_LPID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
 }
@@ -150,6 +151,7 @@ void __restore_cpu_power7(void)
return;
 
mtspr(SPRN_LPID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
 }
@@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct 
cpu_spec *t)
return;
 
mtspr(SPRN_LPID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -184,6 +187,7 @@ void __restore_cpu_power8(void)
return;
 
mtspr(SPRN_LPID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
init_HFSCR();
@@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct 
cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -223,6 +228,7 @@ void __restore_cpu_power9(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct 
cpu_spec *t)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
@@ -264,6 +271,7 @@ void __restore_cpu_power10(void)
mtspr(SPRN_PSSCR, 0);
mtspr(SPRN_LPID, 0);
mtspr(SPRN_PID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_PCR, PCR_MASK);
init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index af95f337e54b..38ea20fadc4a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)
mtspr(SPRN_LPCR, system_registers.lpcr);
if (hv_mode) {
mtspr(SPRN_LPID, 0);
+   mtspr(SPRN_AMOR, ~0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
mtspr(SPRN_PCR, system_registers.pcr);
}
@@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)
}
 
mtspr(SPRN_LPID, 0);
+   mtspr(SPRN_AMOR, ~0);
 
lpcr = mfspr(SPRN_LPCR);
lpcr &=  ~LPCR_LPES0; /* HV external interrupts */
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index bd8cf0a65ce8..a7f63082b4e3 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -286,8 +286,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
 
-   mtspr(SPRN_AMOR, ~0UL);
-
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
 
/*
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 

[PATCH v1 12/55] KVM: PPC: Book3S HV: POWER10 enable HAIL when running radix guests

2021-07-25 Thread Nicholas Piggin
HV interrupts may be taken with the MMU enabled when radix guests are
running. Enable LPCR[HAIL] on ISA v3.1 processors for radix guests.
Make this depend on the host LPCR[HAIL] being enabled. Currently that is
always enabled, but having this test means any issue that might require
LPCR[HAIL] to be disabled in the host will not have to be duplicated in
KVM.

-1380 cycles on P10 NULL hcall entry+exit

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 29 +
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0cef578930f9..e7f8cc04944b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5004,6 +5004,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  */
 int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
 {
+   unsigned long lpcr, lpcr_mask;
+
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
kvmppc_rmap_reset(kvm);
@@ -5013,8 +5015,13 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
kvm->arch.radix = 0;
spin_unlock(>mmu_lock);
kvmppc_free_radix(kvm);
-   kvmppc_update_lpcr(kvm, LPCR_VPM1,
-  LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+   lpcr = LPCR_VPM1;
+   lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+   if (cpu_has_feature(CPU_FTR_ARCH_31))
+   lpcr_mask |= LPCR_HAIL;
+   kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
return 0;
 }
 
@@ -5024,6 +5031,7 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
  */
 int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
 {
+   unsigned long lpcr, lpcr_mask;
int err;
 
err = kvmppc_init_vm_radix(kvm);
@@ -5035,8 +5043,17 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
kvm->arch.radix = 1;
spin_unlock(>mmu_lock);
kvmppc_free_hpt(>arch.hpt);
-   kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
-  LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+
+   lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+   lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   lpcr_mask |= LPCR_HAIL;
+   if (cpu_has_feature(CPU_FTR_HVMODE) &&
+   (kvm->arch.host_lpcr & LPCR_HAIL))
+   lpcr |= LPCR_HAIL;
+   }
+   kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
return 0;
 }
 
@@ -5200,6 +5217,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
kvm->arch.mmu_ready = 1;
lpcr &= ~LPCR_VPM1;
lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+   if (cpu_has_feature(CPU_FTR_HVMODE) &&
+   cpu_has_feature(CPU_FTR_ARCH_31) &&
+   (kvm->arch.host_lpcr & LPCR_HAIL))
+   lpcr |= LPCR_HAIL;
ret = kvmppc_init_vm_radix(kvm);
if (ret) {
kvmppc_free_lpid(kvm->arch.lpid);
-- 
2.23.0



[PATCH v1 11/55] powerpc/time: add API for KVM to re-arm the host timer/decrementer

2021-07-25 Thread Nicholas Piggin
Rather than have KVM look up the host timer and fiddle with the
irq-work internal details, have the powerpc/time.c code provide a
function for KVM to re-arm the Linux timer code when exiting a
guest.

This is implementation has an improvement over existing code of
marking a decrementer interrupt as soft-pending if a timer has
expired, rather than setting DEC to a -ve value, which tended to
cause host timers to take two interrupts (first hdec to exit the
guest, then the immediate dec).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/time.h | 16 +++---
 arch/powerpc/kernel/time.c  | 52 +++--
 arch/powerpc/kvm/book3s_hv.c|  7 ++---
 3 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 69b6be617772..924b2157882f 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -99,18 +99,6 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
 extern void secondary_cpu_time_init(void);
 extern void __init time_init(void);
 
-#ifdef CONFIG_PPC64
-static inline unsigned long test_irq_work_pending(void)
-{
-   unsigned long x;
-
-   asm volatile("lbz %0,%1(13)"
-   : "=r" (x)
-   : "i" (offsetof(struct paca_struct, irq_work_pending)));
-   return x;
-}
-#endif
-
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
 static inline u64 timer_get_next_tb(void)
@@ -118,6 +106,10 @@ static inline u64 timer_get_next_tb(void)
return __this_cpu_read(decrementers_next_tb);
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now);
+#endif
+
 /* Convert timebase ticks to nanoseconds */
 unsigned long long tb_to_ns(unsigned long long tb_ticks);
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 72d872b49167..016828b7401b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -499,6 +499,16 @@ EXPORT_SYMBOL(profile_pc);
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
+static inline unsigned long test_irq_work_pending(void)
+{
+   unsigned long x;
+
+   asm volatile("lbz %0,%1(13)"
+   : "=r" (x)
+   : "i" (offsetof(struct paca_struct, irq_work_pending)));
+   return x;
+}
+
 static inline void set_irq_work_pending_flag(void)
 {
asm volatile("stb %0,%1(13)" : :
@@ -542,13 +552,44 @@ void arch_irq_work_raise(void)
preempt_enable();
 }
 
+static void set_dec_or_work(u64 val)
+{
+   set_dec(val);
+   /* We may have raced with new irq work */
+   if (unlikely(test_irq_work_pending()))
+   set_dec(1);
+}
+
 #else  /* CONFIG_IRQ_WORK */
 
 #define test_irq_work_pending()0
 #define clear_irq_work_pending()
 
+static void set_dec_or_work(u64 val)
+{
+   set_dec(val);
+}
 #endif /* CONFIG_IRQ_WORK */
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
+{
+   u64 *next_tb = this_cpu_ptr(_next_tb);
+
+   WARN_ON_ONCE(!arch_irqs_disabled());
+   WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+   if (now >= *next_tb) {
+   local_paca->irq_happened |= PACA_IRQ_DEC;
+   } else {
+   now = *next_tb - now;
+   if (now <= decrementer_max)
+   set_dec_or_work(now);
+   }
+}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
+
 /*
  * timer_interrupt - gets called when the decrementer overflows,
  * with interrupts disabled.
@@ -609,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
} else {
now = *next_tb - now;
if (now <= decrementer_max)
-   set_dec(now);
-   /* We may have raced with new irq work */
-   if (test_irq_work_pending())
-   set_dec(1);
+   set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
 
@@ -854,11 +892,7 @@ static int decrementer_set_next_event(unsigned long evt,
  struct clock_event_device *dev)
 {
__this_cpu_write(decrementers_next_tb, get_tb() + evt);
-   set_dec(evt);
-
-   /* We may have raced with new irq work */
-   if (test_irq_work_pending())
-   set_dec(1);
+   set_dec_or_work(evt);
 
return 0;
 }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6e6cfb10e9bb..0cef578930f9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4018,11 +4018,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
 
-   next_timer = timer_get_next_tb();
-   set_dec(next_timer - tb);
-   /* We may have raced with new irq work */
-   if (test_irq_work_pending())
-   set_dec(1);
+   timer_rearm_host_dec(tb);
+

[PATCH v1 10/55] KVM: PPC: Book3S HV P9: Reduce mftb per guest entry/exit

2021-07-25 Thread Nicholas Piggin
mftb is serialising (dispatch next-to-complete) so it is heavy weight
for a mfspr. Avoid reading it multiple times in the entry or exit paths.
A small number of cycles delay to timers is tolerable.

-118 cycles (9137) POWER9 virt-mode NULL hcall

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 4 ++--
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 82976f734bd1..6e6cfb10e9bb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3896,7 +3896,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
 *
 * XXX: Another day's problem.
 */
-   mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+   mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
 
if (kvmhv_on_pseries()) {
/*
@@ -4019,7 +4019,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vc->in_guest = 0;
 
next_timer = timer_get_next_tb();
-   set_dec(next_timer - mftb());
+   set_dec(next_timer - tb);
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 0ff9ddb5e7ca..bd8cf0a65ce8 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -203,7 +203,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
unsigned long host_dawr1;
unsigned long host_dawrx1;
 
-   hdec = time_limit - mftb();
+   tb = mftb();
+   hdec = time_limit - tb;
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
 
@@ -215,7 +216,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vcpu->arch.ceded = 0;
 
if (vc->tb_offset) {
-   u64 new_tb = mftb() + vc->tb_offset;
+   u64 new_tb = tb + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xff) < (new_tb & 0xff))
-- 
2.23.0



[PATCH v1 09/55] KVM: PPC: Book3S HV P9: Use large decrementer for HDEC

2021-07-25 Thread Nicholas Piggin
On processors that don't suppress the HDEC exceptions when LPCR[HDICE]=0,
this could help reduce needless guest exits due to leftover exceptions on
entering the guest.

Reviewed-by: Alexey Kardashevskiy 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/time.h   | 2 ++
 arch/powerpc/kernel/time.c| 1 +
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index fd09b4797fd7..69b6be617772 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,8 @@
 #include 
 
 /* time.c */
+extern u64 decrementer_max;
+
 extern unsigned long tb_ticks_per_jiffy;
 extern unsigned long tb_ticks_per_usec;
 extern unsigned long tb_ticks_per_sec;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 01df89918aa4..72d872b49167 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -89,6 +89,7 @@ static struct clocksource clocksource_timebase = {
 
 #define DECREMENTER_DEFAULT_MAX 0x7FFF
 u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
 
 static int decrementer_set_next_event(unsigned long evt,
  struct clock_event_device *dev);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c 
b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 961b3d70483c..0ff9ddb5e7ca 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -504,7 +504,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 
time_limit, unsigned long lpc
vc->tb_offset_applied = 0;
}
 
-   mtspr(SPRN_HDEC, 0x7fff);
+   /* HDEC must be at least as large as DEC, so decrementer_max fits */
+   mtspr(SPRN_HDEC, decrementer_max);
 
save_clear_guest_mmu(kvm, vcpu);
switch_mmu_to_host(kvm, host_pidr);
-- 
2.23.0



[PATCH v1 08/55] KVM: PPC: Book3S HV P9: Use host timer accounting to avoid decrementer read

2021-07-25 Thread Nicholas Piggin
There is no need to save away the host DEC value, as it is derived
from the host timer subsystem which maintains the next timer time,
so it can be restored from there.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/time.h |  5 +
 arch/powerpc/kernel/time.c  |  1 +
 arch/powerpc/kvm/book3s_hv.c| 14 +++---
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8c2c3dd4ddba..fd09b4797fd7 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -111,6 +111,11 @@ static inline unsigned long test_irq_work_pending(void)
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
+static inline u64 timer_get_next_tb(void)
+{
+   return __this_cpu_read(decrementers_next_tb);
+}
+
 /* Convert timebase ticks to nanoseconds */
 unsigned long long tb_to_ns(unsigned long long tb_ticks);
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e45ce427bffb..01df89918aa4 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -108,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {
 EXPORT_SYMBOL(decrementer_clockevent);
 
 DEFINE_PER_CPU(u64, decrementers_next_tb);
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
 static DEFINE_PER_CPU(struct clock_event_device, decrementers);
 
 #define XSEC_PER_SEC (1024*1024)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7020cbbf3aa1..82976f734bd1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3829,18 +3829,17 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
struct kvmppc_vcore *vc = vcpu->arch.vcore;
struct p9_host_os_sprs host_os_sprs;
s64 dec;
-   u64 tb;
+   u64 tb, next_timer;
int trap, save_pmu;
 
WARN_ON_ONCE(vcpu->arch.ceded);
 
-   dec = mfspr(SPRN_DEC);
tb = mftb();
-   if (dec < 0)
+   next_timer = timer_get_next_tb();
+   if (tb >= next_timer)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
-   local_paca->kvm_hstate.dec_expires = dec + tb;
-   if (local_paca->kvm_hstate.dec_expires < time_limit)
-   time_limit = local_paca->kvm_hstate.dec_expires;
+   if (next_timer < time_limit)
+   time_limit = next_timer;
 
save_p9_host_os_sprs(_os_sprs);
 
@@ -4019,7 +4018,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
 
-   set_dec(local_paca->kvm_hstate.dec_expires - mftb());
+   next_timer = timer_get_next_tb();
+   set_dec(next_timer - mftb());
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
-- 
2.23.0



[PATCH v1 07/55] KMV: PPC: Book3S HV P9: Use set_dec to set decrementer to host

2021-07-25 Thread Nicholas Piggin
The host Linux timer code arms the decrementer with the value
'decrementers_next_tb - current_tb' using set_dec(), which stores
val - 1 on Book3S-64, which is not quite the same as what KVM does
to re-arm the host decrementer when exiting the guest.

This shouldn't be a significant change, but it makes the logic match
and avoids this small extra change being brought into the next patch.

Suggested-by: Alexey Kardashevskiy 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 905bf29940ea..7020cbbf3aa1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4019,7 +4019,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
 
-   mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+   set_dec(local_paca->kvm_hstate.dec_expires - mftb());
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
-- 
2.23.0



[PATCH v1 06/55] powerpc/64s: Remove WORT SPR from POWER9/10

2021-07-25 Thread Nicholas Piggin
This register is not architected and not implemented in POWER9 or 10,
it just reads back zeroes for compatibility.

-78 cycles (9255) cycles POWER9 virt-mode NULL hcall

Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv.c  | 3 ---
 arch/powerpc/platforms/powernv/idle.c | 2 --
 2 files changed, 5 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c743020837e7..905bf29940ea 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3740,7 +3740,6 @@ static void load_spr_state(struct kvm_vcpu *vcpu)
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
-   mtspr(SPRN_WORT, vcpu->arch.wort);
mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3767,7 +3766,6 @@ static void store_spr_state(struct kvm_vcpu *vcpu)
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
-   vcpu->arch.wort = mfspr(SPRN_WORT);
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@ -3799,7 +3797,6 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
 {
mtspr(SPRN_PSPB, 0);
-   mtspr(SPRN_WORT, 0);
mtspr(SPRN_UAMOR, 0);
 
mtspr(SPRN_DSCR, host_os_sprs->dscr);
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 1e908536890b..df19e2ff9d3c 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -667,7 +667,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
sprs.purr   = mfspr(SPRN_PURR);
sprs.spurr  = mfspr(SPRN_SPURR);
sprs.dscr   = mfspr(SPRN_DSCR);
-   sprs.wort   = mfspr(SPRN_WORT);
sprs.ciabr  = mfspr(SPRN_CIABR);
 
sprs.mmcra  = mfspr(SPRN_MMCRA);
@@ -785,7 +784,6 @@ static unsigned long power9_idle_stop(unsigned long psscr)
mtspr(SPRN_PURR,sprs.purr);
mtspr(SPRN_SPURR,   sprs.spurr);
mtspr(SPRN_DSCR,sprs.dscr);
-   mtspr(SPRN_WORT,sprs.wort);
mtspr(SPRN_CIABR,   sprs.ciabr);
 
mtspr(SPRN_MMCRA,   sprs.mmcra);
-- 
2.23.0



[PATCH v1 05/55] KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest SPRs are live

2021-07-25 Thread Nicholas Piggin
After the L1 saves its PMU SPRs but before loading the L2's PMU SPRs,
switch the pmcregs_in_use field in the L1 lppaca to the value advertised
by the L2 in its VPA. On the way out of the L2, set it back after saving
the L2 PMU registers (if they were in-use).

This transfers the PMU liveness indication between the L1 and L2 at the
points where the registers are not live.

This fixes the nested HV bug for which a workaround was added to the L0
HV by commit 63279eeb7f93a ("KVM: PPC: Book3S HV: Always save guest pmu
for guest capable of nesting"), which explains the problem in detail.
That workaround is no longer required for guests that include this bug
fix.

Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall")
Reviewed-by: Fabiano Rosas 
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/pmc.h |  7 +++
 arch/powerpc/kvm/book3s_hv.c   | 20 
 2 files changed, 27 insertions(+)

diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index c6bbe9778d3c..3c09109e708e 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -34,6 +34,13 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #endif
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static inline int ppc_get_pmu_inuse(void)
+{
+   return get_paca()->pmcregs_in_use;
+}
+#endif
+
 extern void power4_enable_pmcs(void);
 
 #else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index adac1a6431a0..c743020837e7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -59,6 +59,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -3864,6 +3865,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
 
+#ifdef CONFIG_PPC_PSERIES
+   if (kvmhv_on_pseries()) {
+   barrier();
+   if (vcpu->arch.vpa.pinned_addr) {
+   struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+   get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+   } else {
+   get_lppaca()->pmcregs_in_use = 1;
+   }
+   barrier();
+   }
+#endif
kvmhv_load_guest_pmu(vcpu);
 
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@ -3998,6 +4011,13 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, 
u64 time_limit,
save_pmu |= nesting_enabled(vcpu->kvm);
 
kvmhv_save_guest_pmu(vcpu, save_pmu);
+#ifdef CONFIG_PPC_PSERIES
+   if (kvmhv_on_pseries()) {
+   barrier();
+   get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+   barrier();
+   }
+#endif
 
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
-- 
2.23.0



[PATCH v1 04/55] KVM: PPC: Book3S HV: Stop forwarding all HFUs to L1

2021-07-25 Thread Nicholas Piggin
From: Fabiano Rosas 

If the nested hypervisor has no access to a facility because it has
been disabled by the host, it should also not be able to see the
Hypervisor Facility Unavailable that arises from one of its guests
trying to access the facility.

This patch turns a HFU that happened in L2 into a Hypervisor Emulation
Assistance interrupt and forwards it to L1 for handling. The ones that
happened because L1 explicitly disabled the facility for L2 are still
let through, along with the corresponding Cause bits in the HFSCR.

Signed-off-by: Fabiano Rosas 
---
 arch/powerpc/kvm/book3s_hv_nested.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 9bb0788d312c..983628ed4376 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -99,7 +99,7 @@ static void byteswap_hv_regs(struct hv_guest_state *hr)
hr->dawrx1 = swab64(hr->dawrx1);
 }
 
-static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap,
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
 struct hv_guest_state *hr)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -128,7 +128,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, int 
trap,
hr->pidr = vcpu->arch.pid;
hr->cfar = vcpu->arch.cfar;
hr->ppr = vcpu->arch.ppr;
-   switch (trap) {
+   switch (vcpu->arch.trap) {
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
hr->hdar = vcpu->arch.fault_dar;
hr->hdsisr = vcpu->arch.fault_dsisr;
@@ -137,6 +137,27 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
int trap,
case BOOK3S_INTERRUPT_H_INST_STORAGE:
hr->asdr = vcpu->arch.fault_gpa;
break;
+   case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+   {
+   u8 cause = vcpu->arch.hfscr >> 56;
+
+   WARN_ON_ONCE(cause >= BITS_PER_LONG);
+
+   if (!(hr->hfscr & (1UL << cause)))
+   break;
+
+   /*
+* We have disabled this facility, so it does not
+* exist from L1's perspective. Turn it into a HEAI.
+*/
+   vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+   kvmppc_load_last_inst(vcpu, INST_GENERIC, 
>arch.emul_inst);
+
+   /* Don't leak the cause field */
+   hr->hfscr &= ~HFSCR_INTR_CAUSE;
+
+   fallthrough;
+   }
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
hr->heir = vcpu->arch.emul_inst;
break;
@@ -394,7 +415,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
delta_ic = vcpu->arch.ic - l2_hv.ic;
delta_vtb = vc->vtb - l2_hv.vtb;
-   save_hv_return_state(vcpu, vcpu->arch.trap, _hv);
+   save_hv_return_state(vcpu, _hv);
 
/* restore L1 state */
vcpu->arch.nested = NULL;
-- 
2.23.0



[PATCH v1 03/55] KVM: PPC: Book3S HV: Sanitise vcpu registers in nested path

2021-07-25 Thread Nicholas Piggin
From: Fabiano Rosas 

As one of the arguments of the H_ENTER_NESTED hypercall, the nested
hypervisor (L1) prepares a structure containing the values of various
hypervisor-privileged registers with which it wants the nested guest
(L2) to run. Since the nested HV runs in supervisor mode it needs the
host to write to these registers.

To stop a nested HV manipulating this mechanism and using a nested
guest as a proxy to access a facility that has been made unavailable
to it, we have a routine that sanitises the values of the HV registers
before copying them into the nested guest's vcpu struct.

However, when coming out of the guest the values are copied as they
were back into L1 memory, which means that any sanitisation we did
during guest entry will be exposed to L1 after H_ENTER_NESTED returns.

This patch alters this sanitisation to have effect on the vcpu->arch
registers directly before entering and after exiting the guest,
leaving the structure that is copied back into L1 unchanged (except
when we really want L1 to access the value, e.g the Cause bits of
HFSCR).

Signed-off-by: Fabiano Rosas 
---
 arch/powerpc/kvm/book3s_hv_nested.c | 100 +++-
 1 file changed, 52 insertions(+), 48 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 898f942eb198..9bb0788d312c 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -104,8 +104,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
int trap,
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
+   /*
+* When loading the hypervisor-privileged registers to run L2,
+* we might have used bits from L1 state to restrict what the
+* L2 state is allowed to be. Since L1 is not allowed to read
+* the HV registers, do not include these modifications in the
+* return state.
+*/
+   hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+(HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+
hr->dpdes = vc->dpdes;
-   hr->hfscr = vcpu->arch.hfscr;
hr->purr = vcpu->arch.purr;
hr->spurr = vcpu->arch.spurr;
hr->ic = vcpu->arch.ic;
@@ -134,49 +143,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
int trap,
}
 }
 
-/*
- * This can result in some L0 HV register state being leaked to an L1
- * hypervisor when the hv_guest_state is copied back to the guest after
- * being modified here.
- *
- * There is no known problem with such a leak, and in many cases these
- * register settings could be derived by the guest by observing behaviour
- * and timing, interrupts, etc., but it is an issue to consider.
- */
-static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
-{
-   struct kvmppc_vcore *vc = vcpu->arch.vcore;
-   u64 mask;
-
-   /*
-* Don't let L1 change LPCR bits for the L2 except these:
-*/
-   mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
-   LPCR_LPES | LPCR_MER;
-
-   /*
-* Additional filtering is required depending on hardware
-* and configuration.
-*/
-   hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
-   (vc->lpcr & ~mask) | (hr->lpcr & mask));
-
-   /*
-* Don't let L1 enable features for L2 which we've disabled for L1,
-* but preserve the interrupt cause field.
-*/
-   hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
-
-   /* Don't let data address watchpoint match in hypervisor state */
-   hr->dawrx0 &= ~DAWRX_HYP;
-   hr->dawrx1 &= ~DAWRX_HYP;
-
-   /* Don't let completed instruction address breakpt match in HV state */
-   if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
-   hr->ciabr &= ~CIABR_PRIV;
-}
-
-static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state 
*hr)
 {
struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
@@ -288,6 +255,43 @@ static int kvmhv_write_guest_state_and_regs(struct 
kvm_vcpu *vcpu,
 sizeof(struct pt_regs));
 }
 
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+   const struct hv_guest_state *l2_hv,
+   const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+   struct kvmppc_vcore *vc = vcpu->arch.vcore;
+   u64 mask;
+
+   restore_hv_regs(vcpu, l2_hv);
+
+   /*
+* Don't let L1 change LPCR bits for the L2 except these:
+*/
+   mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
+   LPCR_LPES | LPCR_MER;
+
+   /*
+* Additional filtering is required depending on hardware
+* and configuration.
+*/
+   *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+ (vc->lpcr & ~mask) | (*lpcr & mask));
+
+   /*
+

[PATCH v1 02/55] KVM: PPC: Book3S HV P9: Fixes for TM softpatch interrupt

2021-07-25 Thread Nicholas Piggin
The softpatch interrupt sets HSRR0 to the faulting instruction +4, so
it should subtract 4 for the faulting instruction address. Also have it
emulate and deliver HFAC interrupts correctly, which is important for
nested HV and facility demand-faulting in future.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/reg.h  |  3 +-
 arch/powerpc/kvm/book3s_hv.c| 35 
 arch/powerpc/kvm/book3s_hv_tm.c | 57 +
 3 files changed, 61 insertions(+), 34 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index be85cf156a1f..e9d27265253b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -415,6 +415,7 @@
 #define   FSCR_TAR __MASK(FSCR_TAR_LG)
 #define   FSCR_EBB __MASK(FSCR_EBB_LG)
 #define   FSCR_DSCR__MASK(FSCR_DSCR_LG)
+#define   FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56)  /* interrupt cause */
 #define SPRN_HFSCR 0xbe/* HV=1 Facility Status & Control Register */
 #define   HFSCR_PREFIX __MASK(FSCR_PREFIX_LG)
 #define   HFSCR_MSGP   __MASK(FSCR_MSGP_LG)
@@ -426,7 +427,7 @@
 #define   HFSCR_DSCR   __MASK(FSCR_DSCR_LG)
 #define   HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
 #define   HFSCR_FP __MASK(FSCR_FP_LG)
-#define   HFSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
+#define   HFSCR_INTR_CAUSE FSCR_INTR_CAUSE
 #define SPRN_TAR   0x32f   /* Target Address Register */
 #define SPRN_LPCR  0x13E   /* LPAR Control Register */
 #define   LPCR_VPM0ASM_CONST(0x8000)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ce7ff12cfc03..adac1a6431a0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1682,6 +1682,21 @@ XXX benchmark guest exits
r = RESUME_GUEST;
}
break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+   /*
+* This occurs for various TM-related instructions that
+* we need to emulate on POWER9 DD2.2.  We have already
+* handled the cases where the guest was in real-suspend
+* mode and was transitioning to transactional state.
+*/
+   r = kvmhv_p9_tm_emulation(vcpu);
+   if (r != -1)
+   break;
+   fallthrough; /* go to facility unavailable handler */
+#endif
+
/*
 * This occurs if the guest (kernel or userspace), does something that
 * is prohibited by HFSCR.
@@ -1700,18 +1715,6 @@ XXX benchmark guest exits
}
break;
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-   case BOOK3S_INTERRUPT_HV_SOFTPATCH:
-   /*
-* This occurs for various TM-related instructions that
-* we need to emulate on POWER9 DD2.2.  We have already
-* handled the cases where the guest was in real-suspend
-* mode and was transitioning to transactional state.
-*/
-   r = kvmhv_p9_tm_emulation(vcpu);
-   break;
-#endif
-
case BOOK3S_INTERRUPT_HV_RM_HARD:
r = RESUME_PASSTHROUGH;
break;
@@ -1814,9 +1817,15 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu 
*vcpu)
 * mode and was transitioning to transactional state.
 */
r = kvmhv_p9_tm_emulation(vcpu);
-   break;
+   if (r != -1)
+   break;
+   fallthrough; /* go to facility unavailable handler */
 #endif
 
+   case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+   r = RESUME_HOST;
+   break;
+
case BOOK3S_INTERRUPT_HV_RM_HARD:
vcpu->arch.trap = 0;
r = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
index cc90b8b82329..e4fd4a9dee08 100644
--- a/arch/powerpc/kvm/book3s_hv_tm.c
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -74,19 +74,23 @@ int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
case PPC_INST_RFEBB:
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
/* generate an illegal instruction interrupt */
+   vcpu->arch.regs.nip -= 4;
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
return RESUME_GUEST;
}
/* check EBB facility is available */
if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
-   /* generate an illegal instruction interrupt */
-   kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
-   return RESUME_GUEST;
+   vcpu->arch.regs.nip -= 4;
+   vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+   vcpu->arch.hfscr |= 

[PATCH v1 01/55] KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path

2021-07-25 Thread Nicholas Piggin
TM fake-suspend emulation is only used by POWER9. Remove it from the old
code path.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 42 -
 1 file changed, 42 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8dd437d7a2c6..75079397c2a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1088,12 +1088,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi   r12, BOOK3S_INTERRUPT_H_INST_STORAGE
beq kvmppc_hisi
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-   /* For softpatch interrupt, go off and do TM instruction emulation */
-   cmpwi   r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-   beq kvmppc_tm_emul
-#endif
-
/* See if this is a leftover HDEC interrupt */
cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
@@ -1599,42 +1593,6 @@ maybe_reenter_guest:
blt deliver_guest_interrupt
b   guest_exit_cont
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Softpatch interrupt for transactional memory emulation cases
- * on POWER9 DD2.2.  This is early in the guest exit path - we
- * haven't saved registers or done a treclaim yet.
- */
-kvmppc_tm_emul:
-   /* Save instruction image in HEIR */
-   mfspr   r3, SPRN_HEIR
-   stw r3, VCPU_HEIR(r9)
-
-   /*
-* The cases we want to handle here are those where the guest
-* is in real suspend mode and is trying to transition to
-* transactional mode.
-*/
-   lbz r0, HSTATE_FAKE_SUSPEND(r13)
-   cmpwi   r0, 0   /* keep exiting guest if in fake suspend */
-   bne guest_exit_cont
-   rldicl  r3, r11, 64 - MSR_TS_S_LG, 62
-   cmpwi   r3, 1   /* or if not in suspend state */
-   bne guest_exit_cont
-
-   /* Call C code to do the emulation */
-   mr  r3, r9
-   bl  kvmhv_p9_tm_emulation_early
-   nop
-   ld  r9, HSTATE_KVM_VCPU(r13)
-   li  r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
-   cmpwi   r3, 0
-   beq guest_exit_cont /* continue exiting if not handled */
-   ld  r10, VCPU_PC(r9)
-   ld  r11, VCPU_MSR(r9)
-   b   fast_interrupt_c_return /* go back to guest if handled */
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
-- 
2.23.0



[PATCH v1 00/55] KVM: PPC: Book3S HV P9: entry/exit optimisations

2021-07-25 Thread Nicholas Piggin
This reduces radix guest full entry/exit latency on POWER9 and POWER10
by almost 2x.

Nested HV guests should see smaller improvements in their L1 entry/exit,
but this is also combined with most L0 speedups also applying to nested
entry. nginx localhost throughput test in a SMP nested guest is improved
about 10% (in a direct guest it doesn't change much because it uses XIVE
for IPIs) when L0 and L1 are patched.

It does this in several main ways:

- Rearrange code to optimise SPR accesses. Mainly, avoid scoreboard
  stalls.

- Test SPR values to avoid mtSPRs where possible. mtSPRs are expensive.

- Reduce mftb. mftb is expensive.

- Demand fault certain facilities to avoid saving and/or restoring them
  (at the cost of fault when they are used, but this is mitigated over
  a number of entries, like the facilities when context switching 
  processes). PM, TM, and EBB so far.

- Defer some sequences that are made just in case a guest is interrupted
  in the middle of a critical section to the case where the guest is
  scheduled on a different CPU, rather than every time (at the cost of
  an extra IPI in this case). Namely the tlbsync sequence for radix with
  GTSE, which is very expensive.

This also adds the 2nd round patches to the series, which improve
performance mostly by reducing locking, barriers, and atomics related
to the vcpus-per-vcore > 1 handling that the P9 path does not require.

Some of the numbers quoted in changelogs may have changed a bit with
patches being updated, reordered, etc. They give a bit of a guide, but
I might remove them from the final submission because they're too much
to maintain.

Changes since RFC:
- Rebased with Fabiano's HV sanitising patches at the front.
- Several demand faulting bug fixes mostly relating to nested guests.
- Removed facility demand-faulting from L0 nested entry/exit handler.
  Demand faulting is still done in the L1, but not the L0. The reason
  is to reduce complexity (although it's only a small amount of
  complexity), reduce demand faulting overhead that may require several
  interrupts, and allow better testing of the L1 demand faulting,
  because we may run on hypervisors that do not implement L0 demand
  faulting. In future, depending on performance and such, we could add
  demand faulting to L0 nested entry handling and/or remove it from the
  L1.
- Fixed a timebase problem with the HMI subcore patch.


Fabiano Rosas (2):
  KVM: PPC: Book3S HV: Sanitise vcpu registers in nested path
  KVM: PPC: Book3S HV: Stop forwarding all HFUs to L1

Nicholas Piggin (53):
  KVM: PPC: Book3S HV: Remove TM emulation from POWER7/8 path
  KVM: PPC: Book3S HV P9: Fixes for TM softpatch interrupt
  KVM: PPC: Book3S HV Nested: Reflect guest PMU in-use to L0 when guest
SPRs are live
  powerpc/64s: Remove WORT SPR from POWER9/10
  KMV: PPC: Book3S HV P9: Use set_dec to set decrementer to host
  KVM: PPC: Book3S HV P9: Use host timer accounting to avoid decrementer
read
  KVM: PPC: Book3S HV P9: Use large decrementer for HDEC
  KVM: PPC: Book3S HV P9: Reduce mftb per guest entry/exit
  powerpc/time: add API for KVM to re-arm the host timer/decrementer
  KVM: PPC: Book3S HV: POWER10 enable HAIL when running radix guests
  powerpc/64s: Keep AMOR SPR a constant ~0 at runtime
  KVM: PPC: Book3S HV: Don't always save PMU for guest capable of
nesting
  powerpc/64s: Always set PMU control registers to frozen/disabled when
not in use
  powerpc/64s: Implement PMU override command line option
  KVM: PPC: Book3S HV P9: Implement PMU save/restore in C
  KVM: PPC: Book3S HV P9: Factor PMU save/load into context switch
functions
  KVM: PPC: Book3S HV P9: Demand fault PMU SPRs when marked not inuse
  KVM: PPC: Book3S HV P9: Factor out yield_count increment
  KVM: PPC: Book3S HV: CTRL SPR does not require read-modify-write
  KVM: PPC: Book3S HV P9: Move SPRG restore to restore_p9_host_os_sprs
  KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save
host SPRs
  KVM: PPC: Book3S HV P9: Improve mtmsrd scheduling by delaying MSR[EE]
disable
  KVM: PPC: Book3S HV P9: Add kvmppc_stop_thread to match
kvmppc_start_thread
  KVM: PPC: Book3S HV: Change dec_expires to be relative to guest
timebase
  KVM: PPC: Book3S HV P9: Move TB updates
  KVM: PPC: Book3S HV P9: Optimise timebase reads
  KVM: PPC: Book3S HV P9: Avoid SPR scoreboard stalls
  KVM: PPC: Book3S HV P9: Only execute mtSPR if the value changed
  KVM: PPC: Book3S HV P9: Juggle SPR switching around
  KVM: PPC: Book3S HV P9: Move vcpu register save/restore into functions
  KVM: PPC: Book3S HV P9: Move host OS save/restore functions to
built-in
  KVM: PPC: Book3S HV P9: Move nested guest entry into its own function
  KVM: PPC: Book3S HV P9: Move remaining SPR and MSR access into low
level entry
  KVM: PPC: Book3S HV P9: Implement TM fastpath for guest entry/exit
  KVM: PPC: Book3S HV P9: Switch PMU to guest as late as possible
  KVM: PPC: Book3S HV P9: Restrict DSISR canary 

[PATCH 2/2] powerpc/64s: Rename CPU_FTR_POWER9_DD2_1 to CPU_FTR_P9_STOP_FIXED

2021-07-25 Thread Nicholas Piggin
CPU feature flags work best when they are named for behaviour, not for
the CPU variant that first introduced them. Later revisions might also
contain the behaviour, for example. It's confusing for a POWER9 DD2.2
to have CPU_FTR_POWER9_DD2_1, but it's not confusing if DD2.1 and DD2.2
both have CPU_FTR_P9_STOP_FIXED.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/cputable.h   | 8 
 arch/powerpc/platforms/powernv/idle.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index 46bae9624784..cb9948f318f7 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -186,7 +186,7 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTR_DAWR   LONG_ASM_CONST(0x0080)
 #define CPU_FTR_DABRX  LONG_ASM_CONST(0x0100)
 #define CPU_FTR_PMAO_BUG   LONG_ASM_CONST(0x0200)
-#define CPU_FTR_POWER9_DD2_1   LONG_ASM_CONST(0x0800)
+#define CPU_FTR_P9_STOP_FIXED  LONG_ASM_CONST(0x0800)
 #define CPU_FTR_P9_TM_HV_ASSIST
LONG_ASM_CONST(0x1000)
 #define CPU_FTR_P9_TM_XER_SO_BUG   LONG_ASM_CONST(0x2000)
 #define CPU_FTR_P9_TLBIE_STQ_BUG   LONG_ASM_CONST(0x4000)
@@ -436,11 +436,11 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG)
 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \
   CPU_FTR_P9_RADIX_PREFETCH_BUG | \
-  CPU_FTR_POWER9_DD2_1)
-#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+  CPU_FTR_P9_STOP_FIXED)
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_P9_STOP_FIXED | \
   CPU_FTR_P9_TM_HV_ASSIST | \
   CPU_FTR_P9_TM_XER_SO_BUG)
-#define CPU_FTRS_POWER9_DD2_3 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+#define CPU_FTRS_POWER9_DD2_3 (CPU_FTRS_POWER9 | CPU_FTR_P9_STOP_FIXED | \
   CPU_FTR_P9_TM_HV_ASSIST)
 #define CPU_FTRS_POWER10 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
diff --git a/arch/powerpc/platforms/powernv/idle.c 
b/arch/powerpc/platforms/powernv/idle.c
index 528a7e0cf83a..1e908536890b 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -650,7 +650,7 @@ static unsigned long power9_idle_stop(unsigned long psscr)
}
 #endif
 
-   if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+   if (!cpu_has_feature(CPU_FTR_P9_STOP_FIXED)) {
 /*
  * POWER9 DD2 can incorrectly set PMAO when waking up
  * after a state-loss idle. Saving and restoring MMCR0
@@ -717,7 +717,7 @@ static unsigned long power9_idle_stop(unsigned long psscr)
 * might have been corrupted and needs flushing. We also need
 * to reload MMCR0 (see mmcr0 comment above).
 */
-   if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+   if (!cpu_has_feature(CPU_FTR_P9_STOP_FIXED)) {
asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT);
mtspr(SPRN_MMCR0, mmcr0);
}
-- 
2.23.0



[PATCH 1/2] powerpc/64s: POWER9 DD2.3 CPU feature flag fixes

2021-07-25 Thread Nicholas Piggin
DD2.3 missed out on getting its feature flag bits.

This meant when booting with dt-cpu-ftrs, CPU_FTR_P9_TM_HV_ASSIST is
missing (unless the firmware contains it, which mine does not seem to).
And when booting without, CPU_FTR_P9_TM_XER_SO_BUG is set.

In practice this doesn't make any difference to pseries guests, only
powernv.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/cputable.h |  2 ++
 arch/powerpc/kernel/cputable.c  | 22 --
 arch/powerpc/kernel/dt_cpu_ftrs.c   | 14 +-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index e85c849214a2..46bae9624784 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -440,6 +440,8 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
   CPU_FTR_P9_TM_HV_ASSIST | \
   CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_POWER9_DD2_3 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+  CPU_FTR_P9_TM_HV_ASSIST)
 #define CPU_FTRS_POWER10 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index ae0fdef0ac11..9ab97d1fd5a2 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -487,11 +487,29 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early= __machine_check_early_realmode_p9,
.platform   = "power9",
},
-   {   /* Power9 DD2.2 or later */
+   {   /* Power9 DD 2.2 */
+   .pvr_mask   = 0xefff,
+   .pvr_value  = 0x004e0202,
+   .cpu_name   = "POWER9 (raw)",
+   .cpu_features   = CPU_FTRS_POWER9_DD2_2,
+   .cpu_user_features  = COMMON_USER_POWER9,
+   .cpu_user_features2 = COMMON_USER2_POWER9,
+   .mmu_features   = MMU_FTRS_POWER9,
+   .icache_bsize   = 128,
+   .dcache_bsize   = 128,
+   .num_pmcs   = 6,
+   .pmc_type   = PPC_PMC_IBM,
+   .oprofile_cpu_type  = "ppc64/power9",
+   .cpu_setup  = __setup_cpu_power9,
+   .cpu_restore= __restore_cpu_power9,
+   .machine_check_early= __machine_check_early_realmode_p9,
+   .platform   = "power9",
+   },
+   {   /* Power9 DD 2.3 or later */
.pvr_mask   = 0x,
.pvr_value  = 0x004e,
.cpu_name   = "POWER9 (raw)",
-   .cpu_features   = CPU_FTRS_POWER9_DD2_2,
+   .cpu_features   = CPU_FTRS_POWER9_DD2_3,
.cpu_user_features  = COMMON_USER_POWER9,
.cpu_user_features2 = COMMON_USER2_POWER9,
.mmu_features   = MMU_FTRS_POWER9,
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 358aee7c2d79..af95f337e54b 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -764,18 +764,14 @@ static __init void cpufeatures_cpu_quirks(void)
 * Not all quirks can be derived from the cpufeatures device tree.
 */
if ((version & 0xefff) == 0x004e0200) {
-   /* DD2.0 has no feature flag */
-   cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+   cur_cpu_spec->cpu_features |= CPU_FTRS_POWER9_DD2_0;
} else if ((version & 0xefff) == 0x004e0201) {
-   cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
-   cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+   cur_cpu_spec->cpu_features |= CPU_FTRS_POWER9_DD2_1;
} else if ((version & 0xefff) == 0x004e0202) {
-   cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
-   cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
-   cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+   cur_cpu_spec->cpu_features |= CPU_FTRS_POWER9_DD2_2;
} else if ((version & 0x) == 0x004e) {
-   /* DD2.1 and up have DD2_1 */
-   cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+   /* DD2.3 and up */
+   cur_cpu_spec->cpu_features |= CPU_FTRS_POWER9_DD2_3;
}
 
if ((version & 0x) == 0x004e) {
-- 
2.23.0



Re: [PATCH v5 6/6] powerpc/pseries: Add support for FORM2 associativity

2021-07-25 Thread David Gibson
On Thu, Jul 22, 2021 at 01:04:42PM +0530, Aneesh Kumar K.V wrote:
> David Gibson  writes:
> 
> > On Mon, Jun 28, 2021 at 08:41:17PM +0530, Aneesh Kumar K.V wrote:
> >> PAPR interface currently supports two different ways of communicating 
> >> resource
> >> grouping details to the OS. These are referred to as Form 0 and Form 1
> >> associativity grouping. Form 0 is the older format and is now considered
> >> deprecated. This patch adds another resource grouping named FORM2.
> >> 
> >> Signed-off-by: Daniel Henrique Barboza 
> >> Signed-off-by: Aneesh Kumar K.V 
> >> ---
> >>  Documentation/powerpc/associativity.rst   | 103 ++
> >>  arch/powerpc/include/asm/firmware.h   |   3 +-
> >>  arch/powerpc/include/asm/prom.h   |   1 +
> >>  arch/powerpc/kernel/prom_init.c   |   3 +-
> >>  arch/powerpc/mm/numa.c| 157 ++
> >>  arch/powerpc/platforms/pseries/firmware.c |   1 +
> >>  6 files changed, 242 insertions(+), 26 deletions(-)
> >>  create mode 100644 Documentation/powerpc/associativity.rst
> >> 
> >> diff --git a/Documentation/powerpc/associativity.rst 
> >> b/Documentation/powerpc/associativity.rst
> >> new file mode 100644
> >> index ..31cc7da2c7a6
> >> --- /dev/null
> >> +++ b/Documentation/powerpc/associativity.rst
> >> @@ -0,0 +1,103 @@
> >> +
> >> +NUMA resource associativity
> >> +=
> >> +
> >> +Associativity represents the groupings of the various platform resources 
> >> into
> >> +domains of substantially similar mean performance relative to resources 
> >> outside
> >> +of that domain. Resources subsets of a given domain that exhibit better
> >> +performance relative to each other than relative to other resources 
> >> subsets
> >> +are represented as being members of a sub-grouping domain. This 
> >> performance
> >> +characteristic is presented in terms of NUMA node distance within the 
> >> Linux kernel.
> >> +From the platform view, these groups are also referred to as domains.
> >
> > Pretty hard to decipher, but that's typical for PAPR.
> >
> >> +PAPR interface currently supports different ways of communicating these 
> >> resource
> >> +grouping details to the OS. These are referred to as Form 0, Form 1 and 
> >> Form2
> >> +associativity grouping. Form 0 is the older format and is now considered 
> >> deprecated.
> >
> > Nit: s/older/oldest/ since there are now >2 forms.
> 
> updated.
> 
> >
> >> +Hypervisor indicates the type/form of associativity used via 
> >> "ibm,architecture-vec-5 property".
> >> +Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage 
> >> of Form 0 or Form 1.
> >> +A value of 1 indicates the usage of Form 1 associativity. For Form 2 
> >> associativity
> >> +bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used.
> >> +
> >> +Form 0
> >> +-
> >> +Form 0 associativity supports only two NUMA distances (LOCAL and REMOTE).
> >> +
> >> +Form 1
> >> +-
> >> +With Form 1 a combination of ibm,associativity-reference-points, and 
> >> ibm,associativity
> >> +device tree properties are used to determine the NUMA distance between 
> >> resource groups/domains.
> >> +
> >> +The “ibm,associativity” property contains a list of one or more numbers 
> >> (domainID)
> >> +representing the resource’s platform grouping domains.
> >> +
> >> +The “ibm,associativity-reference-points” property contains a list of one 
> >> or more numbers
> >> +(domainID index) that represents the 1 based ordinal in the associativity 
> >> lists.
> >> +The list of domainID indexes represents an increasing hierarchy of 
> >> resource grouping.
> >> +
> >> +ex:
> >> +{ primary domainID index, secondary domainID index, tertiary domainID 
> >> index.. }
> >> +
> >> +Linux kernel uses the domainID at the primary domainID index as the NUMA 
> >> node id.
> >> +Linux kernel computes NUMA distance between two domains by recursively 
> >> comparing
> >> +if they belong to the same higher-level domains. For mismatch at every 
> >> higher
> >> +level of the resource group, the kernel doubles the NUMA distance between 
> >> the
> >> +comparing domains.
> >> +
> >> +Form 2
> >> +---
> >> +Form 2 associativity format adds separate device tree properties 
> >> representing NUMA node distance
> >> +thereby making the node distance computation flexible. Form 2 also allows 
> >> flexible primary
> >> +domain numbering. With numa distance computation now detached from the 
> >> index value in
> >> +"ibm,associativity-reference-points" property, Form 2 allows a large 
> >> number of primary domain
> >> +ids at the same domainID index representing resource groups of different 
> >> performance/latency
> >> +characteristics.
> >> +
> >> +Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 
> >> 5 in the
> >> +"ibm,architecture-vec-5" property.
> >> +
> >> +"ibm,numa-lookup-index-table" property contains a list of one or more 
> >> 

Re: [PATCH v5 4/6] powerpc/pseries: Consolidate different NUMA distance update code paths

2021-07-25 Thread David Gibson
On Thu, Jul 22, 2021 at 12:37:46PM +0530, Aneesh Kumar K.V wrote:
> David Gibson  writes:
> 
> > On Mon, Jun 28, 2021 at 08:41:15PM +0530, Aneesh Kumar K.V wrote:
> >> The associativity details of the newly added resourced are collected from
> >> the hypervisor via "ibm,configure-connector" rtas call. Update the numa
> >> distance details of the newly added numa node after the above call.
> >> 
> >> Instead of updating NUMA distance every time we lookup a node id
> >> from the associativity property, add helpers that can be used
> >> during boot which does this only once. Also remove the distance
> >> update from node id lookup helpers.
> >> 
> >> Signed-off-by: Aneesh Kumar K.V 
> >> ---
> >>  arch/powerpc/mm/numa.c| 173 +-
> >>  arch/powerpc/platforms/pseries/hotplug-cpu.c  |   2 +
> >>  .../platforms/pseries/hotplug-memory.c|   2 +
> >>  arch/powerpc/platforms/pseries/pseries.h  |   1 +
> >>  4 files changed, 132 insertions(+), 46 deletions(-)
> >> 
> >> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> >> index 0ec16999beef..7b142f79d600 100644
> >> --- a/arch/powerpc/mm/numa.c
> >> +++ b/arch/powerpc/mm/numa.c
> >> @@ -208,22 +208,6 @@ int __node_distance(int a, int b)
> >>  }
> >>  EXPORT_SYMBOL(__node_distance);
> >>  
> >> -static void initialize_distance_lookup_table(int nid,
> >> -  const __be32 *associativity)
> >> -{
> >> -  int i;
> >> -
> >> -  if (affinity_form != FORM1_AFFINITY)
> >> -  return;
> >> -
> >> -  for (i = 0; i < max_associativity_domain_index; i++) {
> >> -  const __be32 *entry;
> >> -
> >> -  entry = [be32_to_cpu(distance_ref_points[i]) - 1];
> >> -  distance_lookup_table[nid][i] = of_read_number(entry, 1);
> >> -  }
> >> -}
> >> -
> >>  /*
> >>   * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
> >>   * info is found.
> >> @@ -241,15 +225,6 @@ static int associativity_to_nid(const __be32 
> >> *associativity)
> >>/* POWER4 LPAR uses 0x as invalid node */
> >>if (nid == 0x || nid >= nr_node_ids)
> >>nid = NUMA_NO_NODE;
> >> -
> >> -  if (nid > 0 &&
> >> -  of_read_number(associativity, 1) >= 
> >> max_associativity_domain_index) {
> >> -  /*
> >> -   * Skip the length field and send start of associativity array
> >> -   */
> >> -  initialize_distance_lookup_table(nid, associativity + 1);
> >> -  }
> >> -
> >>  out:
> >>return nid;
> >>  }
> >> @@ -287,6 +262,49 @@ int of_node_to_nid(struct device_node *device)
> >>  }
> >>  EXPORT_SYMBOL(of_node_to_nid);
> >>  
> >> +static void __initialize_form1_numa_distance(const __be32 *associativity)
> >> +{
> >> +  int i, nid;
> >> +
> >> +  if (affinity_form != FORM1_AFFINITY)
> >
> > Since this shouldn't be called on a !form1 system, this could be a 
> > WARN_ON().
> 
> The way we call functions currently, instead of doing
> 
> if (affinity_form == FORM1_AFFINITY)
> __initialize_form1_numa_distance()
> 
> We avoid doing the if check in multiple places. For example
> parse_numa_properties will fetch the associativity array to find the
> details of online node and set it online. We use the same code path to
> initialize distance.
> 
>   if (__vphn_get_associativity(i, vphn_assoc) == 0) {
>   nid = associativity_to_nid(vphn_assoc);
>   __initialize_form1_numa_distance(vphn_assoc);
>   } else {
> 
>   cpu = of_get_cpu_node(i, NULL);
>   BUG_ON(!cpu);
> 
>   associativity = of_get_associativity(cpu);
>   if (associativity) {
>   nid = associativity_to_nid(associativity);
>   __initialize_form1_numa_distance(associativity);
>   }
> 
> We avoid the the if (affinity_form == FORM1_AFFINITY) check there by
> moving the check inside __initialize_form1_numa_distance().

Oh.. ok.  The only caller I spotted was already doing a test against
affinity_form.

> >> +  return;
> >> +
> >> +  if (of_read_number(associativity, 1) >= primary_domain_index) {
> >> +  nid = of_read_number([primary_domain_index], 1);
> >
> > This computes the nid from the assoc array independently of
> > associativity_to_nid, which doesn't seem like a good idea.  Wouldn't
> > it be better to call assocaitivity_to_nid(), then make the next bit
> > conditional on nid !== NUMA_NO_NODE?
> 
> @@ -302,9 +302,8 @@ static void __initialize_form1_numa_distance(const __be32 
> *associativity)
>   if (affinity_form != FORM1_AFFINITY)
>   return;
>  
> - if (of_read_number(associativity, 1) >= primary_domain_index) {
> - nid = of_read_number([primary_domain_index], 1);
> -
> + nid = associativity_to_nid(associativity);
> + if (nid != NUMA_NO_NODE) {
>   for (i = 0; i < distance_ref_points_depth; i++) {
>  

Re: [PATCH v5 5/6] powerpc/pseries: Add a helper for form1 cpu distance

2021-07-25 Thread David Gibson
On Thu, Jul 22, 2021 at 12:39:27PM +0530, Aneesh Kumar K.V wrote:
> David Gibson  writes:
> 
> > On Mon, Jun 28, 2021 at 08:41:16PM +0530, Aneesh Kumar K.V wrote:
> >> This helper is only used with the dispatch trace log collection.
> >> A later patch will add Form2 affinity support and this change helps
> >> in keeping that simpler. Also add a comment explaining we don't expect
> >> the code to be called with FORM0
> >> 
> >> Reviewed-by: David Gibson 
> >> Signed-off-by: Aneesh Kumar K.V 
> >
> > What makes it a "relative_distance" rather than just a "distance"?
> 
> I added that to indicate that the function is not returning the actual
> distance but a number indicative of 'near', 'far' etc. (it actually returns
> 1, 2 etc).

Hm... ok.  To me at least it doesn't really convey that meaning, but
then I'm not sure what would.  To be "relative distance" means the
distance relative to some other object, but then all the NUMA
distances are that - the distance of one node relative to another.

> >> ---
> >>  arch/powerpc/include/asm/topology.h   |  4 ++--
> >>  arch/powerpc/mm/numa.c| 10 +-
> >>  arch/powerpc/platforms/pseries/lpar.c |  4 ++--
> >>  3 files changed, 13 insertions(+), 5 deletions(-)
> >> 
> >> diff --git a/arch/powerpc/include/asm/topology.h 
> >> b/arch/powerpc/include/asm/topology.h
> >> index e4db64c0e184..ac8b5ed79832 100644
> >> --- a/arch/powerpc/include/asm/topology.h
> >> +++ b/arch/powerpc/include/asm/topology.h
> >> @@ -36,7 +36,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
> >> cpu_all_mask : \
> >> cpumask_of_node(pcibus_to_node(bus)))
> >>  
> >> -extern int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
> >> +int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
> >>  extern int __node_distance(int, int);
> >>  #define node_distance(a, b) __node_distance(a, b)
> >>  
> >> @@ -83,7 +83,7 @@ static inline void sysfs_remove_device_from_node(struct 
> >> device *dev,
> >>  
> >>  static inline void update_numa_cpu_lookup_table(unsigned int cpu, int 
> >> node) {}
> >>  
> >> -static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
> >> +static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 
> >> *cpu2_assoc)
> >>  {
> >>return 0;
> >>  }
> >> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> >> index 7b142f79d600..c6293037a103 100644
> >> --- a/arch/powerpc/mm/numa.c
> >> +++ b/arch/powerpc/mm/numa.c
> >> @@ -166,7 +166,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
> >>  }
> >>  #endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
> >>  
> >> -int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
> >> +static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 
> >> *cpu2_assoc)
> >>  {
> >>int dist = 0;
> >>  
> >> @@ -182,6 +182,14 @@ int cpu_distance(__be32 *cpu1_assoc, __be32 
> >> *cpu2_assoc)
> >>return dist;
> >>  }
> >>  
> >> +int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
> >> +{
> >> +  /* We should not get called with FORM0 */
> >> +  VM_WARN_ON(affinity_form == FORM0_AFFINITY);
> >> +
> >> +  return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
> >> +}
> >> +
> >>  /* must hold reference to node during call */
> >>  static const __be32 *of_get_associativity(struct device_node *dev)
> >>  {
> >> diff --git a/arch/powerpc/platforms/pseries/lpar.c 
> >> b/arch/powerpc/platforms/pseries/lpar.c
> >> index dab356e3ff87..afefbdfe768d 100644
> >> --- a/arch/powerpc/platforms/pseries/lpar.c
> >> +++ b/arch/powerpc/platforms/pseries/lpar.c
> >> @@ -261,7 +261,7 @@ static int cpu_relative_dispatch_distance(int 
> >> last_disp_cpu, int cur_disp_cpu)
> >>if (!last_disp_cpu_assoc || !cur_disp_cpu_assoc)
> >>return -EIO;
> >>  
> >> -  return cpu_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
> >> +  return cpu_relative_distance(last_disp_cpu_assoc, cur_disp_cpu_assoc);
> >>  }
> >>  
> >>  static int cpu_home_node_dispatch_distance(int disp_cpu)
> >> @@ -281,7 +281,7 @@ static int cpu_home_node_dispatch_distance(int 
> >> disp_cpu)
> >>if (!disp_cpu_assoc || !vcpu_assoc)
> >>return -EIO;
> >>  
> >> -  return cpu_distance(disp_cpu_assoc, vcpu_assoc);
> >> +  return cpu_relative_distance(disp_cpu_assoc, vcpu_assoc);
> >>  }
> >>  
> >>  static void update_vcpu_disp_stat(int disp_cpu)
> >
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH v5 1/6] powerpc/pseries: rename min_common_depth to primary_domain_index

2021-07-25 Thread David Gibson
On Thu, Jul 22, 2021 at 10:47:49AM +0530, Aneesh Kumar K.V wrote:
> On 7/22/21 8:06 AM, David Gibson wrote:
> > On Thu, Jul 22, 2021 at 11:59:15AM +1000, David Gibson wrote:
> > > On Mon, Jun 28, 2021 at 08:41:12PM +0530, Aneesh Kumar K.V wrote:
> > > > No functional change in this patch.
> > > 
> > > The new name does not match how you describe "primary domain index" in
> > > the documentation from patch 6/6.  There it comes from the values in
> > > associativity-reference-points, but here it simply comes from the
> > > lengths of all the associativity properties.
> > 
> > No, sorry, I misread this code... misled by the old name, so it's a
> > good thing you're changing it.
> > 
> > But.. I'm still not sure the new name is accurate, either...
> > 
> > [snip]
> > > > if (form1_affinity) {
> > > > -   depth = of_read_number(distance_ref_points, 1);
> > > > +   index = of_read_number(distance_ref_points, 1);
> > 
> > AFACIT distance_ref_points hasn't been altered from the
> > of_get_property() at this point, so isn't this setting depth / index
> > to the number of entries in ref-points, rather than the value of the
> > first entry (which is what primary domain index is supposed to be).
> > 
> 
> ibm,associativity-reference-points property format is as below.
> 
> # lsprop  ibm,associativity-reference-points
> ibm,associativity-reference-points
>  0004 0002
> 
> it doesn't have the number of elements as the first item.
> 
> For FORM1 1 element is the NUMA boundary index/primary_domain_index
> For FORM0 2 element is the NUMA boundary index/primary_domain_index.

Sorry, my bad.  I foolishly expected consistency from PAPR.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[powerpc:merge] BUILD SUCCESS e4277861b65960a264040663ac44c0b946ab402b

2021-07-25 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
merge
branch HEAD: e4277861b65960a264040663ac44c0b946ab402b  Automatic merge of 
'fixes' into merge (2021-07-25 23:55)

elapsed time: 722m

configs tested: 166
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64allyesconfig
arm64   defconfig
arm  allyesconfig
arm  allmodconfig
i386 randconfig-c001-20210725
i386 randconfig-c001-20210726
m68k  hp300_defconfig
sh  kfr2r09_defconfig
m68k  sun3x_defconfig
armmvebu_v7_defconfig
powerpc tqm8540_defconfig
arm pxa_defconfig
armspear6xx_defconfig
mips   mtx1_defconfig
powerpc  cm5200_defconfig
armdove_defconfig
sh sh03_defconfig
powerpc rainier_defconfig
arm  ixp4xx_defconfig
powerpc mpc8315_rdb_defconfig
sh  landisk_defconfig
shedosk7760_defconfig
powerpc   microwatt_defconfig
mips  ath25_defconfig
xtensa   alldefconfig
ia64  tiger_defconfig
m68k   bvme6000_defconfig
arm   spitz_defconfig
mipsmaltaup_xpa_defconfig
ia64  gensparse_defconfig
powerpcicon_defconfig
powerpc akebono_defconfig
sh   se7206_defconfig
m68k  atari_defconfig
mipsjmr3927_defconfig
mips  ath79_defconfig
mips   bmips_be_defconfig
powerpc  ppc40x_defconfig
mips  decstation_64_defconfig
shmigor_defconfig
powerpc64alldefconfig
arm  tct_hammer_defconfig
sh   se7780_defconfig
sh  rsk7203_defconfig
nios2allyesconfig
powerpc ep8248e_defconfig
pariscgeneric-32bit_defconfig
riscv nommu_k210_sdcard_defconfig
ia64zx1_defconfig
powerpc pseries_defconfig
riscvnommu_virt_defconfig
powerpc  storcenter_defconfig
mips cobalt_defconfig
powerpc  mgcoge_defconfig
arm  pcm027_defconfig
xtensa  defconfig
h8300alldefconfig
powerpc ksi8560_defconfig
m68kstmark2_defconfig
powerpc tqm8541_defconfig
powerpc  ep88xc_defconfig
sh apsh4a3a_defconfig
powerpc sequoia_defconfig
mips  pistachio_defconfig
arcnsim_700_defconfig
powerpc mpc834x_mds_defconfig
sparc64  alldefconfig
armmagician_defconfig
xtensa  iss_defconfig
powerpc  iss476-smp_defconfig
nios2   defconfig
sh ap325rxa_defconfig
mips  malta_kvm_defconfig
m68k amcore_defconfig
um   x86_64_defconfig
arm shannon_defconfig
xtensaxip_kc705_defconfig
openriscdefconfig
archsdk_defconfig
x86_64allnoconfig
ia64defconfig
ia64 allyesconfig
ia64 allmodconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
arc  allyesconfig
nds32 allnoconfig
nds32   defconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
s390 allmodconfig
parisc

[powerpc:fixes-test] BUILD SUCCESS d9c57d3ed52a92536f5fa59dc5ccdd58b4875076

2021-07-25 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
fixes-test
branch HEAD: d9c57d3ed52a92536f5fa59dc5ccdd58b4875076  KVM: PPC: Book3S HV 
Nested: Sanitise H_ENTER_NESTED TM state

elapsed time: 724m

configs tested: 158
configs skipped: 99

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm defconfig
arm64   defconfig
arm  allmodconfig
arm  allyesconfig
arm64allyesconfig
i386 randconfig-c001-20210725
i386 randconfig-c001-20210726
arm pxa_defconfig
armspear6xx_defconfig
mips   mtx1_defconfig
powerpc  cm5200_defconfig
armdove_defconfig
powerpc mpc85xx_cds_defconfig
x86_64allnoconfig
h8300   h8s-sim_defconfig
powerpcmvme5100_defconfig
mips   rs90_defconfig
alpha   defconfig
ia64 bigsur_defconfig
openrisc simple_smp_defconfig
sh sh03_defconfig
powerpc rainier_defconfig
arm  ixp4xx_defconfig
powerpc mpc8315_rdb_defconfig
powerpc   microwatt_defconfig
powerpc akebono_defconfig
sh   se7206_defconfig
m68k  atari_defconfig
m68k  sun3x_defconfig
mipsjmr3927_defconfig
mips  ath79_defconfig
powerpc  mgcoge_defconfig
um   x86_64_defconfig
powerpc  g5_defconfig
microblaze  defconfig
mipsnlm_xlp_defconfig
powerpc   allnoconfig
mips   bmips_be_defconfig
powerpc  ppc40x_defconfig
mips  decstation_64_defconfig
shmigor_defconfig
powerpc64alldefconfig
powerpc  obs600_defconfig
sh sh7710voipgw_defconfig
sh  rsk7203_defconfig
nios2allyesconfig
powerpc ep8248e_defconfig
pariscgeneric-32bit_defconfig
riscv nommu_k210_sdcard_defconfig
ia64zx1_defconfig
powerpc pseries_defconfig
riscvnommu_virt_defconfig
powerpc  storcenter_defconfig
mipsmaltaup_xpa_defconfig
mips cobalt_defconfig
sh   se7780_defconfig
arm  pcm027_defconfig
xtensa  defconfig
h8300alldefconfig
powerpc ksi8560_defconfig
m68kstmark2_defconfig
powerpc tqm8541_defconfig
powerpc  ep88xc_defconfig
sh apsh4a3a_defconfig
powerpc sequoia_defconfig
xtensa  iss_defconfig
mips  pistachio_defconfig
sh  landisk_defconfig
arcnsim_700_defconfig
powerpc mpc834x_mds_defconfig
sparc64  alldefconfig
armmagician_defconfig
powerpc  iss476-smp_defconfig
nios2   defconfig
sh ap325rxa_defconfig
mips  malta_kvm_defconfig
m68k amcore_defconfig
ia64defconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
arc  allyesconfig
nds32 allnoconfig
nds32   defconfig
cskydefconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
s390 allmodconfig
parisc   allyesconfig
s390defconfig
sparc   defconfig
i386defconfig
i386 allyesconfig
sparcallyesconfig
mips allyesconfig
mips

Re: [PATCH v4 1/2] KVM: PPC: Book3S HV: Sanitise vcpu registers in nested path

2021-07-25 Thread Nicholas Piggin
Excerpts from Fabiano Rosas's message of July 23, 2021 8:12 am:
> As one of the arguments of the H_ENTER_NESTED hypercall, the nested
> hypervisor (L1) prepares a structure containing the values of various
> hypervisor-privileged registers with which it wants the nested guest
> (L2) to run. Since the nested HV runs in supervisor mode it needs the
> host to write to these registers.
> 
> To stop a nested HV manipulating this mechanism and using a nested
> guest as a proxy to access a facility that has been made unavailable
> to it, we have a routine that sanitises the values of the HV registers
> before copying them into the nested guest's vcpu struct.
> 
> However, when coming out of the guest the values are copied as they
> were back into L1 memory, which means that any sanitisation we did
> during guest entry will be exposed to L1 after H_ENTER_NESTED returns.
> 
> This patch alters this sanitisation to have effect on the vcpu->arch
> registers directly before entering and after exiting the guest,
> leaving the structure that is copied back into L1 unchanged (except
> when we really want L1 to access the value, e.g the Cause bits of
> HFSCR).

These patches look good to me. I ported my demand-faulting patches on 
top of them and things seem to work okay.

Reviewed-by: Nicholas Piggin 

Just one minor nit:

> 
> Signed-off-by: Fabiano Rosas 
> ---
>  arch/powerpc/kvm/book3s_hv_nested.c | 100 +++-
>  1 file changed, 52 insertions(+), 48 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
> b/arch/powerpc/kvm/book3s_hv_nested.c
> index 8543ad538b0c..3804dc50ebe8 100644
> --- a/arch/powerpc/kvm/book3s_hv_nested.c
> +++ b/arch/powerpc/kvm/book3s_hv_nested.c
> @@ -104,8 +104,17 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
> int trap,
>  {
>   struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
> + /*
> +  * When loading the hypervisor-privileged registers to run L2,
> +  * we might have used bits from L1 state to restrict what the
> +  * L2 state is allowed to be. Since L1 is not allowed to read
> +  * the HV registers, do not include these modifications in the
> +  * return state.
> +  */
> + hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
> +  (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));

Can you change this to only update HFSCR intr cause field when we take a 
hfac interrupt? It's possible the L0 can cause other kinds of hfacs 
behind the back of the L1 with demand faulting, so it would be unusual
for L1 to see the register change if it didn't take an hfac interrupt.

Thanks,
Nick

> +
>   hr->dpdes = vc->dpdes;
> - hr->hfscr = vcpu->arch.hfscr;
>   hr->purr = vcpu->arch.purr;
>   hr->spurr = vcpu->arch.spurr;
>   hr->ic = vcpu->arch.ic;
> @@ -134,49 +143,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, 
> int trap,
>   }
>  }
>  
> -/*
> - * This can result in some L0 HV register state being leaked to an L1
> - * hypervisor when the hv_guest_state is copied back to the guest after
> - * being modified here.
> - *
> - * There is no known problem with such a leak, and in many cases these
> - * register settings could be derived by the guest by observing behaviour
> - * and timing, interrupts, etc., but it is an issue to consider.
> - */
> -static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state 
> *hr)
> -{
> - struct kvmppc_vcore *vc = vcpu->arch.vcore;
> - u64 mask;
> -
> - /*
> -  * Don't let L1 change LPCR bits for the L2 except these:
> -  */
> - mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
> - LPCR_LPES | LPCR_MER;
> -
> - /*
> -  * Additional filtering is required depending on hardware
> -  * and configuration.
> -  */
> - hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
> - (vc->lpcr & ~mask) | (hr->lpcr & mask));
> -
> - /*
> -  * Don't let L1 enable features for L2 which we've disabled for L1,
> -  * but preserve the interrupt cause field.
> -  */
> - hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr);
> -
> - /* Don't let data address watchpoint match in hypervisor state */
> - hr->dawrx0 &= ~DAWRX_HYP;
> - hr->dawrx1 &= ~DAWRX_HYP;
> -
> - /* Don't let completed instruction address breakpt match in HV state */
> - if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
> - hr->ciabr &= ~CIABR_PRIV;
> -}
> -
> -static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
> +static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct 
> hv_guest_state *hr)
>  {
>   struct kvmppc_vcore *vc = vcpu->arch.vcore;
>  
> @@ -288,6 +255,43 @@ static int kvmhv_write_guest_state_and_regs(struct 
> kvm_vcpu *vcpu,
>sizeof(struct pt_regs));
>  }
>  
> +static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
> + const struct hv_guest_state *l2_hv,
> +   

Re: [PATCH v4 10/10] net/ps3_gelic: Fix DMA mapping problems

2021-07-25 Thread Christophe Leroy

Geoff Levand  a écrit :


Fixes several DMA mapping problems with the PS3's gelic network driver:

 * Change from checking the return value of dma_map_single to using the
   dma_mapping_error routine.
 * Use the correct buffer length when mapping the RX skb.
 * Improved error checking and debug logging.


The patch is quite big and probably deserves more explanation. For  
instance, explain why the buffer length is not correct today.


Also as it is a bug fixing patch, it should include a 'fixes' tag, and  
a Cc: to sta...@vger.kernel.org. Also, when possible, bug fixes should  
be one of the first patches in a series like that so that they can be  
applied to stable without applying the whole series.


Christophe



Fixes runtime errors like these, and also other randomly occurring errors:

  IP-Config: Complete:
  DMA-API: ps3_gelic_driver sb_05: device driver failed to check map error
  WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:1027 .check_unmap+0x888/0x8dc

Signed-off-by: Geoff Levand 
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 183 +++
 1 file changed, 108 insertions(+), 75 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c  
b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

index 42f4de9ad5fe..11ddeacb1159 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -336,22 +336,31 @@ static int gelic_card_init_chain(struct  
gelic_card *card,

struct gelic_descr_chain *chain, struct gelic_descr *start_descr,
int descr_count)
 {
-   int i;
-   struct gelic_descr *descr;
+   struct gelic_descr *descr = start_descr;
struct device *dev = ctodev(card);
+   unsigned int index;

-   descr = start_descr;
-   memset(descr, 0, sizeof(*descr) *descr_count);
+   memset(start_descr, 0, descr_count * sizeof(*start_descr));

-   for (i = 0; i < descr_count; i++, descr++) {
-   descr->link.size = sizeof(struct gelic_hw_regs);
+   for (index = 0, descr = start_descr; index < descr_count;
+   index++, descr++) {
gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
-   descr->link.cpu_addr =
-   dma_map_single(dev, descr, descr->link.size,
-   DMA_BIDIRECTIONAL);

-   if (!descr->link.cpu_addr)
-   goto iommu_error;
+   descr->link.size = sizeof(struct gelic_hw_regs);
+   descr->link.cpu_addr = dma_map_single(dev, descr,
+   descr->link.size, DMA_BIDIRECTIONAL);
+
+   if (unlikely(dma_mapping_error(dev, descr->link.cpu_addr))) {
+   dev_err(dev, "%s:%d: dma_mapping_error\n", __func__,
+   __LINE__);
+
+   for (index--, descr--; index > 0; index--, descr--) {
+   if (descr->link.cpu_addr) {
+   gelic_unmap_link(dev, descr);
+   }
+   }
+   return -ENOMEM;
+   }

descr->next = descr + 1;
descr->prev = descr - 1;
@@ -360,8 +369,9 @@ static int gelic_card_init_chain(struct gelic_card *card,
(descr - 1)->next = start_descr;
start_descr->prev = (descr - 1);

-   descr = start_descr;
-   for (i = 0; i < descr_count; i++, descr++) {
+   /* chain bus addr of hw descriptor */
+   for (index = 0, descr = start_descr; index < descr_count;
+   index++, descr++) {
descr->hw_regs.next_descr_addr =
cpu_to_be32(descr->next->link.cpu_addr);
}
@@ -373,12 +383,6 @@ static int gelic_card_init_chain(struct  
gelic_card *card,

(descr - 1)->hw_regs.next_descr_addr = 0;

return 0;
-
-iommu_error:
-   for (i--, descr--; 0 <= i; i--, descr--)
-   if (descr->link.cpu_addr)
-   gelic_unmap_link(dev, descr);
-   return -ENOMEM;
 }

 /**
@@ -395,49 +399,63 @@ static int gelic_descr_prepare_rx(struct  
gelic_card *card,

struct gelic_descr *descr)
 {
struct device *dev = ctodev(card);
-   int offset;
-   unsigned int bufsize;
+   struct aligned_buff {
+   unsigned int total_bytes;
+   unsigned int offset;
+   };
+   struct aligned_buff a_buf;
+   dma_addr_t cpu_addr;

if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE) {
dev_err(dev, "%s:%d: ERROR status\n", __func__, __LINE__);
}

-   /* we need to round up the buffer size to a multiple of 128 */
-   bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
+   a_buf.total_bytes = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN)
+   + GELIC_NET_RXBUF_ALIGN;
+
+   descr->skb = dev_alloc_skb(a_buf.total_bytes);

-   /* and we 

Re: [PATCH v4 09/10] net/ps3_gelic: Add new routine gelic_work_to_card

2021-07-25 Thread Christophe Leroy

Geoff Levand  a écrit :


Add new helper routine gelic_work_to_card that converts a work_struct
to a gelic_card.


Adding a function is it really needed as it is used only once ?

Christophe



Signed-off-by: Geoff Levand 
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c  
b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

index 60fcca5d20dd..42f4de9ad5fe 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1420,6 +1420,11 @@ static const struct ethtool_ops  
gelic_ether_ethtool_ops = {

.set_link_ksettings = gelic_ether_set_link_ksettings,
 };

+static struct gelic_card *gelic_work_to_card(struct work_struct *work)
+{
+   return container_of(work, struct gelic_card, tx_timeout_task);
+}
+
 /**
  * gelic_net_tx_timeout_task - task scheduled by the watchdog timeout
  * function (to be called not under interrupt status)
@@ -1429,8 +1434,7 @@ static const struct ethtool_ops  
gelic_ether_ethtool_ops = {

  */
 static void gelic_net_tx_timeout_task(struct work_struct *work)
 {
-   struct gelic_card *card =
-   container_of(work, struct gelic_card, tx_timeout_task);
+   struct gelic_card *card = gelic_work_to_card(work);
struct net_device *netdev = card->netdev[GELIC_PORT_ETHERNET_0];
struct device *dev = ctodev(card);

--
2.25.1





Re: [PATCH v4 08/10] net/ps3_gelic: Rename no to descr_count

2021-07-25 Thread Christophe Leroy

Geoff Levand  a écrit :


In an effort to make the PS3 gelic driver easier to maintain, rename
the gelic_card_init_chain parameter 'no' to 'descr_count'.


Not sure you really need a so long name. 'count' should be good enough.

Read https://www.kernel.org/doc/html/latest/process/coding-style.html#naming



Signed-off-by: Geoff Levand 
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c  
b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

index e55aa9fecfeb..60fcca5d20dd 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -325,7 +325,7 @@ static void gelic_card_free_chain(struct  
gelic_card *card,

  * @card: card structure
  * @chain: address of chain
  * @start_descr: address of descriptor array
- * @no: number of descriptors
+ * @descr_count: number of descriptors
  *
  * we manage a circular list that mirrors the hardware structure,
  * except that the hardware uses bus addresses.
@@ -334,16 +334,16 @@ static void gelic_card_free_chain(struct  
gelic_card *card,

  */
 static int gelic_card_init_chain(struct gelic_card *card,
struct gelic_descr_chain *chain, struct gelic_descr *start_descr,
-   int no)
+   int descr_count)
 {
int i;
struct gelic_descr *descr;
struct device *dev = ctodev(card);

descr = start_descr;
-   memset(descr, 0, sizeof(*descr) * no);
+   memset(descr, 0, sizeof(*descr) *descr_count);


You forgot the space after the *

Christophe



-   for (i = 0; i < no; i++, descr++) {
+   for (i = 0; i < descr_count; i++, descr++) {
descr->link.size = sizeof(struct gelic_hw_regs);
gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
descr->link.cpu_addr =
@@ -361,7 +361,7 @@ static int gelic_card_init_chain(struct gelic_card *card,
start_descr->prev = (descr - 1);

descr = start_descr;
-   for (i = 0; i < no; i++, descr++) {
+   for (i = 0; i < descr_count; i++, descr++) {
descr->hw_regs.next_descr_addr =
cpu_to_be32(descr->next->link.cpu_addr);
}
--
2.25.1





Re: [PATCH v4 04/10] net/ps3_gelic: Add new macro BUG_ON_DEBUG

2021-07-25 Thread Christophe Leroy

Geoff Levand  a écrit :


Add a new preprocessor macro BUG_ON_DEBUG, that expands to BUG_ON when
the preprocessor macro DEBUG is defined, or to WARN_ON when DEBUG is not
defined.  Also, replace all occurrences of BUG_ON with BUG_ON_DEBUG.


Why is BUG_ON() needed at all if WARN_ON() is enough ?

You just have to set panic_on_warn  to get the system to stop at first  
warning.


BUG_ON() should be avoided unless vital.

Please read  
https://www.kernel.org/doc/html/latest/process/deprecated.html#bug-and-bug-on


Christophe




Signed-off-by: Geoff Levand 
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c  
b/drivers/net/ethernet/toshiba/ps3_gelic_net.c

index ded467d81f36..946e9bfa071b 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -44,6 +44,13 @@ MODULE_AUTHOR("SCE Inc.");
 MODULE_DESCRIPTION("Gelic Network driver");
 MODULE_LICENSE("GPL");

+#define BUG_ON_DEBUG(_cond) do { \
+   if (__is_defined(DEBUG)) \
+   BUG_ON(_cond); \
+   else \
+   WARN_ON(_cond); \
+} while (0)
+
 int gelic_card_set_irq_mask(struct gelic_card *card, u64 mask)
 {
struct device *dev = ctodev(card);
@@ -505,7 +512,7 @@ static void gelic_descr_release_tx(struct  
gelic_card *card,

struct sk_buff *skb = descr->skb;
struct device *dev = ctodev(card);

-   BUG_ON(!(be32_to_cpu(descr->hw_regs.data_status) &
+   BUG_ON_DEBUG(!(be32_to_cpu(descr->hw_regs.data_status) &
GELIC_DESCR_TX_TAIL));

dma_unmap_single(dev, be32_to_cpu(descr->hw_regs.payload.dev_addr),
@@ -1667,7 +1674,7 @@ static void gelic_card_get_vlan_info(struct  
gelic_card *card)

}

if (card->vlan[GELIC_PORT_ETHERNET_0].tx) {
-   BUG_ON(!card->vlan[GELIC_PORT_WIRELESS].tx);
+   BUG_ON_DEBUG(!card->vlan[GELIC_PORT_WIRELESS].tx);
card->vlan_required = 1;
} else
card->vlan_required = 0;
@@ -1709,7 +1716,7 @@ static int ps3_gelic_driver_probe(struct  
ps3_system_bus_device *sb_dev)

if (result) {
dev_err(dev, "%s:%d: ps3_dma_region_create failed: %d\n",
__func__, __LINE__, result);
-   BUG_ON("check region type");
+   BUG_ON_DEBUG("check region type");
goto fail_dma_region;
}

--
2.25.1





Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.14-3 tag

2021-07-25 Thread pr-tracker-bot
The pull request you sent on Mon, 26 Jul 2021 00:20:47 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> tags/powerpc-5.14-3

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/3c0ce1497a449b0d150b455628947152c5f6216a

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/prtracker.html


[PATCH] powerpc/xmon: use ARRAY_SIZE

2021-07-25 Thread Jason Wang
The ARRAY_SIZE is the macro definition of sizeof(a)/sizeof(a[0]) and
it is more compact and formal to get a array size.

Signed-off-by: Jason Wang 
---
 arch/powerpc/xmon/ppc-opc.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c
index dfb80810b16c..6ca4cd26caef 100644
--- a/arch/powerpc/xmon/ppc-opc.c
+++ b/arch/powerpc/xmon/ppc-opc.c
@@ -954,8 +954,7 @@ const struct powerpc_operand powerpc_operands[] =
   { 0xff, 11, NULL, NULL, PPC_OPERAND_SIGNOPT },
 };
 
-const unsigned int num_powerpc_operands = (sizeof (powerpc_operands)
-  / sizeof (powerpc_operands[0]));
+const unsigned int num_powerpc_operands = ARRAY_SIZE(powerpc_operands);
 
 /* The functions used to insert and extract complicated operands.  */
 
@@ -6968,8 +6967,7 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 {"fcfidu.",XRC(63,974,1),  XRA_MASK, POWER7|PPCA2, PPCVLE, {FRT, 
FRB}},
 };
 
-const int powerpc_num_opcodes =
-  sizeof (powerpc_opcodes) / sizeof (powerpc_opcodes[0]);
+const int powerpc_num_opcodes = ARRAY_SIZE(powerpc_opcodes);
 
 /* The VLE opcode table.
 
@@ -7207,8 +7205,7 @@ const struct powerpc_opcode vle_opcodes[] = {
 {"se_bl",  BD8(58,0,1),BD8_MASK,   PPCVLE, 0,  {B8}},
 };
 
-const int vle_num_opcodes =
-  sizeof (vle_opcodes) / sizeof (vle_opcodes[0]);
+const int vle_num_opcodes = ARRAY_SIZE(vle_opcodes);
 
 /* The macro table.  This is only used by the assembler.  */
 
@@ -7276,5 +7273,4 @@ const struct powerpc_macro powerpc_macros[] = {
 {"e_clrlslwi",4, PPCVLE, "e_rlwinm %0,%1,%3,(%2)-(%3),31-(%3)"},
 };
 
-const int powerpc_num_macros =
-  sizeof (powerpc_macros) / sizeof (powerpc_macros[0]);
+const int powerpc_num_macros = ARRAY_SIZE(powerpc_macros);
-- 
2.32.0



[GIT PULL] Please pull powerpc/linux.git powerpc-5.14-3 tag

2021-07-25 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA256

Hi Linus,

Please pull some more powerpc fixes for 5.14:

The following changes since commit e73f0f0ee7541171d89f2e2491130c7771ba58d3:

  Linux 5.14-rc1 (2021-07-11 15:07:40 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-5.14-3

for you to fetch changes up to d9c57d3ed52a92536f5fa59dc5ccdd58b4875076:

  KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state (2021-07-23 
16:19:38 +1000)

- --
powerpc fixes for 5.14 #3

 - Fix guest to host memory corruption in H_RTAS due to missing nargs check.

 - Fix guest triggerable host crashes due to bad handling of nested guest TM 
state.

 - Fix possible crashes due to incorrect reference counting in 
kvm_arch_vcpu_ioctl().

 - Two commits fixing some regressions in KVM transactional memory handling 
introduced by
   the recent rework of the KVM code.

Thanks to: Nicholas Piggin, Alexey Kardashevskiy, Michael Neuling.

- --
Nicholas Piggin (5):
  KVM: PPC: Book3S HV P9: Fix guest TM support
  KVM: PPC: Book3S: Fix CONFIG_TRANSACTIONAL_MEM=n crash
  KVM: PPC: Fix kvm_arch_vcpu_ioctl vcpu_load leak
  KVM: PPC: Book3S: Fix H_RTAS rets buffer overflow
  KVM: PPC: Book3S HV Nested: Sanitise H_ENTER_NESTED TM state


 arch/powerpc/kvm/book3s_hv.c  |  2 ++
 arch/powerpc/kvm/book3s_hv_nested.c   | 20 
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 25 +---
 arch/powerpc/kvm/book3s_rtas.c| 25 +---
 arch/powerpc/kvm/powerpc.c|  4 ++--
 5 files changed, 68 insertions(+), 8 deletions(-)
-BEGIN PGP SIGNATURE-

iQIzBAEBCAAdFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAmD9bJEACgkQUevqPMjh
pYCFLBAAoD0a0fLmoh/LZBvvzD1LNDfoMcxtTF1mVPjv5zfLIDvj+M3+UsMHKBCV
ajDYuz0nd3TBPPBxMBXmQG4r5eYgilgkQ968uXIpQu2wkyAWIFu4hxKzUapLdddy
CN1TuSU9mWjviMsacjCU4zlEgmWpj7TtH+gBmhRKrQlAUMszmXlb5giRS+P6oAyN
OI+3ODlMkj+2CNwhQy7uaPrM13FnarUV0pliItAez4ka+1oWtlkKjXG8DG4k5TZf
7E/7qvxy7yOVXcULBeISJmLfBFttv3nUEAUjtxaA28d7YPZOUgfanSQUVhHl9nv5
KMJlOPAxEjZCF0j6TIEYgqC+DP0eBGDnvvfmGsxoTkYfMI5ykGYxsIEnTUBMZW4+
xi3OMx3cIlGhP0/3e1JFA9O4/zDzM7HxdONW/Wd9xDpLkjucNLZqvE5yP3ct/ATG
W4JmimhXIPFtnNrN0pgfyiU3l9Vodw/UD25AVYAQuamQ9kRtoPh0PtTarttVlw3o
JvZibyMwlds96n78PLThhpyC/SG030RO4oJAQBUpxt5PdAzyRTo4lCy10GQYN/kp
EG7rS7eizU38C379LJT7tkQzrfCgI1aJJ3PsoxggeT78AdybKogN3fHANHbFgOdn
k/SUo/6BF3lVO7C/SFdvPLuldltuYqjddDu24MHXHvtzhJ/pzQo=
=R0jy
-END PGP SIGNATURE-


Re: [PATCH v2 01/21] dma-mapping: Allow map_sg() ops to return negative error codes

2021-07-25 Thread Christoph Hellwig
> +int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
> + enum dma_data_direction dir, unsigned long attrs)
> +{
> + int nents;
> +
> + nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);
> + if (nents == 0)
> + return -EIO;
> + else if (nents < 0) {
> + if (WARN_ON_ONCE(nents != -EINVAL && nents != -ENOMEM &&
> +  nents != -EIO))
> + return -EIO;

I think this validation of the errnos needs to go into __dma_map_sg_attrs,
so that we catch it for the classic dma_map_sg callers as well.