When userspace requests that a VCPU is not allowed to profile anymore via the
KVM_ARM_VCPU_SPE_STOP attribute, keep all the register state in memory and
trap all registers, not just the buffer registers, and don't copy any of
this shadow state on the hardware.

Signed-off-by: Alexandru Elisei <[email protected]>
---
 arch/arm64/include/asm/kvm_hyp.h   |  2 +
 arch/arm64/include/asm/kvm_spe.h   | 14 +++++++
 arch/arm64/include/uapi/asm/kvm.h  |  3 ++
 arch/arm64/kvm/arm.c               |  9 ++++
 arch/arm64/kvm/debug.c             | 13 ++++--
 arch/arm64/kvm/hyp/nvhe/debug-sr.c |  4 +-
 arch/arm64/kvm/hyp/nvhe/spe-sr.c   | 24 +++++++++++
 arch/arm64/kvm/hyp/vhe/spe-sr.c    | 56 +++++++++++++++++++++++++
 arch/arm64/kvm/spe.c               | 67 ++++++++++++++++++++++++++++++
 arch/arm64/kvm/sys_regs.c          |  2 +-
 10 files changed, 188 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 03bc51049996..ce365427b483 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -86,8 +86,10 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 #ifdef __KVM_NVHE_HYPERVISOR__
 void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu,
                                    struct kvm_cpu_context *host_ctxt);
+void __debug_save_spe(u64 *pmscr_el1);
 void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu,
                                       struct kvm_cpu_context *host_ctxt);
+void __debug_restore_spe(u64 pmscr_el1);
 #ifdef CONFIG_KVM_ARM_SPE
 void __spe_save_host_state_nvhe(struct kvm_vcpu *vcpu,
                                struct kvm_cpu_context *host_ctxt);
diff --git a/arch/arm64/include/asm/kvm_spe.h b/arch/arm64/include/asm/kvm_spe.h
index d7d7b9e243de..f51561e3b43f 100644
--- a/arch/arm64/include/asm/kvm_spe.h
+++ b/arch/arm64/include/asm/kvm_spe.h
@@ -16,13 +16,23 @@ static __always_inline bool kvm_supports_spe(void)
        return static_branch_likely(&kvm_spe_available);
 }
 
+/* Guest profiling disabled by the user. */
+#define KVM_VCPU_SPE_STOP_USER         (1 << 0)
+/* Stop profiling and exit to userspace when guest starts profiling. */
+#define KVM_VCPU_SPE_STOP_USER_EXIT    (1 << 1)
+
 struct kvm_vcpu_spe {
        bool initialized;       /* SPE initialized for the VCPU */
        int irq_num;            /* Buffer management interrut number */
        bool irq_level;         /* 'true' if the interrupt is asserted at the 
VGIC */
        bool hwirq_level;       /* 'true' if the SPE hardware is asserting the 
interrupt */
+       u64 flags;
 };
 
+#define kvm_spe_profiling_stopped(vcpu)                                        
\
+       (((vcpu)->arch.spe.flags & KVM_VCPU_SPE_STOP_USER) ||           \
+        ((vcpu)->arch.spe.flags & KVM_VCPU_SPE_STOP_USER_EXIT))        \
+
 struct kvm_spe {
        bool perfmon_capable;   /* Is the VM perfmon_capable()? */
 };
@@ -31,6 +41,7 @@ void kvm_spe_init_supported_cpus(void);
 void kvm_spe_vm_init(struct kvm *kvm);
 int kvm_spe_vcpu_first_run_init(struct kvm_vcpu *vcpu);
 void kvm_spe_sync_hwstate(struct kvm_vcpu *vcpu);
+bool kvm_spe_exit_to_user(struct kvm_vcpu *vcpu);
 
 void kvm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val);
 u64 kvm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg);
@@ -48,6 +59,8 @@ int kvm_spe_has_attr(struct kvm_vcpu *vcpu, struct 
kvm_device_attr *attr);
 struct kvm_vcpu_spe {
 };
 
+#define kvm_spe_profiling_stopped(vcpu)                (false)
+
 struct kvm_spe {
 };
 
@@ -55,6 +68,7 @@ static inline void kvm_spe_init_supported_cpus(void) {}
 static inline void kvm_spe_vm_init(struct kvm *kvm) {}
 static inline int kvm_spe_vcpu_first_run_init(struct kvm_vcpu *vcpu) { return 
-ENOEXEC; }
 static inline void kvm_spe_sync_hwstate(struct kvm_vcpu *vcpu) {}
+static inline bool kvm_spe_exit_to_user(struct kvm_vcpu *vcpu) { return false; 
}
 
 static inline void kvm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 
val) {}
 static inline u64 kvm_spe_read_sysreg(struct kvm_vcpu *vcpu, int reg) { return 
0; }
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 75a5113f610e..63599ee39a7b 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -376,6 +376,9 @@ struct kvm_arm_copy_mte_tags {
 #define     KVM_ARM_VCPU_SPE_STOP_EXIT         (1 << 1)
 #define     KVM_ARM_VCPU_SPE_RESUME            (1 << 2)
 
+/* run->fail_entry.hardware_entry_failure_reason codes. */
+#define KVM_EXIT_FAIL_ENTRY_SPE                (1 << 0)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_VCPU2_SHIFT                28
 #define KVM_ARM_IRQ_VCPU2_MASK         0xf
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index ec449bc5f811..b7aae25bb9da 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -873,6 +873,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                        continue;
                }
 
+               if (unlikely(kvm_spe_exit_to_user(vcpu))) {
+                       run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+                       run->fail_entry.hardware_entry_failure_reason
+                               = KVM_EXIT_FAIL_ENTRY_SPE;
+                       ret = -EAGAIN;
+                       preempt_enable();
+                       continue;
+               }
+
                kvm_pmu_flush_hwstate(vcpu);
 
                local_irq_disable();
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 6e5fc1887215..6a4277a23bbb 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -96,11 +96,18 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
        if (kvm_supports_spe() && kvm_vcpu_has_spe(vcpu)) {
                /*
                 * Use EL1&0 for the profiling buffer translation regime and
-                * trap accesses to the buffer control registers; leave
-                * MDCR_EL2.TPMS unset and do not trap accesses to the profiling
-                * control registers.
+                * trap accesses to the buffer control registers; if profiling
+                * is stopped, also set MSCR_EL2.TMPS to trap accesses to the
+                * rest of the registers, otherwise leave it clear.
+                *
+                * Leaving MDCR_EL2.E2P unset, like we do when the VCPU does not
+                * have SPE, means that the PMBIDR_EL1.P (which KVM does not
+                * trap) will be set and the guest will detect SPE as being
+                * unavailable.
                 */
                vcpu->arch.mdcr_el2 |= MDCR_EL2_E2PB_EL1_TRAP << 
MDCR_EL2_E2PB_SHIFT;
+               if (kvm_spe_profiling_stopped(vcpu))
+                       vcpu->arch.mdcr_el2 |= MDCR_EL2_TPMS;
        } else {
                /*
                 * Trap accesses to the profiling control registers; leave
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c 
b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 1622615954b2..944972de0944 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -14,7 +14,7 @@
 #include <asm/kvm_hyp.h>
 #include <asm/kvm_mmu.h>
 
-static void __debug_save_spe(u64 *pmscr_el1)
+void __debug_save_spe(u64 *pmscr_el1)
 {
        u64 reg;
 
@@ -40,7 +40,7 @@ static void __debug_save_spe(u64 *pmscr_el1)
        dsb(nsh);
 }
 
-static void __debug_restore_spe(u64 pmscr_el1)
+void __debug_restore_spe(u64 pmscr_el1)
 {
        if (!pmscr_el1)
                return;
diff --git a/arch/arm64/kvm/hyp/nvhe/spe-sr.c b/arch/arm64/kvm/hyp/nvhe/spe-sr.c
index b74131486a75..8ed03aa4f965 100644
--- a/arch/arm64/kvm/hyp/nvhe/spe-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/spe-sr.c
@@ -23,6 +23,11 @@ void __spe_save_host_state_nvhe(struct kvm_vcpu *vcpu,
 {
        u64 pmblimitr;
 
+       if (kvm_spe_profiling_stopped(vcpu)) {
+               __debug_save_spe(__ctxt_sys_reg(host_ctxt, PMSCR_EL1));
+               return;
+       }
+
        pmblimitr = read_sysreg_s(SYS_PMBLIMITR_EL1);
        if (pmblimitr & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)) {
                psb_csync();
@@ -49,6 +54,13 @@ void __spe_save_guest_state_nvhe(struct kvm_vcpu *vcpu,
 {
        u64 pmbsr;
 
+       /*
+        * Profiling is stopped and all register accesses are trapped, nothing
+        * to save here.
+        */
+       if (kvm_spe_profiling_stopped(vcpu))
+               return;
+
        if (read_sysreg_s(SYS_PMBLIMITR_EL1) & BIT(SYS_PMBLIMITR_EL1_E_SHIFT)) {
                psb_csync();
                dsb(nsh);
@@ -82,6 +94,11 @@ void __spe_save_guest_state_nvhe(struct kvm_vcpu *vcpu,
 void __spe_restore_host_state_nvhe(struct kvm_vcpu *vcpu,
                                   struct kvm_cpu_context *host_ctxt)
 {
+       if (kvm_spe_profiling_stopped(vcpu)) {
+               __debug_restore_spe(ctxt_sys_reg(host_ctxt, PMSCR_EL1));
+               return;
+       }
+
        __spe_restore_common_state(host_ctxt);
 
        write_sysreg_s(ctxt_sys_reg(host_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1);
@@ -94,6 +111,13 @@ void __spe_restore_host_state_nvhe(struct kvm_vcpu *vcpu,
 void __spe_restore_guest_state_nvhe(struct kvm_vcpu *vcpu,
                                    struct kvm_cpu_context *guest_ctxt)
 {
+       /*
+        * Profiling is stopped and all register accesses are trapped, nothing
+        * to restore here.
+        */
+       if (kvm_spe_profiling_stopped(vcpu))
+               return;
+
        __spe_restore_common_state(guest_ctxt);
 
        write_sysreg_s(ctxt_sys_reg(guest_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1);
diff --git a/arch/arm64/kvm/hyp/vhe/spe-sr.c b/arch/arm64/kvm/hyp/vhe/spe-sr.c
index ea4b3b69bb32..024a4c0618cc 100644
--- a/arch/arm64/kvm/hyp/vhe/spe-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/spe-sr.c
@@ -10,6 +10,34 @@
 
 #include <hyp/spe-sr.h>
 
+static void __spe_save_host_buffer(u64 *pmscr_el2)
+{
+       u64 pmblimitr;
+
+       /* Disable guest profiling. */
+       write_sysreg_el1(0, SYS_PMSCR);
+
+       pmblimitr = read_sysreg_s(SYS_PMBLIMITR_EL1);
+       if (!(pmblimitr & BIT(SYS_PMBLIMITR_EL1_E_SHIFT))) {
+               *pmscr_el2 = 0;
+               return;
+       }
+
+       *pmscr_el2 = read_sysreg_el2(SYS_PMSCR);
+
+       /* Disable profiling at EL2 so we can drain the buffer. */
+       write_sysreg_el2(0, SYS_PMSCR);
+       isb();
+
+       /*
+        * We're going to change the buffer owning exception level when we
+        * activate traps, drain the buffer now.
+        */
+       psb_csync();
+       dsb(nsh);
+}
+NOKPROBE_SYMBOL(__spe_save_host_buffer);
+
 /*
  * Disable host profiling, drain the buffer and save the host SPE context.
  * Extra care must be taken because profiling might be in progress.
@@ -19,6 +47,11 @@ void __spe_save_host_state_vhe(struct kvm_vcpu *vcpu,
 {
        u64 pmblimitr, pmscr_el2;
 
+       if (kvm_spe_profiling_stopped(vcpu)) {
+               __spe_save_host_buffer(__ctxt_sys_reg(host_ctxt, PMSCR_EL2));
+               return;
+       }
+
        /* Disable profiling while the SPE context is being switched. */
        pmscr_el2 = read_sysreg_el2(SYS_PMSCR);
        write_sysreg_el2(__vcpu_sys_reg(vcpu, PMSCR_EL2), SYS_PMSCR);
@@ -50,6 +83,9 @@ void __spe_save_guest_state_vhe(struct kvm_vcpu *vcpu,
 {
        u64 pmblimitr, pmbsr;
 
+       if (kvm_spe_profiling_stopped(vcpu))
+               return;
+
        /*
         * We're at EL2 and the buffer owning regime is EL1, which means that
         * profiling is disabled. After we disable traps and restore the host's
@@ -78,6 +114,18 @@ void __spe_save_guest_state_vhe(struct kvm_vcpu *vcpu,
 }
 NOKPROBE_SYMBOL(__spe_save_guest_state_vhe);
 
+static void __spe_restore_host_buffer(u64 pmscr_el2)
+{
+       if (!pmscr_el2)
+               return;
+
+       /* Synchronize MDCR_EL2 write. */
+       isb();
+
+       write_sysreg_el2(pmscr_el2, SYS_PMSCR);
+}
+NOKPROBE_SYMBOL(__spe_restore_host_buffer);
+
 /*
  * Restore the host SPE context. Special care must be taken because we're
  * potentially resuming a profiling session which was stopped when we saved the
@@ -86,6 +134,11 @@ NOKPROBE_SYMBOL(__spe_save_guest_state_vhe);
 void __spe_restore_host_state_vhe(struct kvm_vcpu *vcpu,
                                  struct kvm_cpu_context *host_ctxt)
 {
+       if (kvm_spe_profiling_stopped(vcpu)) {
+               __spe_restore_host_buffer(ctxt_sys_reg(host_ctxt, PMSCR_EL2));
+               return;
+       }
+
        __spe_restore_common_state(host_ctxt);
 
        write_sysreg_s(ctxt_sys_reg(host_ctxt, PMBPTR_EL1), SYS_PMBPTR_EL1);
@@ -115,6 +168,9 @@ NOKPROBE_SYMBOL(__spe_restore_host_state_vhe);
 void __spe_restore_guest_state_vhe(struct kvm_vcpu *vcpu,
                                   struct kvm_cpu_context *guest_ctxt)
 {
+       if (kvm_spe_profiling_stopped(vcpu))
+               return;
+
        __spe_restore_common_state(guest_ctxt);
 
        /*
diff --git a/arch/arm64/kvm/spe.c b/arch/arm64/kvm/spe.c
index 2630e777fe1d..69ca731ba9d3 100644
--- a/arch/arm64/kvm/spe.c
+++ b/arch/arm64/kvm/spe.c
@@ -140,6 +140,28 @@ void kvm_spe_sync_hwstate(struct kvm_vcpu *vcpu)
        kvm_spe_update_irq(vcpu, true);
 }
 
+static bool kvm_spe_buffer_enabled(struct kvm_vcpu *vcpu)
+{
+       return !vcpu->arch.spe.irq_level  &&
+               (__vcpu_sys_reg(vcpu, PMBLIMITR_EL1) & 
BIT(SYS_PMBLIMITR_EL1_E_SHIFT));
+}
+
+bool kvm_spe_exit_to_user(struct kvm_vcpu *vcpu)
+{
+       u64 pmscr_enabled_mask = BIT(SYS_PMSCR_EL1_E0SPE_SHIFT) |
+                                BIT(SYS_PMSCR_EL1_E1SPE_SHIFT);
+
+       if (!(vcpu->arch.spe.flags & KVM_VCPU_SPE_STOP_USER_EXIT))
+               return false;
+
+       /*
+        * We don't trap the guest dropping to EL0, so exit even if profiling is
+        * disabled at EL1, but enabled at EL0.
+        */
+       return kvm_spe_buffer_enabled(vcpu) &&
+               (__vcpu_sys_reg(vcpu, PMSCR_EL1) & pmscr_enabled_mask);
+}
+
 void kvm_spe_write_sysreg(struct kvm_vcpu *vcpu, int reg, u64 val)
 {
        __vcpu_sys_reg(vcpu, reg) = val;
@@ -215,6 +237,31 @@ static bool kvm_spe_irq_is_valid(struct kvm *kvm, int irq)
        return true;
 }
 
+static int kvm_spe_stop_user(struct kvm_vcpu *vcpu, int flags)
+{
+       struct kvm_vcpu_spe *spe = &vcpu->arch.spe;
+
+       if (flags & KVM_ARM_VCPU_SPE_STOP_TRAP) {
+               if (flags & ~KVM_ARM_VCPU_SPE_STOP_TRAP)
+                       return -EINVAL;
+               spe->flags = KVM_VCPU_SPE_STOP_USER;
+       }
+
+       if (flags & KVM_ARM_VCPU_SPE_STOP_EXIT) {
+               if (flags & ~KVM_ARM_VCPU_SPE_STOP_EXIT)
+                       return -EINVAL;
+               spe->flags = KVM_VCPU_SPE_STOP_USER_EXIT;
+       }
+
+       if (flags & KVM_ARM_VCPU_SPE_RESUME) {
+               if (flags & ~KVM_ARM_VCPU_SPE_RESUME)
+                       return -EINVAL;
+               spe->flags = 0;
+       }
+
+       return 0;
+}
+
 int kvm_spe_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 {
        if (!kvm_vcpu_supports_spe(vcpu))
@@ -268,6 +315,8 @@ int kvm_spe_set_attr(struct kvm_vcpu *vcpu, struct 
kvm_device_attr *attr)
 
                if (!flags)
                        return -EINVAL;
+
+               return kvm_spe_stop_user(vcpu, flags);
        }
        }
 
@@ -293,6 +342,24 @@ int kvm_spe_get_attr(struct kvm_vcpu *vcpu, struct 
kvm_device_attr *attr)
 
                return 0;
        }
+       case KVM_ARM_VCPU_SPE_STOP: {
+               int __user *uaddr = (int __user *)(long)attr->addr;
+               struct kvm_vcpu_spe *spe = &vcpu->arch.spe;
+               int flag = 0;
+
+               if (!vcpu->arch.spe.initialized)
+                       return -EAGAIN;
+
+               if (spe->flags & KVM_VCPU_SPE_STOP_USER)
+                       flag = KVM_ARM_VCPU_SPE_STOP_TRAP;
+               else if (spe->flags & KVM_VCPU_SPE_STOP_USER_EXIT)
+                       flag = KVM_ARM_VCPU_SPE_STOP_EXIT;
+
+               if (put_user(flag, uaddr))
+                       return -EFAULT;
+
+               return 0;
+       }
        }
 
        return -ENXIO;
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 064742cee425..cc711b081f31 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -608,7 +608,7 @@ static bool access_spe_reg(struct kvm_vcpu *vcpu, struct 
sys_reg_params *p,
 {      int reg = r->reg;
        u64 val = p->regval;
 
-       if (reg < PMBLIMITR_EL1) {
+       if (reg < PMBLIMITR_EL1 && !kvm_spe_profiling_stopped(vcpu)) {
                print_sys_reg_msg(p, "Unsupported guest SPE register access at: 
%lx [%08lx]\n",
                                  *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
        }
-- 
2.33.0

_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to