From: Wanpeng Li <[email protected]>

Allowing a guest to execute MWAIT without interception enables a guest
to put a (physical) CPU into a power saving state, where it takes
longer to return from than what may be desired by the host.

Don't give a guest that power over a host by default. (Especially,
since nothing prevents a guest from using MWAIT even when it is not
advertised via CPUID.)

Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Cc: Jan H. Schönherr <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
 Documentation/virtual/kvm/api.txt | 23 ++++++++++++++---------
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/kvm/svm.c                |  2 +-
 arch/x86/kvm/vmx.c                |  9 +++++----
 arch/x86/kvm/x86.c                | 24 ++++++++++++++++++++----
 arch/x86/kvm/x86.h                | 10 +++++-----
 include/uapi/linux/kvm.h          |  2 +-
 tools/include/uapi/linux/kvm.h    |  2 +-
 8 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 98de506..76e5a15 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -4358,6 +4358,20 @@ enables QEMU to build error log and branch to guest 
kernel registered
 machine check handling routine. Without this capability KVM will
 branch to guests' 0x200 interrupt vector.
 
+7.13 KVM_CAP_X86_DISABLE_EXITS
+
+Architectures: x86
+Parameters: args[0] defines which exits are disabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid exits
+
+Valid exits in args[0] are
+
+#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
+
+Enabling this capability on a VM provides userspace with a way to no
+longer intercepts some instructions for improved latency in some
+workloads.
+
 8. Other capabilities.
 ----------------------
 
@@ -4470,15 +4484,6 @@ reserved.
     Both registers and addresses are 64-bits wide.
     It will be possible to run 64-bit or 32-bit guest code.
 
-8.8 KVM_CAP_X86_GUEST_MWAIT
-
-Architectures: x86
-
-This capability indicates that guest using memory monotoring instructions
-(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
-spent while virtual CPU is halted in this way will then be accounted for as
-guest running time on the host (as opposed to e.g. HLT).
-
 8.9 KVM_CAP_ARM_USER_IRQ
 
 Architectures: arm, arm64
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0395c35..e107171 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -811,6 +811,8 @@ struct kvm_arch {
 
        gpa_t wall_clock;
 
+       bool mwait_in_guest;
+
        bool ept_identity_pagetable_done;
        gpa_t ept_identity_map_addr;
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index be9c839..321b3fd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1390,7 +1390,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        set_intercept(svm, INTERCEPT_XSETBV);
        set_intercept(svm, INTERCEPT_RSM);
 
-       if (!kvm_mwait_in_guest()) {
+       if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
                set_intercept(svm, INTERCEPT_MONITOR);
                set_intercept(svm, INTERCEPT_MWAIT);
        }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6cefd7b..2302ae2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3733,13 +3733,11 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
              CPU_BASED_UNCOND_IO_EXITING |
              CPU_BASED_MOV_DR_EXITING |
              CPU_BASED_USE_TSC_OFFSETING |
+             CPU_BASED_MWAIT_EXITING |
+             CPU_BASED_MONITOR_EXITING |
              CPU_BASED_INVLPG_EXITING |
              CPU_BASED_RDPMC_EXITING;
 
-       if (!kvm_mwait_in_guest())
-               min |= CPU_BASED_MWAIT_EXITING |
-                       CPU_BASED_MONITOR_EXITING;
-
        opt = CPU_BASED_TPR_SHADOW |
              CPU_BASED_USE_MSR_BITMAPS |
              CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -5531,6 +5529,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
                exec_control |= CPU_BASED_CR3_STORE_EXITING |
                                CPU_BASED_CR3_LOAD_EXITING  |
                                CPU_BASED_INVLPG_EXITING;
+       if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+               exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+                               CPU_BASED_MONITOR_EXITING);
        return exec_control;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 36ef3d8..5fae476 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2809,9 +2809,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs 
__user *user_msrs,
        return r;
 }
 
+static inline bool kvm_can_mwait_in_guest(void)
+{
+       return boot_cpu_has(X86_FEATURE_MWAIT) &&
+               !boot_cpu_has_bug(X86_BUG_MONITOR);
+}
+
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
-       int r;
+       int r = 0;
 
        switch (ext) {
        case KVM_CAP_IRQCHIP:
@@ -2867,8 +2873,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
        case KVM_CAP_ADJUST_CLOCK:
                r = KVM_CLOCK_TSC_STABLE;
                break;
-       case KVM_CAP_X86_GUEST_MWAIT:
-               r = kvm_mwait_in_guest();
+       case KVM_CAP_X86_DISABLE_EXITS:
+               if(kvm_can_mwait_in_guest())
+                       r |= KVM_X86_DISABLE_EXITS_MWAIT;
                break;
        case KVM_CAP_X86_SMM:
                /* SMBASE is usually relocated above 1M on modern chipsets,
@@ -2909,7 +2916,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
                r = KVM_X2APIC_API_VALID_FLAGS;
                break;
        default:
-               r = 0;
                break;
        }
        return r;
@@ -4214,6 +4220,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 
                r = 0;
                break;
+       case KVM_CAP_X86_DISABLE_EXITS:
+               r = -EINVAL;
+               if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
+                       break;
+
+               if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
+                       kvm_can_mwait_in_guest())
+                       kvm->arch.mwait_in_guest = true;
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index b91215d..cd1215e 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -2,8 +2,6 @@
 #ifndef ARCH_X86_KVM_X86_H
 #define ARCH_X86_KVM_X86_H
 
-#include <asm/processor.h>
-#include <asm/mwait.h>
 #include <linux/kvm_host.h>
 #include <asm/pvclock.h>
 #include "kvm_cache_regs.h"
@@ -264,10 +262,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, 
u64 nsec)
            __rem;                                              \
         })
 
-static inline bool kvm_mwait_in_guest(void)
+#define KVM_X86_DISABLE_EXITS_MWAIT          (1 << 0)
+#define KVM_X86_DISABLE_VALID_EXITS          (KVM_X86_DISABLE_EXITS_MWAIT)
+
+static inline bool kvm_mwait_in_guest(struct kvm *kvm)
 {
-       return boot_cpu_has(X86_FEATURE_MWAIT) &&
-               !boot_cpu_has_bug(X86_BUG_MONITOR);
+       return kvm->arch.mwait_in_guest;
 }
 
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 088c2c9..1065006 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
 #define KVM_CAP_ARM_USER_IRQ 144
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 0fb5ef9..b13c257 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
 #define KVM_CAP_SPAPR_TCE_VFIO 142
-#define KVM_CAP_X86_GUEST_MWAIT 143
+#define KVM_CAP_X86_DISABLE_EXITS 143
 #define KVM_CAP_ARM_USER_IRQ 144
 #define KVM_CAP_S390_CMMA_MIGRATION 145
 #define KVM_CAP_PPC_FWNMI 146
-- 
2.7.4

Reply via email to