This plugs an NMI-related hole in the VCPU synchronization between
kernel and user space. So far, neither pending NMIs nor the inhibit NMI
mask was properly read/set which was able to cause problems on
vmsave/restore, live migration and system reset. Fix it by making use
of the new VCPU substate interface.

Signed-off-by: Jan Kiszka <[email protected]>
---

 Documentation/kvm/api.txt       |   12 ++++++++++++
 arch/x86/include/asm/kvm.h      |    7 +++++++
 arch/x86/include/asm/kvm_host.h |    2 ++
 arch/x86/kvm/svm.c              |   22 ++++++++++++++++++++++
 arch/x86/kvm/vmx.c              |   30 ++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c              |   26 ++++++++++++++++++++++++++
 6 files changed, 99 insertions(+), 0 deletions(-)

diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt
index bee5bbd..e483edb 100644
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -848,3 +848,15 @@ Deprecates: KVM_GET/SET_CPUID2
 Architectures: x86
 Payload: struct kvm_lapic
 Deprecates: KVM_GET/SET_LAPIC
+
+6.8 KVM_X86_VCPU_STATE_NMI
+
+Architectures: x86
+Payload: struct kvm_nmi_state
+Deprecates: -
+
+struct kvm_nmi_state {
+       __u8 pending;
+       __u8 masked;
+       __u8 pad1[6];
+};
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 326615a..6ad4448 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -256,5 +256,12 @@ struct kvm_reinject_control {
 #define KVM_X86_VCPU_STATE_MSRS                1000
 #define KVM_X86_VCPU_STATE_CPUID       1001
 #define KVM_X86_VCPU_STATE_LAPIC       1002
+#define KVM_X86_VCPU_STATE_NMI         1003
+
+struct kvm_nmi_state {
+       __u8 pending;
+       __u8 masked;
+       __u8 pad1[6];
+};
 
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 179a919..b6b2db4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -513,6 +513,8 @@ struct kvm_x86_ops {
                                unsigned char *hypercall_addr);
        void (*set_irq)(struct kvm_vcpu *vcpu);
        void (*set_nmi)(struct kvm_vcpu *vcpu);
+       bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
+       void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
        void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
                                bool has_error_code, u32 error_code);
        int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 170b2d9..a16ee6e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2498,6 +2498,26 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
                !(svm->vcpu.arch.hflags & HF_NMI_MASK);
 }
 
+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
+}
+
+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (masked) {
+               svm->vcpu.arch.hflags |= HF_NMI_MASK;
+               svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
+       } else {
+               svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
+               svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
+       }
+}
+
 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -2945,6 +2965,8 @@ static struct kvm_x86_ops svm_x86_ops = {
        .queue_exception = svm_queue_exception,
        .interrupt_allowed = svm_interrupt_allowed,
        .nmi_allowed = svm_nmi_allowed,
+       .get_nmi_mask = svm_get_nmi_mask,
+       .set_nmi_mask = svm_set_nmi_mask,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 364263a..6e032e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2655,6 +2655,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
                                GUEST_INTR_STATE_NMI));
 }
 
+static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
+{
+       if (!cpu_has_virtual_nmis())
+               return to_vmx(vcpu)->soft_vnmi_blocked;
+       else
+               return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
+                         GUEST_INTR_STATE_NMI);
+}
+
+static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (!cpu_has_virtual_nmis()) {
+               if (vmx->soft_vnmi_blocked != masked) {
+                       vmx->soft_vnmi_blocked = masked;
+                       vmx->vnmi_blocked_time = 0;
+               }
+       } else {
+               if (masked)
+                       vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                     GUEST_INTR_STATE_NMI);
+               else
+                       vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+                                       GUEST_INTR_STATE_NMI);
+       }
+}
+
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
        return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
@@ -4006,6 +4034,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .queue_exception = vmx_queue_exception,
        .interrupt_allowed = vmx_interrupt_allowed,
        .nmi_allowed = vmx_nmi_allowed,
+       .get_nmi_mask = vmx_get_nmi_mask,
+       .set_nmi_mask = vmx_set_nmi_mask,
        .enable_nmi_window = enable_nmi_window,
        .enable_irq_window = enable_irq_window,
        .update_cr8_intercept = update_cr8_intercept,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 46fad88..e7ce505 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4686,6 +4686,19 @@ out_free_lapic:
                kfree(lapic);
                break;
        }
+       case KVM_X86_VCPU_STATE_NMI: {
+               struct kvm_nmi_state nmi;
+
+               vcpu_load(vcpu);
+               nmi.pending = vcpu->arch.nmi_pending;
+               nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
+               vcpu_put(vcpu);
+               r = -EFAULT;
+               if (copy_to_user(argp, &nmi, sizeof(struct kvm_nmi_state)))
+                       break;
+               r = 0;
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -4733,6 +4746,19 @@ out_free_lapic:
                kfree(lapic);
                break;
        }
+       case KVM_X86_VCPU_STATE_NMI: {
+               struct kvm_nmi_state nmi;
+
+               r = -EFAULT;
+               if (copy_from_user(&nmi, argp, sizeof(struct kvm_nmi_state)))
+                       break;
+               vcpu_load(vcpu);
+               vcpu->arch.nmi_pending = nmi.pending;
+               kvm_x86_ops->set_nmi_mask(vcpu, nmi.masked);
+               vcpu_put(vcpu);
+               r = 0;
+               break;
+       }
        default:
                r = -EINVAL;
        }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to