From: Wanpeng Li <[email protected]>

Add an nested_apf field to vcpu->arch.exception to identify an async page 
fault, and constructs the expected vm-exit information fields. Force a 
nested VM exit from nested_vmx_check_exception() if the injected #PF is 
async page fault. Extending the userspace interface KVM_GET_VCPU_EVENTS
and KVM_SET_VCPU_EVENTS for live migration.

Cc: Paolo Bonzini <[email protected]>
Cc: Radim Krčmář <[email protected]>
Signed-off-by: Wanpeng Li <[email protected]>
---
 Documentation/virtual/kvm/api.txt     |  8 ++++++--
 arch/x86/include/asm/kvm_emulate.h    |  1 +
 arch/x86/include/asm/kvm_host.h       |  2 ++
 arch/x86/include/uapi/asm/kvm.h       |  3 ++-
 arch/x86/kvm/svm.c                    | 16 ++++++++++------
 arch/x86/kvm/vmx.c                    | 17 ++++++++++++++---
 arch/x86/kvm/x86.c                    | 19 +++++++++++++++----
 tools/arch/x86/include/uapi/asm/kvm.h |  3 ++-
 8 files changed, 52 insertions(+), 17 deletions(-)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index 4029943..a991a7c 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -832,7 +832,7 @@ struct kvm_vcpu_events {
                __u8 injected;
                __u8 nr;
                __u8 has_error_code;
-               __u8 pad;
+               __u8 nested_apf;
                __u32 error_code;
        } exception;
        struct {
@@ -857,7 +857,7 @@ struct kvm_vcpu_events {
        } smi;
 };
 
-Only two fields are defined in the flags field:
+Only three fields are defined in the flags field:
 
 - KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
   interrupt.shadow contains a valid state.
@@ -865,6 +865,9 @@ Only two fields are defined in the flags field:
 - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
   smi contains a valid state.
 
+- KVM_VCPUEVENT_VALID_ASYNC_PF may be set in the flags field to signal that
+  the exception is an async page fault.
+
 4.32 KVM_SET_VCPU_EVENTS
 
 Capability: KVM_CAP_VCPU_EVENTS
@@ -887,6 +890,7 @@ suppress overwriting the current in-kernel state. The bits 
are:
 KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
 KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
 KVM_VCPUEVENT_VALID_SMM         - transfer the smi sub-struct.
+KVM_VCPUEVENT_VALID_ASYNC_PF    - transfer async page fault
 
 If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
 the flags field to signal that interrupt.shadow contains a valid state and
diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index 722d0e5..fde36f1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@ struct x86_exception {
        u16 error_code;
        bool nested_page_fault;
        u64 address; /* cr2 or nested page fault gpa */
+       u8 async_page_fault;
 };
 
 /*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e20d8a8..71aef4b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -545,6 +545,7 @@ struct kvm_vcpu_arch {
                bool reinject;
                u8 nr;
                u32 error_code;
+               u8 nested_apf;
        } exception;
 
        struct kvm_queued_interrupt {
@@ -646,6 +647,7 @@ struct kvm_vcpu_arch {
                u32 id;
                bool send_user_only;
                u32 host_apf_reason;
+               unsigned long nested_apf_token;
        } apf;
 
        /* OSVW MSRs (AMD only) */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index c2824d0..c9556ec 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -287,6 +287,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR        0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW     0x00000004
 #define KVM_VCPUEVENT_VALID_SMM                0x00000008
+#define KVM_VCPUEVENT_VALID_ASYNC_PF 0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS      0x01
@@ -298,7 +299,7 @@ struct kvm_vcpu_events {
                __u8 injected;
                __u8 nr;
                __u8 has_error_code;
-               __u8 pad;
+               __u8 nested_apf;
                __u32 error_code;
        } exception;
        struct {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8f263bf..49cdb8e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2367,15 +2367,19 @@ static int nested_svm_check_exception(struct vcpu_svm 
*svm, unsigned nr,
        if (!is_guest_mode(&svm->vcpu))
                return 0;
 
+       vmexit = nested_svm_intercept(svm);
+       if (vmexit != NESTED_EXIT_DONE)
+               return 0;
+
        svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
        svm->vmcb->control.exit_code_hi = 0;
        svm->vmcb->control.exit_info_1 = error_code;
-       svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
-       vmexit = nested_svm_intercept(svm);
-       if (vmexit == NESTED_EXIT_DONE)
-               svm->nested.exit_required = true;
+       if (svm->vcpu.arch.exception.nested_apf)
+               svm->vmcb->control.exit_info_2 = 
svm->vcpu.arch.apf.nested_apf_token;
+       else
+               svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
 
+       svm->nested.exit_required = true;
        return vmexit;
 }
 
@@ -2568,7 +2572,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
                        vmexit = NESTED_EXIT_DONE;
                /* async page fault always cause vmexit */
                else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-                        svm->vcpu.arch.apf.host_apf_reason != 0)
+                        svm->vcpu.arch.exception.nested_apf != 0)
                        vmexit = NESTED_EXIT_DONE;
                break;
        }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d20f794..8724ea6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2418,13 +2418,24 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       unsigned int nr = vcpu->arch.exception.nr;
 
-       if (!(vmcs12->exception_bitmap & (1u << nr)))
+       if (!((vmcs12->exception_bitmap & (1u << nr)) ||
+               (nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
                return 0;
 
+       if (vcpu->arch.exception.nested_apf) {
+               vmcs_write32(VM_EXIT_INTR_ERROR_CODE, 
vcpu->arch.exception.error_code);
+               nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+                       PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
+                       INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
+                       vcpu->arch.apf.nested_apf_token);
+               return 1;
+       }
+
        nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                          vmcs_read32(VM_EXIT_INTR_INFO),
                          vmcs_readl(EXIT_QUALIFICATION));
@@ -2441,7 +2452,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
        if (!reinject && is_guest_mode(vcpu) &&
-           nested_vmx_check_exception(vcpu, nr))
+           nested_vmx_check_exception(vcpu))
                return;
 
        if (has_error_code) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7511c0a..5756811 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -452,7 +452,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 {
        ++vcpu->stat.pf_guest;
-       vcpu->arch.cr2 = fault->address;
+       vcpu->arch.exception.nested_apf =
+               is_guest_mode(vcpu) && fault->async_page_fault;
+       if (vcpu->arch.exception.nested_apf)
+               vcpu->arch.apf.nested_apf_token = fault->address;
+       else
+               vcpu->arch.cr2 = fault->address;
        kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
 }
 EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@@ -3072,7 +3077,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct 
kvm_vcpu *vcpu,
                !kvm_exception_is_soft(vcpu->arch.exception.nr);
        events->exception.nr = vcpu->arch.exception.nr;
        events->exception.has_error_code = vcpu->arch.exception.has_error_code;
-       events->exception.pad = 0;
+       events->exception.nested_apf = vcpu->arch.exception.nested_apf;
        events->exception.error_code = vcpu->arch.exception.error_code;
 
        events->interrupt.injected =
@@ -3096,7 +3101,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct 
kvm_vcpu *vcpu,
 
        events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
                         | KVM_VCPUEVENT_VALID_SHADOW
-                        | KVM_VCPUEVENT_VALID_SMM);
+                        | KVM_VCPUEVENT_VALID_SMM
+                        | KVM_VCPUEVENT_VALID_ASYNC_PF);
        memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
@@ -3108,7 +3114,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct 
kvm_vcpu *vcpu,
        if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
                              | KVM_VCPUEVENT_VALID_SIPI_VECTOR
                              | KVM_VCPUEVENT_VALID_SHADOW
-                             | KVM_VCPUEVENT_VALID_SMM))
+                             | KVM_VCPUEVENT_VALID_SMM
+                             | KVM_VCPUEVENT_VALID_ASYNC_PF))
                return -EINVAL;
 
        if (events->exception.injected &&
@@ -3126,6 +3133,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct 
kvm_vcpu *vcpu,
        vcpu->arch.exception.pending = events->exception.injected;
        vcpu->arch.exception.nr = events->exception.nr;
        vcpu->arch.exception.has_error_code = events->exception.has_error_code;
+       if (events->flags & KVM_VCPUEVENT_VALID_ASYNC_PF)
+               vcpu->arch.exception.nested_apf = events->exception.nested_apf;
        vcpu->arch.exception.error_code = events->exception.error_code;
 
        vcpu->arch.interrupt.pending = events->interrupt.injected;
@@ -8573,6 +8582,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu 
*vcpu,
                fault.error_code = 0;
                fault.nested_page_fault = false;
                fault.address = work->arch.token;
+               fault.async_page_fault = true;
                kvm_inject_page_fault(vcpu, &fault);
        }
 }
@@ -8595,6 +8605,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
                fault.error_code = 0;
                fault.nested_page_fault = false;
                fault.address = work->arch.token;
+               fault.async_page_fault = true;
                kvm_inject_page_fault(vcpu, &fault);
        }
        vcpu->arch.apf.halted = false;
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h 
b/tools/arch/x86/include/uapi/asm/kvm.h
index c2824d0..c9556ec 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -287,6 +287,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR        0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW     0x00000004
 #define KVM_VCPUEVENT_VALID_SMM                0x00000008
+#define KVM_VCPUEVENT_VALID_ASYNC_PF 0x00000010
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS      0x01
@@ -298,7 +299,7 @@ struct kvm_vcpu_events {
                __u8 injected;
                __u8 nr;
                __u8 has_error_code;
-               __u8 pad;
+               __u8 nested_apf;
                __u32 error_code;
        } exception;
        struct {
-- 
2.7.4

Reply via email to