If we let L1 use EPT, we should probably also support the INVEPT instruction.

Signed-off-by: Nadav Har'El <[email protected]>
---
 arch/x86/include/asm/vmx.h |    2 
 arch/x86/kvm/vmx.c         |  112 +++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)

--- .before/arch/x86/include/asm/vmx.h  2011-11-10 11:33:59.000000000 +0200
+++ .after/arch/x86/include/asm/vmx.h   2011-11-10 11:33:59.000000000 +0200
@@ -279,6 +279,7 @@ enum vmcs_field {
 #define EXIT_REASON_APIC_ACCESS         44
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_INVEPT             50
 #define EXIT_REASON_WBINVD             54
 #define EXIT_REASON_XSETBV             55
 
@@ -404,6 +405,7 @@ enum vmcs_field {
 #define VMX_EPTP_WB_BIT                                (1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT                   (1ull << 16)
 #define VMX_EPT_1GB_PAGE_BIT                   (1ull << 17)
+#define VMX_EPT_INVEPT_BIT                     (1ull << 20)
 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT          (1ull << 24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT             (1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT              (1ull << 26)
--- .before/arch/x86/kvm/vmx.c  2011-11-10 11:33:59.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c   2011-11-10 11:33:59.000000000 +0200
@@ -351,6 +351,8 @@ struct nested_vmx {
        struct list_head vmcs02_pool;
        int vmcs02_num;
        u64 vmcs01_tsc_offset;
+       /* Remember last EPT02, for single-context INVEPT optimization */
+       u64 last_eptp02;
        /* L2 must run next, and mustn't decide to exit to L1. */
        bool nested_run_pending;
        /*
@@ -1987,6 +1989,10 @@ static __init void nested_vmx_setup_ctls
        /* ept capabilities */
        if (nested_ept) {
                nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT;
+               nested_vmx_ept_caps |=
+                       VMX_EPT_INVEPT_BIT | VMX_EPT_EXTENT_GLOBAL_BIT |
+                       VMX_EPT_EXTENT_CONTEXT_BIT |
+                       VMX_EPT_EXTENT_INDIVIDUAL_BIT;
                nested_vmx_ept_caps &= vmx_capability.ept;
        } else
                nested_vmx_ept_caps = 0;
@@ -5568,6 +5574,105 @@ static int handle_vmptrst(struct kvm_vcp
        return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+       u32 vmx_instruction_info;
+       unsigned long type;
+       gva_t gva;
+       struct x86_exception e;
+       struct {
+               u64 eptp, gpa;
+       } operand;
+
+
+       if (!nested_ept || !(nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       if (!nested_vmx_check_permission(vcpu))
+               return 1;
+
+       if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+
+       /* According to the Intel VMX instruction reference, the memory
+        * operand is read even if it isn't needed (e.g., for type==global)
+        */
+       vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+       if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+                       vmx_instruction_info, &gva))
+               return 1;
+       if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, &operand,
+                               sizeof(operand), &e)) {
+               kvm_inject_page_fault(vcpu, &e);
+               return 1;
+       }
+
+       type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+
+       switch (type) {
+       case VMX_EPT_EXTENT_GLOBAL:
+               if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_GLOBAL_BIT))
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               else {
+                       ept_sync_global();
+                       nested_vmx_succeed(vcpu);
+               }
+               break;
+       case VMX_EPT_EXTENT_CONTEXT:
+               if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_CONTEXT_BIT))
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               else {
+                       /*
+                        * We efficiently handle the common case, of L1
+                        * invalidating the last eptp it used to run L2.
+                        * TODO: Instead of saving one last_eptp02, look up
+                        * operand.eptp in the shadow EPT table cache, to
+                        * find its shadow. Then last_eptp02 won't be needed.
+                        */
+                       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+                       struct vcpu_vmx *vmx = to_vmx(vcpu);
+                       if (vmcs12 && nested_cpu_has_ept(vmcs12) &&
+                           (vmcs12->ept_pointer == operand.eptp) &&
+                           vmx->nested.last_eptp02)
+                               ept_sync_context(vmx->nested.last_eptp02);
+                       else
+                               ept_sync_global();
+                       nested_vmx_succeed(vcpu);
+               }
+               break;
+       case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+               if (!(nested_vmx_ept_caps & VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+                       nested_vmx_failValid(vcpu,
+                               VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+               else {
+                       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+                       struct vcpu_vmx *vmx = to_vmx(vcpu);
+                       if (vmcs12 && nested_cpu_has_ept(vmcs12) &&
+                           (vmcs12->ept_pointer == operand.eptp) &&
+                           vmx->nested.last_eptp02)
+                               ept_sync_individual_addr(
+                                       vmx->nested.last_eptp02, operand.gpa);
+                       else
+                               ept_sync_global();
+                       nested_vmx_succeed(vcpu);
+               }
+               break;
+       default:
+               nested_vmx_failValid(vcpu,
+                       VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+       }
+
+       skip_emulated_instruction(vcpu);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -5609,6 +5714,7 @@ static int (*kvm_vmx_exit_handlers[])(st
        [EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
        [EXIT_REASON_MWAIT_INSTRUCTION]       = handle_invalid_op,
        [EXIT_REASON_MONITOR_INSTRUCTION]     = handle_invalid_op,
+       [EXIT_REASON_INVEPT]                  = handle_invept,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -5793,6 +5899,7 @@ static bool nested_vmx_exit_handled(stru
        case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
        case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
        case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
+       case EXIT_REASON_INVEPT:
                /*
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -7056,6 +7163,11 @@ void prepare_vmcs12(struct kvm_vcpu *vcp
        /* clear vm-entry fields which are to be cleared on exit */
        if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
                vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+
+       /* For single-context INVEPT optimization */
+       if (nested_cpu_has_ept(vmcs12))
+               to_vmx(vcpu)->nested.last_eptp02 = vmcs_read64(EPT_POINTER);
+
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to