Implement the VMLAUNCH and VMRESUME instructions, allowing a guest
hypervisor to run its own guests.

Signed-off-by: Nadav Har'El <[email protected]>
---
 arch/x86/kvm/vmx.c |  139 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 137 insertions(+), 2 deletions(-)

--- .before/arch/x86/kvm/vmx.c  2011-05-08 10:43:20.000000000 +0300
+++ .after/arch/x86/kvm/vmx.c   2011-05-08 10:43:20.000000000 +0300
@@ -346,6 +346,9 @@ struct nested_vmx {
        /* vmcs02_list cache of VMCSs recently used to run L2 guests */
        struct list_head vmcs02_pool;
        int vmcs02_num;
+
+       /* Saving the VMCS that we used for running L1 */
+       struct saved_vmcs saved_vmcs01;
        u64 vmcs01_tsc_offset;
        /*
         * Guest pages referred to in vmcs02 with host-physical pointers, so
@@ -4880,6 +4883,21 @@ static int handle_vmclear(struct kvm_vcp
        return 1;
 }
 
+static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
+
+/* Emulate the VMLAUNCH instruction */
+static int handle_vmlaunch(struct kvm_vcpu *vcpu)
+{
+       return nested_vmx_run(vcpu, true);
+}
+
+/* Emulate the VMRESUME instruction */
+static int handle_vmresume(struct kvm_vcpu *vcpu)
+{
+
+       return nested_vmx_run(vcpu, false);
+}
+
 enum vmcs_field_type {
        VMCS_FIELD_TYPE_U16 = 0,
        VMCS_FIELD_TYPE_U64 = 1,
@@ -5160,11 +5178,11 @@ static int (*kvm_vmx_exit_handlers[])(st
        [EXIT_REASON_INVLPG]                  = handle_invlpg,
        [EXIT_REASON_VMCALL]                  = handle_vmcall,
        [EXIT_REASON_VMCLEAR]                 = handle_vmclear,
-       [EXIT_REASON_VMLAUNCH]                = handle_vmx_insn,
+       [EXIT_REASON_VMLAUNCH]                = handle_vmlaunch,
        [EXIT_REASON_VMPTRLD]                 = handle_vmptrld,
        [EXIT_REASON_VMPTRST]                 = handle_vmptrst,
        [EXIT_REASON_VMREAD]                  = handle_vmread,
-       [EXIT_REASON_VMRESUME]                = handle_vmx_insn,
+       [EXIT_REASON_VMRESUME]                = handle_vmresume,
        [EXIT_REASON_VMWRITE]                 = handle_vmwrite,
        [EXIT_REASON_VMOFF]                   = handle_vmoff,
        [EXIT_REASON_VMON]                    = handle_vmon,
@@ -6021,6 +6039,123 @@ static int prepare_vmcs02(struct kvm_vcp
        return 0;
 }
 
+/*
+ * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
+ * for running an L2 nested guest.
+ */
+static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
+{
+       struct vmcs12 *vmcs12;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int cpu;
+       struct saved_vmcs *saved_vmcs02;
+       u32 low, high;
+
+       if (!nested_vmx_check_permission(vcpu))
+               return 1;
+       skip_emulated_instruction(vcpu);
+
+       /*
+        * The nested entry process starts with enforcing various prerequisites
+        * on vmcs12 as required by the Intel SDM, and act appropriately when
+        * they fail: As the SDM explains, some conditions should cause the
+        * instruction to fail, while others will cause the instruction to seem
+        * to succeed, but return an EXIT_REASON_INVALID_STATE.
+        * To speed up the normal (success) code path, we should avoid checking
+        * for misconfigurations which will anyway be caught by the processor
+        * when using the merged vmcs02.
+        */
+
+       vmcs12 = get_vmcs12(vcpu);
+       if (vmcs12->launch_state == launch) {
+               nested_vmx_failValid(vcpu,
+                       launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
+                              : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
+               return 1;
+       }
+
+       if (vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_MOV_SS) {
+               nested_vmx_failValid(vcpu,
+                       VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
+               return 1;
+       }
+
+       if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) &&
+                       !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) {
+               /*TODO: Also verify bits beyond physical address width are 0*/
+               nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               return 1;
+       }
+
+       if (vmcs12->vm_entry_msr_load_count > 0 ||
+           vmcs12->vm_exit_msr_load_count > 0 ||
+           vmcs12->vm_exit_msr_store_count > 0) {
+               if (printk_ratelimit())
+                       printk(KERN_WARNING
+                         "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__);
+               nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               return 1;
+       }
+
+       nested_vmx_pinbased_ctls(&low, &high);
+       if (!vmx_control_verify(vmcs12->pin_based_vm_exec_control, low, high)) {
+               nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+               return 1;
+       }
+
+       if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
+           ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+               nested_vmx_failValid(vcpu,
+                       VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+               return 1;
+       }
+
+       /*
+        * We're finally done with prerequisite checking, and can start with
+        * the nested entry.
+        */
+
+       enter_guest_mode(vcpu);
+
+       vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
+
+       /*
+        * Switch from L1's VMCS (vmcs01), to L2's VMCS (vmcs02). Remember
+        * vmcs01, on which CPU it was last loaded, and whether it was launched
+        * (we need all these values next time we will use L1). Then recall
+        * these values from the last time vmcs02 was used.
+        */
+       saved_vmcs02 = nested_get_current_vmcs02(vmx);
+       if (!saved_vmcs02)
+               return -ENOMEM;
+
+       cpu = get_cpu();
+       vmx->nested.saved_vmcs01.vmcs = vmx->vmcs;
+       vmx->nested.saved_vmcs01.cpu = vcpu->cpu;
+       vmx->nested.saved_vmcs01.launched = vmx->launched;
+
+       vmx->vmcs = saved_vmcs02->vmcs;
+       vcpu->cpu = saved_vmcs02->cpu;
+       vmx->launched = saved_vmcs02->launched;
+
+       vmx_vcpu_put(vcpu);
+       vmx_vcpu_load(vcpu, cpu);
+       vcpu->cpu = cpu;
+       put_cpu();
+
+       vmcs12->launch_state = 1;
+
+       prepare_vmcs02(vcpu, vmcs12);
+
+       /*
+        * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
+        * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
+        * returned as far as L1 is concerned. It will only return (and set
+        * the success flag) when L2 exits (see nested_vmx_vmexit()).
+        */
+       return 1;
+}
+
 static int vmx_check_intercept(struct kvm_vcpu *vcpu,
                               struct x86_instruction_info *info,
                               enum x86_intercept_stage stage)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to