In this patch we add a list of L0 (hardware) VMCSs, which we'll use to hold a 
hardware VMCS for each active vmcs12 (i.e., for each L2 guest).

We call each of these L0 VMCSs a "vmcs02", as it is the VMCS that L0 uses
to run its nested guest L2.

Signed-off-by: Nadav Har'El <[email protected]>
---
 arch/x86/kvm/vmx.c |  142 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

--- .before/arch/x86/kvm/vmx.c  2011-01-26 18:06:03.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c   2011-01-26 18:06:03.000000000 +0200
@@ -117,6 +117,7 @@ static int ple_window = KVM_VMX_DEFAULT_
 module_param(ple_window, int, S_IRUGO);
 
 #define NR_AUTOLOAD_MSRS 1
+#define NESTED_MAX_VMCS 256
 
 struct vmcs {
        u32 revision_id;
@@ -159,6 +160,34 @@ struct __packed vmcs12 {
 #define VMCS12_REVISION 0x11e57ed0
 
 /*
+ * When we temporarily switch a vcpu's VMCS (e.g., stop using an L1's VMCS
+ * while we use L2's VMCS), and wish to save the previous VMCS, we must also
+ * remember on which CPU it was last loaded (vcpu->cpu), so when we return to
+ * using this VMCS we'll know if we're now running on a different CPU and need
+ * to clear the VMCS on the old CPU, and load it on the new one. Additionally,
+ * we need to remember whether this VMCS was launched (vmx->launched), so when
+ * we return to it we know if to VMLAUNCH or to VMRESUME it (we cannot deduce
+ * this from other state, because it's possible that this VMCS had once been
+ * launched, but has since been cleared after a CPU switch, and now
+ * vmx->launch is 0.
+ */
+struct saved_vmcs {
+       struct vmcs *vmcs;
+       int cpu;
+       int launched;
+};
+
+/*
+ * A cache keeping a VMCS (vmcs02) for each loaded vmcs12. In addition to the
+ * VMCS, we need information on its state - see struct saved_vmcs above.
+ */
+struct vmcs_list {
+       struct list_head list;
+       gpa_t vmcs12_addr;
+       struct saved_vmcs vmcs02;
+};
+
+/*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example,
  * the current VMCS set by L1, a list of the VMCSs used to run the active
@@ -173,6 +202,10 @@ struct nested_vmx {
        /* The host-usable pointer to the above */
        struct page *current_vmcs12_page;
        struct vmcs12 *current_vmcs12;
+
+       /* list of real (hardware) VMCS, one for each L2 guest of L1 */
+       struct list_head vmcs02_list; /* a vmcs_list */
+       int vmcs02_num;
 };
 
 struct vcpu_vmx {
@@ -3964,6 +3997,110 @@ static int handle_invalid_op(struct kvm_
        return 1;
 }
 
+/* Find a vmcs02 saved for the current L2's vmcs12 */
+static struct saved_vmcs *nested_get_current_vmcs(struct vcpu_vmx *vmx)
+{
+       struct vmcs_list *item;
+       list_for_each_entry(item, &vmx->nested.vmcs02_list, list)
+               if (item->vmcs12_addr == vmx->nested.current_vmptr)
+                       return &item->vmcs02;
+       return NULL;
+}
+
+/*
+ * Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one
+ * does not already exist. The allocation is done in L0 memory, so to avoid
+ * denial-of-service attack by guests, we limit the number of concurrently-
+ * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not
+ * trigger this limit.
+ */
+static int nested_create_current_vmcs(struct kvm_vcpu *vcpu)
+{
+       struct vmcs_list *new_l2_guest;
+       struct vmcs *vmcs02;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+       if (nested_get_current_vmcs(vmx))
+               return 0; /* nothing to do - we already have a VMCS */
+
+       if (vmx->nested.vmcs02_num >= NESTED_MAX_VMCS)
+               return -ENOMEM;
+
+       new_l2_guest = (struct vmcs_list *)
+               kmalloc(sizeof(struct vmcs_list), GFP_KERNEL);
+       if (!new_l2_guest)
+               return -ENOMEM;
+
+       vmcs02 = alloc_vmcs();
+       if (!vmcs02) {
+               kfree(new_l2_guest);
+               return -ENOMEM;
+       }
+
+       new_l2_guest->vmcs12_addr = vmx->nested.current_vmptr;
+       new_l2_guest->vmcs02.vmcs = vmcs02;
+       new_l2_guest->vmcs02.cpu = -1;
+       new_l2_guest->vmcs02.launched = 0;
+       list_add(&(new_l2_guest->list), &(vmx->nested.vmcs02_list));
+       vmx->nested.vmcs02_num++;
+       return 0;
+}
+
+static void __nested_free_saved_vmcs(void *arg)
+{
+       struct saved_vmcs *saved_vmcs = arg;
+       int cpu = raw_smp_processor_id();
+
+       if (saved_vmcs->cpu == cpu) /* TODO: how can this not be the case? */
+               vmcs_clear(saved_vmcs->vmcs);
+       if (per_cpu(current_vmcs, cpu) == saved_vmcs->vmcs)
+               per_cpu(current_vmcs, cpu) = NULL;
+}
+
+/*
+ * Free a VMCS, but before that VMCLEAR it on the CPU where it was last loaded
+ * (the necessary information is in the saved_vmcs structure).
+ * See also vcpu_clear() (with different parameters and side-effects)
+ */
+static void nested_free_saved_vmcs(struct vcpu_vmx *vmx,
+               struct saved_vmcs *saved_vmcs)
+{
+       if (saved_vmcs->cpu != -1)
+               smp_call_function_single(saved_vmcs->cpu,
+                               __nested_free_saved_vmcs, saved_vmcs, 1);
+
+       free_vmcs(saved_vmcs->vmcs);
+}
+
+/*
+ * Free a vmcs12's associated vmcs02 (if there is one), and remove it from
+ * vmcs02_list.
+ */
+static void nested_free_vmcs(struct vcpu_vmx *vmx, gpa_t vmptr)
+{
+       struct vmcs_list *item;
+       list_for_each_entry(item, &vmx->nested.vmcs02_list, list)
+               if (item->vmcs12_addr == vmptr) {
+                       nested_free_saved_vmcs(vmx, &item->vmcs02);
+                       list_del(&item->list);
+                       kfree(item);
+                       vmx->nested.vmcs02_num--;
+                       return;
+               }
+}
+
+/* Free all vmcs02 saved for this L1 vcpu */
+static void nested_free_all_vmcs(struct vcpu_vmx *vmx)
+{
+       struct vmcs_list *item, *n;
+       list_for_each_entry_safe(item, n, &vmx->nested.vmcs02_list, list) {
+               nested_free_saved_vmcs(vmx, &item->vmcs02);
+               list_del(&item->list);
+               kfree(item);
+       }
+       vmx->nested.vmcs02_num = 0;
+}
+
 /*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
@@ -4000,6 +4137,9 @@ static int handle_vmon(struct kvm_vcpu *
                return 1;
        }
 
+       INIT_LIST_HEAD(&(vmx->nested.vmcs02_list));
+       vmx->nested.vmcs02_num = 0;
+
        vmx->nested.vmxon = true;
 
        skip_emulated_instruction(vcpu);
@@ -4050,6 +4190,8 @@ static void free_nested(struct vcpu_vmx 
                nested_release_page(vmx->nested.current_vmcs12_page);
                vmx->nested.current_vmptr = -1ull;
        }
+
+       nested_free_all_vmcs(vmx);
 }
 
 /* Emulate the VMXOFF instruction */
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to