From: Orit Wasserman <[email protected]>
---
arch/x86/kvm/svm.c | 3 -
arch/x86/kvm/vmx.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++++-
arch/x86/kvm/x86.c | 6 ++-
arch/x86/kvm/x86.h | 2 +
4 files changed, 192 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2df9b45..3c1f22a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -124,9 +124,6 @@ static int npt = 1;
module_param(npt, int, S_IRUGO);
-static int nested = 1;
-module_param(nested, int, S_IRUGO);
-
static void svm_flush_tlb(struct kvm_vcpu *vcpu);
static void svm_complete_interrupts(struct vcpu_svm *svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 78101dd..abba325 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -67,6 +67,11 @@ struct vmcs {
char data[0];
};
+struct nested_vmx {
+ /* Has the level1 guest done vmon? */
+ bool vmon;
+};
+
struct vcpu_vmx {
struct kvm_vcpu vcpu;
struct list_head local_vcpus_link;
@@ -114,6 +119,9 @@ struct vcpu_vmx {
ktime_t entry_time;
s64 vnmi_blocked_time;
u32 exit_reason;
+
+ /* Nested vmx */
+ struct nested_vmx nested;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -967,6 +975,69 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
}
/*
+ * Handles msr read for nested virtualization
+ */
+static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
+ u64 *pdata)
+{
+ u32 vmx_msr_low = 0, vmx_msr_high = 0;
+
+ switch (msr_index) {
+ case MSR_IA32_FEATURE_CONTROL:
+ *pdata = 0;
+ break;
+ case MSR_IA32_VMX_BASIC:
+ rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
+ *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+ break;
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ *pdata = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+ PIN_BASED_VIRTUAL_NMIS;
+ break;
+ case MSR_IA32_VMX_PROCBASED_CTLS:
+ *pdata = CPU_BASED_HLT_EXITING |
+#ifdef CONFIG_X86_64
+ CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING |
+#endif
+ CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
+ CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_MOV_DR_EXITING |
+ CPU_BASED_USE_TSC_OFFSETING |
+ CPU_BASED_INVLPG_EXITING;
+
+ if (cpu_has_secondary_exec_ctrls())
+ *pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
+ if (vm_need_tpr_shadow(vcpu->kvm))
+ *pdata |= CPU_BASED_TPR_SHADOW;
+ break;
+ case MSR_IA32_VMX_EXIT_CTLS:
+ *pdata = 0;
+#ifdef CONFIG_X86_64
+ *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+ break;
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ *pdata = 0;
+ break;
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ *pdata = 0;
+ if (vm_need_virtualize_apic_accesses(vcpu->kvm))
+ *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ break;
+ case MSR_IA32_VMX_EPT_VPID_CAP:
+ *pdata = 0;
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
* Reads an msr value (of 'msr_index') into 'pdata'.
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
@@ -1005,6 +1076,9 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32
msr_index, u64 *pdata)
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
default:
+ if (nested &&
+ !nested_vmx_get_msr(vcpu, msr_index, &data))
+ break;
vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
@@ -1019,6 +1093,27 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32
msr_index, u64 *pdata)
}
/*
+ * Writes msr value for nested virtualization
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+ switch (msr_index) {
+ case MSR_IA32_FEATURE_CONTROL:
+ if ((data & (FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED))
+ != (FEATURE_CONTROL_LOCKED |
+ FEATURE_CONTROL_VMXON_ENABLED))
+ return 1;
+ break;
+ default:
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
* Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
@@ -1064,6 +1159,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32
msr_index, u64 data)
}
/* Otherwise falls through to kvm_set_msr_common */
default:
+ if (nested &&
+ !nested_vmx_set_msr(vcpu, msr_index, data))
+ break;
vmx_load_host_state(vmx);
msr = find_msr_entry(vmx, msr_index);
if (msr) {
@@ -3095,12 +3193,97 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
return 1;
}
+/*
+ * Check to see if vcpu can execute vmx command
+ * Inject the corrseponding exception
+ */
+static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_msr_entry *msr;
+
+ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+
+ if (!vmx->nested.vmon) {
+ printk(KERN_DEBUG "%s: vmx not on\n", __func__);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 0;
+ }
+
+ msr = find_msr_entry(vmx, MSR_EFER);
+
+ if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
+ ((msr->data & EFER_LMA) && !cs.l)) {
+ printk(KERN_DEBUG "%s: invalid mode cs.l %d lma %llu\n",
+ __func__, cs.l, msr->data & EFER_LMA);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 0;
+ }
+
+ if (vmx_get_cpl(vcpu)) {
+ kvm_inject_gp(vcpu, 0);
+ return 0;
+ }
+
+ return 1;
+}
+
+
static int handle_vmx_insn(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
+static int handle_vmoff(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!nested_vmx_check_permission(vcpu))
+ return 1;
+
+ vmx->nested.vmon = 0;
+
+ skip_emulated_instruction(vcpu);
+ return 1;
+}
+
+static int handle_vmon(struct kvm_vcpu *vcpu)
+{
+ struct kvm_segment cs;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ if (!nested) {
+ printk(KERN_DEBUG "%s: nested vmx not enabled\n", __func__);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
+
+ vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
+
+ if (!(vcpu->arch.cr4 & X86_CR4_VMXE) ||
+ !(vcpu->arch.cr0 & X86_CR0_PE) ||
+ (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
+ ((find_msr_entry(to_vmx(vcpu),
+ MSR_EFER)->data & EFER_LMA) && !cs.l)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ printk(KERN_INFO "%s invalid register state\n", __func__);
+ return 1;
+ }
+
+ if (vmx_get_cpl(vcpu)) {
+ printk(KERN_INFO "%s no permission\n", __func__);
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+
+ vmx->nested.vmon = 1;
+
+ skip_emulated_instruction(vcpu);
+ return 1;
+}
+
static int handle_invlpg(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -3376,8 +3559,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu
*vcpu) = {
[EXIT_REASON_VMREAD] = handle_vmx_insn,
[EXIT_REASON_VMRESUME] = handle_vmx_insn,
[EXIT_REASON_VMWRITE] = handle_vmx_insn,
- [EXIT_REASON_VMOFF] = handle_vmx_insn,
- [EXIT_REASON_VMON] = handle_vmx_insn,
+ [EXIT_REASON_VMOFF] = handle_vmoff,
+ [EXIT_REASON_VMON] = handle_vmon,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
[EXIT_REASON_WBINVD] = handle_wbinvd,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8b3a169..9c39092 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -87,6 +87,10 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
int ignore_msrs = 0;
module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
+int nested = 1;
+EXPORT_SYMBOL_GPL(nested);
+module_param(nested, int, S_IRUGO);
+
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
{ "pf_guest", VCPU_STAT(pf_guest) },
@@ -373,7 +377,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return;
}
- if (cr4 & X86_CR4_VMXE) {
+ if (cr4 & X86_CR4_VMXE && !nested) {
printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
kvm_inject_gp(vcpu, 0);
return;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 5eadea5..57204cb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -35,4 +35,6 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
+extern int nested;
+
#endif
--
1.6.0.4
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html