When the guest can use VMX instructions (when the "nested" module option is
on), it should also be able to read and write VMX MSRs, e.g., to query about
VMX capabilities. This patch adds this support.

Signed-off-by: Nadav Har'El <n...@il.ibm.com>
---
 arch/x86/kvm/vmx.c |  117 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c |    6 +-
 2 files changed, 122 insertions(+), 1 deletion(-)

--- .before/arch/x86/kvm/x86.c  2010-12-08 18:56:49.000000000 +0200
+++ .after/arch/x86/kvm/x86.c   2010-12-08 18:56:49.000000000 +0200
@@ -796,7 +796,11 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
        MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
+       MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
+       MSR_IA32_FEATURE_CONTROL,  MSR_IA32_VMX_BASIC,
+       MSR_IA32_VMX_PINBASED_CTLS, MSR_IA32_VMX_PROCBASED_CTLS,
+       MSR_IA32_VMX_EXIT_CTLS, MSR_IA32_VMX_ENTRY_CTLS,
+       MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_EPT_VPID_CAP,
 };
 
 static unsigned num_msrs_to_save;
--- .before/arch/x86/kvm/vmx.c  2010-12-08 18:56:49.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c   2010-12-08 18:56:49.000000000 +0200
@@ -1211,6 +1211,119 @@ static void vmx_adjust_tsc_offset(struct
 }
 
 /*
+ * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
+ * also let it use VMX-specific MSRs.
+ * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 0 when we handled a
+ * VMX-specific MSR, or 1 when we haven't (and the caller should handled it
+ * like all other MSRs).
+ */
+static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+       u64 vmx_msr = 0;
+       u32 vmx_msr_high, vmx_msr_low;
+
+       switch (msr_index) {
+       case MSR_IA32_FEATURE_CONTROL:
+               *pdata = 0;
+               break;
+       case MSR_IA32_VMX_BASIC:
+               /*
+                * This MSR reports some information about VMX support of the
+                * processor. We should return information about the VMX we
+                * emulate for the guest, and the VMCS structure we give it -
+                * not about the VMX support of the underlying hardware.
+                * However, some capabilities of the underlying hardware are
+                * used directly by our emulation (e.g., the physical address
+                * width), so these are copied from what the hardware reports.
+                */
+               *pdata = VMCS12_REVISION | (((u64)sizeof(struct vmcs12)) << 32);
+               rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
+#define VMX_BASIC_64           0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE     0x003c000000000000LLU
+#define VMX_BASIC_INOUT                0x0040000000000000LLU
+               *pdata |= vmx_msr &
+                       (VMX_BASIC_64 | VMX_BASIC_MEM_TYPE | VMX_BASIC_INOUT);
+               break;
+#define CORE2_PINBASED_CTLS_MUST_BE_ONE        0x00000016
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS        0x48d
+       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+       case MSR_IA32_VMX_PINBASED_CTLS:
+               vmx_msr_low  = CORE2_PINBASED_CTLS_MUST_BE_ONE;
+               vmx_msr_high = CORE2_PINBASED_CTLS_MUST_BE_ONE |
+                               PIN_BASED_EXT_INTR_MASK |
+                               PIN_BASED_NMI_EXITING |
+                               PIN_BASED_VIRTUAL_NMIS;
+               *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+               break;
+       case MSR_IA32_VMX_PROCBASED_CTLS:
+               /* This MSR determines which vm-execution controls the L1
+                * hypervisor may ask, or may not ask, to enable. Normally we
+                * can only allow enabling features which the hardware can
+                * support, but we limit ourselves to allowing only known
+                * features that were tested nested. We allow disabling any
+                * feature (even if the hardware can't disable it).
+                */
+               rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+
+               vmx_msr_low = 0; /* allow disabling any feature */
+               vmx_msr_high &= /* do not expose new untested features */
+                       CPU_BASED_HLT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
+                       CPU_BASED_CR3_STORE_EXITING | CPU_BASED_USE_IO_BITMAPS |
+                       CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING |
+                       CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING |
+                       CPU_BASED_INVLPG_EXITING | CPU_BASED_TPR_SHADOW |
+                       CPU_BASED_USE_MSR_BITMAPS |
+#ifdef CONFIG_X86_64
+                       CPU_BASED_CR8_LOAD_EXITING |
+                       CPU_BASED_CR8_STORE_EXITING |
+#endif
+                       CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+               *pdata = vmx_msr_low | ((u64)vmx_msr_high << 32);
+               break;
+       case MSR_IA32_VMX_EXIT_CTLS:
+               *pdata = 0;
+#ifdef CONFIG_X86_64
+               *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+               break;
+       case MSR_IA32_VMX_ENTRY_CTLS:
+               *pdata = 0;
+               break;
+       case MSR_IA32_VMX_PROCBASED_CTLS2:
+               *pdata = 0;
+               if (vm_need_virtualize_apic_accesses(vcpu->kvm))
+                       *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               break;
+       case MSR_IA32_VMX_EPT_VPID_CAP:
+               *pdata = 0;
+               break;
+       default:
+               return 1;
+       }
+
+       return 0;
+}
+
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+       switch (msr_index) {
+       case MSR_IA32_FEATURE_CONTROL:
+       case MSR_IA32_VMX_BASIC:
+       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+       case MSR_IA32_VMX_PINBASED_CTLS:
+       case MSR_IA32_VMX_PROCBASED_CTLS:
+       case MSR_IA32_VMX_EXIT_CTLS:
+       case MSR_IA32_VMX_ENTRY_CTLS:
+       case MSR_IA32_VMX_PROCBASED_CTLS2:
+       case MSR_IA32_VMX_EPT_VPID_CAP:
+               pr_unimpl(vcpu, "unimplemented VMX MSR write: 0x%x data %llx\n",
+                         msr_index, data);
+               return 0;
+       default:
+               return 1;
+       }
+}
+/*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1258,6 +1371,8 @@ static int vmx_get_msr(struct kvm_vcpu *
                /* Otherwise falls through */
        default:
                vmx_load_host_state(to_vmx(vcpu));
+               if (nested && !vmx_get_vmx_msr(vcpu, msr_index, &data))
+                       break;
                msr = find_msr_entry(to_vmx(vcpu), msr_index);
                if (msr) {
                        vmx_load_host_state(to_vmx(vcpu));
@@ -1327,6 +1442,8 @@ static int vmx_set_msr(struct kvm_vcpu *
                        return 1;
                /* Otherwise falls through */
        default:
+               if (nested && !vmx_set_vmx_msr(vcpu, msr_index, data))
+                       break;
                msr = find_msr_entry(vmx, msr_index);
                if (msr) {
                        vmx_load_host_state(vmx);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to