BitVisor hypervisor fails to start a nested guest due to lack of MSR
load/store support in KVM.

This patch fixes this problem according to Intel SDM.


Signed-off-by: Eugene Korenevsky <[email protected]>
---
 arch/x86/include/asm/vmx.h            |   6 ++
 arch/x86/include/uapi/asm/msr-index.h |   3 +
 arch/x86/include/uapi/asm/vmx.h       |   1 +
 arch/x86/kvm/vmx.c                    | 191 +++++++++++++++++++++++++++++++++-
 virt/kvm/kvm_main.c                   |   1 +
 5 files changed, 197 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..e07414c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -454,6 +454,12 @@ struct vmx_msr_entry {
 #define ENTRY_FAIL_VMCS_LINK_PTR       4
 
 /*
+ * VMX Abort codes
+ */
+#define VMX_ABORT_MSR_STORE            1
+#define VMX_ABORT_MSR_LOAD             4
+
+/*
  * VM-instruction error numbers
  */
 enum vm_instruction_error_number {
diff --git a/arch/x86/include/uapi/asm/msr-index.h 
b/arch/x86/include/uapi/asm/msr-index.h
index e21331c..3c9c601 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -316,6 +316,9 @@
 #define MSR_IA32_UCODE_WRITE           0x00000079
 #define MSR_IA32_UCODE_REV             0x0000008b
 
+#define MSR_IA32_SMM_MONITOR_CTL       0x0000009b
+#define MSR_IA32_SMBASE                        0x0000009e
+
 #define MSR_IA32_PERF_STATUS           0x00000198
 #define MSR_IA32_PERF_CTL              0x00000199
 #define MSR_AMD_PSTATE_DEF_BASE                0xc0010064
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe..f5f8a62 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -56,6 +56,7 @@
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MSR_LOAD_FAILURE    34
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6a951d8..deebc96 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8498,6 +8498,163 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
struct vmcs12 *vmcs12)
        kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
 }
 
+struct msr_entry {
+       u32 index;
+       u32 reserved;
+       u64 data;
+} __packed;
+
+static bool vmx_msr_switch_area_verify(u32 count, u64 addr, int maxphyaddr)
+{
+       if (count == 0)
+               return true;
+       if ((addr & 0xf) != 0)
+               return false;
+       if ((addr >> maxphyaddr) != 0)
+               return false;
+       if (((addr + count * sizeof(struct msr_entry) - 1) >> maxphyaddr) != 0)
+               return false;
+       return true;
+}
+
+static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu,
+                                        struct vmcs12 *vmcs12)
+{
+       int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+#define VMCS12_MSR_SWITCH_VERIFY(name) { \
+       if (!vmx_msr_switch_area_verify(vmcs12->name##_count, \
+                                       vmcs12->name##_addr, maxphyaddr)) { \
+               pr_warn_ratelimited( \
+                       "%s: invalid MSR_{LOAD,STORE} parameters", \
+                       #name); \
+               return false; \
+       } \
+       }
+       VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_load);
+       VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_store);
+       VMCS12_MSR_SWITCH_VERIFY(vm_entry_msr_load);
+       return true;
+}
+
+static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu,
+                                     struct msr_entry *e)
+{
+       if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE)
+               return false;
+       /* SMM is not supported */
+       if (e->index == MSR_IA32_SMM_MONITOR_CTL)
+               return false;
+       /* x2APIC MSR accesses are not allowed */
+       if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+               return false;
+       if (e->reserved != 0)
+               return false;
+       return true;
+}
+
+static bool vmx_msr_switch_is_protected_msr(u32 msr_index)
+{
+       /* Intel SDM: a processor MAY prevent writing to these MSRs when
+        * loading guest states on VM entries or saving guest states on VM
+        * exits.
+        * Assume our emulated processor DOES prevent writing */
+       return msr_index == MSR_IA32_UCODE_WRITE ||
+              msr_index == MSR_IA32_UCODE_REV;
+}
+
+static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu,
+                                        u32 count, u64 addr)
+{
+       unsigned int i;
+       struct msr_entry msr_entry;
+       struct msr_data msr;
+
+       for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+               if (kvm_read_guest(vcpu->kvm, addr,
+                                  &msr_entry, sizeof(msr_entry))) {
+                       pr_warn_ratelimited(
+                               "Load MSR %d: cannot read GPA: 0x%llx\n",
+                               i, addr);
+                       return i;
+               }
+               if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) {
+                       pr_warn_ratelimited(
+                              "Load MSR %d: invalid MSR entry 0x%x 0x%x\n",
+                              i, msr_entry.index, msr_entry.reserved);
+                       return i;
+               }
+               msr.host_initiated = 0;
+               msr.index = msr_entry.index;
+               msr.data = msr_entry.data;
+               if (vmx_msr_switch_is_protected_msr(msr.index)) {
+                       pr_warn_ratelimited(
+                               "Load MSR %d: prevent writing to MSR 0x%x\n",
+                               i, msr.index);
+                       return i;
+               }
+               if (kvm_set_msr(vcpu, &msr)) {
+                       pr_warn_ratelimited(
+                              "Load MSR %d: cannot write 0x%llx to MSR 0x%x\n",
+                              i, msr.data, msr.index);
+                       return i;
+               }
+       }
+       return 0;
+}
+
+static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu,
+                                      struct msr_entry *e)
+{
+       /* x2APIC MSR accesses are not allowed */
+       if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
+               return false;
+       /* SMM is not supported */
+       if (e->index == MSR_IA32_SMBASE)
+               return false;
+       if (e->reserved != 0)
+               return false;
+       return true;
+}
+
+static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu,
+                                         u32 count, u64 addr)
+{
+       unsigned int i;
+       struct msr_entry msr_entry;
+
+       for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
+               if (kvm_read_guest(vcpu->kvm, addr,
+                                  &msr_entry, 2 * sizeof(u32))) {
+                       pr_warn_ratelimited(
+                               "Store MSR %d: cannot read GPA: 0x%llx\n",
+                               i, addr);
+                       return i;
+               }
+               if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) {
+                       pr_warn_ratelimited(
+                              "Store MSR %d: invalid MSR entry 0x%x 0x%x\n",
+                              i, msr_entry.index, msr_entry.reserved);
+                       return i;
+               }
+               if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.data)) {
+                       pr_warn_ratelimited(
+                               "Store MSR %d: cannot read MSR 0x%x\n",
+                               i, msr_entry.index);
+                       return i;
+               }
+               if (kvm_write_guest(vcpu->kvm,
+                                   addr + offsetof(struct msr_entry, data),
+                                   &msr_entry.data, sizeof(msr_entry.data))) {
+                       pr_warn_ratelimited(
+                               "Store MSR %d: cannot write to GPA: 0x%llx\n",
+                               i, addr);
+                       return i;
+               }
+       }
+       return 0;
+}
+
 /*
  * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
  * for running an L2 nested guest.
@@ -8507,6 +8664,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
        struct vmcs12 *vmcs12;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        int cpu;
+       unsigned int msr;
        struct loaded_vmcs *vmcs02;
        bool ia32e;
 
@@ -8556,11 +8714,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
                return 1;
        }
 
-       if (vmcs12->vm_entry_msr_load_count > 0 ||
-           vmcs12->vm_exit_msr_load_count > 0 ||
-           vmcs12->vm_exit_msr_store_count > 0) {
-               pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
-                                   __func__);
+       if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) {
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }
@@ -8670,6 +8824,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
 
        prepare_vmcs02(vcpu, vmcs12);
 
+       msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count,
+                                  vmcs12->vm_entry_msr_load_addr);
+       if (msr)
+               nested_vmx_entry_failure(vcpu, vmcs12,
+                                        EXIT_REASON_MSR_LOAD_FAILURE, msr);
+
        if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
                return kvm_emulate_halt(vcpu);
 
@@ -9099,6 +9259,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
 }
 
+static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code)
+{
+       struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+       vmcs12->abort = abort_code;
+       pr_warn("Nested VMX abort %u\n", abort_code);
+       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+}
+
 /*
  * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
  * and modify vmcs12 to make it see what it would expect to see there if
@@ -9118,8 +9287,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 
exit_reason,
        prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
                       exit_qualification);
 
+       if (exit_reason != EXIT_REASON_INVALID_STATE &&
+           exit_reason != EXIT_REASON_MSR_LOAD_FAILURE) {
+               if (nested_vmx_store_msrs(vcpu,
+                                         vmcs12->vm_exit_msr_store_count,
+                                         vmcs12->vm_exit_msr_store_addr))
+                       nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE);
+       }
+
        vmx_load_vmcs01(vcpu);
 
+       if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count,
+                                vmcs12->vm_exit_msr_load_addr))
+               nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD);
+
        if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
            && nested_exit_intr_ack_set(vcpu)) {
                int irq = kvm_cpu_get_interrupt(vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5b45330..c9d1c4a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1582,6 +1582,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const 
void *data,
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(kvm_write_guest);
 
 int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
                              gpa_t gpa, unsigned long len)
-- 
2.0.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to