[Ccing a few others who might be interested]

Hi Eugene,

Did you look at the other patch that was posted for this functionality
by Wincy Van ?

https://lkml.org/lkml/2014/11/21/749

Eugene Korenevsky <[email protected]> writes:

> BitVisor hypervisor fails to start a nested guest due to lack of MSR
> load/store support in KVM.
>
> This patch fixes this problem according to Intel SDM.

It would be helpful for the commit message to be a little more 
descriptive. 
>
> Signed-off-by: Eugene Korenevsky <[email protected]>
> ---
>  arch/x86/include/asm/vmx.h            |   6 ++
>  arch/x86/include/uapi/asm/msr-index.h |   3 +
>  arch/x86/include/uapi/asm/vmx.h       |   1 +
>  arch/x86/kvm/vmx.c                    | 191 
> +++++++++++++++++++++++++++++++++-
>  virt/kvm/kvm_main.c                   |   1 +
>  5 files changed, 197 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index bcbfade..e07414c 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -454,6 +454,12 @@ struct vmx_msr_entry {
>  #define ENTRY_FAIL_VMCS_LINK_PTR     4
>  
>  /*
> + * VMX Abort codes
> + */
> +#define VMX_ABORT_MSR_STORE          1
> +#define VMX_ABORT_MSR_LOAD           4
> +
> +/*
>   * VM-instruction error numbers
>   */
>  enum vm_instruction_error_number {
> diff --git a/arch/x86/include/uapi/asm/msr-index.h 
> b/arch/x86/include/uapi/asm/msr-index.h
> index e21331c..3c9c601 100644
> --- a/arch/x86/include/uapi/asm/msr-index.h
> +++ b/arch/x86/include/uapi/asm/msr-index.h
> @@ -316,6 +316,9 @@
>  #define MSR_IA32_UCODE_WRITE         0x00000079
>  #define MSR_IA32_UCODE_REV           0x0000008b
>  
> +#define MSR_IA32_SMM_MONITOR_CTL     0x0000009b
> +#define MSR_IA32_SMBASE                      0x0000009e
> +
Extra tab here ?

>  #define MSR_IA32_PERF_STATUS         0x00000198
>  #define MSR_IA32_PERF_CTL            0x00000199
>  #define MSR_AMD_PSTATE_DEF_BASE              0xc0010064
> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
> index 990a2fe..f5f8a62 100644
> --- a/arch/x86/include/uapi/asm/vmx.h
> +++ b/arch/x86/include/uapi/asm/vmx.h
> @@ -56,6 +56,7 @@
>  #define EXIT_REASON_MSR_READ            31
>  #define EXIT_REASON_MSR_WRITE           32
>  #define EXIT_REASON_INVALID_STATE       33
> +#define EXIT_REASON_MSR_LOAD_FAILURE    34
>  #define EXIT_REASON_MWAIT_INSTRUCTION   36
>  #define EXIT_REASON_MONITOR_INSTRUCTION 39
>  #define EXIT_REASON_PAUSE_INSTRUCTION   40
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6a951d8..deebc96 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -8498,6 +8498,163 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, 
> struct vmcs12 *vmcs12)
>       kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
>  }
>  
> +struct msr_entry {
> +     u32 index;
> +     u32 reserved;
> +     u64 data;
> +} __packed;
> +

We already have struct vmx_msr_entry in vmx.h

> +static bool vmx_msr_switch_area_verify(u32 count, u64 addr, int maxphyaddr)
> +{
> +     if (count == 0)
> +             return true;
> +     if ((addr & 0xf) != 0)
> +             return false;
> +     if ((addr >> maxphyaddr) != 0)
> +             return false;
> +     if (((addr + count * sizeof(struct msr_entry) - 1) >> maxphyaddr) != 0)
> +             return false;
> +     return true;
> +}
> +
Shouldn't we also check if any of bits 63:32 is set ?

> +static bool nested_vmx_msr_switch_verify(struct kvm_vcpu *vcpu,
> +                                      struct vmcs12 *vmcs12)
> +{
> +     int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> +#define VMCS12_MSR_SWITCH_VERIFY(name) { \
> +     if (!vmx_msr_switch_area_verify(vmcs12->name##_count, \
> +                                     vmcs12->name##_addr, maxphyaddr)) { \
> +             pr_warn_ratelimited( \
> +                     "%s: invalid MSR_{LOAD,STORE} parameters", \
> +                     #name); \
> +             return false; \
> +     } \
> +     }
Just a personal opinion: replacing the macro with a switch statement is
probably a little less clumsy

> +     VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_load);
> +     VMCS12_MSR_SWITCH_VERIFY(vm_exit_msr_store);
> +     VMCS12_MSR_SWITCH_VERIFY(vm_entry_msr_load);
> +     return true;
> +}
> +
> +static bool vmx_load_msr_entry_verify(struct kvm_vcpu *vcpu,
> +                                   struct msr_entry *e)
> +{
> +     if (e->index == MSR_FS_BASE || e->index == MSR_GS_BASE)
> +             return false;
> +     /* SMM is not supported */
> +     if (e->index == MSR_IA32_SMM_MONITOR_CTL)
> +             return false;
> +     /* x2APIC MSR accesses are not allowed */
> +     if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
> +             return false;
> +     if (e->reserved != 0)
> +             return false;
> +     return true;
> +}
> +
> +static bool vmx_msr_switch_is_protected_msr(u32 msr_index)
> +{
> +     /* Intel SDM: a processor MAY prevent writing to these MSRs when
> +      * loading guest states on VM entries or saving guest states on VM
> +      * exits.
> +      * Assume our emulated processor DOES prevent writing */
> +     return msr_index == MSR_IA32_UCODE_WRITE ||
> +            msr_index == MSR_IA32_UCODE_REV;
> +}
> +
> +static unsigned int nested_vmx_load_msrs(struct kvm_vcpu *vcpu,
> +                                      u32 count, u64 addr)
> +{
> +     unsigned int i;
> +     struct msr_entry msr_entry;
> +     struct msr_data msr;
> +
> +     for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
> +             if (kvm_read_guest(vcpu->kvm, addr,
> +                                &msr_entry, sizeof(msr_entry))) {
> +                     pr_warn_ratelimited(
> +                             "Load MSR %d: cannot read GPA: 0x%llx\n",
> +                             i, addr);
> +                     return i;
> +             }
> +             if (!vmx_load_msr_entry_verify(vcpu, &msr_entry)) {
> +                     pr_warn_ratelimited(
> +                            "Load MSR %d: invalid MSR entry 0x%x 0x%x\n",
> +                            i, msr_entry.index, msr_entry.reserved);
> +                     return i;
> +             }
> +             msr.host_initiated = 0;
> +             msr.index = msr_entry.index;
> +             msr.data = msr_entry.data;
> +             if (vmx_msr_switch_is_protected_msr(msr.index)) {
> +                     pr_warn_ratelimited(
> +                             "Load MSR %d: prevent writing to MSR 0x%x\n",
> +                             i, msr.index);
> +                     return i;
> +             }
> +             if (kvm_set_msr(vcpu, &msr)) {
> +                     pr_warn_ratelimited(
> +                            "Load MSR %d: cannot write 0x%llx to MSR 0x%x\n",
> +                            i, msr.data, msr.index);
> +                     return i;
> +             }
> +     }
> +     return 0;
> +}
> +
To ease debugging, these warning messages should also specify that they are
Nested VMX specific. Also I think that the interfaces/functions you have 
introduced
can be split up into an introductory patch and a second patch can then use these
interfaces to provide the load/store functionality.

> +static bool vmx_store_msr_entry_verify(struct kvm_vcpu *vcpu,
> +                                    struct msr_entry *e)
> +{
> +     /* x2APIC MSR accesses are not allowed */
> +     if (apic_x2apic_mode(vcpu->arch.apic) && (e->index >> 24) == 0x800)
> +             return false;
> +     /* SMM is not supported */
> +     if (e->index == MSR_IA32_SMBASE)
> +             return false;
> +     if (e->reserved != 0)
> +             return false;
> +     return true;
> +}
> +
> +static unsigned int nested_vmx_store_msrs(struct kvm_vcpu *vcpu,
> +                                       u32 count, u64 addr)
> +{
> +     unsigned int i;
> +     struct msr_entry msr_entry;
> +
> +     for (i = 1; i <= count; i++, addr += sizeof(msr_entry)) {
> +             if (kvm_read_guest(vcpu->kvm, addr,
> +                                &msr_entry, 2 * sizeof(u32))) {
> +                     pr_warn_ratelimited(
> +                             "Store MSR %d: cannot read GPA: 0x%llx\n",
> +                             i, addr);
> +                     return i;
> +             }
> +             if (!vmx_store_msr_entry_verify(vcpu, &msr_entry)) {
> +                     pr_warn_ratelimited(
> +                            "Store MSR %d: invalid MSR entry 0x%x 0x%x\n",
> +                            i, msr_entry.index, msr_entry.reserved);
> +                     return i;
> +             }
> +             if (vmx_get_msr(vcpu, msr_entry.index, &msr_entry.data)) {
> +                     pr_warn_ratelimited(
> +                             "Store MSR %d: cannot read MSR 0x%x\n",
> +                             i, msr_entry.index);
> +                     return i;
> +             }
> +             if (kvm_write_guest(vcpu->kvm,
> +                                 addr + offsetof(struct msr_entry, data),
> +                                 &msr_entry.data, sizeof(msr_entry.data))) {
> +                     pr_warn_ratelimited(
> +                             "Store MSR %d: cannot write to GPA: 0x%llx\n",
> +                             i, addr);
> +                     return i;
> +             }
> +     }
> +     return 0;
> +}
> +
>  /*
>   * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on 
> L1
>   * for running an L2 nested guest.
> @@ -8507,6 +8664,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
> launch)
>       struct vmcs12 *vmcs12;
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>       int cpu;
> +     unsigned int msr;
>       struct loaded_vmcs *vmcs02;
>       bool ia32e;
>  
> @@ -8556,11 +8714,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
> launch)
>               return 1;
>       }
>  
> -     if (vmcs12->vm_entry_msr_load_count > 0 ||
> -         vmcs12->vm_exit_msr_load_count > 0 ||
> -         vmcs12->vm_exit_msr_store_count > 0) {
> -             pr_warn_ratelimited("%s: VMCS MSR_{LOAD,STORE} unsupported\n",
> -                                 __func__);
> +     if (!nested_vmx_msr_switch_verify(vcpu, vmcs12)) {
>               nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>               return 1;
>       }
> @@ -8670,6 +8824,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
> launch)
>  
>       prepare_vmcs02(vcpu, vmcs12);
>  
> +     msr = nested_vmx_load_msrs(vcpu, vmcs12->vm_entry_msr_load_count,
> +                                vmcs12->vm_entry_msr_load_addr);
> +     if (msr)
> +             nested_vmx_entry_failure(vcpu, vmcs12,
> +                                      EXIT_REASON_MSR_LOAD_FAILURE, msr);
> +
>       if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
>               return kvm_emulate_halt(vcpu);
>  
> @@ -9099,6 +9259,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu 
> *vcpu,
>       vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
>  }
>  
> +static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 abort_code)
> +{
> +     struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +
> +     vmcs12->abort = abort_code;
> +     pr_warn("Nested VMX abort %u\n", abort_code);
> +     kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
> +}
> +
>  /*
>   * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
>   * and modify vmcs12 to make it see what it would expect to see there if
> @@ -9118,8 +9287,20 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, 
> u32 exit_reason,
>       prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
>                      exit_qualification);
>  
> +     if (exit_reason != EXIT_REASON_INVALID_STATE &&
> +         exit_reason != EXIT_REASON_MSR_LOAD_FAILURE) {
> +             if (nested_vmx_store_msrs(vcpu,
> +                                       vmcs12->vm_exit_msr_store_count,
> +                                       vmcs12->vm_exit_msr_store_addr))
> +                     nested_vmx_abort(vcpu, VMX_ABORT_MSR_STORE);
> +     }
> +
>       vmx_load_vmcs01(vcpu);
>  
> +     if (nested_vmx_load_msrs(vcpu, vmcs12->vm_exit_msr_load_count,
> +                              vmcs12->vm_exit_msr_load_addr))
> +             nested_vmx_abort(vcpu, VMX_ABORT_MSR_LOAD);
> +
>       if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
>           && nested_exit_intr_ack_set(vcpu)) {
>               int irq = kvm_cpu_get_interrupt(vcpu);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 5b45330..c9d1c4a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -1582,6 +1582,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const 
> void *data,
>       }
>       return 0;
>  }
> +EXPORT_SYMBOL_GPL(kvm_write_guest);
>  
>  int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
>                             gpa_t gpa, unsigned long len)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to