Dave Voutila writes:

> The following diff cleans up and improves MSR-related event handling in
> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
> mentioned in a previous email to tech@ about fixing support for 9front
> guests [1], we found some discprepencies between vmm(4)'s handling on
> Intel hosts and AMD hosts.
>
> While the diff has been tested already by abieber@ and brynet@ with
> additional review by mlarkin@, I'm looking for additional testers
> willing to look for regressions.

Last call for additional testers. Plan is to commit the diff later today
as I have an OK from mlarkin@.

>
> This diff specifically improves and standardizes msr-based exit handling
> between Intel and AMD hosts to the following:
>
> 1. All RDMSR instructions that cause a vm-exit must be explicitly
>    handled (e.g. via emulation) or they result in injecting a #GP
>    exception into the guest.
>
> 2. All WRMSR instructions that cause a vm-exit and are not explicitly
>    handled are ignored (i.e. %rax and %rdx values are not inspected or
>    used).
>
> Consequently with the change for (1) above, the diff adds explicit
> handling for the following MSRs:
>
> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
>    host state is not touched. The shadow state is initialized on vcpu
>    reset to the same value as the host.
>
> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
>    ignored. This means reads result in vmm(4) setting the guest vcpu's
>    %rax and %rdx to 0. (These msr's are ignored in the same manner by
>    other hypervisors like kvm and nvmm.)
>
> The average user should not see a change in behavior of vmm(4) or
> vmd(8). The biggest change is for *Intel* users as this diff changes the
> current vmx logic which was not injecting #GP for unsupported
> msr's. (Your guests were potentially getting garbage results from rdmsr
> instructions.)
>

If you do test the diff and have issues, I forgot to mention to please
build the kernel with VMM_DEBUG. The output to the kernel buffer will
help diagnose any problematic msr access.

> The folks attempting to host the latest release of 9front as a guest on
> AMD hosts should see their guest boot successfully with this diff :-)
>
> -dv
>
> [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2
>
>
> Index: sys/arch/amd64/include/vmmvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.70
> diff -u -p -r1.70 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h   8 Apr 2020 07:39:48 -0000       1.70
> +++ sys/arch/amd64/include/vmmvar.h   31 Mar 2021 00:15:43 -0000
> @@ -936,6 +936,9 @@ struct vcpu {
>       paddr_t vc_pvclock_system_gpa;
>       uint32_t vc_pvclock_system_tsc_mul;
>
> +     /* Shadowed MSRs */
> +     uint64_t vc_shadow_pat;
> +
>       /* VMX only */
>       uint64_t vc_vmx_basic;
>       uint64_t vc_vmx_entry_ctls;
> Index: sys/arch/amd64/amd64/vmm.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.278
> diff -u -p -r1.278 vmm.c
> --- sys/arch/amd64/amd64/vmm.c        11 Mar 2021 11:16:55 -0000      1.278
> +++ sys/arch/amd64/amd64/vmm.c        31 Mar 2021 00:15:43 -0000
> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
>  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
>  void vmm_init_pvclock(struct vcpu *, paddr_t);
>  int vmm_update_pvclock(struct vcpu *);
> +int vmm_pat_is_valid(uint64_t);
>
>  #ifdef VMM_DEBUG
>  void dump_vcpu(struct vcpu *);
> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>       /* xcr0 power on default sets bit 0 (x87 state) */
>       vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
>
> +     /* XXX PAT shadow */
> +     vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> +
>  exit:
>       /* Flush the VMCS */
>       if (vmclear(&vcpu->vc_control_pa)) {
> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
>       vcpu->vc_state = VCPU_STATE_STOPPED;
>       vcpu->vc_vpid = 0;
>       vcpu->vc_pvclock_system_gpa = 0;
> +
> +     /* Shadow PAT MSR, starting with host's value. */
> +     vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> +
>       if (vmm_softc->mode == VMM_MODE_VMX ||
>           vmm_softc->mode == VMM_MODE_EPT)
>               ret = vcpu_init_vmx(vcpu);
> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
>       rdx = &vcpu->vc_gueststate.vg_rdx;
>
>       switch (*rcx) {
> -     case MSR_SMBASE:
> -             /*
> -              * 34.15.6.3 - Saving Guest State (SMM)
> -              *
> -              * Unsupported, so inject #GP and return without
> -              * advancing %rip.
> -              */
> +     case MSR_BIOS_SIGN:
> +     case MSR_PLATFORM_ID:
> +             /* Ignored */
> +             *rax = 0;
> +             *rdx = 0;
> +             break;
> +     case MSR_CR_PAT:
> +             *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> +             *rdx = (vcpu->vc_shadow_pat >> 32);
> +             break;
> +     default:
> +             /* Unsupported MSRs causes #GP exception, don't advance %rip */
> +             DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
> +                 __func__, *rcx);
>               ret = vmm_inject_gp(vcpu);
>               return (ret);
>       }
>
> -     *rax = 0;
> -     *rdx = 0;
> -
> -#ifdef VMM_DEBUG
> -     /* Log the access, to be able to identify unknown MSRs */
> -     DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
> -         "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
> -#endif /* VMM_DEBUG */
> -
>       vcpu->vc_gueststate.vg_rip += insn_length;
>
>       return (0);
> @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
>  int
>  vmx_handle_wrmsr(struct vcpu *vcpu)
>  {
> -     uint64_t insn_length;
> +     uint64_t insn_length, val;
>       uint64_t *rax, *rdx, *rcx;
>       int ret;
>
> @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>       rax = &vcpu->vc_gueststate.vg_rax;
>       rcx = &vcpu->vc_gueststate.vg_rcx;
>       rdx = &vcpu->vc_gueststate.vg_rdx;
> +     val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
>
>       switch (*rcx) {
> +     case MSR_CR_PAT:
> +             if (!vmm_pat_is_valid(val)) {
> +                     ret = vmm_inject_gp(vcpu);
> +                     return (ret);
> +             }
> +             vcpu->vc_shadow_pat = val;
> +             break;
>       case MSR_MISC_ENABLE:
>               vmx_handle_misc_enable_msr(vcpu);
>               break;
> @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>  int
>  svm_handle_msr(struct vcpu *vcpu)
>  {
> -     uint64_t insn_length;
> +     uint64_t insn_length, val;
>       uint64_t *rax, *rcx, *rdx;
>       struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
>       int ret;
> @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
>       rdx = &vcpu->vc_gueststate.vg_rdx;
>
>       if (vmcb->v_exitinfo1 == 1) {
> -             if (*rcx == MSR_EFER) {
> +             /* WRMSR */
> +             val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
> +
> +             switch (*rcx) {
> +             case MSR_CR_PAT:
> +                     if (!vmm_pat_is_valid(val)) {
> +                             ret = vmm_inject_gp(vcpu);
> +                             return (ret);
> +                     }
> +                     vcpu->vc_shadow_pat = val;
> +                     break;
> +             case MSR_EFER:
>                       vmcb->v_efer = *rax | EFER_SVME;
> -             } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
> +                     break;
> +             case KVM_MSR_SYSTEM_TIME:
>                       vmm_init_pvclock(vcpu,
>                           (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
> -             } else {
> -#ifdef VMM_DEBUG
> +                     break;
> +             default:
>                       /* Log the access, to be able to identify unknown MSRs 
> */
>                       DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
>                           "written from guest=0x%llx:0x%llx\n", __func__,
>                           *rcx, *rdx, *rax);
> -#endif /* VMM_DEBUG */
>               }
>       } else {
> +             /* RDMSR */
>               switch (*rcx) {
> -                     case MSR_DE_CFG:
> -                             /* LFENCE seralizing bit is set by host */
> -                             *rax = DE_CFG_SERIALIZE_LFENCE;
> -                             *rdx = 0;
> -                             break;
> -                     case MSR_INT_PEN_MSG:
> -                             *rax = 0;
> -                             *rdx = 0;
> -                             break;
> -                     default:
> -                             DPRINTF("%s: guest read msr 0x%llx, injecting "
> -                                 "#GP\n", __func__, *rcx);
> -                             ret = vmm_inject_gp(vcpu);
> -                             return (ret);
> +             case MSR_BIOS_SIGN:
> +             case MSR_INT_PEN_MSG:
> +             case MSR_PLATFORM_ID:
> +                     /* Ignored */
> +                     *rax = 0;
> +                     *rdx = 0;
> +                     break;
> +             case MSR_CR_PAT:
> +                     *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> +                     *rdx = (vcpu->vc_shadow_pat >> 32);
> +                     break;
> +             case MSR_DE_CFG:
> +                     /* LFENCE seralizing bit is set by host */
> +                     *rax = DE_CFG_SERIALIZE_LFENCE;
> +                     *rdx = 0;
> +                     break;
> +             default:
> +                     /*
> +                      * Unsupported MSRs causes #GP exception, don't advance
> +                      * %rip
> +                      */
> +                     DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
> +                         "injecting #GP\n", __func__, *rcx);
> +                     ret = vmm_inject_gp(vcpu);
> +                     return (ret);
>               }
>       }
>
> @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
>               pvclock_ti->ti_version &= ~0x1;
>       }
>       return (0);
> +}
> +
> +int
> +vmm_pat_is_valid(uint64_t pat)
> +{
> +     int i;
> +     uint8_t *byte = (uint8_t *)&pat;
> +
> +     /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
> +     for (i = 0; i < 8; i++) {
> +             if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
> +                     DPRINTF("%s: invalid pat %llx\n", __func__, pat);
> +                     return 0;
> +             }
> +     }
> +
> +     return 1;
>  }
>
>  /*


--
-Dave Voutila

Reply via email to