On Mon, Apr 05, 2021 at 09:54:14AM -0400, Dave Voutila wrote:
> 
> Dave Voutila writes:
> 
> > The following diff cleans up and improves MSR-related event handling in
> > vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
> > mentioned in a previous email to tech@ about fixing support for 9front
> > guests [1], we found some discprepencies between vmm(4)'s handling on
> > Intel hosts and AMD hosts.
> >
> > While the diff has been tested already by abieber@ and brynet@ with
> > additional review by mlarkin@, I'm looking for additional testers
> > willing to look for regressions.
> 
> Last call for additional testers. Plan is to commit the diff later today
> as I have an OK from mlarkin@.

This is ok brynet@ too!

> >
> > This diff specifically improves and standardizes msr-based exit handling
> > between Intel and AMD hosts to the following:
> >
> > 1. All RDMSR instructions that cause a vm-exit must be explicitly
> >    handled (e.g. via emulation) or they result in injecting a #GP
> >    exception into the guest.
> >
> > 2. All WRMSR instructions that cause a vm-exit and are not explicitly
> >    handled are ignored (i.e. %rax and %rdx values are not inspected or
> >    used).
> >
> > Consequently with the change for (1) above, the diff adds explicit
> > handling for the following MSRs:
> >
> > 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
> >    host state is not touched. The shadow state is initialized on vcpu
> >    reset to the same value as the host.
> >
> > 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
> >    ignored. This means reads result in vmm(4) setting the guest vcpu's
> >    %rax and %rdx to 0. (These msr's are ignored in the same manner by
> >    other hypervisors like kvm and nvmm.)
> >
> > The average user should not see a change in behavior of vmm(4) or
> > vmd(8). The biggest change is for *Intel* users as this diff changes the
> > current vmx logic which was not injecting #GP for unsupported
> > msr's. (Your guests were potentially getting garbage results from rdmsr
> > instructions.)
> >
> 
> If you do test the diff and have issues, I forgot to mention to please
> build the kernel with VMM_DEBUG. The output to the kernel buffer will
> help diagnose any problematic msr access.
> 
> > The folks attempting to host the latest release of 9front as a guest on
> > AMD hosts should see their guest boot successfully with this diff :-)
> >
> > -dv
> >
> > [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2
> >
> >
> > Index: sys/arch/amd64/include/vmmvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> > retrieving revision 1.70
> > diff -u -p -r1.70 vmmvar.h
> > --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000       1.70
> > +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000
> > @@ -936,6 +936,9 @@ struct vcpu {
> >     paddr_t vc_pvclock_system_gpa;
> >     uint32_t vc_pvclock_system_tsc_mul;
> >
> > +   /* Shadowed MSRs */
> > +   uint64_t vc_shadow_pat;
> > +
> >     /* VMX only */
> >     uint64_t vc_vmx_basic;
> >     uint64_t vc_vmx_entry_ctls;
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.278
> > diff -u -p -r1.278 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c      11 Mar 2021 11:16:55 -0000      1.278
> > +++ sys/arch/amd64/amd64/vmm.c      31 Mar 2021 00:15:43 -0000
> > @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
> >  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
> >  void vmm_init_pvclock(struct vcpu *, paddr_t);
> >  int vmm_update_pvclock(struct vcpu *);
> > +int vmm_pat_is_valid(uint64_t);
> >
> >  #ifdef VMM_DEBUG
> >  void dump_vcpu(struct vcpu *);
> > @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> >     /* xcr0 power on default sets bit 0 (x87 state) */
> >     vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
> >
> > +   /* XXX PAT shadow */
> > +   vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> > +
> >  exit:
> >     /* Flush the VMCS */
> >     if (vmclear(&vcpu->vc_control_pa)) {
> > @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
> >     vcpu->vc_state = VCPU_STATE_STOPPED;
> >     vcpu->vc_vpid = 0;
> >     vcpu->vc_pvclock_system_gpa = 0;
> > +
> > +   /* Shadow PAT MSR, starting with host's value. */
> > +   vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> > +
> >     if (vmm_softc->mode == VMM_MODE_VMX ||
> >         vmm_softc->mode == VMM_MODE_EPT)
> >             ret = vcpu_init_vmx(vcpu);
> > @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
> >     rdx = &vcpu->vc_gueststate.vg_rdx;
> >
> >     switch (*rcx) {
> > -   case MSR_SMBASE:
> > -           /*
> > -            * 34.15.6.3 - Saving Guest State (SMM)
> > -            *
> > -            * Unsupported, so inject #GP and return without
> > -            * advancing %rip.
> > -            */
> > +   case MSR_BIOS_SIGN:
> > +   case MSR_PLATFORM_ID:
> > +           /* Ignored */
> > +           *rax = 0;
> > +           *rdx = 0;
> > +           break;
> > +   case MSR_CR_PAT:
> > +           *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> > +           *rdx = (vcpu->vc_shadow_pat >> 32);
> > +           break;
> > +   default:
> > +           /* Unsupported MSRs causes #GP exception, don't advance %rip */
> > +           DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
> > +               __func__, *rcx);
> >             ret = vmm_inject_gp(vcpu);
> >             return (ret);
> >     }
> >
> > -   *rax = 0;
> > -   *rdx = 0;
> > -
> > -#ifdef VMM_DEBUG
> > -   /* Log the access, to be able to identify unknown MSRs */
> > -   DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
> > -       "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
> > -#endif /* VMM_DEBUG */
> > -
> >     vcpu->vc_gueststate.vg_rip += insn_length;
> >
> >     return (0);
> > @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
> >  int
> >  vmx_handle_wrmsr(struct vcpu *vcpu)
> >  {
> > -   uint64_t insn_length;
> > +   uint64_t insn_length, val;
> >     uint64_t *rax, *rdx, *rcx;
> >     int ret;
> >
> > @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
> >     rax = &vcpu->vc_gueststate.vg_rax;
> >     rcx = &vcpu->vc_gueststate.vg_rcx;
> >     rdx = &vcpu->vc_gueststate.vg_rdx;
> > +   val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
> >
> >     switch (*rcx) {
> > +   case MSR_CR_PAT:
> > +           if (!vmm_pat_is_valid(val)) {
> > +                   ret = vmm_inject_gp(vcpu);
> > +                   return (ret);
> > +           }
> > +           vcpu->vc_shadow_pat = val;
> > +           break;
> >     case MSR_MISC_ENABLE:
> >             vmx_handle_misc_enable_msr(vcpu);
> >             break;
> > @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
> >  int
> >  svm_handle_msr(struct vcpu *vcpu)
> >  {
> > -   uint64_t insn_length;
> > +   uint64_t insn_length, val;
> >     uint64_t *rax, *rcx, *rdx;
> >     struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> >     int ret;
> > @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
> >     rdx = &vcpu->vc_gueststate.vg_rdx;
> >
> >     if (vmcb->v_exitinfo1 == 1) {
> > -           if (*rcx == MSR_EFER) {
> > +           /* WRMSR */
> > +           val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
> > +
> > +           switch (*rcx) {
> > +           case MSR_CR_PAT:
> > +                   if (!vmm_pat_is_valid(val)) {
> > +                           ret = vmm_inject_gp(vcpu);
> > +                           return (ret);
> > +                   }
> > +                   vcpu->vc_shadow_pat = val;
> > +                   break;
> > +           case MSR_EFER:
> >                     vmcb->v_efer = *rax | EFER_SVME;
> > -           } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
> > +                   break;
> > +           case KVM_MSR_SYSTEM_TIME:
> >                     vmm_init_pvclock(vcpu,
> >                         (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
> > -           } else {
> > -#ifdef VMM_DEBUG
> > +                   break;
> > +           default:
> >                     /* Log the access, to be able to identify unknown MSRs 
> > */
> >                     DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
> >                         "written from guest=0x%llx:0x%llx\n", __func__,
> >                         *rcx, *rdx, *rax);
> > -#endif /* VMM_DEBUG */
> >             }
> >     } else {
> > +           /* RDMSR */
> >             switch (*rcx) {
> > -                   case MSR_DE_CFG:
> > -                           /* LFENCE seralizing bit is set by host */
> > -                           *rax = DE_CFG_SERIALIZE_LFENCE;
> > -                           *rdx = 0;
> > -                           break;
> > -                   case MSR_INT_PEN_MSG:
> > -                           *rax = 0;
> > -                           *rdx = 0;
> > -                           break;
> > -                   default:
> > -                           DPRINTF("%s: guest read msr 0x%llx, injecting "
> > -                               "#GP\n", __func__, *rcx);
> > -                           ret = vmm_inject_gp(vcpu);
> > -                           return (ret);
> > +           case MSR_BIOS_SIGN:
> > +           case MSR_INT_PEN_MSG:
> > +           case MSR_PLATFORM_ID:
> > +                   /* Ignored */
> > +                   *rax = 0;
> > +                   *rdx = 0;
> > +                   break;
> > +           case MSR_CR_PAT:
> > +                   *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> > +                   *rdx = (vcpu->vc_shadow_pat >> 32);
> > +                   break;
> > +           case MSR_DE_CFG:
> > +                   /* LFENCE seralizing bit is set by host */
> > +                   *rax = DE_CFG_SERIALIZE_LFENCE;
> > +                   *rdx = 0;
> > +                   break;
> > +           default:
> > +                   /*
> > +                    * Unsupported MSRs causes #GP exception, don't advance
> > +                    * %rip
> > +                    */
> > +                   DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
> > +                       "injecting #GP\n", __func__, *rcx);
> > +                   ret = vmm_inject_gp(vcpu);
> > +                   return (ret);
> >             }
> >     }
> >
> > @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
> >             pvclock_ti->ti_version &= ~0x1;
> >     }
> >     return (0);
> > +}
> > +
> > +int
> > +vmm_pat_is_valid(uint64_t pat)
> > +{
> > +   int i;
> > +   uint8_t *byte = (uint8_t *)&pat;
> > +
> > +   /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
> > +   for (i = 0; i < 8; i++) {
> > +           if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
> > +                   DPRINTF("%s: invalid pat %llx\n", __func__, pat);
> > +                   return 0;
> > +           }
> > +   }
> > +
> > +   return 1;
> >  }
> >
> >  /*
> 
> 
> --
> -Dave Voutila
> 
> 

Reply via email to