Dave Voutila writes:
> Dave Voutila writes: > >> The following diff cleans up and improves MSR-related event handling in >> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As >> mentioned in a previous email to tech@ about fixing support for 9front >> guests [1], we found some discprepencies between vmm(4)'s handling on >> Intel hosts and AMD hosts. >> >> While the diff has been tested already by abieber@ and brynet@ with >> additional review by mlarkin@, I'm looking for additional testers >> willing to look for regressions. Confirmed it's working for me - very latest 9front and nixos work fine on my ryzen! > > Last call for additional testers. Plan is to commit the diff later today > as I have an OK from mlarkin@. > >> >> This diff specifically improves and standardizes msr-based exit handling >> between Intel and AMD hosts to the following: >> >> 1. All RDMSR instructions that cause a vm-exit must be explicitly >> handled (e.g. via emulation) or they result in injecting a #GP >> exception into the guest. >> >> 2. All WRMSR instructions that cause a vm-exit and are not explicitly >> handled are ignored (i.e. %rax and %rdx values are not inspected or >> used). >> >> Consequently with the change for (1) above, the diff adds explicit >> handling for the following MSRs: >> >> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and >> host state is not touched. The shadow state is initialized on vcpu >> reset to the same value as the host. >> >> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all >> ignored. This means reads result in vmm(4) setting the guest vcpu's >> %rax and %rdx to 0. (These msr's are ignored in the same manner by >> other hypervisors like kvm and nvmm.) >> >> The average user should not see a change in behavior of vmm(4) or >> vmd(8). The biggest change is for *Intel* users as this diff changes the >> current vmx logic which was not injecting #GP for unsupported >> msr's. (Your guests were potentially getting garbage results from rdmsr >> instructions.) >> > > If you do test the diff and have issues, I forgot to mention to please > build the kernel with VMM_DEBUG. The output to the kernel buffer will > help diagnose any problematic msr access. > >> The folks attempting to host the latest release of 9front as a guest on >> AMD hosts should see their guest boot successfully with this diff :-) >> >> -dv >> >> [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2 >> >> >> Index: sys/arch/amd64/include/vmmvar.h >> =================================================================== >> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v >> retrieving revision 1.70 >> diff -u -p -r1.70 vmmvar.h >> --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000 1.70 >> +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000 >> @@ -936,6 +936,9 @@ struct vcpu { >> paddr_t vc_pvclock_system_gpa; >> uint32_t vc_pvclock_system_tsc_mul; >> >> + /* Shadowed MSRs */ >> + uint64_t vc_shadow_pat; >> + >> /* VMX only */ >> uint64_t vc_vmx_basic; >> uint64_t vc_vmx_entry_ctls; >> Index: sys/arch/amd64/amd64/vmm.c >> =================================================================== >> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v >> retrieving revision 1.278 >> diff -u -p -r1.278 vmm.c >> --- sys/arch/amd64/amd64/vmm.c 11 Mar 2021 11:16:55 -0000 1.278 >> +++ sys/arch/amd64/amd64/vmm.c 31 Mar 2021 00:15:43 -0000 >> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32 >> int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size); >> void vmm_init_pvclock(struct vcpu *, paddr_t); >> int vmm_update_pvclock(struct vcpu *); >> +int vmm_pat_is_valid(uint64_t); >> >> #ifdef VMM_DEBUG >> void dump_vcpu(struct vcpu *); >> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s >> /* xcr0 power on default sets bit 0 (x87 state) */ >> vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask; >> >> + /* XXX PAT shadow */ >> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT); >> + >> exit: >> /* Flush the VMCS */ >> if (vmclear(&vcpu->vc_control_pa)) { >> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu) >> vcpu->vc_state = VCPU_STATE_STOPPED; >> vcpu->vc_vpid = 0; >> vcpu->vc_pvclock_system_gpa = 0; >> + >> + /* Shadow PAT MSR, starting with host's value. */ >> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT); >> + >> if (vmm_softc->mode == VMM_MODE_VMX || >> vmm_softc->mode == VMM_MODE_EPT) >> ret = vcpu_init_vmx(vcpu); >> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu) >> rdx = &vcpu->vc_gueststate.vg_rdx; >> >> switch (*rcx) { >> - case MSR_SMBASE: >> - /* >> - * 34.15.6.3 - Saving Guest State (SMM) >> - * >> - * Unsupported, so inject #GP and return without >> - * advancing %rip. >> - */ >> + case MSR_BIOS_SIGN: >> + case MSR_PLATFORM_ID: >> + /* Ignored */ >> + *rax = 0; >> + *rdx = 0; >> + break; >> + case MSR_CR_PAT: >> + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); >> + *rdx = (vcpu->vc_shadow_pat >> 32); >> + break; >> + default: >> + /* Unsupported MSRs causes #GP exception, don't advance %rip */ >> + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n", >> + __func__, *rcx); >> ret = vmm_inject_gp(vcpu); >> return (ret); >> } >> >> - *rax = 0; >> - *rdx = 0; >> - >> -#ifdef VMM_DEBUG >> - /* Log the access, to be able to identify unknown MSRs */ >> - DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to " >> - "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax); >> -#endif /* VMM_DEBUG */ >> - >> vcpu->vc_gueststate.vg_rip += insn_length; >> >> return (0); >> @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu * >> int >> vmx_handle_wrmsr(struct vcpu *vcpu) >> { >> - uint64_t insn_length; >> + uint64_t insn_length, val; >> uint64_t *rax, *rdx, *rcx; >> int ret; >> >> @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu) >> rax = &vcpu->vc_gueststate.vg_rax; >> rcx = &vcpu->vc_gueststate.vg_rcx; >> rdx = &vcpu->vc_gueststate.vg_rdx; >> + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); >> >> switch (*rcx) { >> + case MSR_CR_PAT: >> + if (!vmm_pat_is_valid(val)) { >> + ret = vmm_inject_gp(vcpu); >> + return (ret); >> + } >> + vcpu->vc_shadow_pat = val; >> + break; >> case MSR_MISC_ENABLE: >> vmx_handle_misc_enable_msr(vcpu); >> break; >> @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu) >> int >> svm_handle_msr(struct vcpu *vcpu) >> { >> - uint64_t insn_length; >> + uint64_t insn_length, val; >> uint64_t *rax, *rcx, *rdx; >> struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; >> int ret; >> @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu) >> rdx = &vcpu->vc_gueststate.vg_rdx; >> >> if (vmcb->v_exitinfo1 == 1) { >> - if (*rcx == MSR_EFER) { >> + /* WRMSR */ >> + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL); >> + >> + switch (*rcx) { >> + case MSR_CR_PAT: >> + if (!vmm_pat_is_valid(val)) { >> + ret = vmm_inject_gp(vcpu); >> + return (ret); >> + } >> + vcpu->vc_shadow_pat = val; >> + break; >> + case MSR_EFER: >> vmcb->v_efer = *rax | EFER_SVME; >> - } else if (*rcx == KVM_MSR_SYSTEM_TIME) { >> + break; >> + case KVM_MSR_SYSTEM_TIME: >> vmm_init_pvclock(vcpu, >> (*rax & 0xFFFFFFFFULL) | (*rdx << 32)); >> - } else { >> -#ifdef VMM_DEBUG >> + break; >> + default: >> /* Log the access, to be able to identify unknown MSRs >> */ >> DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data " >> "written from guest=0x%llx:0x%llx\n", __func__, >> *rcx, *rdx, *rax); >> -#endif /* VMM_DEBUG */ >> } >> } else { >> + /* RDMSR */ >> switch (*rcx) { >> - case MSR_DE_CFG: >> - /* LFENCE seralizing bit is set by host */ >> - *rax = DE_CFG_SERIALIZE_LFENCE; >> - *rdx = 0; >> - break; >> - case MSR_INT_PEN_MSG: >> - *rax = 0; >> - *rdx = 0; >> - break; >> - default: >> - DPRINTF("%s: guest read msr 0x%llx, injecting " >> - "#GP\n", __func__, *rcx); >> - ret = vmm_inject_gp(vcpu); >> - return (ret); >> + case MSR_BIOS_SIGN: >> + case MSR_INT_PEN_MSG: >> + case MSR_PLATFORM_ID: >> + /* Ignored */ >> + *rax = 0; >> + *rdx = 0; >> + break; >> + case MSR_CR_PAT: >> + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL); >> + *rdx = (vcpu->vc_shadow_pat >> 32); >> + break; >> + case MSR_DE_CFG: >> + /* LFENCE seralizing bit is set by host */ >> + *rax = DE_CFG_SERIALIZE_LFENCE; >> + *rdx = 0; >> + break; >> + default: >> + /* >> + * Unsupported MSRs causes #GP exception, don't advance >> + * %rip >> + */ >> + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), " >> + "injecting #GP\n", __func__, *rcx); >> + ret = vmm_inject_gp(vcpu); >> + return (ret); >> } >> } >> >> @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu) >> pvclock_ti->ti_version &= ~0x1; >> } >> return (0); >> +} >> + >> +int >> +vmm_pat_is_valid(uint64_t pat) >> +{ >> + int i; >> + uint8_t *byte = (uint8_t *)&pat; >> + >> + /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */ >> + for (i = 0; i < 8; i++) { >> + if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) { >> + DPRINTF("%s: invalid pat %llx\n", __func__, pat); >> + return 0; >> + } >> + } >> + >> + return 1; >> } >> >> /*