Dave Voutila writes:

> Dave Voutila writes:
>
>> The following diff cleans up and improves MSR-related event handling in
>> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
>> mentioned in a previous email to tech@ about fixing support for 9front
>> guests [1], we found some discprepencies between vmm(4)'s handling on
>> Intel hosts and AMD hosts.
>>
>> While the diff has been tested already by abieber@ and brynet@ with
>> additional review by mlarkin@, I'm looking for additional testers
>> willing to look for regressions.

Confirmed it's working for me - very latest 9front and nixos work fine
on my ryzen!

>
> Last call for additional testers. Plan is to commit the diff later today
> as I have an OK from mlarkin@.
>
>>
>> This diff specifically improves and standardizes msr-based exit handling
>> between Intel and AMD hosts to the following:
>>
>> 1. All RDMSR instructions that cause a vm-exit must be explicitly
>>    handled (e.g. via emulation) or they result in injecting a #GP
>>    exception into the guest.
>>
>> 2. All WRMSR instructions that cause a vm-exit and are not explicitly
>>    handled are ignored (i.e. %rax and %rdx values are not inspected or
>>    used).
>>
>> Consequently with the change for (1) above, the diff adds explicit
>> handling for the following MSRs:
>>
>> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
>>    host state is not touched. The shadow state is initialized on vcpu
>>    reset to the same value as the host.
>>
>> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
>>    ignored. This means reads result in vmm(4) setting the guest vcpu's
>>    %rax and %rdx to 0. (These msr's are ignored in the same manner by
>>    other hypervisors like kvm and nvmm.)
>>
>> The average user should not see a change in behavior of vmm(4) or
>> vmd(8). The biggest change is for *Intel* users as this diff changes the
>> current vmx logic which was not injecting #GP for unsupported
>> msr's. (Your guests were potentially getting garbage results from rdmsr
>> instructions.)
>>
>
> If you do test the diff and have issues, I forgot to mention to please
> build the kernel with VMM_DEBUG. The output to the kernel buffer will
> help diagnose any problematic msr access.
>
>> The folks attempting to host the latest release of 9front as a guest on
>> AMD hosts should see their guest boot successfully with this diff :-)
>>
>> -dv
>>
>> [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2
>>
>>
>> Index: sys/arch/amd64/include/vmmvar.h
>> ===================================================================
>> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
>> retrieving revision 1.70
>> diff -u -p -r1.70 vmmvar.h
>> --- sys/arch/amd64/include/vmmvar.h  8 Apr 2020 07:39:48 -0000       1.70
>> +++ sys/arch/amd64/include/vmmvar.h  31 Mar 2021 00:15:43 -0000
>> @@ -936,6 +936,9 @@ struct vcpu {
>>      paddr_t vc_pvclock_system_gpa;
>>      uint32_t vc_pvclock_system_tsc_mul;
>>
>> +    /* Shadowed MSRs */
>> +    uint64_t vc_shadow_pat;
>> +
>>      /* VMX only */
>>      uint64_t vc_vmx_basic;
>>      uint64_t vc_vmx_entry_ctls;
>> Index: sys/arch/amd64/amd64/vmm.c
>> ===================================================================
>> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
>> retrieving revision 1.278
>> diff -u -p -r1.278 vmm.c
>> --- sys/arch/amd64/amd64/vmm.c       11 Mar 2021 11:16:55 -0000      1.278
>> +++ sys/arch/amd64/amd64/vmm.c       31 Mar 2021 00:15:43 -0000
>> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
>>  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
>>  void vmm_init_pvclock(struct vcpu *, paddr_t);
>>  int vmm_update_pvclock(struct vcpu *);
>> +int vmm_pat_is_valid(uint64_t);
>>
>>  #ifdef VMM_DEBUG
>>  void dump_vcpu(struct vcpu *);
>> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>>      /* xcr0 power on default sets bit 0 (x87 state) */
>>      vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
>>
>> +    /* XXX PAT shadow */
>> +    vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
>> +
>>  exit:
>>      /* Flush the VMCS */
>>      if (vmclear(&vcpu->vc_control_pa)) {
>> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
>>      vcpu->vc_state = VCPU_STATE_STOPPED;
>>      vcpu->vc_vpid = 0;
>>      vcpu->vc_pvclock_system_gpa = 0;
>> +
>> +    /* Shadow PAT MSR, starting with host's value. */
>> +    vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
>> +
>>      if (vmm_softc->mode == VMM_MODE_VMX ||
>>          vmm_softc->mode == VMM_MODE_EPT)
>>              ret = vcpu_init_vmx(vcpu);
>> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
>>      rdx = &vcpu->vc_gueststate.vg_rdx;
>>
>>      switch (*rcx) {
>> -    case MSR_SMBASE:
>> -            /*
>> -             * 34.15.6.3 - Saving Guest State (SMM)
>> -             *
>> -             * Unsupported, so inject #GP and return without
>> -             * advancing %rip.
>> -             */
>> +    case MSR_BIOS_SIGN:
>> +    case MSR_PLATFORM_ID:
>> +            /* Ignored */
>> +            *rax = 0;
>> +            *rdx = 0;
>> +            break;
>> +    case MSR_CR_PAT:
>> +            *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
>> +            *rdx = (vcpu->vc_shadow_pat >> 32);
>> +            break;
>> +    default:
>> +            /* Unsupported MSRs causes #GP exception, don't advance %rip */
>> +            DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
>> +                __func__, *rcx);
>>              ret = vmm_inject_gp(vcpu);
>>              return (ret);
>>      }
>>
>> -    *rax = 0;
>> -    *rdx = 0;
>> -
>> -#ifdef VMM_DEBUG
>> -    /* Log the access, to be able to identify unknown MSRs */
>> -    DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
>> -        "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
>> -#endif /* VMM_DEBUG */
>> -
>>      vcpu->vc_gueststate.vg_rip += insn_length;
>>
>>      return (0);
>> @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
>>  int
>>  vmx_handle_wrmsr(struct vcpu *vcpu)
>>  {
>> -    uint64_t insn_length;
>> +    uint64_t insn_length, val;
>>      uint64_t *rax, *rdx, *rcx;
>>      int ret;
>>
>> @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>>      rax = &vcpu->vc_gueststate.vg_rax;
>>      rcx = &vcpu->vc_gueststate.vg_rcx;
>>      rdx = &vcpu->vc_gueststate.vg_rdx;
>> +    val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
>>
>>      switch (*rcx) {
>> +    case MSR_CR_PAT:
>> +            if (!vmm_pat_is_valid(val)) {
>> +                    ret = vmm_inject_gp(vcpu);
>> +                    return (ret);
>> +            }
>> +            vcpu->vc_shadow_pat = val;
>> +            break;
>>      case MSR_MISC_ENABLE:
>>              vmx_handle_misc_enable_msr(vcpu);
>>              break;
>> @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>>  int
>>  svm_handle_msr(struct vcpu *vcpu)
>>  {
>> -    uint64_t insn_length;
>> +    uint64_t insn_length, val;
>>      uint64_t *rax, *rcx, *rdx;
>>      struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
>>      int ret;
>> @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
>>      rdx = &vcpu->vc_gueststate.vg_rdx;
>>
>>      if (vmcb->v_exitinfo1 == 1) {
>> -            if (*rcx == MSR_EFER) {
>> +            /* WRMSR */
>> +            val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
>> +
>> +            switch (*rcx) {
>> +            case MSR_CR_PAT:
>> +                    if (!vmm_pat_is_valid(val)) {
>> +                            ret = vmm_inject_gp(vcpu);
>> +                            return (ret);
>> +                    }
>> +                    vcpu->vc_shadow_pat = val;
>> +                    break;
>> +            case MSR_EFER:
>>                      vmcb->v_efer = *rax | EFER_SVME;
>> -            } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
>> +                    break;
>> +            case KVM_MSR_SYSTEM_TIME:
>>                      vmm_init_pvclock(vcpu,
>>                          (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
>> -            } else {
>> -#ifdef VMM_DEBUG
>> +                    break;
>> +            default:
>>                      /* Log the access, to be able to identify unknown MSRs 
>> */
>>                      DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
>>                          "written from guest=0x%llx:0x%llx\n", __func__,
>>                          *rcx, *rdx, *rax);
>> -#endif /* VMM_DEBUG */
>>              }
>>      } else {
>> +            /* RDMSR */
>>              switch (*rcx) {
>> -                    case MSR_DE_CFG:
>> -                            /* LFENCE seralizing bit is set by host */
>> -                            *rax = DE_CFG_SERIALIZE_LFENCE;
>> -                            *rdx = 0;
>> -                            break;
>> -                    case MSR_INT_PEN_MSG:
>> -                            *rax = 0;
>> -                            *rdx = 0;
>> -                            break;
>> -                    default:
>> -                            DPRINTF("%s: guest read msr 0x%llx, injecting "
>> -                                "#GP\n", __func__, *rcx);
>> -                            ret = vmm_inject_gp(vcpu);
>> -                            return (ret);
>> +            case MSR_BIOS_SIGN:
>> +            case MSR_INT_PEN_MSG:
>> +            case MSR_PLATFORM_ID:
>> +                    /* Ignored */
>> +                    *rax = 0;
>> +                    *rdx = 0;
>> +                    break;
>> +            case MSR_CR_PAT:
>> +                    *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
>> +                    *rdx = (vcpu->vc_shadow_pat >> 32);
>> +                    break;
>> +            case MSR_DE_CFG:
>> +                    /* LFENCE seralizing bit is set by host */
>> +                    *rax = DE_CFG_SERIALIZE_LFENCE;
>> +                    *rdx = 0;
>> +                    break;
>> +            default:
>> +                    /*
>> +                     * Unsupported MSRs causes #GP exception, don't advance
>> +                     * %rip
>> +                     */
>> +                    DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
>> +                        "injecting #GP\n", __func__, *rcx);
>> +                    ret = vmm_inject_gp(vcpu);
>> +                    return (ret);
>>              }
>>      }
>>
>> @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
>>              pvclock_ti->ti_version &= ~0x1;
>>      }
>>      return (0);
>> +}
>> +
>> +int
>> +vmm_pat_is_valid(uint64_t pat)
>> +{
>> +    int i;
>> +    uint8_t *byte = (uint8_t *)&pat;
>> +
>> +    /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
>> +    for (i = 0; i < 8; i++) {
>> +            if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
>> +                    DPRINTF("%s: invalid pat %llx\n", __func__, pat);
>> +                    return 0;
>> +            }
>> +    }
>> +
>> +    return 1;
>>  }
>>
>>  /*

Reply via email to