The following diff cleans up and improves MSR-related event handling in
vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
mentioned in a previous email to tech@ about fixing support for 9front
guests [1], we found some discprepencies between vmm(4)'s handling on
Intel hosts and AMD hosts.

While the diff has been tested already by abieber@ and brynet@ with
additional review by mlarkin@, I'm looking for additional testers
willing to look for regressions.

This diff specifically improves and standardizes msr-based exit handling
between Intel and AMD hosts to the following:

1. All RDMSR instructions that cause a vm-exit must be explicitly
   handled (e.g. via emulation) or they result in injecting a #GP
   exception into the guest.

2. All WRMSR instructions that cause a vm-exit and are not explicitly
   handled are ignored (i.e. %rax and %rdx values are not inspected or
   used).

Consequently with the change for (1) above, the diff adds explicit
handling for the following MSRs:

1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
   host state is not touched. The shadow state is initialized on vcpu
   reset to the same value as the host.

2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
   ignored. This means reads result in vmm(4) setting the guest vcpu's
   %rax and %rdx to 0. (These msr's are ignored in the same manner by
   other hypervisors like kvm and nvmm.)

The average user should not see a change in behavior of vmm(4) or
vmd(8). The biggest change is for *Intel* users as this diff changes the
current vmx logic which was not injecting #GP for unsupported
msr's. (Your guests were potentially getting garbage results from rdmsr
instructions.)

The folks attempting to host the latest release of 9front as a guest on
AMD hosts should see their guest boot successfully with this diff :-)

-dv

[1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2


Index: sys/arch/amd64/include/vmmvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.70
diff -u -p -r1.70 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h     8 Apr 2020 07:39:48 -0000       1.70
+++ sys/arch/amd64/include/vmmvar.h     31 Mar 2021 00:15:43 -0000
@@ -936,6 +936,9 @@ struct vcpu {
        paddr_t vc_pvclock_system_gpa;
        uint32_t vc_pvclock_system_tsc_mul;

+       /* Shadowed MSRs */
+       uint64_t vc_shadow_pat;
+
        /* VMX only */
        uint64_t vc_vmx_basic;
        uint64_t vc_vmx_entry_ctls;
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.278
diff -u -p -r1.278 vmm.c
--- sys/arch/amd64/amd64/vmm.c  11 Mar 2021 11:16:55 -0000      1.278
+++ sys/arch/amd64/amd64/vmm.c  31 Mar 2021 00:15:43 -0000
@@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
 void vmm_init_pvclock(struct vcpu *, paddr_t);
 int vmm_update_pvclock(struct vcpu *);
+int vmm_pat_is_valid(uint64_t);

 #ifdef VMM_DEBUG
 void dump_vcpu(struct vcpu *);
@@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
        /* xcr0 power on default sets bit 0 (x87 state) */
        vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;

+       /* XXX PAT shadow */
+       vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
+
 exit:
        /* Flush the VMCS */
        if (vmclear(&vcpu->vc_control_pa)) {
@@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
        vcpu->vc_state = VCPU_STATE_STOPPED;
        vcpu->vc_vpid = 0;
        vcpu->vc_pvclock_system_gpa = 0;
+
+       /* Shadow PAT MSR, starting with host's value. */
+       vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
+
        if (vmm_softc->mode == VMM_MODE_VMX ||
            vmm_softc->mode == VMM_MODE_EPT)
                ret = vcpu_init_vmx(vcpu);
@@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
        rdx = &vcpu->vc_gueststate.vg_rdx;

        switch (*rcx) {
-       case MSR_SMBASE:
-               /*
-                * 34.15.6.3 - Saving Guest State (SMM)
-                *
-                * Unsupported, so inject #GP and return without
-                * advancing %rip.
-                */
+       case MSR_BIOS_SIGN:
+       case MSR_PLATFORM_ID:
+               /* Ignored */
+               *rax = 0;
+               *rdx = 0;
+               break;
+       case MSR_CR_PAT:
+               *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
+               *rdx = (vcpu->vc_shadow_pat >> 32);
+               break;
+       default:
+               /* Unsupported MSRs causes #GP exception, don't advance %rip */
+               DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
+                   __func__, *rcx);
                ret = vmm_inject_gp(vcpu);
                return (ret);
        }

-       *rax = 0;
-       *rdx = 0;
-
-#ifdef VMM_DEBUG
-       /* Log the access, to be able to identify unknown MSRs */
-       DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
-           "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
-#endif /* VMM_DEBUG */
-
        vcpu->vc_gueststate.vg_rip += insn_length;

        return (0);
@@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
 int
 vmx_handle_wrmsr(struct vcpu *vcpu)
 {
-       uint64_t insn_length;
+       uint64_t insn_length, val;
        uint64_t *rax, *rdx, *rcx;
        int ret;

@@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
        rax = &vcpu->vc_gueststate.vg_rax;
        rcx = &vcpu->vc_gueststate.vg_rcx;
        rdx = &vcpu->vc_gueststate.vg_rdx;
+       val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);

        switch (*rcx) {
+       case MSR_CR_PAT:
+               if (!vmm_pat_is_valid(val)) {
+                       ret = vmm_inject_gp(vcpu);
+                       return (ret);
+               }
+               vcpu->vc_shadow_pat = val;
+               break;
        case MSR_MISC_ENABLE:
                vmx_handle_misc_enable_msr(vcpu);
                break;
@@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
 int
 svm_handle_msr(struct vcpu *vcpu)
 {
-       uint64_t insn_length;
+       uint64_t insn_length, val;
        uint64_t *rax, *rcx, *rdx;
        struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
        int ret;
@@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
        rdx = &vcpu->vc_gueststate.vg_rdx;

        if (vmcb->v_exitinfo1 == 1) {
-               if (*rcx == MSR_EFER) {
+               /* WRMSR */
+               val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
+
+               switch (*rcx) {
+               case MSR_CR_PAT:
+                       if (!vmm_pat_is_valid(val)) {
+                               ret = vmm_inject_gp(vcpu);
+                               return (ret);
+                       }
+                       vcpu->vc_shadow_pat = val;
+                       break;
+               case MSR_EFER:
                        vmcb->v_efer = *rax | EFER_SVME;
-               } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
+                       break;
+               case KVM_MSR_SYSTEM_TIME:
                        vmm_init_pvclock(vcpu,
                            (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
-               } else {
-#ifdef VMM_DEBUG
+                       break;
+               default:
                        /* Log the access, to be able to identify unknown MSRs 
*/
                        DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
                            "written from guest=0x%llx:0x%llx\n", __func__,
                            *rcx, *rdx, *rax);
-#endif /* VMM_DEBUG */
                }
        } else {
+               /* RDMSR */
                switch (*rcx) {
-                       case MSR_DE_CFG:
-                               /* LFENCE seralizing bit is set by host */
-                               *rax = DE_CFG_SERIALIZE_LFENCE;
-                               *rdx = 0;
-                               break;
-                       case MSR_INT_PEN_MSG:
-                               *rax = 0;
-                               *rdx = 0;
-                               break;
-                       default:
-                               DPRINTF("%s: guest read msr 0x%llx, injecting "
-                                   "#GP\n", __func__, *rcx);
-                               ret = vmm_inject_gp(vcpu);
-                               return (ret);
+               case MSR_BIOS_SIGN:
+               case MSR_INT_PEN_MSG:
+               case MSR_PLATFORM_ID:
+                       /* Ignored */
+                       *rax = 0;
+                       *rdx = 0;
+                       break;
+               case MSR_CR_PAT:
+                       *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
+                       *rdx = (vcpu->vc_shadow_pat >> 32);
+                       break;
+               case MSR_DE_CFG:
+                       /* LFENCE seralizing bit is set by host */
+                       *rax = DE_CFG_SERIALIZE_LFENCE;
+                       *rdx = 0;
+                       break;
+               default:
+                       /*
+                        * Unsupported MSRs causes #GP exception, don't advance
+                        * %rip
+                        */
+                       DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
+                           "injecting #GP\n", __func__, *rcx);
+                       ret = vmm_inject_gp(vcpu);
+                       return (ret);
                }
        }

@@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
                pvclock_ti->ti_version &= ~0x1;
        }
        return (0);
+}
+
+int
+vmm_pat_is_valid(uint64_t pat)
+{
+       int i;
+       uint8_t *byte = (uint8_t *)&pat;
+
+       /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
+       for (i = 0; i < 8; i++) {
+               if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
+                       DPRINTF("%s: invalid pat %llx\n", __func__, pat);
+                       return 0;
+               }
+       }
+
+       return 1;
 }

 /*

Reply via email to