On Wed, 17 Nov 2021, Josh Grosse wrote:
...
> vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported
> vmx_handle_cr: mov to cr0 @ 100149e, data=0x80010031
> vmx_handle_wrmsr: wrmsr exit, msr=0x8b, discarding data written from 
> guest=0x0:0x0
> vmx_handle_wrmsr: wrmsr exit, msr=0x8b, discarding data written from 
> guest=0x0:0x0
> vmm_handle_cpuid: unsupported rax=0x40000100
> vmm_handle_cpuid: invalid cpuid input leaf 0x15, guest rip=0xffffffff81c89979 
> - resetting to 0xd
> vmm_handle_cpuid: function 0x06 (thermal/power mgt) not supported
> vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported

The cpuid leaf clamping added in vmm.c rev 1.185 broke the "fake up to 
cpuid 0x15 if tsc_is_invariant" logic added in vmm.c rev 1.182


The diff below does the following:

 * add vmm_cpuid_level as the max cpuid leaf currently enabled:
    - what the CPU does
    - ...except we'll fake at least to 0x15 if tsc_is_invariant
    - ...unless locked to 0x2 by MISC_ENABLE_LIMIT_CPUID_MAXVAL
   That is then used by the clamp logic and for cpuid(0).eax

 * put the leaf and subleaf input values (from rax/rcx) into local
   variables, truncating them to 32bit as documented by the SDM and 
   verified on an Intel CPU in a Lenovo T510.  Use that in the clamping 
   logic and all the tests

 * only invoke the underlying cpuid instruction if the real CPU might 
   support the leaf and always pass the subleaf.  Delete the CPUID_LEAF() 
   calls made superfluous by always passing the subleaf

 * fix cpuid(0x40000000).eax to return the highest supported vm leaf


OpenBSD seems to work under vmm on my X1gen? with not-unexpected values 
returned by cpuid in the guest, but that really doesn't test the clamping 
behaviors.  Built with VMM_DEBUG to verify format strings, but not run 
with that.


Should this be broken into smaller diffs?


Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.294
diff -u -p -r1.294 vmm.c
--- sys/arch/amd64/amd64/vmm.c  26 Oct 2021 16:29:49 -0000      1.294
+++ sys/arch/amd64/amd64/vmm.c  19 Nov 2021 00:46:09 -0000
@@ -6665,10 +6665,15 @@ int
 vmm_handle_cpuid(struct vcpu *vcpu)
 {
        uint64_t insn_length, cr4;
-       uint64_t *rax, *rbx, *rcx, *rdx, cpuid_limit;
+       uint64_t *rax, *rbx, *rcx, *rdx;
        struct vmcb *vmcb;
-       uint32_t eax, ebx, ecx, edx;
+       uint32_t leaf, subleaf, eax, ebx, ecx, edx;
        struct vmx_msr_store *msr_store;
+       int vmm_cpuid_level;
+
+       vmm_cpuid_level = cpuid_level;
+       if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
+               vmm_cpuid_level = 0x15;
 
        if (vmm_softc->mode == VMM_MODE_VMX ||
            vmm_softc->mode == VMM_MODE_EPT) {
@@ -6684,21 +6689,32 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                }
 
                rax = &vcpu->vc_gueststate.vg_rax;
+               leaf = *rax;
                msr_store =
                    (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
-               cpuid_limit = msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
-                   MISC_ENABLE_LIMIT_CPUID_MAXVAL;
+
+               /*
+                * "CPUID leaves above 02H and below 80000000H are only
+                * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
+                * default value 0"
+                */
+               if (msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
+                   MISC_ENABLE_LIMIT_CPUID_MAXVAL) {
+                       vmm_cpuid_level = 0x02;
+               }
        } else {
                /* XXX: validate insn_length 2 */
                insn_length = 2;
                vmcb = (struct vmcb *)vcpu->vc_control_va;
                rax = &vmcb->v_rax;
+               leaf = *rax;
                cr4 = vmcb->v_cr4;
        }
 
        rbx = &vcpu->vc_gueststate.vg_rbx;
        rcx = &vcpu->vc_gueststate.vg_rcx;
        rdx = &vcpu->vc_gueststate.vg_rdx;
+       subleaf = *rcx;
        vcpu->vc_gueststate.vg_rip += insn_length;
 
        /*
@@ -6706,45 +6722,36 @@ vmm_handle_cpuid(struct vcpu *vcpu)
         *  value for basic or extended function for that processor then the
         *  data for the highest basic information leaf is returned."
         *
-        * This means if rax is between cpuid_level and 0x40000000 (the start
-        * of the hypervisor info leaves), clamp to cpuid_level. Also, if
-        * rax is greater than the extended function info, clamp also to
-        * cpuid_level.
+        * "When CPUID returns the highest basic leaf information as a result
+        *  of an invalid input EAX value, any dependence on input ECX value
+        *  in the basic leaf is honored."
         *
-        * Note that %rax may be overwritten here - that's ok since we are
-        * going to reassign it a new value (based on the input parameter)
-        * later anyway.
+        * This means if rax is between vmm_cpuid_level and 0x40000000 (the 
start
+        * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
+        * altering subleaf.  Also, if rax is greater than the extended function
+        * info, clamp also to vmm_cpuid_level.
         */
-       if ((*rax > cpuid_level && *rax < 0x40000000) ||
-           (*rax > curcpu()->ci_pnfeatset)) {
-               DPRINTF("%s: invalid cpuid input leaf 0x%llx, guest rip="
-                   "0x%llx - resetting to 0x%x\n", __func__, *rax,
+       if ((leaf > vmm_cpuid_level && leaf < 0x40000000) ||
+           (leaf > curcpu()->ci_pnfeatset)) {
+               DPRINTF("%s: invalid cpuid leaf 0x%x subleaf 0x%x, guest rip="
+                   "0x%llx - resetting to 0x%x\n", __func__, leaf, subleaf,
                    vcpu->vc_gueststate.vg_rip - insn_length,
-                   cpuid_level);
-               *rax = cpuid_level;
+                   vmm_cpuid_level);
+               leaf = vmm_cpuid_level;
        }
 
        /*
-        * "CPUID leaves above 02H and below 80000000H are only visible when
-        * IA32_MISC_ENABLE MSR has bit 22 set to its default value 0"
+        * We fake up values in the range (cpuid_level, vmm_cpuid_level]
+        * as well as [0x40000000, 0x40000001]
         */
-       if ((vmm_softc->mode == VMM_MODE_VMX || vmm_softc->mode == VMM_MODE_EPT)
-           && cpuid_limit && (*rax > 0x02 && *rax < 0x80000000)) {
-               *rax = 0;
-               *rbx = 0;
-               *rcx = 0;
-               *rdx = 0;
-               return (0);
-       }
+       if (leaf <= cpuid_level || leaf > 0x80000000)
+               CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx);
+       else
+               eax = ebx = ecx = edx = 0;
 
-       CPUID_LEAF(*rax, 0, eax, ebx, ecx, edx);
-
-       switch (*rax) {
+       switch (leaf) {
        case 0x00:      /* Max level and vendor ID */
-               if (cpuid_level < 0x15 && tsc_is_invariant)
-                       *rax = 0x15;
-               else
-                       *rax = cpuid_level;
+               *rax = vmm_cpuid_level;
                *rbx = *((uint32_t *)&cpu_vendor);
                *rdx = *((uint32_t *)&cpu_vendor + 1);
                *rcx = *((uint32_t *)&cpu_vendor + 2);
@@ -6772,25 +6779,17 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                break;
        case 0x03:      /* Processor serial number (not supported) */
                DPRINTF("%s: function 0x03 (processor serial number) not "
-               "supported\n", __func__);
+                   "supported\n", __func__);
                *rax = 0;
                *rbx = 0;
                *rcx = 0;
                *rdx = 0;
                break;
        case 0x04:      /* Deterministic cache info */
-               if (*rcx == 0) {
-                       *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
-                       *rbx = ebx;
-                       *rcx = ecx;
-                       *rdx = edx;
-               } else {
-                       CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx);
-                       *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
-                       *rbx = ebx;
-                       *rcx = ecx;
-                       *rdx = edx;
-               }
+               *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
+               *rbx = ebx;
+               *rcx = ecx;
+               *rdx = edx;
                break;
        case 0x05:      /* MONITOR/MWAIT (not supported) */
                DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
@@ -6809,7 +6808,7 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                *rdx = 0;
                break;
        case 0x07:      /* SEFF */
-               if (*rcx == 0) {
+               if (subleaf == 0) {
                        *rax = 0;       /* Highest subleaf supported */
                        *rbx = curcpu()->ci_feature_sefflags_ebx & 
VMM_SEFF0EBX_MASK;
                        *rcx = curcpu()->ci_feature_sefflags_ecx & 
VMM_SEFF0ECX_MASK;
@@ -6817,7 +6816,7 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                } else {
                        /* Unsupported subleaf */
                        DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
-                           "0x%llx not supported\n", __func__, *rcx);
+                           "0x%x not supported\n", __func__, subleaf);
                        *rax = 0;
                        *rbx = 0;
                        *rcx = 0;
@@ -6849,18 +6848,17 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                *rdx = 0;
                break;
        case 0x0d:      /* Processor ext. state information */
-               if (*rcx == 0) {
+               if (subleaf == 0) {
                        *rax = xsave_mask;
                        *rbx = ebx;
                        *rcx = ecx;
                        *rdx = edx;
-               } else if (*rcx == 1) {
+               } else if (subleaf == 1) {
                        *rax = 0;
                        *rbx = 0;
                        *rcx = 0;
                        *rdx = 0;
                } else {
-                       CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx);
                        *rax = eax;
                        *rbx = ebx;
                        *rcx = ecx;
@@ -6885,17 +6883,16 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                break;
        case 0x15:
                if (cpuid_level >= 0x15) {
-                       CPUID(0x15, *rax, *rbx, *rcx, *rdx);
-               } else if (tsc_is_invariant) {
+                       *rax = eax;
+                       *rbx = ebx;
+                       *rcx = ecx;
+                       *rdx = edx;
+               } else {
+                       KASSERT(tsc_is_invariant);
                        *rax = 1;
                        *rbx = 100;
                        *rcx = tsc_frequency / 100;
                        *rdx = 0;
-               } else {
-                       *rax = 0;
-                       *rbx = 0;
-                       *rcx = 0;
-                       *rdx = 0;
                }
                break;
        case 0x16:      /* Processor frequency info */
@@ -6905,7 +6902,8 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                *rdx = edx;
                break;
        case 0x40000000:        /* Hypervisor information */
-               *rax = 0;
+               /* rax has highest supported vm leaf */
+               *rax = 0x40000001;
                *rbx = *((uint32_t *)&vmm_hv_signature[0]);
                *rcx = *((uint32_t *)&vmm_hv_signature[4]);
                *rdx = *((uint32_t *)&vmm_hv_signature[8]);
@@ -6960,16 +6958,20 @@ vmm_handle_cpuid(struct vcpu *vcpu)
                *rdx = curcpu()->ci_extcacheinfo[3];
                break;
        case 0x80000007:        /* apmi */
-               CPUID(0x80000007, *rax, *rbx, *rcx, *rdx);
+               *rax = eax;
+               *rbx = ebx;
+               *rcx = ecx;
+               *rdx = edx;
                break;
        case 0x80000008:        /* Phys bits info and topology (AMD) */
-               CPUID(0x80000008, *rax, *rbx, *rcx, *rdx);
-               *rbx &= VMM_AMDSPEC_EBX_MASK;
+               *rax = eax;
+               *rbx = ebx & VMM_AMDSPEC_EBX_MASK;
                /* Reset %rcx (topology) */
                *rcx = 0;
+               *rdx = edx;
                break;
        default:
-               DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
+               DPRINTF("%s: unsupported rax=0x%x\n", __func__, leaf);
                *rax = 0;
                *rbx = 0;
                *rcx = 0;

Reply via email to