On Wed, 17 Nov 2021, Josh Grosse wrote:
...
> vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported
> vmx_handle_cr: mov to cr0 @ 100149e, data=0x80010031
> vmx_handle_wrmsr: wrmsr exit, msr=0x8b, discarding data written from
> guest=0x0:0x0
> vmx_handle_wrmsr: wrmsr exit, msr=0x8b, discarding data written from
> guest=0x0:0x0
> vmm_handle_cpuid: unsupported rax=0x40000100
> vmm_handle_cpuid: invalid cpuid input leaf 0x15, guest rip=0xffffffff81c89979
> - resetting to 0xd
> vmm_handle_cpuid: function 0x06 (thermal/power mgt) not supported
> vmm_handle_cpuid: function 0x0a (arch. perf mon) not supported
The cpuid leaf clamping added in vmm.c rev 1.185 broke the "fake up to
cpuid 0x15 if tsc_is_invariant" logic added in vmm.c rev 1.182
The diff below does the following:
* add vmm_cpuid_level as the max cpuid leaf currently enabled:
- what the CPU does
- ...except we'll fake at least to 0x15 if tsc_is_invariant
- ...unless locked to 0x2 by MISC_ENABLE_LIMIT_CPUID_MAXVAL
That is then used by the clamp logic and for cpuid(0).eax
* put the leaf and subleaf input values (from rax/rcx) into local
variables, truncating them to 32bit as documented by the SDM and
verified on an Intel CPU in a Lenovo T510. Use that in the clamping
logic and all the tests
* only invoke the underlying cpuid instruction if the real CPU might
support the leaf and always pass the subleaf. Delete the CPUID_LEAF()
calls made superfluous by always passing the subleaf
* fix cpuid(0x40000000).eax to return the highest supported vm leaf
OpenBSD seems to work under vmm on my X1gen? with not-unexpected values
returned by cpuid in the guest, but that really doesn't test the clamping
behaviors. Built with VMM_DEBUG to verify format strings, but not run
with that.
Should this be broken into smaller diffs?
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /data/src/openbsd/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.294
diff -u -p -r1.294 vmm.c
--- sys/arch/amd64/amd64/vmm.c 26 Oct 2021 16:29:49 -0000 1.294
+++ sys/arch/amd64/amd64/vmm.c 19 Nov 2021 00:46:09 -0000
@@ -6665,10 +6665,15 @@ int
vmm_handle_cpuid(struct vcpu *vcpu)
{
uint64_t insn_length, cr4;
- uint64_t *rax, *rbx, *rcx, *rdx, cpuid_limit;
+ uint64_t *rax, *rbx, *rcx, *rdx;
struct vmcb *vmcb;
- uint32_t eax, ebx, ecx, edx;
+ uint32_t leaf, subleaf, eax, ebx, ecx, edx;
struct vmx_msr_store *msr_store;
+ int vmm_cpuid_level;
+
+ vmm_cpuid_level = cpuid_level;
+ if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
+ vmm_cpuid_level = 0x15;
if (vmm_softc->mode == VMM_MODE_VMX ||
vmm_softc->mode == VMM_MODE_EPT) {
@@ -6684,21 +6689,32 @@ vmm_handle_cpuid(struct vcpu *vcpu)
}
rax = &vcpu->vc_gueststate.vg_rax;
+ leaf = *rax;
msr_store =
(struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
- cpuid_limit = msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
- MISC_ENABLE_LIMIT_CPUID_MAXVAL;
+
+ /*
+ * "CPUID leaves above 02H and below 80000000H are only
+ * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
+ * default value 0"
+ */
+ if (msr_store[VCPU_REGS_MISC_ENABLE].vms_data &
+ MISC_ENABLE_LIMIT_CPUID_MAXVAL) {
+ vmm_cpuid_level = 0x02;
+ }
} else {
/* XXX: validate insn_length 2 */
insn_length = 2;
vmcb = (struct vmcb *)vcpu->vc_control_va;
rax = &vmcb->v_rax;
+ leaf = *rax;
cr4 = vmcb->v_cr4;
}
rbx = &vcpu->vc_gueststate.vg_rbx;
rcx = &vcpu->vc_gueststate.vg_rcx;
rdx = &vcpu->vc_gueststate.vg_rdx;
+ subleaf = *rcx;
vcpu->vc_gueststate.vg_rip += insn_length;
/*
@@ -6706,45 +6722,36 @@ vmm_handle_cpuid(struct vcpu *vcpu)
* value for basic or extended function for that processor then the
* data for the highest basic information leaf is returned."
*
- * This means if rax is between cpuid_level and 0x40000000 (the start
- * of the hypervisor info leaves), clamp to cpuid_level. Also, if
- * rax is greater than the extended function info, clamp also to
- * cpuid_level.
+ * "When CPUID returns the highest basic leaf information as a result
+ * of an invalid input EAX value, any dependence on input ECX value
+ * in the basic leaf is honored."
*
- * Note that %rax may be overwritten here - that's ok since we are
- * going to reassign it a new value (based on the input parameter)
- * later anyway.
+ * This means if rax is between vmm_cpuid_level and 0x40000000 (the
start
+ * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
+ * altering subleaf. Also, if rax is greater than the extended function
+ * info, clamp also to vmm_cpuid_level.
*/
- if ((*rax > cpuid_level && *rax < 0x40000000) ||
- (*rax > curcpu()->ci_pnfeatset)) {
- DPRINTF("%s: invalid cpuid input leaf 0x%llx, guest rip="
- "0x%llx - resetting to 0x%x\n", __func__, *rax,
+ if ((leaf > vmm_cpuid_level && leaf < 0x40000000) ||
+ (leaf > curcpu()->ci_pnfeatset)) {
+ DPRINTF("%s: invalid cpuid leaf 0x%x subleaf 0x%x, guest rip="
+ "0x%llx - resetting to 0x%x\n", __func__, leaf, subleaf,
vcpu->vc_gueststate.vg_rip - insn_length,
- cpuid_level);
- *rax = cpuid_level;
+ vmm_cpuid_level);
+ leaf = vmm_cpuid_level;
}
/*
- * "CPUID leaves above 02H and below 80000000H are only visible when
- * IA32_MISC_ENABLE MSR has bit 22 set to its default value 0"
+ * We fake up values in the range (cpuid_level, vmm_cpuid_level]
+ * as well as [0x40000000, 0x40000001]
*/
- if ((vmm_softc->mode == VMM_MODE_VMX || vmm_softc->mode == VMM_MODE_EPT)
- && cpuid_limit && (*rax > 0x02 && *rax < 0x80000000)) {
- *rax = 0;
- *rbx = 0;
- *rcx = 0;
- *rdx = 0;
- return (0);
- }
+ if (leaf <= cpuid_level || leaf > 0x80000000)
+ CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx);
+ else
+ eax = ebx = ecx = edx = 0;
- CPUID_LEAF(*rax, 0, eax, ebx, ecx, edx);
-
- switch (*rax) {
+ switch (leaf) {
case 0x00: /* Max level and vendor ID */
- if (cpuid_level < 0x15 && tsc_is_invariant)
- *rax = 0x15;
- else
- *rax = cpuid_level;
+ *rax = vmm_cpuid_level;
*rbx = *((uint32_t *)&cpu_vendor);
*rdx = *((uint32_t *)&cpu_vendor + 1);
*rcx = *((uint32_t *)&cpu_vendor + 2);
@@ -6772,25 +6779,17 @@ vmm_handle_cpuid(struct vcpu *vcpu)
break;
case 0x03: /* Processor serial number (not supported) */
DPRINTF("%s: function 0x03 (processor serial number) not "
- "supported\n", __func__);
+ "supported\n", __func__);
*rax = 0;
*rbx = 0;
*rcx = 0;
*rdx = 0;
break;
case 0x04: /* Deterministic cache info */
- if (*rcx == 0) {
- *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
- *rbx = ebx;
- *rcx = ecx;
- *rdx = edx;
- } else {
- CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx);
- *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
- *rbx = ebx;
- *rcx = ecx;
- *rdx = edx;
- }
+ *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK;
+ *rbx = ebx;
+ *rcx = ecx;
+ *rdx = edx;
break;
case 0x05: /* MONITOR/MWAIT (not supported) */
DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
@@ -6809,7 +6808,7 @@ vmm_handle_cpuid(struct vcpu *vcpu)
*rdx = 0;
break;
case 0x07: /* SEFF */
- if (*rcx == 0) {
+ if (subleaf == 0) {
*rax = 0; /* Highest subleaf supported */
*rbx = curcpu()->ci_feature_sefflags_ebx &
VMM_SEFF0EBX_MASK;
*rcx = curcpu()->ci_feature_sefflags_ecx &
VMM_SEFF0ECX_MASK;
@@ -6817,7 +6816,7 @@ vmm_handle_cpuid(struct vcpu *vcpu)
} else {
/* Unsupported subleaf */
DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
- "0x%llx not supported\n", __func__, *rcx);
+ "0x%x not supported\n", __func__, subleaf);
*rax = 0;
*rbx = 0;
*rcx = 0;
@@ -6849,18 +6848,17 @@ vmm_handle_cpuid(struct vcpu *vcpu)
*rdx = 0;
break;
case 0x0d: /* Processor ext. state information */
- if (*rcx == 0) {
+ if (subleaf == 0) {
*rax = xsave_mask;
*rbx = ebx;
*rcx = ecx;
*rdx = edx;
- } else if (*rcx == 1) {
+ } else if (subleaf == 1) {
*rax = 0;
*rbx = 0;
*rcx = 0;
*rdx = 0;
} else {
- CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx);
*rax = eax;
*rbx = ebx;
*rcx = ecx;
@@ -6885,17 +6883,16 @@ vmm_handle_cpuid(struct vcpu *vcpu)
break;
case 0x15:
if (cpuid_level >= 0x15) {
- CPUID(0x15, *rax, *rbx, *rcx, *rdx);
- } else if (tsc_is_invariant) {
+ *rax = eax;
+ *rbx = ebx;
+ *rcx = ecx;
+ *rdx = edx;
+ } else {
+ KASSERT(tsc_is_invariant);
*rax = 1;
*rbx = 100;
*rcx = tsc_frequency / 100;
*rdx = 0;
- } else {
- *rax = 0;
- *rbx = 0;
- *rcx = 0;
- *rdx = 0;
}
break;
case 0x16: /* Processor frequency info */
@@ -6905,7 +6902,8 @@ vmm_handle_cpuid(struct vcpu *vcpu)
*rdx = edx;
break;
case 0x40000000: /* Hypervisor information */
- *rax = 0;
+ /* rax has highest supported vm leaf */
+ *rax = 0x40000001;
*rbx = *((uint32_t *)&vmm_hv_signature[0]);
*rcx = *((uint32_t *)&vmm_hv_signature[4]);
*rdx = *((uint32_t *)&vmm_hv_signature[8]);
@@ -6960,16 +6958,20 @@ vmm_handle_cpuid(struct vcpu *vcpu)
*rdx = curcpu()->ci_extcacheinfo[3];
break;
case 0x80000007: /* apmi */
- CPUID(0x80000007, *rax, *rbx, *rcx, *rdx);
+ *rax = eax;
+ *rbx = ebx;
+ *rcx = ecx;
+ *rdx = edx;
break;
case 0x80000008: /* Phys bits info and topology (AMD) */
- CPUID(0x80000008, *rax, *rbx, *rcx, *rdx);
- *rbx &= VMM_AMDSPEC_EBX_MASK;
+ *rax = eax;
+ *rbx = ebx & VMM_AMDSPEC_EBX_MASK;
/* Reset %rcx (topology) */
*rcx = 0;
+ *rdx = edx;
break;
default:
- DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
+ DPRINTF("%s: unsupported rax=0x%x\n", __func__, leaf);
*rax = 0;
*rbx = 0;
*rcx = 0;