Hi, a long known problem with Bhyve is that Windows guests are rather slow. With Windows 10 1903 this became much worse, to the point that the guest is unusable. I have found the reason for this: Windows hammers on the %cr8 control register. For example, Windows 10 1909 on an i7-2620M has about 68,000 %cr8 accesses per second. Each of them triggers a vm exit.
The most common solution is TPR shadowing. Many thanks to royger in #bhyve for getting me on the right track. Bhyve already implements TPR shadowing. On AMD SVM it just works, but the implementation for Intel VT-x is bound to APIC virtualization. And APIC virtualization is a Xeon feature that is missing on most (all?) desktop CPUs. The patch - further down inline or under [0] - separates TPR shadowing from APIC virtualization, so TPR shadowing can be used on desktop CPUs as well. The patch doesn't just give a small speed boost, it's a difference like day and night. As an example, without the patch, the installation of Windows 10 1909 takes about 2280 seconds from start to first reboot. With the patch, only 370 seconds. On an old Thinkpad X220, Windows 10 guests were previously unusable, now they are resonable fast. The patch does: * Add a new tuneable 'hw.vmm.vmx.use_tpr_shadowing' to disable TLP shadowing. Also add 'hw.vmm.vmx.cap.tpr_shadowing' to be able to query if TPR shadowing is used. * Detach the initialization of TPR shadowing from the initialization of APIC virtualization. APIC virtualization still needs TPR shadowing, but not vice versa. Any CPU that supports APIC virtualization should also support TPR shadowing. * When TPR shadowing is used, the APIC page of each vCPU is written to the VMCS_VIRTUAL_APIC field of the VMCS so that the CPU can write directly to the page without intercept. * On vm exit, vlapic_update_ppr() is called to update the PPR. The patch was tested on an i7-2620M, an i7-6700k and a Xeon Silver 4110. Both Windows and FreeBSD guests work correctly. Regards, Yamagi 0: https://gist.github.com/Yamagi/de70c08eadeeef14eec4cb42aeb5957f ---- diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index 605fd0bda766..324a1e9d0c3c 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -172,6 +172,10 @@ static int cap_invpcid; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, invpcid, CTLFLAG_RD, &cap_invpcid, 0, "Guests are allowed to use INVPCID"); +static int tpr_shadowing; +SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, tpr_shadowing, CTLFLAG_RD, + &tpr_shadowing, 0, "TPR shadowin support"); + static int virtual_interrupt_delivery; SYSCTL_INT(_hw_vmm_vmx_cap, OID_AUTO, virtual_interrupt_delivery, CTLFLAG_RD, &virtual_interrupt_delivery, 0, "APICv virtual interrupt delivery support"); @@ -627,7 +631,7 @@ vmx_restore(void) static int vmx_init(int ipinum) { - int error, use_tpr_shadow; + int error; uint64_t basic, fixed0, fixed1, feature_control; uint32_t tmp, procbased2_vid_bits; @@ -750,6 +754,24 @@ vmx_init(int ipinum) MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, &tmp) == 0); + /* + * Check support for TPR shadow. + */ + error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, + MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0, + &tmp); + if (error == 0) { + tpr_shadowing = 1; + TUNABLE_INT_FETCH("hw.vmm.vmx.use_tpr_shadowing", + &tpr_shadowing); + } + + if (tpr_shadowing) { + procbased_ctls |= PROCBASED_USE_TPR_SHADOW; + procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING; + procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING; + } + /* * Check support for virtual interrupt delivery. */ @@ -758,13 +780,9 @@ vmx_init(int ipinum) PROCBASED2_APIC_REGISTER_VIRTUALIZATION | PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY); - use_tpr_shadow = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, - MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_USE_TPR_SHADOW, 0, - &tmp) == 0); - error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, procbased2_vid_bits, 0, &tmp); - if (error == 0 && use_tpr_shadow) { + if (error == 0 && tpr_shadowing) { virtual_interrupt_delivery = 1; TUNABLE_INT_FETCH("hw.vmm.vmx.use_apic_vid", &virtual_interrupt_delivery); @@ -775,13 +793,6 @@ vmx_init(int ipinum) procbased_ctls2 |= procbased2_vid_bits; procbased_ctls2 &= ~PROCBASED2_VIRTUALIZE_X2APIC_MODE; - /* - * No need to emulate accesses to %CR8 if virtual - * interrupt delivery is enabled. - */ - procbased_ctls &= ~PROCBASED_CR8_LOAD_EXITING; - procbased_ctls &= ~PROCBASED_CR8_STORE_EXITING; - /* * Check for Posted Interrupts only if Virtual Interrupt * Delivery is enabled. @@ -1051,10 +1062,13 @@ vmx_vminit(struct vm *vm, pmap_t pmap) vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; error += vmwrite(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); - if (virtual_interrupt_delivery) { - error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); + if (tpr_shadowing) { error += vmwrite(VMCS_VIRTUAL_APIC, vtophys(&vmx->apic_page[i])); + } + + if (virtual_interrupt_delivery) { + error += vmwrite(VMCS_APIC_ACCESS, APIC_ACCESS_ADDRESS); error += vmwrite(VMCS_EOI_EXIT0, 0); error += vmwrite(VMCS_EOI_EXIT1, 0); error += vmwrite(VMCS_EOI_EXIT2, 0); @@ -2313,6 +2327,14 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) } } + /* + * If 'TPR shadowing' is used, update the local APICs PPR. + */ + if (tpr_shadowing) { + vlapic = vm_lapic(vmx->vm, vcpu); + vlapic_update_ppr(vlapic); + } + switch (reason) { case EXIT_REASON_TASK_SWITCH: ts = &vmexit->u.task_switch; diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 74e6cd967396..289fdb7e077d 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -490,7 +490,7 @@ dump_isrvec_stk(struct vlapic *vlapic) * Algorithm adopted from section "Interrupt, Task and Processor Priority" * in Intel Architecture Manual Vol 3a. */ -static void +void vlapic_update_ppr(struct vlapic *vlapic) { int isrvec, tpr, ppr; diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h index 2a5f54003253..71b97feab6bc 100644 --- a/sys/amd64/vmm/io/vlapic.h +++ b/sys/amd64/vmm/io/vlapic.h @@ -74,6 +74,8 @@ void vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum); void vlapic_fire_cmci(struct vlapic *vlapic); int vlapic_trigger_lvt(struct vlapic *vlapic, int vector); +void vlapic_update_ppr(struct vlapic *vlapic); + uint64_t vlapic_get_apicbase(struct vlapic *vlapic); int vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); void vlapic_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state s); -- Homepage: https://www.yamagi.org Github: https://github.com/yamagi GPG: 0x1D502515
pgpo4uU4VyD6D.pgp
Description: PGP signature
