Use Hyper-V's HvCallFlushGuestPhysicalAddressSpace for local TLB flushes. This makes any KVM_REQ_TLB_FLUSH_CURRENT (such as on root alloc) visible to all CPUs which means we no longer need to do a KVM_REQ_TLB_FLUSH on CPU migration.
The goal is to avoid invept-global in KVM_REQ_TLB_FLUSH. Hyper-V uses a shadow page table for the nested hypervisor (KVM) and has to invalidate all EPT roots when invept-global is issued. This has a performance impact on all nested VMs. KVM issues KVM_REQ_TLB_FLUSH on CPU migration, and under load the performance hit causes vCPUs to use up more of their slice of CPU time, leading to more CPU migrations. This has a snowball effect and causes CPU usage spikes. By issuing the hypercall we are now guaranteed that any root modification that requires a local TLB flush becomes visible to all CPUs. The same hypercall is already used in kvm_arch_flush_remote_tlbs and kvm_arch_flush_remote_tlbs_range. The KVM expectation is that roots are flushed locally on alloc and we achieve consistency on migration by flushing all roots - the new behavior of achieving consistency on alloc on Hyper-V is a superset of the expected guarantees. This makes the KVM_REQ_TLB_FLUSH on CPU migration no longer necessary on Hyper-V. Coincidentally - we now match the behavior of SVM on Hyper-V. Signed-off-by: Jeremi Piotrowski <jpiotrow...@linux.microsoft.com> --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 20 +++++++++++++++++--- arch/x86/kvm/vmx/vmx_onhyperv.h | 6 ++++++ arch/x86/kvm/x86.c | 3 +++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b4a391929cdb..d3acab19f425 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1077,6 +1077,7 @@ struct kvm_vcpu_arch { #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; + bool hv_vmx_use_flush_guest_mapping; #endif }; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4953846cb30d..f537e0df56fc 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1485,8 +1485,12 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) /* * Flush all EPTP/VPID contexts, the new pCPU may have stale * TLB entries from its previous association with the vCPU. + * Unless we are running on Hyper-V where we promotes local TLB + * flushes to be visible across all CPUs so no need to do again + * on migration. */ - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + if (!vmx_hv_use_flush_guest_mapping(vcpu)) + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); /* * Linux uses per-cpu TSS and GDT, so set these when switching @@ -3243,11 +3247,21 @@ void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) if (!VALID_PAGE(root_hpa)) return; - if (enable_ept) + if (enable_ept) { + /* + * hyperv_flush_guest_mapping() has the semantics of + * invept-single across all pCPUs. This makes root + * modifications consistent across pCPUs, so an invept-global + * on migration is no longer required. + */ + if (vmx_hv_use_flush_guest_mapping(vcpu)) + return (void)WARN_ON_ONCE(hyperv_flush_guest_mapping(root_hpa)); + ept_sync_context(construct_eptp(vcpu, root_hpa, mmu->root_role.level)); - else + } else { vpid_sync_context(vmx_get_current_vpid(vcpu)); + } } void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) diff --git a/arch/x86/kvm/vmx/vmx_onhyperv.h b/arch/x86/kvm/vmx/vmx_onhyperv.h index cdf8cbb69209..a5c64c90e49e 100644 --- a/arch/x86/kvm/vmx/vmx_onhyperv.h +++ b/arch/x86/kvm/vmx/vmx_onhyperv.h @@ -119,6 +119,11 @@ static inline void evmcs_load(u64 phys_addr) } void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); + +static inline bool vmx_hv_use_flush_guest_mapping(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hv_vmx_use_flush_guest_mapping; +} #else /* !IS_ENABLED(CONFIG_HYPERV) */ static __always_inline bool kvm_is_using_evmcs(void) { return false; } static __always_inline void evmcs_write64(unsigned long field, u64 value) {} @@ -128,6 +133,7 @@ static __always_inline u64 evmcs_read64(unsigned long field) { return 0; } static __always_inline u32 evmcs_read32(unsigned long field) { return 0; } static __always_inline u16 evmcs_read16(unsigned long field) { return 0; } static inline void evmcs_load(u64 phys_addr) {} +static inline bool vmx_hv_use_flush_guest_mapping(struct kvm_vcpu *vcpu) { return false; } #endif /* IS_ENABLED(CONFIG_HYPERV) */ #endif /* __ARCH_X86_KVM_VMX_ONHYPERV_H__ */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b58a74c1722d..cbde795096a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -25,6 +25,7 @@ #include "tss.h" #include "kvm_cache_regs.h" #include "kvm_emulate.h" +#include "kvm_onhyperv.h" #include "mmu/page_track.h" #include "x86.h" #include "cpuid.h" @@ -12390,6 +12391,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) #if IS_ENABLED(CONFIG_HYPERV) vcpu->arch.hv_root_tdp = INVALID_PAGE; + vcpu->arch.hv_vmx_use_flush_guest_mapping = + (kvm_x86_ops.flush_remote_tlbs == hv_flush_remote_tlbs); #endif r = kvm_x86_call(vcpu_create)(vcpu); -- 2.39.5