[PATCH v3 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering
From: Andi Kleen KVM added a workaround for PEBS events leaking into guests with 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.") This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR. Intel also added a fix for this issue to microcode updates on Haswell/Broadwell/Skylake. It turns out using the MSR entry/exit list makes VM exits significantly slower. The list is only needed for disabling PEBS, because the GLOBAL_CTRL change gets optimized by KVM into changing the VMCS. Check for the microcode updates that have the microcode fix for leaking PEBS, and disable the extra entry/exit list entry for PEBS_ENABLE. In addition we always clear the GLOBAL_CTRL for the PEBS counter while running in the guest, which is enough to make them never fire at the wrong side of the host/guest transition. We see significantly reduced overhead for VM exits with the filtering active with the patch from 8% to 4%. Signed-off-by: Andi Kleen --- v2: Use match_ucode, not match_ucode_all Remove cpu lock Use INTEL_MIN_UCODE and move to header Update Table to include skylake clients. --- arch/x86/events/intel/core.c | 80 arch/x86/events/perf_event.h | 3 +- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0fb8659b20d8..5c45535c60b4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../perf_event.h" @@ -3170,16 +3171,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; - /* -* If PMU counter has PEBS enabled it is not enough to disable counter -* on a guest entry since PEBS memory write can overshoot guest entry -* and corrupt guest memory. Disabling PEBS solves the problem. -*/ - arr[1].msr = MSR_IA32_PEBS_ENABLE; - arr[1].host = cpuc->pebs_enabled; - arr[1].guest = 0; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + arr[0].guest &= ~cpuc->pebs_enabled; + else + arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + *nr = 1; + + if (!x86_pmu.pebs_isolated) { + /* +* If PMU counter has PEBS enabled it is not enough to +* disable counter on a guest entry since PEBS memory +* write can overshoot guest entry and corrupt guest +* memory. Disabling PEBS solves the problem. +* +* Don't do this if the CPU already enforces it. +*/ + arr[1].msr = MSR_IA32_PEBS_ENABLE; + arr[1].host = cpuc->pebs_enabled; + arr[1].guest = 0; + *nr = 2; + } - *nr = 2; return arr; } @@ -3697,6 +3709,45 @@ static __init void intel_clovertown_quirk(void) x86_pmu.pebs_constraints = NULL; } +static const struct x86_ucode_id isolation_ucodes[] = { + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_CORE, 3, 0x001f), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_ULT, 1, 0x001e), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_GT3E, 1, 0x0015), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,2, 0x0037), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,4, 0x000a), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_CORE, 4, 0x0023), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_GT3E, 1, 0x0014), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x0010), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x0709), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f09), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e02), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_X, 2, 0x0b14), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,3, 0x0021), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,4, 0x), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x007c), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x007c), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,10, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,11, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,12, 0x004e), +
[PATCH v3 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering
From: Andi Kleen KVM added a workaround for PEBS events leaking into guests with 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.") This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR. Intel also added a fix for this issue to microcode updates on Haswell/Broadwell/Skylake. It turns out using the MSR entry/exit list makes VM exits significantly slower. The list is only needed for disabling PEBS, because the GLOBAL_CTRL change gets optimized by KVM into changing the VMCS. Check for the microcode updates that have the microcode fix for leaking PEBS, and disable the extra entry/exit list entry for PEBS_ENABLE. In addition we always clear the GLOBAL_CTRL for the PEBS counter while running in the guest, which is enough to make them never fire at the wrong side of the host/guest transition. We see significantly reduced overhead for VM exits with the filtering active with the patch from 8% to 4%. Signed-off-by: Andi Kleen --- v2: Use match_ucode, not match_ucode_all Remove cpu lock Use INTEL_MIN_UCODE and move to header Update Table to include skylake clients. --- arch/x86/events/intel/core.c | 80 arch/x86/events/perf_event.h | 3 +- 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0fb8659b20d8..5c45535c60b4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../perf_event.h" @@ -3170,16 +3171,27 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr) arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL; arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask; arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask; - /* -* If PMU counter has PEBS enabled it is not enough to disable counter -* on a guest entry since PEBS memory write can overshoot guest entry -* and corrupt guest memory. Disabling PEBS solves the problem. -*/ - arr[1].msr = MSR_IA32_PEBS_ENABLE; - arr[1].host = cpuc->pebs_enabled; - arr[1].guest = 0; + if (x86_pmu.flags & PMU_FL_PEBS_ALL) + arr[0].guest &= ~cpuc->pebs_enabled; + else + arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK); + *nr = 1; + + if (!x86_pmu.pebs_isolated) { + /* +* If PMU counter has PEBS enabled it is not enough to +* disable counter on a guest entry since PEBS memory +* write can overshoot guest entry and corrupt guest +* memory. Disabling PEBS solves the problem. +* +* Don't do this if the CPU already enforces it. +*/ + arr[1].msr = MSR_IA32_PEBS_ENABLE; + arr[1].host = cpuc->pebs_enabled; + arr[1].guest = 0; + *nr = 2; + } - *nr = 2; return arr; } @@ -3697,6 +3709,45 @@ static __init void intel_clovertown_quirk(void) x86_pmu.pebs_constraints = NULL; } +static const struct x86_ucode_id isolation_ucodes[] = { + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_CORE, 3, 0x001f), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_ULT, 1, 0x001e), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_GT3E, 1, 0x0015), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,2, 0x0037), + INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,4, 0x000a), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_CORE, 4, 0x0023), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_GT3E, 1, 0x0014), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x0010), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x0709), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f09), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e02), + INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_X, 2, 0x0b14), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,3, 0x0021), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,4, 0x), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_MOBILE, 3, 0x007c), + INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP, 3, 0x007c), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 9, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,10, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,11, 0x004e), + INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,12, 0x004e), +