[PATCH v3 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering

2018-10-19 Thread Andi Kleen
From: Andi Kleen 

KVM added a workaround for PEBS events leaking
into guests with 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.")
This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR.

Intel also added a fix for this issue to microcode updates on
Haswell/Broadwell/Skylake.

It turns out using the MSR entry/exit list makes VM exits
significantly slower. The list is only needed for disabling
PEBS, because the GLOBAL_CTRL change gets optimized by
KVM into changing the VMCS.

Check for the microcode updates that have the microcode
fix for leaking PEBS, and disable the extra entry/exit list
entry for PEBS_ENABLE. In addition we always clear the
GLOBAL_CTRL for the PEBS counter while running in the guest,
which is enough to make them never fire at the wrong
side of the host/guest transition.

We see significantly reduced overhead for VM exits with the
filtering active with the patch from 8% to 4%.

Signed-off-by: Andi Kleen 
---
v2:
Use match_ucode, not match_ucode_all
Remove cpu lock
Use INTEL_MIN_UCODE and move to header
Update Table to include skylake clients.
---
 arch/x86/events/intel/core.c | 80 
 arch/x86/events/perf_event.h |  3 +-
 2 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0fb8659b20d8..5c45535c60b4 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "../perf_event.h"
 
@@ -3170,16 +3171,27 @@ static struct perf_guest_switch_msr 
*intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-   /*
-* If PMU counter has PEBS enabled it is not enough to disable counter
-* on a guest entry since PEBS memory write can overshoot guest entry
-* and corrupt guest memory. Disabling PEBS solves the problem.
-*/
-   arr[1].msr = MSR_IA32_PEBS_ENABLE;
-   arr[1].host = cpuc->pebs_enabled;
-   arr[1].guest = 0;
+   if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+   arr[0].guest &= ~cpuc->pebs_enabled;
+   else
+   arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+   *nr = 1;
+
+   if (!x86_pmu.pebs_isolated) {
+   /*
+* If PMU counter has PEBS enabled it is not enough to
+* disable counter on a guest entry since PEBS memory
+* write can overshoot guest entry and corrupt guest
+* memory. Disabling PEBS solves the problem.
+*
+* Don't do this if the CPU already enforces it.
+*/
+   arr[1].msr = MSR_IA32_PEBS_ENABLE;
+   arr[1].host = cpuc->pebs_enabled;
+   arr[1].guest = 0;
+   *nr = 2;
+   }
 
-   *nr = 2;
return arr;
 }
 
@@ -3697,6 +3709,45 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
 }
 
+static const struct x86_ucode_id isolation_ucodes[] = {
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_CORE, 3, 0x001f),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_ULT,  1, 0x001e),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_GT3E, 1, 0x0015),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,2, 0x0037),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,4, 0x000a),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_CORE,   4, 0x0023),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_GT3E,   1, 0x0014),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x0010),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x0709),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f09),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e02),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_X,  2, 0x0b14),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,3, 0x0021),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,4, 0x),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_MOBILE,   3, 0x007c),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP,  3, 0x007c),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,  9, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,10, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,11, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,12, 0x004e),
+   

[PATCH v3 2/2] perf/x86/kvm: Avoid unnecessary work in guest filtering

2018-10-19 Thread Andi Kleen
From: Andi Kleen 

KVM added a workaround for PEBS events leaking
into guests with 26a4f3c08de4 ("perf/x86: disable PEBS on a guest entry.")
This uses the VT entry/exit list to add an extra disable of the PEBS_ENABLE MSR.

Intel also added a fix for this issue to microcode updates on
Haswell/Broadwell/Skylake.

It turns out using the MSR entry/exit list makes VM exits
significantly slower. The list is only needed for disabling
PEBS, because the GLOBAL_CTRL change gets optimized by
KVM into changing the VMCS.

Check for the microcode updates that have the microcode
fix for leaking PEBS, and disable the extra entry/exit list
entry for PEBS_ENABLE. In addition we always clear the
GLOBAL_CTRL for the PEBS counter while running in the guest,
which is enough to make them never fire at the wrong
side of the host/guest transition.

We see significantly reduced overhead for VM exits with the
filtering active with the patch from 8% to 4%.

Signed-off-by: Andi Kleen 
---
v2:
Use match_ucode, not match_ucode_all
Remove cpu lock
Use INTEL_MIN_UCODE and move to header
Update Table to include skylake clients.
---
 arch/x86/events/intel/core.c | 80 
 arch/x86/events/perf_event.h |  3 +-
 2 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0fb8659b20d8..5c45535c60b4 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "../perf_event.h"
 
@@ -3170,16 +3171,27 @@ static struct perf_guest_switch_msr 
*intel_guest_get_msrs(int *nr)
arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
-   /*
-* If PMU counter has PEBS enabled it is not enough to disable counter
-* on a guest entry since PEBS memory write can overshoot guest entry
-* and corrupt guest memory. Disabling PEBS solves the problem.
-*/
-   arr[1].msr = MSR_IA32_PEBS_ENABLE;
-   arr[1].host = cpuc->pebs_enabled;
-   arr[1].guest = 0;
+   if (x86_pmu.flags & PMU_FL_PEBS_ALL)
+   arr[0].guest &= ~cpuc->pebs_enabled;
+   else
+   arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
+   *nr = 1;
+
+   if (!x86_pmu.pebs_isolated) {
+   /*
+* If PMU counter has PEBS enabled it is not enough to
+* disable counter on a guest entry since PEBS memory
+* write can overshoot guest entry and corrupt guest
+* memory. Disabling PEBS solves the problem.
+*
+* Don't do this if the CPU already enforces it.
+*/
+   arr[1].msr = MSR_IA32_PEBS_ENABLE;
+   arr[1].host = cpuc->pebs_enabled;
+   arr[1].guest = 0;
+   *nr = 2;
+   }
 
-   *nr = 2;
return arr;
 }
 
@@ -3697,6 +3709,45 @@ static __init void intel_clovertown_quirk(void)
x86_pmu.pebs_constraints = NULL;
 }
 
+static const struct x86_ucode_id isolation_ucodes[] = {
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_CORE, 3, 0x001f),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_ULT,  1, 0x001e),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_GT3E, 1, 0x0015),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,2, 0x0037),
+   INTEL_MIN_UCODE(INTEL_FAM6_HASWELL_X,4, 0x000a),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_CORE,   4, 0x0023),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_GT3E,   1, 0x0014),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 2, 0x0010),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 3, 0x0709),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 4, 0x0f09),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_XEON_D, 5, 0x0e02),
+   INTEL_MIN_UCODE(INTEL_FAM6_BROADWELL_X,  2, 0x0b14),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,3, 0x0021),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_X,4, 0x),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_MOBILE,   3, 0x007c),
+   INTEL_MIN_UCODE(INTEL_FAM6_SKYLAKE_DESKTOP,  3, 0x007c),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP, 9, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE,  9, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 10, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 11, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_MOBILE, 12, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,10, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,11, 0x004e),
+   INTEL_MIN_UCODE(INTEL_FAM6_KABYLAKE_DESKTOP,12, 0x004e),
+