Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-14 Thread Stephane Eranian
On Sat, Jul 12, 2014 at 2:01 AM, Andi Kleen  wrote:
> From: Andi Kleen 
>
> The basic idea is that it does not make sense to list all PEBS
> events individually. The list is very long, sometimes outdated
> and the hardware doesn't need it. If an event does not support
> PEBS it will just not count, there is no security issue.
>
> This vastly simplifies the PEBS event selection. It also
> speeds up the scheduling because the scheduler doesn't
> have to walk as many constraints.
>
> Bugs fixed:
> - We do not allow setting forbidden flags with PEBS anymore
> (SDM 18.9.4), except for the special cycle event.
> This is done using a new constraint macro that also
> matches on the event flags.
> - We now allow DataLA on all Haswell events, not just
> a small subset. In general all PEBS events that tag memory
> accesses support DataLA on Haswell. Otherwise the reported
> address is just zero. This allows address profiling
> on vastly more events.
> - We did not allow all PEBS events on Haswell:
> We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
> MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
>
> This includes the changes proposed by Stephane earlier and obsoletes
> his patchkit (except for some changes on pre Sandy Bridge/Silvermont
> CPUs)
>
> I only did Sandy Bridge and Silvermont and later so far, mostly because these
> are the parts I could directly confirm the hardware behavior with hardware
> architects. Also I do not believe the older CPUs have any
> missing events in their PEBS list, so there's no pressing
> need to change them.
>
> I did not implement the flag proposed by Peter to allow
> setting forbidden flags. If really needed this could
> be implemented on to of this patch.
>
> Cc: eran...@google.com
> v2: Fix broken store events on SNB/IVB (Stephane Eranian)
> v3: More fixes. Rename some arguments (Stephane Eranian)
> Update description.
> Signed-off-by: Andi Kleen 

Works now for me on SNB/HSW.

Reviewed-by: Stephane Eranian 
> ---
>  arch/x86/include/asm/perf_event.h |  8 +++
>  arch/x86/kernel/cpu/perf_event.h  | 18 +--
>  arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 
> +++
>  3 files changed, 39 insertions(+), 75 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h 
> b/arch/x86/include/asm/perf_event.h
> index 8249df4..8dfc9fd 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -51,6 +51,14 @@
>  ARCH_PERFMON_EVENTSEL_EDGE  |  \
>  ARCH_PERFMON_EVENTSEL_INV   |  \
>  ARCH_PERFMON_EVENTSEL_CMASK)
> +#define X86_ALL_EVENT_FLAGS\
> +   (ARCH_PERFMON_EVENTSEL_EDGE |   \
> +ARCH_PERFMON_EVENTSEL_INV |\
> +ARCH_PERFMON_EVENTSEL_CMASK |  \
> +ARCH_PERFMON_EVENTSEL_ANY |\
> +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
> +HSW_IN_TX |\
> +HSW_IN_TX_CHECKPOINTED)
>  #define AMD64_RAW_EVENT_MASK   \
> (X86_RAW_EVENT_MASK  |  \
>  AMD64_EVENTSEL_EVENT)
> diff --git a/arch/x86/kernel/cpu/perf_event.h 
> b/arch/x86/kernel/cpu/perf_event.h
> index a22a34e9..8f32af0 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -262,16 +262,24 @@ struct cpu_hw_events {
> EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
>  #define INTEL_PLD_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
>HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
>
>  #define INTEL_PST_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
>   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
>
> -/* DataLA version of store sampling without extra enable bit. */
> -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +/* Event constraint, but match on all event flags too. */
> +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
> +   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
> +
> +/* Check only flags, but allow all event/umask */
> +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\
> +   EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
> +
> +/* Same as above, but enable DataLA */
> +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \
> +   __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \
>   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
>
>  /*
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
> b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..64b4be9 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -567,28 +567,10 @@ struct event_constraint 
> 

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-14 Thread Stephane Eranian
On Sat, Jul 12, 2014 at 2:01 AM, Andi Kleen a...@firstfloor.org wrote:
 From: Andi Kleen a...@linux.intel.com

 The basic idea is that it does not make sense to list all PEBS
 events individually. The list is very long, sometimes outdated
 and the hardware doesn't need it. If an event does not support
 PEBS it will just not count, there is no security issue.

 This vastly simplifies the PEBS event selection. It also
 speeds up the scheduling because the scheduler doesn't
 have to walk as many constraints.

 Bugs fixed:
 - We do not allow setting forbidden flags with PEBS anymore
 (SDM 18.9.4), except for the special cycle event.
 This is done using a new constraint macro that also
 matches on the event flags.
 - We now allow DataLA on all Haswell events, not just
 a small subset. In general all PEBS events that tag memory
 accesses support DataLA on Haswell. Otherwise the reported
 address is just zero. This allows address profiling
 on vastly more events.
 - We did not allow all PEBS events on Haswell:
 We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
 MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)

 This includes the changes proposed by Stephane earlier and obsoletes
 his patchkit (except for some changes on pre Sandy Bridge/Silvermont
 CPUs)

 I only did Sandy Bridge and Silvermont and later so far, mostly because these
 are the parts I could directly confirm the hardware behavior with hardware
 architects. Also I do not believe the older CPUs have any
 missing events in their PEBS list, so there's no pressing
 need to change them.

 I did not implement the flag proposed by Peter to allow
 setting forbidden flags. If really needed this could
 be implemented on to of this patch.

 Cc: eran...@google.com
 v2: Fix broken store events on SNB/IVB (Stephane Eranian)
 v3: More fixes. Rename some arguments (Stephane Eranian)
 Update description.
 Signed-off-by: Andi Kleen a...@linux.intel.com

Works now for me on SNB/HSW.

Reviewed-by: Stephane Eranian eran...@google.com
 ---
  arch/x86/include/asm/perf_event.h |  8 +++
  arch/x86/kernel/cpu/perf_event.h  | 18 +--
  arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 
 +++
  3 files changed, 39 insertions(+), 75 deletions(-)

 diff --git a/arch/x86/include/asm/perf_event.h 
 b/arch/x86/include/asm/perf_event.h
 index 8249df4..8dfc9fd 100644
 --- a/arch/x86/include/asm/perf_event.h
 +++ b/arch/x86/include/asm/perf_event.h
 @@ -51,6 +51,14 @@
  ARCH_PERFMON_EVENTSEL_EDGE  |  \
  ARCH_PERFMON_EVENTSEL_INV   |  \
  ARCH_PERFMON_EVENTSEL_CMASK)
 +#define X86_ALL_EVENT_FLAGS\
 +   (ARCH_PERFMON_EVENTSEL_EDGE |   \
 +ARCH_PERFMON_EVENTSEL_INV |\
 +ARCH_PERFMON_EVENTSEL_CMASK |  \
 +ARCH_PERFMON_EVENTSEL_ANY |\
 +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
 +HSW_IN_TX |\
 +HSW_IN_TX_CHECKPOINTED)
  #define AMD64_RAW_EVENT_MASK   \
 (X86_RAW_EVENT_MASK  |  \
  AMD64_EVENTSEL_EVENT)
 diff --git a/arch/x86/kernel/cpu/perf_event.h 
 b/arch/x86/kernel/cpu/perf_event.h
 index a22a34e9..8f32af0 100644
 --- a/arch/x86/kernel/cpu/perf_event.h
 +++ b/arch/x86/kernel/cpu/perf_event.h
 @@ -262,16 +262,24 @@ struct cpu_hw_events {
 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

  #define INTEL_PLD_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)

  #define INTEL_PST_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)

 -/* DataLA version of store sampling without extra enable bit. */
 -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +/* Event constraint, but match on all event flags too. */
 +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
 +   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
 +
 +/* Check only flags, but allow all event/umask */
 +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\
 +   EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
 +
 +/* Same as above, but enable DataLA */
 +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \
 +   __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)

  /*
 diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
 b/arch/x86/kernel/cpu/perf_event_intel_ds.c
 index 980970c..64b4be9 100644
 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
 +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
 @@ -567,28 +567,10 @@ struct event_constraint 
 intel_atom_pebs_event_constraints[] = {
  };

  

[PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-11 Thread Andi Kleen
From: Andi Kleen 

The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.

This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.

Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)

This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)

I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.

I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.

Cc: eran...@google.com
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
v3: More fixes. Rename some arguments (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen 
---
 arch/x86/include/asm/perf_event.h |  8 +++
 arch/x86/kernel/cpu/perf_event.h  | 18 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++
 3 files changed, 39 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
 ARCH_PERFMON_EVENTSEL_EDGE  |  \
 ARCH_PERFMON_EVENTSEL_INV   |  \
 ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS\
+   (ARCH_PERFMON_EVENTSEL_EDGE |   \
+ARCH_PERFMON_EVENTSEL_INV |\
+ARCH_PERFMON_EVENTSEL_CMASK |  \
+ARCH_PERFMON_EVENTSEL_ANY |\
+ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
+HSW_IN_TX |\
+HSW_IN_TX_CHECKPOINTED)
 #define AMD64_RAW_EVENT_MASK   \
(X86_RAW_EVENT_MASK  |  \
 AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..8f32af0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define INTEL_PLD_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
 #define INTEL_PST_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\
+   EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \
+   __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..64b4be9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint 
intel_atom_pebs_event_constraints[] = {
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 

[PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-11 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.

This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.

Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)

This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)

I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.

I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.

Cc: eran...@google.com
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
v3: More fixes. Rename some arguments (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 arch/x86/include/asm/perf_event.h |  8 +++
 arch/x86/kernel/cpu/perf_event.h  | 18 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++
 3 files changed, 39 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
 ARCH_PERFMON_EVENTSEL_EDGE  |  \
 ARCH_PERFMON_EVENTSEL_INV   |  \
 ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS\
+   (ARCH_PERFMON_EVENTSEL_EDGE |   \
+ARCH_PERFMON_EVENTSEL_INV |\
+ARCH_PERFMON_EVENTSEL_CMASK |  \
+ARCH_PERFMON_EVENTSEL_ANY |\
+ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
+HSW_IN_TX |\
+HSW_IN_TX_CHECKPOINTED)
 #define AMD64_RAW_EVENT_MASK   \
(X86_RAW_EVENT_MASK  |  \
 AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..8f32af0 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define INTEL_PLD_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
 #define INTEL_PST_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\
+   EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \
+   __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..64b4be9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint 
intel_atom_pebs_event_constraints[] = {
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
-   

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection v2

2014-07-07 Thread Stephane Eranian
On Tue, Jul 8, 2014 at 12:37 AM, Andi Kleen  wrote:
> From: Andi Kleen 
>
> The basic idea is that it does not make sense to list all PEBS
> events individually. The list is very long, sometimes outdated
> and the hardware doesn't need it. If an event does not support
> PEBS it will just not count, there is no security issue.
>
> This vastly simplifies the PEBS event selection. It also
> speeds up the scheduling because the scheduler doesn't
> have to walk as many constraints.
>
> Bugs fixed:
> - We do not allow setting forbidden flags with PEBS anymore
> (SDM 18.9.4), except for the special cycle event.
> This is done using a new constraint macro that also
> matches on the event flags.
> - We now allow DataLA on all Haswell events, not just
> a small subset. In general all PEBS events that tag memory
> accesses support DataLA on Haswell. Otherwise the reported
> address is just zero. This allows address profiling
> on vastly more events.
> - We did not allow all PEBS events on Haswell:
> We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
> MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)
>
> This includes the changes proposed by Stephane earlier and obsoletes
> his patchkit (except for some changes on pre Sandy Bridge/Silvermont
> CPUs)
>
> I only did Sandy Bridge and Silvermont and later so far, mostly because these
> are the parts I could directly confirm the hardware behavior with hardware
> architects. Also I do not believe the older CPUs have any
> missing events in their PEBS list, so there's no pressing
> need to change them.
>
> I did not implement the flag proposed by Peter to allow
> setting forbidden flags. If really needed this could
> be implemented on to of this patch.
>
> Cc: eran...@google.com
> v2: Fix broken store events on SNB/IVB (Stephane Eranian)
> Update description.
> Signed-off-by: Andi Kleen 
> ---
>  arch/x86/include/asm/perf_event.h |  8 +++
>  arch/x86/kernel/cpu/perf_event.h  | 18 +--
>  arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 
> +++
>  3 files changed, 39 insertions(+), 75 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h 
> b/arch/x86/include/asm/perf_event.h
> index 8249df4..8dfc9fd 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -51,6 +51,14 @@
>  ARCH_PERFMON_EVENTSEL_EDGE  |  \
>  ARCH_PERFMON_EVENTSEL_INV   |  \
>  ARCH_PERFMON_EVENTSEL_CMASK)
> +#define X86_ALL_EVENT_FLAGS\
> +   (ARCH_PERFMON_EVENTSEL_EDGE |   \
> +ARCH_PERFMON_EVENTSEL_INV |\
> +ARCH_PERFMON_EVENTSEL_CMASK |  \
> +ARCH_PERFMON_EVENTSEL_ANY |\
> +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
> +HSW_IN_TX |\
> +HSW_IN_TX_CHECKPOINTED)
>  #define AMD64_RAW_EVENT_MASK   \
> (X86_RAW_EVENT_MASK  |  \
>  AMD64_EVENTSEL_EVENT)
> diff --git a/arch/x86/kernel/cpu/perf_event.h 
> b/arch/x86/kernel/cpu/perf_event.h
> index a22a34e9..70273e8 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -262,16 +262,24 @@ struct cpu_hw_events {
> EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
>  #define INTEL_PLD_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
>HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
>
>  #define INTEL_PST_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
>   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
>
> -/* DataLA version of store sampling without extra enable bit. */
> -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +/* Event constraint, but match on all event flags too. */
> +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
> +   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
> +
> +/* Check only flags, but allow all event/umask */
> +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
> +   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
> +
The first argument is not flags but rather 'code'. This is confusing
otherwise.

> +/* Same as above, but enable DataLA */
> +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
> +   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
>   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
>
>  /*
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
> b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..0e22ce6 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -567,28 +567,10 @@ struct event_constraint 
> intel_atom_pebs_event_constraints[] = {
>  };
>
>  

[PATCH 1/2] perf, x86: Revamp PEBS event selection v2

2014-07-07 Thread Andi Kleen
From: Andi Kleen 

The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.

This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.

Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)

This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)

I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.

I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.

Cc: eran...@google.com
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen 
---
 arch/x86/include/asm/perf_event.h |  8 +++
 arch/x86/kernel/cpu/perf_event.h  | 18 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++
 3 files changed, 39 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
 ARCH_PERFMON_EVENTSEL_EDGE  |  \
 ARCH_PERFMON_EVENTSEL_INV   |  \
 ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS\
+   (ARCH_PERFMON_EVENTSEL_EDGE |   \
+ARCH_PERFMON_EVENTSEL_INV |\
+ARCH_PERFMON_EVENTSEL_CMASK |  \
+ARCH_PERFMON_EVENTSEL_ANY |\
+ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
+HSW_IN_TX |\
+HSW_IN_TX_CHECKPOINTED)
 #define AMD64_RAW_EVENT_MASK   \
(X86_RAW_EVENT_MASK  |  \
 AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..70273e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define INTEL_PLD_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
 #define INTEL_PST_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
+   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
+   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..0e22ce6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint 
intel_atom_pebs_event_constraints[] = {
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 
MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0404, 

[PATCH 1/2] perf, x86: Revamp PEBS event selection v2

2014-07-07 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.

This vastly simplifies the PEBS event selection. It also
speeds up the scheduling because the scheduler doesn't
have to walk as many constraints.

Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell:
We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)

This includes the changes proposed by Stephane earlier and obsoletes
his patchkit (except for some changes on pre Sandy Bridge/Silvermont
CPUs)

I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects. Also I do not believe the older CPUs have any
missing events in their PEBS list, so there's no pressing
need to change them.

I did not implement the flag proposed by Peter to allow
setting forbidden flags. If really needed this could
be implemented on to of this patch.

Cc: eran...@google.com
v2: Fix broken store events on SNB/IVB (Stephane Eranian)
Update description.
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 arch/x86/include/asm/perf_event.h |  8 +++
 arch/x86/kernel/cpu/perf_event.h  | 18 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++
 3 files changed, 39 insertions(+), 75 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
 ARCH_PERFMON_EVENTSEL_EDGE  |  \
 ARCH_PERFMON_EVENTSEL_INV   |  \
 ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS\
+   (ARCH_PERFMON_EVENTSEL_EDGE |   \
+ARCH_PERFMON_EVENTSEL_INV |\
+ARCH_PERFMON_EVENTSEL_CMASK |  \
+ARCH_PERFMON_EVENTSEL_ANY |\
+ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
+HSW_IN_TX |\
+HSW_IN_TX_CHECKPOINTED)
 #define AMD64_RAW_EVENT_MASK   \
(X86_RAW_EVENT_MASK  |  \
 AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a22a34e9..70273e8 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -262,16 +262,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define INTEL_PLD_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
 #define INTEL_PST_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
+   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
+   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..0e22ce6 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint 
intel_atom_pebs_event_constraints[] = {
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 
MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
-   

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection v2

2014-07-07 Thread Stephane Eranian
On Tue, Jul 8, 2014 at 12:37 AM, Andi Kleen a...@firstfloor.org wrote:
 From: Andi Kleen a...@linux.intel.com

 The basic idea is that it does not make sense to list all PEBS
 events individually. The list is very long, sometimes outdated
 and the hardware doesn't need it. If an event does not support
 PEBS it will just not count, there is no security issue.

 This vastly simplifies the PEBS event selection. It also
 speeds up the scheduling because the scheduler doesn't
 have to walk as many constraints.

 Bugs fixed:
 - We do not allow setting forbidden flags with PEBS anymore
 (SDM 18.9.4), except for the special cycle event.
 This is done using a new constraint macro that also
 matches on the event flags.
 - We now allow DataLA on all Haswell events, not just
 a small subset. In general all PEBS events that tag memory
 accesses support DataLA on Haswell. Otherwise the reported
 address is just zero. This allows address profiling
 on vastly more events.
 - We did not allow all PEBS events on Haswell:
 We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*,
 MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*)

 This includes the changes proposed by Stephane earlier and obsoletes
 his patchkit (except for some changes on pre Sandy Bridge/Silvermont
 CPUs)

 I only did Sandy Bridge and Silvermont and later so far, mostly because these
 are the parts I could directly confirm the hardware behavior with hardware
 architects. Also I do not believe the older CPUs have any
 missing events in their PEBS list, so there's no pressing
 need to change them.

 I did not implement the flag proposed by Peter to allow
 setting forbidden flags. If really needed this could
 be implemented on to of this patch.

 Cc: eran...@google.com
 v2: Fix broken store events on SNB/IVB (Stephane Eranian)
 Update description.
 Signed-off-by: Andi Kleen a...@linux.intel.com
 ---
  arch/x86/include/asm/perf_event.h |  8 +++
  arch/x86/kernel/cpu/perf_event.h  | 18 +--
  arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 
 +++
  3 files changed, 39 insertions(+), 75 deletions(-)

 diff --git a/arch/x86/include/asm/perf_event.h 
 b/arch/x86/include/asm/perf_event.h
 index 8249df4..8dfc9fd 100644
 --- a/arch/x86/include/asm/perf_event.h
 +++ b/arch/x86/include/asm/perf_event.h
 @@ -51,6 +51,14 @@
  ARCH_PERFMON_EVENTSEL_EDGE  |  \
  ARCH_PERFMON_EVENTSEL_INV   |  \
  ARCH_PERFMON_EVENTSEL_CMASK)
 +#define X86_ALL_EVENT_FLAGS\
 +   (ARCH_PERFMON_EVENTSEL_EDGE |   \
 +ARCH_PERFMON_EVENTSEL_INV |\
 +ARCH_PERFMON_EVENTSEL_CMASK |  \
 +ARCH_PERFMON_EVENTSEL_ANY |\
 +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
 +HSW_IN_TX |\
 +HSW_IN_TX_CHECKPOINTED)
  #define AMD64_RAW_EVENT_MASK   \
 (X86_RAW_EVENT_MASK  |  \
  AMD64_EVENTSEL_EVENT)
 diff --git a/arch/x86/kernel/cpu/perf_event.h 
 b/arch/x86/kernel/cpu/perf_event.h
 index a22a34e9..70273e8 100644
 --- a/arch/x86/kernel/cpu/perf_event.h
 +++ b/arch/x86/kernel/cpu/perf_event.h
 @@ -262,16 +262,24 @@ struct cpu_hw_events {
 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

  #define INTEL_PLD_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)

  #define INTEL_PST_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)

 -/* DataLA version of store sampling without extra enable bit. */
 -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +/* Event constraint, but match on all event flags too. */
 +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
 +   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
 +
 +/* Check only flags, but allow all event/umask */
 +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
 +   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
 +
The first argument is not flags but rather 'code'. This is confusing
otherwise.

 +/* Same as above, but enable DataLA */
 +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
 +   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)

  /*
 diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
 b/arch/x86/kernel/cpu/perf_event_intel_ds.c
 index 980970c..0e22ce6 100644
 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
 +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
 @@ -567,28 +567,10 @@ struct event_constraint 
 intel_atom_pebs_event_constraints[] = {
  };

  struct event_constraint 

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-06 Thread Stephane Eranian
On Wed, Jul 2, 2014 at 8:10 PM, Andi Kleen  wrote:
> On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote:
>> On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen  wrote:
>> > On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
>> >> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen  wrote:
>> >> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
>> >> >> otherwise the get_event_constraint() test I mentioned previously will
>> >> >> fail, event with your ALL_FILTER mask.
>> >> >
>> >> > What events should fail? I verified all PEBS events and they work as 
>> >> > expected.
>> >> >
>> >> Random events should not fail, they should go with precise and not 
>> >> generate
>> >> any samples. That's the whole point of the exercise.
>> >>
>> >> perf record -a -e r6099:p sleep 1
>> >
>> > Like I said I ran all PEBS events and they generated samples.
>> >
>> I understand. I ran some random events to make sure I was not
>> getting PEBS samples and the system was stable.
>
> Not sure we're talking about the same thing. You claimed my patch
> wouldn't let any PEBS events through, but the test results
> disagree with that.
>
I did not say that. I said, it does not let any random event code
use precise > 0. And this is what we want to eliminate. It is
okay to let precise > 1 on any event. The non-PEBS events
will not generate any PEBS records.


> I fixed the broken store events you pointed out.
>
> INST_RETIRED.PREC_DIST
> cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ]
> UOPS_RETIRED.ALL
> cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ]
> UOPS_RETIRED.RETIRE_SLOTS
> cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ]
> BR_INST_RETIRED.CONDITIONAL
> cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ]
> BR_INST_RETIRED.NEAR_CALL
> cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ]
> BR_INST_RETIRED.NEAR_RETURN
> cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ]
> BR_INST_RETIRED.NEAR_TAKEN
> cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ]
> BR_INST_RETIRED.ALL_BRANCHES_PEBS
> cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ]
> BR_MISP_RETIRED.CONDITIONAL
> cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ]
> BR_MISP_RETIRED.ALL_BRANCHES_PEBS
> cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ]
> HLE_RETIRED.ABORTED
> cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
> RTM_RETIRED.ABORTED
> cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
> MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4
> cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ]
> MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8
> cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ]
> MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16
> cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ]
> MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32
> cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ]
> MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64
> 

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-06 Thread Stephane Eranian
On Wed, Jul 2, 2014 at 8:10 PM, Andi Kleen a...@linux.intel.com wrote:
 On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote:
 On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen a...@linux.intel.com wrote:
  On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
  On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote:
   No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
   otherwise the get_event_constraint() test I mentioned previously will
   fail, event with your ALL_FILTER mask.
  
   What events should fail? I verified all PEBS events and they work as 
   expected.
  
  Random events should not fail, they should go with precise and not 
  generate
  any samples. That's the whole point of the exercise.
 
  perf record -a -e r6099:p sleep 1
 
  Like I said I ran all PEBS events and they generated samples.
 
 I understand. I ran some random events to make sure I was not
 getting PEBS samples and the system was stable.

 Not sure we're talking about the same thing. You claimed my patch
 wouldn't let any PEBS events through, but the test results
 disagree with that.

I did not say that. I said, it does not let any random event code
use precise  0. And this is what we want to eliminate. It is
okay to let precise  1 on any event. The non-PEBS events
will not generate any PEBS records.


 I fixed the broken store events you pointed out.

 INST_RETIRED.PREC_DIST
 cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ]
 UOPS_RETIRED.ALL
 cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ]
 UOPS_RETIRED.RETIRE_SLOTS
 cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ]
 BR_INST_RETIRED.CONDITIONAL
 cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ]
 BR_INST_RETIRED.NEAR_CALL
 cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ]
 BR_INST_RETIRED.NEAR_RETURN
 cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ]
 BR_INST_RETIRED.NEAR_TAKEN
 cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ]
 BR_INST_RETIRED.ALL_BRANCHES_PEBS
 cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ]
 BR_MISP_RETIRED.CONDITIONAL
 cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ]
 BR_MISP_RETIRED.ALL_BRANCHES_PEBS
 cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ]
 HLE_RETIRED.ABORTED
 cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
 RTM_RETIRED.ABORTED
 cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
 MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4
 cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ]
 MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8
 cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ]
 MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16
 cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ]
 MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32
 cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp
 [ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ]
 MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64
 cpu/event=0xCD,umask=0x01,ldlat=0x40,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_64/pp
 [ perf record: 

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
> But also, I think we should conditionally allow the filter bits;
> possibly with a sysfs file like I had.
> 
> Back when we had to sort that SNB cycles thing it was tedious that Linus
> could not just try things.

Hmm, the code in your patch to handle it was quite nasty.
I don't really see the situation repeating.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote:
> On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen  wrote:
> > On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
> >> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen  wrote:
> >> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
> >> >> otherwise the get_event_constraint() test I mentioned previously will
> >> >> fail, event with your ALL_FILTER mask.
> >> >
> >> > What events should fail? I verified all PEBS events and they work as 
> >> > expected.
> >> >
> >> Random events should not fail, they should go with precise and not generate
> >> any samples. That's the whole point of the exercise.
> >>
> >> perf record -a -e r6099:p sleep 1
> >
> > Like I said I ran all PEBS events and they generated samples.
> >
> I understand. I ran some random events to make sure I was not
> getting PEBS samples and the system was stable.

Not sure we're talking about the same thing. You claimed my patch
wouldn't let any PEBS events through, but the test results 
disagree with that.

I fixed the broken store events you pointed out.

INST_RETIRED.PREC_DIST
cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ]
UOPS_RETIRED.ALL
cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ]
UOPS_RETIRED.RETIRE_SLOTS
cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ]
BR_INST_RETIRED.CONDITIONAL
cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ]
BR_INST_RETIRED.NEAR_CALL
cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ]
BR_INST_RETIRED.NEAR_RETURN
cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ]
BR_INST_RETIRED.NEAR_TAKEN
cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ]
BR_INST_RETIRED.ALL_BRANCHES_PEBS
cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ]
BR_MISP_RETIRED.CONDITIONAL
cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ]
BR_MISP_RETIRED.ALL_BRANCHES_PEBS
cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ]
HLE_RETIRED.ABORTED
cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
RTM_RETIRED.ABORTED
cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4
cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8
cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16
cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32
cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64
cpu/event=0xCD,umask=0x01,ldlat=0x40,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_64/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data (~516 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128
cpu/event=0xCD,umask=0x01,ldlat=0x80,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_128/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.014 MB perf.data (~604 samples) ]

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Stephane Eranian
On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen  wrote:
> On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
>> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen  wrote:
>> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
>> >> otherwise the get_event_constraint() test I mentioned previously will
>> >> fail, event with your ALL_FILTER mask.
>> >
>> > What events should fail? I verified all PEBS events and they work as 
>> > expected.
>> >
>> Random events should not fail, they should go with precise and not generate
>> any samples. That's the whole point of the exercise.
>>
>> perf record -a -e r6099:p sleep 1
>
> Like I said I ran all PEBS events and they generated samples.
>
I understand. I ran some random events to make sure I was not
getting PEBS samples and the system was stable.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen  wrote:
> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
> >> otherwise the get_event_constraint() test I mentioned previously will
> >> fail, event with your ALL_FILTER mask.
> >
> > What events should fail? I verified all PEBS events and they work as 
> > expected.
> >
> Random events should not fail, they should go with precise and not generate
> any samples. That's the whole point of the exercise.
> 
> perf record -a -e r6099:p sleep 1

Like I said I ran all PEBS events and they generated samples.

-Andi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Stephane Eranian
On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen  wrote:
>> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
>> otherwise the get_event_constraint() test I mentioned previously will
>> fail, event with your ALL_FILTER mask.
>
> What events should fail? I verified all PEBS events and they work as expected.
>
Random events should not fail, they should go with precise and not generate
any samples. That's the whole point of the exercise.

perf record -a -e r6099:p sleep 1

>> > -   INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */
>> > -   INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */
>> > -   INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* 
>> > MEM_TRANS_RETIRED.LAT_ABOVE_THR */
>> > -   INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* 
>> > MEM_TRANS_RETIRED.PRECISE_STORES */
>> > -   INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */
>> > -   INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */
>> > -   INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* 
>> > MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
>> > -   INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* 
>> > MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
>> > -   INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* 
>> > MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
>> > +   INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* 
>> > MEM_TRANS_RETIRED.LAT_ABOVE_THR */
>> > +   INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* 
>> > MEM_TRANS_RETIRED.PRECISE_STORES */
>>
>> No, precise stores only work on counter 3, keep 0x8 here
>
> Good point.
>
>
>
> -Andi
> --
> a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Peter Zijlstra
On Wed, Jul 02, 2014 at 08:34:07AM -0700, Andi Kleen wrote:
> On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote:
> > On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote:
> > > From: Andi Kleen 
> > > 
> > > As already discussed earlier in email.
> > 
> > Is an entirely inappropriate start for a Changelog. Do not assume prior
> > knowledge. If its relevant include it here without reference.
> 
> Thanks. Do you have any other comments?

What Stephane said ;-)

But also, I think we should conditionally allow the filter bits;
possibly with a sysfs file like I had.

Back when we had to sort that SNB cycles thing it was tedious that Linus
could not just try things.


pgpWYwScdSllk.pgp
Description: PGP signature


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote:
> On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote:
> > From: Andi Kleen 
> > 
> > As already discussed earlier in email.
> 
> Is an entirely inappropriate start for a Changelog. Do not assume prior
> knowledge. If its relevant include it here without reference.

Thanks. Do you have any other comments?

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
> otherwise the get_event_constraint() test I mentioned previously will
> fail, event with your ALL_FILTER mask.

What events should fail? I verified all PEBS events and they work as expected.

> > -   INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */
> > -   INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */
> > -   INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* 
> > MEM_TRANS_RETIRED.LAT_ABOVE_THR */
> > -   INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* 
> > MEM_TRANS_RETIRED.PRECISE_STORES */
> > -   INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */
> > -   INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */
> > -   INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* 
> > MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
> > -   INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* 
> > MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
> > -   INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* 
> > MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
> > +   INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* 
> > MEM_TRANS_RETIRED.LAT_ABOVE_THR */
> > +   INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* 
> > MEM_TRANS_RETIRED.PRECISE_STORES */
> 
> No, precise stores only work on counter 3, keep 0x8 here

Good point.



-Andi
-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Stephane Eranian
Andi,


On Sat, Jun 28, 2014 at 1:10 AM, Andi Kleen  wrote:
> From: Andi Kleen 
>
> As already discussed earlier in email.
>
> The basic idea is that it does not make sense to list all PEBS
> events individually. The list is very long, sometimes outdated
> and the hardware doesn't need it. If an event does not support
> PEBS it will just not count, there is no security issue.
>
> This vastly simplifies the PEBS event selection.
>
> Bugs fixed:
> - We do not allow setting forbidden flags with PEBS anymore
> (SDM 18.9.4), except for the special cycle event.
> This is done using a new constraint macro that also
> matches on the event flags.
> - We now allow DataLA on all Haswell events, not just
> a small subset. In general all PEBS events that tag memory
> accesses support DataLA on Haswell. Otherwise the reported
> address is just zero. This allows address profiling
> on vastly more events.
> - We did not allow all PEBS events on Haswell.
>
> This includes the changes proposed by Stephane earlier and obsoletes
> his patchkit.
>
> I only did Sandy Bridge and Silvermont and later so far, mostly because these
> are the parts I could directly confirm the hardware behavior with hardware
> architects.
>
This patch still does not work as expected on any platforms. See below

> Cc: eran...@google.com
> Signed-off-by: Andi Kleen 
> ---
>  arch/x86/include/asm/perf_event.h |  8 +++
>  arch/x86/kernel/cpu/perf_event.h  | 18 --
>  arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 
> +++
>  3 files changed, 43 insertions(+), 79 deletions(-)
>
> diff --git a/arch/x86/include/asm/perf_event.h 
> b/arch/x86/include/asm/perf_event.h
> index 8249df4..8dfc9fd 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -51,6 +51,14 @@
>  ARCH_PERFMON_EVENTSEL_EDGE  |  \
>  ARCH_PERFMON_EVENTSEL_INV   |  \
>  ARCH_PERFMON_EVENTSEL_CMASK)
> +#define X86_ALL_EVENT_FLAGS\
> +   (ARCH_PERFMON_EVENTSEL_EDGE |   \
> +ARCH_PERFMON_EVENTSEL_INV |\
> +ARCH_PERFMON_EVENTSEL_CMASK |  \
> +ARCH_PERFMON_EVENTSEL_ANY |\
> +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
> +HSW_IN_TX |\
> +HSW_IN_TX_CHECKPOINTED)
>  #define AMD64_RAW_EVENT_MASK   \
> (X86_RAW_EVENT_MASK  |  \
>  AMD64_EVENTSEL_EVENT)
> diff --git a/arch/x86/kernel/cpu/perf_event.h 
> b/arch/x86/kernel/cpu/perf_event.h
> index 3b2f9bd..9907759 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -252,16 +252,24 @@ struct cpu_hw_events {
> EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
>  #define INTEL_PLD_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
>HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
>
>  #define INTEL_PST_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
>   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
>
> -/* DataLA version of store sampling without extra enable bit. */
> -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
> -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
> +/* Event constraint, but match on all event flags too. */
> +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
> +   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
> +
> +/* Check only flags, but allow all event/umask */
> +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
> +   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
> +
> +/* Same as above, but enable DataLA */
> +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
> +   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
>   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
>
>  /*
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
> b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 980970c..d50142e 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -567,28 +567,10 @@ struct event_constraint 
> intel_atom_pebs_event_constraints[] = {
>  };
>
>  struct event_constraint intel_slm_pebs_event_constraints[] = {
> -   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* 
> REHABQ.LD_BLOCK_ST_FORWARD_PS */
> -   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
> -   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 
> MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
> -   INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* 
> MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
> -   INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* 
> MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
> -   INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
> -   

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Peter Zijlstra
On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote:
> From: Andi Kleen 
> 
> As already discussed earlier in email.

Is an entirely inappropriate start for a Changelog. Do not assume prior
knowledge. If its relevant include it here without reference.


pgprcUiTuNF0F.pgp
Description: PGP signature


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Peter Zijlstra
On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote:
 From: Andi Kleen a...@linux.intel.com
 
 As already discussed earlier in email.

Is an entirely inappropriate start for a Changelog. Do not assume prior
knowledge. If its relevant include it here without reference.


pgprcUiTuNF0F.pgp
Description: PGP signature


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Stephane Eranian
Andi,


On Sat, Jun 28, 2014 at 1:10 AM, Andi Kleen a...@firstfloor.org wrote:
 From: Andi Kleen a...@linux.intel.com

 As already discussed earlier in email.

 The basic idea is that it does not make sense to list all PEBS
 events individually. The list is very long, sometimes outdated
 and the hardware doesn't need it. If an event does not support
 PEBS it will just not count, there is no security issue.

 This vastly simplifies the PEBS event selection.

 Bugs fixed:
 - We do not allow setting forbidden flags with PEBS anymore
 (SDM 18.9.4), except for the special cycle event.
 This is done using a new constraint macro that also
 matches on the event flags.
 - We now allow DataLA on all Haswell events, not just
 a small subset. In general all PEBS events that tag memory
 accesses support DataLA on Haswell. Otherwise the reported
 address is just zero. This allows address profiling
 on vastly more events.
 - We did not allow all PEBS events on Haswell.

 This includes the changes proposed by Stephane earlier and obsoletes
 his patchkit.

 I only did Sandy Bridge and Silvermont and later so far, mostly because these
 are the parts I could directly confirm the hardware behavior with hardware
 architects.

This patch still does not work as expected on any platforms. See below

 Cc: eran...@google.com
 Signed-off-by: Andi Kleen a...@linux.intel.com
 ---
  arch/x86/include/asm/perf_event.h |  8 +++
  arch/x86/kernel/cpu/perf_event.h  | 18 --
  arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 
 +++
  3 files changed, 43 insertions(+), 79 deletions(-)

 diff --git a/arch/x86/include/asm/perf_event.h 
 b/arch/x86/include/asm/perf_event.h
 index 8249df4..8dfc9fd 100644
 --- a/arch/x86/include/asm/perf_event.h
 +++ b/arch/x86/include/asm/perf_event.h
 @@ -51,6 +51,14 @@
  ARCH_PERFMON_EVENTSEL_EDGE  |  \
  ARCH_PERFMON_EVENTSEL_INV   |  \
  ARCH_PERFMON_EVENTSEL_CMASK)
 +#define X86_ALL_EVENT_FLAGS\
 +   (ARCH_PERFMON_EVENTSEL_EDGE |   \
 +ARCH_PERFMON_EVENTSEL_INV |\
 +ARCH_PERFMON_EVENTSEL_CMASK |  \
 +ARCH_PERFMON_EVENTSEL_ANY |\
 +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
 +HSW_IN_TX |\
 +HSW_IN_TX_CHECKPOINTED)
  #define AMD64_RAW_EVENT_MASK   \
 (X86_RAW_EVENT_MASK  |  \
  AMD64_EVENTSEL_EVENT)
 diff --git a/arch/x86/kernel/cpu/perf_event.h 
 b/arch/x86/kernel/cpu/perf_event.h
 index 3b2f9bd..9907759 100644
 --- a/arch/x86/kernel/cpu/perf_event.h
 +++ b/arch/x86/kernel/cpu/perf_event.h
 @@ -252,16 +252,24 @@ struct cpu_hw_events {
 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)

  #define INTEL_PLD_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)

  #define INTEL_PST_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)

 -/* DataLA version of store sampling without extra enable bit. */
 -#define INTEL_PST_HSW_CONSTRAINT(c, n) \
 -   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
 +/* Event constraint, but match on all event flags too. */
 +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
 +   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
 +
 +/* Check only flags, but allow all event/umask */
 +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
 +   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
 +
 +/* Same as above, but enable DataLA */
 +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
 +   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)

  /*
 diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
 b/arch/x86/kernel/cpu/perf_event_intel_ds.c
 index 980970c..d50142e 100644
 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
 +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
 @@ -567,28 +567,10 @@ struct event_constraint 
 intel_atom_pebs_event_constraints[] = {
  };

  struct event_constraint intel_slm_pebs_event_constraints[] = {
 -   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* 
 REHABQ.LD_BLOCK_ST_FORWARD_PS */
 -   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
 -   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 
 MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
 -   INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* 
 MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
 -   INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* 
 MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
 -   INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
 -   INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
 -   

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
 No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
 otherwise the get_event_constraint() test I mentioned previously will
 fail, event with your ALL_FILTER mask.

What events should fail? I verified all PEBS events and they work as expected.

  -   INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */
  -   INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* 
  MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  -   INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* 
  MEM_TRANS_RETIRED.PRECISE_STORES */
  -   INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* 
  MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* 
  MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
  -   INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* 
  MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
  +   INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* 
  MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  +   INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* 
  MEM_TRANS_RETIRED.PRECISE_STORES */
 
 No, precise stores only work on counter 3, keep 0x8 here

Good point.



-Andi
-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote:
 On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote:
  From: Andi Kleen a...@linux.intel.com
  
  As already discussed earlier in email.
 
 Is an entirely inappropriate start for a Changelog. Do not assume prior
 knowledge. If its relevant include it here without reference.

Thanks. Do you have any other comments?

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Peter Zijlstra
On Wed, Jul 02, 2014 at 08:34:07AM -0700, Andi Kleen wrote:
 On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote:
  On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote:
   From: Andi Kleen a...@linux.intel.com
   
   As already discussed earlier in email.
  
  Is an entirely inappropriate start for a Changelog. Do not assume prior
  knowledge. If its relevant include it here without reference.
 
 Thanks. Do you have any other comments?

What Stephane said ;-)

But also, I think we should conditionally allow the filter bits;
possibly with a sysfs file like I had.

Back when we had to sort that SNB cycles thing it was tedious that Linus
could not just try things.


pgpWYwScdSllk.pgp
Description: PGP signature


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Stephane Eranian
On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote:
 No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
 otherwise the get_event_constraint() test I mentioned previously will
 fail, event with your ALL_FILTER mask.

 What events should fail? I verified all PEBS events and they work as expected.

Random events should not fail, they should go with precise and not generate
any samples. That's the whole point of the exercise.

perf record -a -e r6099:p sleep 1

  -   INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */
  -   INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* 
  MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  -   INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* 
  MEM_TRANS_RETIRED.PRECISE_STORES */
  -   INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* 
  MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
  -   INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* 
  MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
  -   INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* 
  MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
  +   INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* 
  MEM_TRANS_RETIRED.LAT_ABOVE_THR */
  +   INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* 
  MEM_TRANS_RETIRED.PRECISE_STORES */

 No, precise stores only work on counter 3, keep 0x8 here

 Good point.



 -Andi
 --
 a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
 On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote:
  No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
  otherwise the get_event_constraint() test I mentioned previously will
  fail, event with your ALL_FILTER mask.
 
  What events should fail? I verified all PEBS events and they work as 
  expected.
 
 Random events should not fail, they should go with precise and not generate
 any samples. That's the whole point of the exercise.
 
 perf record -a -e r6099:p sleep 1

Like I said I ran all PEBS events and they generated samples.

-Andi
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Stephane Eranian
On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen a...@linux.intel.com wrote:
 On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
 On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote:
  No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
  otherwise the get_event_constraint() test I mentioned previously will
  fail, event with your ALL_FILTER mask.
 
  What events should fail? I verified all PEBS events and they work as 
  expected.
 
 Random events should not fail, they should go with precise and not generate
 any samples. That's the whole point of the exercise.

 perf record -a -e r6099:p sleep 1

 Like I said I ran all PEBS events and they generated samples.

I understand. I ran some random events to make sure I was not
getting PEBS samples and the system was stable.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote:
 On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen a...@linux.intel.com wrote:
  On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote:
  On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote:
   No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1)
   otherwise the get_event_constraint() test I mentioned previously will
   fail, event with your ALL_FILTER mask.
  
   What events should fail? I verified all PEBS events and they work as 
   expected.
  
  Random events should not fail, they should go with precise and not generate
  any samples. That's the whole point of the exercise.
 
  perf record -a -e r6099:p sleep 1
 
  Like I said I ran all PEBS events and they generated samples.
 
 I understand. I ran some random events to make sure I was not
 getting PEBS samples and the system was stable.

Not sure we're talking about the same thing. You claimed my patch
wouldn't let any PEBS events through, but the test results 
disagree with that.

I fixed the broken store events you pointed out.

INST_RETIRED.PREC_DIST
cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ]
UOPS_RETIRED.ALL
cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ]
UOPS_RETIRED.RETIRE_SLOTS
cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ]
BR_INST_RETIRED.CONDITIONAL
cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ]
BR_INST_RETIRED.NEAR_CALL
cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ]
BR_INST_RETIRED.NEAR_RETURN
cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ]
BR_INST_RETIRED.NEAR_TAKEN
cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ]
BR_INST_RETIRED.ALL_BRANCHES_PEBS
cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ]
BR_MISP_RETIRED.CONDITIONAL
cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ]
BR_MISP_RETIRED.ALL_BRANCHES_PEBS
cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ]
HLE_RETIRED.ABORTED
cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
RTM_RETIRED.ABORTED
cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4
cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8
cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16
cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32
cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64
cpu/event=0xCD,umask=0x01,ldlat=0x40,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_64/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.012 MB perf.data (~516 samples) ]
MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128
cpu/event=0xCD,umask=0x01,ldlat=0x80,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_128/pp
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.014 MB perf.data (~604 samples) ]

Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-07-02 Thread Andi Kleen
 But also, I think we should conditionally allow the filter bits;
 possibly with a sysfs file like I had.
 
 Back when we had to sort that SNB cycles thing it was tedious that Linus
 could not just try things.

Hmm, the code in your patch to handle it was quite nasty.
I don't really see the situation repeating.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-06-27 Thread Andi Kleen
From: Andi Kleen 

As already discussed earlier in email.

The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.

This vastly simplifies the PEBS event selection.

Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell.

This includes the changes proposed by Stephane earlier and obsoletes
his patchkit.

I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects.

Cc: eran...@google.com
Signed-off-by: Andi Kleen 
---
 arch/x86/include/asm/perf_event.h |  8 +++
 arch/x86/kernel/cpu/perf_event.h  | 18 --
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 +++
 3 files changed, 43 insertions(+), 79 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
 ARCH_PERFMON_EVENTSEL_EDGE  |  \
 ARCH_PERFMON_EVENTSEL_INV   |  \
 ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS\
+   (ARCH_PERFMON_EVENTSEL_EDGE |   \
+ARCH_PERFMON_EVENTSEL_INV |\
+ARCH_PERFMON_EVENTSEL_CMASK |  \
+ARCH_PERFMON_EVENTSEL_ANY |\
+ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
+HSW_IN_TX |\
+HSW_IN_TX_CHECKPOINTED)
 #define AMD64_RAW_EVENT_MASK   \
(X86_RAW_EVENT_MASK  |  \
 AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3b2f9bd..9907759 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -252,16 +252,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define INTEL_PLD_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
 #define INTEL_PST_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
+   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
+   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..d50142e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint 
intel_atom_pebs_event_constraints[] = {
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 
MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* 
MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* 
MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
-   INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
-   INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* 
BR_INST_RETIRED.ALL_BRANCHES_PS */
-   INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
-   INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* 

[PATCH 1/2] perf, x86: Revamp PEBS event selection

2014-06-27 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

As already discussed earlier in email.

The basic idea is that it does not make sense to list all PEBS
events individually. The list is very long, sometimes outdated
and the hardware doesn't need it. If an event does not support
PEBS it will just not count, there is no security issue.

This vastly simplifies the PEBS event selection.

Bugs fixed:
- We do not allow setting forbidden flags with PEBS anymore
(SDM 18.9.4), except for the special cycle event.
This is done using a new constraint macro that also
matches on the event flags.
- We now allow DataLA on all Haswell events, not just
a small subset. In general all PEBS events that tag memory
accesses support DataLA on Haswell. Otherwise the reported
address is just zero. This allows address profiling
on vastly more events.
- We did not allow all PEBS events on Haswell.

This includes the changes proposed by Stephane earlier and obsoletes
his patchkit.

I only did Sandy Bridge and Silvermont and later so far, mostly because these
are the parts I could directly confirm the hardware behavior with hardware
architects.

Cc: eran...@google.com
Signed-off-by: Andi Kleen a...@linux.intel.com
---
 arch/x86/include/asm/perf_event.h |  8 +++
 arch/x86/kernel/cpu/perf_event.h  | 18 --
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 +++
 3 files changed, 43 insertions(+), 79 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 8249df4..8dfc9fd 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -51,6 +51,14 @@
 ARCH_PERFMON_EVENTSEL_EDGE  |  \
 ARCH_PERFMON_EVENTSEL_INV   |  \
 ARCH_PERFMON_EVENTSEL_CMASK)
+#define X86_ALL_EVENT_FLAGS\
+   (ARCH_PERFMON_EVENTSEL_EDGE |   \
+ARCH_PERFMON_EVENTSEL_INV |\
+ARCH_PERFMON_EVENTSEL_CMASK |  \
+ARCH_PERFMON_EVENTSEL_ANY |\
+ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\
+HSW_IN_TX |\
+HSW_IN_TX_CHECKPOINTED)
 #define AMD64_RAW_EVENT_MASK   \
(X86_RAW_EVENT_MASK  |  \
 AMD64_EVENTSEL_EVENT)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 3b2f9bd..9907759 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -252,16 +252,24 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
 
 #define INTEL_PLD_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
   HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
 #define INTEL_PST_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
 
-/* DataLA version of store sampling without extra enable bit. */
-#define INTEL_PST_HSW_CONSTRAINT(c, n) \
-   __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+/* Event constraint, but match on all event flags too. */
+#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
+   EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
+/* Check only flags, but allow all event/umask */
+#define INTEL_ALL_EVENT_CONSTRAINT(flags, n)   \
+   EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS)
+
+/* Same as above, but enable DataLA */
+#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \
+   __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \
  HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW)
 
 /*
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c 
b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 980970c..d50142e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -567,28 +567,10 @@ struct event_constraint 
intel_atom_pebs_event_constraints[] = {
 };
 
 struct event_constraint intel_slm_pebs_event_constraints[] = {
-   INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS 
*/
-   INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* 
MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* 
MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* 
MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */
-   INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */
-   INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */
-   INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* 
BR_INST_RETIRED.ALL_BRANCHES_PS */
-   INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */
-   INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS 
*/
-