Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Sat, Jul 12, 2014 at 2:01 AM, Andi Kleen wrote: > From: Andi Kleen > > The basic idea is that it does not make sense to list all PEBS > events individually. The list is very long, sometimes outdated > and the hardware doesn't need it. If an event does not support > PEBS it will just not count, there is no security issue. > > This vastly simplifies the PEBS event selection. It also > speeds up the scheduling because the scheduler doesn't > have to walk as many constraints. > > Bugs fixed: > - We do not allow setting forbidden flags with PEBS anymore > (SDM 18.9.4), except for the special cycle event. > This is done using a new constraint macro that also > matches on the event flags. > - We now allow DataLA on all Haswell events, not just > a small subset. In general all PEBS events that tag memory > accesses support DataLA on Haswell. Otherwise the reported > address is just zero. This allows address profiling > on vastly more events. > - We did not allow all PEBS events on Haswell: > We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, > MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) > > This includes the changes proposed by Stephane earlier and obsoletes > his patchkit (except for some changes on pre Sandy Bridge/Silvermont > CPUs) > > I only did Sandy Bridge and Silvermont and later so far, mostly because these > are the parts I could directly confirm the hardware behavior with hardware > architects. Also I do not believe the older CPUs have any > missing events in their PEBS list, so there's no pressing > need to change them. > > I did not implement the flag proposed by Peter to allow > setting forbidden flags. If really needed this could > be implemented on to of this patch. > > Cc: eran...@google.com > v2: Fix broken store events on SNB/IVB (Stephane Eranian) > v3: More fixes. Rename some arguments (Stephane Eranian) > Update description. > Signed-off-by: Andi Kleen Works now for me on SNB/HSW. Reviewed-by: Stephane Eranian > --- > arch/x86/include/asm/perf_event.h | 8 +++ > arch/x86/kernel/cpu/perf_event.h | 18 +-- > arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 > +++ > 3 files changed, 39 insertions(+), 75 deletions(-) > > diff --git a/arch/x86/include/asm/perf_event.h > b/arch/x86/include/asm/perf_event.h > index 8249df4..8dfc9fd 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -51,6 +51,14 @@ > ARCH_PERFMON_EVENTSEL_EDGE | \ > ARCH_PERFMON_EVENTSEL_INV | \ > ARCH_PERFMON_EVENTSEL_CMASK) > +#define X86_ALL_EVENT_FLAGS\ > + (ARCH_PERFMON_EVENTSEL_EDGE | \ > +ARCH_PERFMON_EVENTSEL_INV |\ > +ARCH_PERFMON_EVENTSEL_CMASK | \ > +ARCH_PERFMON_EVENTSEL_ANY |\ > +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ > +HSW_IN_TX |\ > +HSW_IN_TX_CHECKPOINTED) > #define AMD64_RAW_EVENT_MASK \ > (X86_RAW_EVENT_MASK | \ > AMD64_EVENTSEL_EVENT) > diff --git a/arch/x86/kernel/cpu/perf_event.h > b/arch/x86/kernel/cpu/perf_event.h > index a22a34e9..8f32af0 100644 > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -262,16 +262,24 @@ struct cpu_hw_events { > EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) > > #define INTEL_PLD_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ >HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) > > #define INTEL_PST_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ > HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) > > -/* DataLA version of store sampling without extra enable bit. */ > -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > +/* Event constraint, but match on all event flags too. */ > +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) > + > +/* Check only flags, but allow all event/umask */ > +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\ > + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) > + > +/* Same as above, but enable DataLA */ > +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \ > + __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \ > HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) > > /* > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c > b/arch/x86/kernel/cpu/perf_event_intel_ds.c > index 980970c..64b4be9 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c > @@ -567,28 +567,10 @@ struct event_constraint >
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Sat, Jul 12, 2014 at 2:01 AM, Andi Kleen a...@firstfloor.org wrote: From: Andi Kleen a...@linux.intel.com The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. Cc: eran...@google.com v2: Fix broken store events on SNB/IVB (Stephane Eranian) v3: More fixes. Rename some arguments (Stephane Eranian) Update description. Signed-off-by: Andi Kleen a...@linux.intel.com Works now for me on SNB/HSW. Reviewed-by: Stephane Eranian eran...@google.com --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++ 3 files changed, 39 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a22a34e9..8f32af0 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,16 +262,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\ + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \ + __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..64b4be9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { };
[PATCH 1/2] perf, x86: Revamp PEBS event selection
From: Andi Kleen The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. Cc: eran...@google.com v2: Fix broken store events on SNB/IVB (Stephane Eranian) v3: More fixes. Rename some arguments (Stephane Eranian) Update description. Signed-off-by: Andi Kleen --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++ 3 files changed, 39 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a22a34e9..8f32af0 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,16 +262,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\ + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \ + __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..64b4be9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /*
[PATCH 1/2] perf, x86: Revamp PEBS event selection
From: Andi Kleen a...@linux.intel.com The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. Cc: eran...@google.com v2: Fix broken store events on SNB/IVB (Stephane Eranian) v3: More fixes. Rename some arguments (Stephane Eranian) Update description. Signed-off-by: Andi Kleen a...@linux.intel.com --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++ 3 files changed, 39 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a22a34e9..8f32af0 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,16 +262,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(code, n)\ + EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(code, n) \ + __EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..64b4be9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ -
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection v2
On Tue, Jul 8, 2014 at 12:37 AM, Andi Kleen wrote: > From: Andi Kleen > > The basic idea is that it does not make sense to list all PEBS > events individually. The list is very long, sometimes outdated > and the hardware doesn't need it. If an event does not support > PEBS it will just not count, there is no security issue. > > This vastly simplifies the PEBS event selection. It also > speeds up the scheduling because the scheduler doesn't > have to walk as many constraints. > > Bugs fixed: > - We do not allow setting forbidden flags with PEBS anymore > (SDM 18.9.4), except for the special cycle event. > This is done using a new constraint macro that also > matches on the event flags. > - We now allow DataLA on all Haswell events, not just > a small subset. In general all PEBS events that tag memory > accesses support DataLA on Haswell. Otherwise the reported > address is just zero. This allows address profiling > on vastly more events. > - We did not allow all PEBS events on Haswell: > We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, > MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) > > This includes the changes proposed by Stephane earlier and obsoletes > his patchkit (except for some changes on pre Sandy Bridge/Silvermont > CPUs) > > I only did Sandy Bridge and Silvermont and later so far, mostly because these > are the parts I could directly confirm the hardware behavior with hardware > architects. Also I do not believe the older CPUs have any > missing events in their PEBS list, so there's no pressing > need to change them. > > I did not implement the flag proposed by Peter to allow > setting forbidden flags. If really needed this could > be implemented on to of this patch. > > Cc: eran...@google.com > v2: Fix broken store events on SNB/IVB (Stephane Eranian) > Update description. > Signed-off-by: Andi Kleen > --- > arch/x86/include/asm/perf_event.h | 8 +++ > arch/x86/kernel/cpu/perf_event.h | 18 +-- > arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 > +++ > 3 files changed, 39 insertions(+), 75 deletions(-) > > diff --git a/arch/x86/include/asm/perf_event.h > b/arch/x86/include/asm/perf_event.h > index 8249df4..8dfc9fd 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -51,6 +51,14 @@ > ARCH_PERFMON_EVENTSEL_EDGE | \ > ARCH_PERFMON_EVENTSEL_INV | \ > ARCH_PERFMON_EVENTSEL_CMASK) > +#define X86_ALL_EVENT_FLAGS\ > + (ARCH_PERFMON_EVENTSEL_EDGE | \ > +ARCH_PERFMON_EVENTSEL_INV |\ > +ARCH_PERFMON_EVENTSEL_CMASK | \ > +ARCH_PERFMON_EVENTSEL_ANY |\ > +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ > +HSW_IN_TX |\ > +HSW_IN_TX_CHECKPOINTED) > #define AMD64_RAW_EVENT_MASK \ > (X86_RAW_EVENT_MASK | \ > AMD64_EVENTSEL_EVENT) > diff --git a/arch/x86/kernel/cpu/perf_event.h > b/arch/x86/kernel/cpu/perf_event.h > index a22a34e9..70273e8 100644 > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -262,16 +262,24 @@ struct cpu_hw_events { > EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) > > #define INTEL_PLD_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ >HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) > > #define INTEL_PST_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ > HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) > > -/* DataLA version of store sampling without extra enable bit. */ > -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > +/* Event constraint, but match on all event flags too. */ > +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) > + > +/* Check only flags, but allow all event/umask */ > +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ > + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) > + The first argument is not flags but rather 'code'. This is confusing otherwise. > +/* Same as above, but enable DataLA */ > +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ > + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ > HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) > > /* > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c > b/arch/x86/kernel/cpu/perf_event_intel_ds.c > index 980970c..0e22ce6 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c > @@ -567,28 +567,10 @@ struct event_constraint > intel_atom_pebs_event_constraints[] = { > }; > >
[PATCH 1/2] perf, x86: Revamp PEBS event selection v2
From: Andi Kleen The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. Cc: eran...@google.com v2: Fix broken store events on SNB/IVB (Stephane Eranian) Update description. Signed-off-by: Andi Kleen --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++ 3 files changed, 39 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a22a34e9..70273e8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,16 +262,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..0e22ce6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0404,
[PATCH 1/2] perf, x86: Revamp PEBS event selection v2
From: Andi Kleen a...@linux.intel.com The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. Cc: eran...@google.com v2: Fix broken store events on SNB/IVB (Stephane Eranian) Update description. Signed-off-by: Andi Kleen a...@linux.intel.com --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++ 3 files changed, 39 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a22a34e9..70273e8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,16 +262,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..0e22ce6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ -
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection v2
On Tue, Jul 8, 2014 at 12:37 AM, Andi Kleen a...@firstfloor.org wrote: From: Andi Kleen a...@linux.intel.com The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. It also speeds up the scheduling because the scheduler doesn't have to walk as many constraints. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell: We were missing some valid subevents in d1-d2 (MEM_LOAD_UOPS_RETIRED.*, MEM_LOAD_UOPS_RETIRED_L3_HIT_RETIRED.*) This includes the changes proposed by Stephane earlier and obsoletes his patchkit (except for some changes on pre Sandy Bridge/Silvermont CPUs) I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Also I do not believe the older CPUs have any missing events in their PEBS list, so there's no pressing need to change them. I did not implement the flag proposed by Peter to allow setting forbidden flags. If really needed this could be implemented on to of this patch. Cc: eran...@google.com v2: Fix broken store events on SNB/IVB (Stephane Eranian) Update description. Signed-off-by: Andi Kleen a...@linux.intel.com --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 88 +++ 3 files changed, 39 insertions(+), 75 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a22a34e9..70273e8 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -262,16 +262,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) + The first argument is not flags but rather 'code'. This is confusing otherwise. +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..0e22ce6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 2, 2014 at 8:10 PM, Andi Kleen wrote: > On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote: >> On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen wrote: >> > On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: >> >> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen wrote: >> >> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) >> >> >> otherwise the get_event_constraint() test I mentioned previously will >> >> >> fail, event with your ALL_FILTER mask. >> >> > >> >> > What events should fail? I verified all PEBS events and they work as >> >> > expected. >> >> > >> >> Random events should not fail, they should go with precise and not >> >> generate >> >> any samples. That's the whole point of the exercise. >> >> >> >> perf record -a -e r6099:p sleep 1 >> > >> > Like I said I ran all PEBS events and they generated samples. >> > >> I understand. I ran some random events to make sure I was not >> getting PEBS samples and the system was stable. > > Not sure we're talking about the same thing. You claimed my patch > wouldn't let any PEBS events through, but the test results > disagree with that. > I did not say that. I said, it does not let any random event code use precise > 0. And this is what we want to eliminate. It is okay to let precise > 1 on any event. The non-PEBS events will not generate any PEBS records. > I fixed the broken store events you pointed out. > > INST_RETIRED.PREC_DIST > cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ] > UOPS_RETIRED.ALL > cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ] > UOPS_RETIRED.RETIRE_SLOTS > cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ] > BR_INST_RETIRED.CONDITIONAL > cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ] > BR_INST_RETIRED.NEAR_CALL > cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ] > BR_INST_RETIRED.NEAR_RETURN > cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ] > BR_INST_RETIRED.NEAR_TAKEN > cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ] > BR_INST_RETIRED.ALL_BRANCHES_PEBS > cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ] > BR_MISP_RETIRED.CONDITIONAL > cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ] > BR_MISP_RETIRED.ALL_BRANCHES_PEBS > cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ] > HLE_RETIRED.ABORTED > cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] > RTM_RETIRED.ABORTED > cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] > MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4 > cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ] > MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8 > cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ] > MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16 > cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ] > MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32 > cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp > [ perf record: Woken up 1 times to write data ] > [ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ] > MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64 >
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 2, 2014 at 8:10 PM, Andi Kleen a...@linux.intel.com wrote: On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote: On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen a...@linux.intel.com wrote: On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote: No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) otherwise the get_event_constraint() test I mentioned previously will fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. Random events should not fail, they should go with precise and not generate any samples. That's the whole point of the exercise. perf record -a -e r6099:p sleep 1 Like I said I ran all PEBS events and they generated samples. I understand. I ran some random events to make sure I was not getting PEBS samples and the system was stable. Not sure we're talking about the same thing. You claimed my patch wouldn't let any PEBS events through, but the test results disagree with that. I did not say that. I said, it does not let any random event code use precise 0. And this is what we want to eliminate. It is okay to let precise 1 on any event. The non-PEBS events will not generate any PEBS records. I fixed the broken store events you pointed out. INST_RETIRED.PREC_DIST cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ] UOPS_RETIRED.ALL cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ] UOPS_RETIRED.RETIRE_SLOTS cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ] BR_INST_RETIRED.CONDITIONAL cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ] BR_INST_RETIRED.NEAR_CALL cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ] BR_INST_RETIRED.NEAR_RETURN cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ] BR_INST_RETIRED.NEAR_TAKEN cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ] BR_INST_RETIRED.ALL_BRANCHES_PEBS cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ] BR_MISP_RETIRED.CONDITIONAL cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ] BR_MISP_RETIRED.ALL_BRANCHES_PEBS cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ] HLE_RETIRED.ABORTED cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] RTM_RETIRED.ABORTED cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4 cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8 cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16 cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32 cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64 cpu/event=0xCD,umask=0x01,ldlat=0x40,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_64/pp [ perf record:
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
> But also, I think we should conditionally allow the filter bits; > possibly with a sysfs file like I had. > > Back when we had to sort that SNB cycles thing it was tedious that Linus > could not just try things. Hmm, the code in your patch to handle it was quite nasty. I don't really see the situation repeating. -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote: > On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen wrote: > > On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: > >> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen wrote: > >> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) > >> >> otherwise the get_event_constraint() test I mentioned previously will > >> >> fail, event with your ALL_FILTER mask. > >> > > >> > What events should fail? I verified all PEBS events and they work as > >> > expected. > >> > > >> Random events should not fail, they should go with precise and not generate > >> any samples. That's the whole point of the exercise. > >> > >> perf record -a -e r6099:p sleep 1 > > > > Like I said I ran all PEBS events and they generated samples. > > > I understand. I ran some random events to make sure I was not > getting PEBS samples and the system was stable. Not sure we're talking about the same thing. You claimed my patch wouldn't let any PEBS events through, but the test results disagree with that. I fixed the broken store events you pointed out. INST_RETIRED.PREC_DIST cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ] UOPS_RETIRED.ALL cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ] UOPS_RETIRED.RETIRE_SLOTS cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ] BR_INST_RETIRED.CONDITIONAL cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ] BR_INST_RETIRED.NEAR_CALL cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ] BR_INST_RETIRED.NEAR_RETURN cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ] BR_INST_RETIRED.NEAR_TAKEN cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ] BR_INST_RETIRED.ALL_BRANCHES_PEBS cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ] BR_MISP_RETIRED.CONDITIONAL cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ] BR_MISP_RETIRED.ALL_BRANCHES_PEBS cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ] HLE_RETIRED.ABORTED cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] RTM_RETIRED.ABORTED cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4 cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8 cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16 cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32 cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64 cpu/event=0xCD,umask=0x01,ldlat=0x40,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_64/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (~516 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128 cpu/event=0xCD,umask=0x01,ldlat=0x80,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_128/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data (~604 samples) ]
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen wrote: > On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: >> On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen wrote: >> >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) >> >> otherwise the get_event_constraint() test I mentioned previously will >> >> fail, event with your ALL_FILTER mask. >> > >> > What events should fail? I verified all PEBS events and they work as >> > expected. >> > >> Random events should not fail, they should go with precise and not generate >> any samples. That's the whole point of the exercise. >> >> perf record -a -e r6099:p sleep 1 > > Like I said I ran all PEBS events and they generated samples. > I understand. I ran some random events to make sure I was not getting PEBS samples and the system was stable. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: > On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen wrote: > >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) > >> otherwise the get_event_constraint() test I mentioned previously will > >> fail, event with your ALL_FILTER mask. > > > > What events should fail? I verified all PEBS events and they work as > > expected. > > > Random events should not fail, they should go with precise and not generate > any samples. That's the whole point of the exercise. > > perf record -a -e r6099:p sleep 1 Like I said I ran all PEBS events and they generated samples. -Andi -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen wrote: >> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) >> otherwise the get_event_constraint() test I mentioned previously will >> fail, event with your ALL_FILTER mask. > > What events should fail? I verified all PEBS events and they work as expected. > Random events should not fail, they should go with precise and not generate any samples. That's the whole point of the exercise. perf record -a -e r6099:p sleep 1 >> > - INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */ >> > - INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */ >> > - INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* >> > MEM_TRANS_RETIRED.LAT_ABOVE_THR */ >> > - INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* >> > MEM_TRANS_RETIRED.PRECISE_STORES */ >> > - INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */ >> > - INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */ >> > - INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* >> > MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ >> > - INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* >> > MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ >> > - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* >> > MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ >> > + INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* >> > MEM_TRANS_RETIRED.LAT_ABOVE_THR */ >> > + INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* >> > MEM_TRANS_RETIRED.PRECISE_STORES */ >> >> No, precise stores only work on counter 3, keep 0x8 here > > Good point. > > > > -Andi > -- > a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 08:34:07AM -0700, Andi Kleen wrote: > On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote: > > On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote: > > > From: Andi Kleen > > > > > > As already discussed earlier in email. > > > > Is an entirely inappropriate start for a Changelog. Do not assume prior > > knowledge. If its relevant include it here without reference. > > Thanks. Do you have any other comments? What Stephane said ;-) But also, I think we should conditionally allow the filter bits; possibly with a sysfs file like I had. Back when we had to sort that SNB cycles thing it was tedious that Linus could not just try things. pgpWYwScdSllk.pgp Description: PGP signature
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote: > On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote: > > From: Andi Kleen > > > > As already discussed earlier in email. > > Is an entirely inappropriate start for a Changelog. Do not assume prior > knowledge. If its relevant include it here without reference. Thanks. Do you have any other comments? -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
> No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) > otherwise the get_event_constraint() test I mentioned previously will > fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. > > - INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */ > > - INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */ > > - INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* > > MEM_TRANS_RETIRED.LAT_ABOVE_THR */ > > - INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* > > MEM_TRANS_RETIRED.PRECISE_STORES */ > > - INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */ > > - INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */ > > - INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* > > MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ > > - INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* > > MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ > > - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* > > MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ > > + INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* > > MEM_TRANS_RETIRED.LAT_ABOVE_THR */ > > + INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* > > MEM_TRANS_RETIRED.PRECISE_STORES */ > > No, precise stores only work on counter 3, keep 0x8 here Good point. -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
Andi, On Sat, Jun 28, 2014 at 1:10 AM, Andi Kleen wrote: > From: Andi Kleen > > As already discussed earlier in email. > > The basic idea is that it does not make sense to list all PEBS > events individually. The list is very long, sometimes outdated > and the hardware doesn't need it. If an event does not support > PEBS it will just not count, there is no security issue. > > This vastly simplifies the PEBS event selection. > > Bugs fixed: > - We do not allow setting forbidden flags with PEBS anymore > (SDM 18.9.4), except for the special cycle event. > This is done using a new constraint macro that also > matches on the event flags. > - We now allow DataLA on all Haswell events, not just > a small subset. In general all PEBS events that tag memory > accesses support DataLA on Haswell. Otherwise the reported > address is just zero. This allows address profiling > on vastly more events. > - We did not allow all PEBS events on Haswell. > > This includes the changes proposed by Stephane earlier and obsoletes > his patchkit. > > I only did Sandy Bridge and Silvermont and later so far, mostly because these > are the parts I could directly confirm the hardware behavior with hardware > architects. > This patch still does not work as expected on any platforms. See below > Cc: eran...@google.com > Signed-off-by: Andi Kleen > --- > arch/x86/include/asm/perf_event.h | 8 +++ > arch/x86/kernel/cpu/perf_event.h | 18 -- > arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 > +++ > 3 files changed, 43 insertions(+), 79 deletions(-) > > diff --git a/arch/x86/include/asm/perf_event.h > b/arch/x86/include/asm/perf_event.h > index 8249df4..8dfc9fd 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -51,6 +51,14 @@ > ARCH_PERFMON_EVENTSEL_EDGE | \ > ARCH_PERFMON_EVENTSEL_INV | \ > ARCH_PERFMON_EVENTSEL_CMASK) > +#define X86_ALL_EVENT_FLAGS\ > + (ARCH_PERFMON_EVENTSEL_EDGE | \ > +ARCH_PERFMON_EVENTSEL_INV |\ > +ARCH_PERFMON_EVENTSEL_CMASK | \ > +ARCH_PERFMON_EVENTSEL_ANY |\ > +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ > +HSW_IN_TX |\ > +HSW_IN_TX_CHECKPOINTED) > #define AMD64_RAW_EVENT_MASK \ > (X86_RAW_EVENT_MASK | \ > AMD64_EVENTSEL_EVENT) > diff --git a/arch/x86/kernel/cpu/perf_event.h > b/arch/x86/kernel/cpu/perf_event.h > index 3b2f9bd..9907759 100644 > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -252,16 +252,24 @@ struct cpu_hw_events { > EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) > > #define INTEL_PLD_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ >HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) > > #define INTEL_PST_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ > HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) > > -/* DataLA version of store sampling without extra enable bit. */ > -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ > - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ > +/* Event constraint, but match on all event flags too. */ > +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) > + > +/* Check only flags, but allow all event/umask */ > +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ > + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) > + > +/* Same as above, but enable DataLA */ > +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ > + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ > HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) > > /* > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c > b/arch/x86/kernel/cpu/perf_event_intel_ds.c > index 980970c..d50142e 100644 > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c > @@ -567,28 +567,10 @@ struct event_constraint > intel_atom_pebs_event_constraints[] = { > }; > > struct event_constraint intel_slm_pebs_event_constraints[] = { > - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* > REHABQ.LD_BLOCK_ST_FORWARD_PS */ > - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ > - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* > MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ > - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* > MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ > - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* > MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ > - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ > -
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote: > From: Andi Kleen > > As already discussed earlier in email. Is an entirely inappropriate start for a Changelog. Do not assume prior knowledge. If its relevant include it here without reference. pgprcUiTuNF0F.pgp Description: PGP signature
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote: From: Andi Kleen a...@linux.intel.com As already discussed earlier in email. Is an entirely inappropriate start for a Changelog. Do not assume prior knowledge. If its relevant include it here without reference. pgprcUiTuNF0F.pgp Description: PGP signature
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
Andi, On Sat, Jun 28, 2014 at 1:10 AM, Andi Kleen a...@firstfloor.org wrote: From: Andi Kleen a...@linux.intel.com As already discussed earlier in email. The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell. This includes the changes proposed by Stephane earlier and obsoletes his patchkit. I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. This patch still does not work as expected on any platforms. See below Cc: eran...@google.com Signed-off-by: Andi Kleen a...@linux.intel.com --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 -- arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 +++ 3 files changed, 43 insertions(+), 79 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..9907759 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -252,16 +252,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..d50142e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ -
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) otherwise the get_event_constraint() test I mentioned previously will fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. - INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */ - INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ - INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* MEM_TRANS_RETIRED.PRECISE_STORES */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* MEM_TRANS_RETIRED.PRECISE_STORES */ No, precise stores only work on counter 3, keep 0x8 here Good point. -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote: On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote: From: Andi Kleen a...@linux.intel.com As already discussed earlier in email. Is an entirely inappropriate start for a Changelog. Do not assume prior knowledge. If its relevant include it here without reference. Thanks. Do you have any other comments? -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 08:34:07AM -0700, Andi Kleen wrote: On Wed, Jul 02, 2014 at 02:29:02PM +0200, Peter Zijlstra wrote: On Fri, Jun 27, 2014 at 04:10:11PM -0700, Andi Kleen wrote: From: Andi Kleen a...@linux.intel.com As already discussed earlier in email. Is an entirely inappropriate start for a Changelog. Do not assume prior knowledge. If its relevant include it here without reference. Thanks. Do you have any other comments? What Stephane said ;-) But also, I think we should conditionally allow the filter bits; possibly with a sysfs file like I had. Back when we had to sort that SNB cycles thing it was tedious that Linus could not just try things. pgpWYwScdSllk.pgp Description: PGP signature
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote: No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) otherwise the get_event_constraint() test I mentioned previously will fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. Random events should not fail, they should go with precise and not generate any samples. That's the whole point of the exercise. perf record -a -e r6099:p sleep 1 - INTEL_EVENT_CONSTRAINT(0xc4, 0xf),/* BR_INST_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xc5, 0xf),/* BR_MISP_RETIRED.* */ - INTEL_PLD_CONSTRAINT(0x01cd, 0x8),/* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ - INTEL_PST_CONSTRAINT(0x02cd, 0x8),/* MEM_TRANS_RETIRED.PRECISE_STORES */ - INTEL_EVENT_CONSTRAINT(0xd0, 0xf),/* MEM_UOP_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd1, 0xf),/* MEM_LOAD_UOPS_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd2, 0xf),/* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ - INTEL_EVENT_CONSTRAINT(0xd3, 0xf),/* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ + INTEL_PLD_CONSTRAINT(0x01cd, 0xf),/* MEM_TRANS_RETIRED.LAT_ABOVE_THR */ + INTEL_PST_CONSTRAINT(0x02cd, 0xf),/* MEM_TRANS_RETIRED.PRECISE_STORES */ No, precise stores only work on counter 3, keep 0x8 here Good point. -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote: No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) otherwise the get_event_constraint() test I mentioned previously will fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. Random events should not fail, they should go with precise and not generate any samples. That's the whole point of the exercise. perf record -a -e r6099:p sleep 1 Like I said I ran all PEBS events and they generated samples. -Andi -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen a...@linux.intel.com wrote: On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote: No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) otherwise the get_event_constraint() test I mentioned previously will fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. Random events should not fail, they should go with precise and not generate any samples. That's the whole point of the exercise. perf record -a -e r6099:p sleep 1 Like I said I ran all PEBS events and they generated samples. I understand. I ran some random events to make sure I was not getting PEBS samples and the system was stable. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
On Wed, Jul 02, 2014 at 06:07:31PM +0200, Stephane Eranian wrote: On Wed, Jul 2, 2014 at 5:48 PM, Andi Kleen a...@linux.intel.com wrote: On Wed, Jul 02, 2014 at 05:44:05PM +0200, Stephane Eranian wrote: On Wed, Jul 2, 2014 at 5:33 PM, Andi Kleen a...@linux.intel.com wrote: No, still needs to be INTEL_ALL_EVENT_CONSTRAINT(0x0, 0x1) otherwise the get_event_constraint() test I mentioned previously will fail, event with your ALL_FILTER mask. What events should fail? I verified all PEBS events and they work as expected. Random events should not fail, they should go with precise and not generate any samples. That's the whole point of the exercise. perf record -a -e r6099:p sleep 1 Like I said I ran all PEBS events and they generated samples. I understand. I ran some random events to make sure I was not getting PEBS samples and the system was stable. Not sure we're talking about the same thing. You claimed my patch wouldn't let any PEBS events through, but the test results disagree with that. I fixed the broken store events you pointed out. INST_RETIRED.PREC_DIST cpu/event=0xC0,umask=0x01,name=INST_RETIRED_PREC_DIST/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7821 samples) ] UOPS_RETIRED.ALL cpu/event=0xC2,umask=0x01,name=UOPS_RETIRED_ALL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7824 samples) ] UOPS_RETIRED.RETIRE_SLOTS cpu/event=0xC2,umask=0x02,name=UOPS_RETIRED_RETIRE_SLOTS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.180 MB perf.data (~7869 samples) ] BR_INST_RETIRED.CONDITIONAL cpu/event=0xC4,umask=0x01,name=BR_INST_RETIRED_CONDITIONAL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.177 MB perf.data (~7729 samples) ] BR_INST_RETIRED.NEAR_CALL cpu/event=0xC4,umask=0x02,name=BR_INST_RETIRED_NEAR_CALL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.140 MB perf.data (~6112 samples) ] BR_INST_RETIRED.NEAR_RETURN cpu/event=0xC4,umask=0x08,name=BR_INST_RETIRED_NEAR_RETURN/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.140 MB perf.data (~6124 samples) ] BR_INST_RETIRED.NEAR_TAKEN cpu/event=0xC4,umask=0x20,name=BR_INST_RETIRED_NEAR_TAKEN/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.176 MB perf.data (~7709 samples) ] BR_INST_RETIRED.ALL_BRANCHES_PEBS cpu/event=0xC4,umask=0x04,name=BR_INST_RETIRED_ALL_BRANCHES_PEBS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.177 MB perf.data (~7747 samples) ] BR_MISP_RETIRED.CONDITIONAL cpu/event=0xC5,umask=0x01,name=BR_MISP_RETIRED_CONDITIONAL/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.132 MB perf.data (~5767 samples) ] BR_MISP_RETIRED.ALL_BRANCHES_PEBS cpu/event=0xC5,umask=0x04,name=BR_MISP_RETIRED_ALL_BRANCHES_PEBS/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.132 MB perf.data (~5781 samples) ] HLE_RETIRED.ABORTED cpu/event=0xc8,umask=0x04,name=HLE_RETIRED_ABORTED/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] RTM_RETIRED.ABORTED cpu/event=0xc9,umask=0x04,name=RTM_RETIRED_ABORTED/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (~86 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4 cpu/event=0xCD,umask=0x01,ldlat=0x4,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_4/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.179 MB perf.data (~7832 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8 cpu/event=0xCD,umask=0x01,ldlat=0x8,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_8/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.126 MB perf.data (~5522 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16 cpu/event=0xCD,umask=0x01,ldlat=0x10,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_16/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.090 MB perf.data (~3911 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32 cpu/event=0xCD,umask=0x01,ldlat=0x20,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_32/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.056 MB perf.data (~2429 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64 cpu/event=0xCD,umask=0x01,ldlat=0x40,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_64/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (~516 samples) ] MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128 cpu/event=0xCD,umask=0x01,ldlat=0x80,name=MEM_TRANS_RETIRED_LOAD_LATENCY_GT_128/pp [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data (~604 samples) ]
Re: [PATCH 1/2] perf, x86: Revamp PEBS event selection
But also, I think we should conditionally allow the filter bits; possibly with a sysfs file like I had. Back when we had to sort that SNB cycles thing it was tedious that Linus could not just try things. Hmm, the code in your patch to handle it was quite nasty. I don't really see the situation repeating. -Andi -- a...@linux.intel.com -- Speaking for myself only -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] perf, x86: Revamp PEBS event selection
From: Andi Kleen As already discussed earlier in email. The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell. This includes the changes proposed by Stephane earlier and obsoletes his patchkit. I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Cc: eran...@google.com Signed-off-by: Andi Kleen --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 -- arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 +++ 3 files changed, 43 insertions(+), 79 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..9907759 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -252,16 +252,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..d50142e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */ - INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */ - INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /*
[PATCH 1/2] perf, x86: Revamp PEBS event selection
From: Andi Kleen a...@linux.intel.com As already discussed earlier in email. The basic idea is that it does not make sense to list all PEBS events individually. The list is very long, sometimes outdated and the hardware doesn't need it. If an event does not support PEBS it will just not count, there is no security issue. This vastly simplifies the PEBS event selection. Bugs fixed: - We do not allow setting forbidden flags with PEBS anymore (SDM 18.9.4), except for the special cycle event. This is done using a new constraint macro that also matches on the event flags. - We now allow DataLA on all Haswell events, not just a small subset. In general all PEBS events that tag memory accesses support DataLA on Haswell. Otherwise the reported address is just zero. This allows address profiling on vastly more events. - We did not allow all PEBS events on Haswell. This includes the changes proposed by Stephane earlier and obsoletes his patchkit. I only did Sandy Bridge and Silvermont and later so far, mostly because these are the parts I could directly confirm the hardware behavior with hardware architects. Cc: eran...@google.com Signed-off-by: Andi Kleen a...@linux.intel.com --- arch/x86/include/asm/perf_event.h | 8 +++ arch/x86/kernel/cpu/perf_event.h | 18 -- arch/x86/kernel/cpu/perf_event_intel_ds.c | 96 +++ 3 files changed, 43 insertions(+), 79 deletions(-) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8249df4..8dfc9fd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -51,6 +51,14 @@ ARCH_PERFMON_EVENTSEL_EDGE | \ ARCH_PERFMON_EVENTSEL_INV | \ ARCH_PERFMON_EVENTSEL_CMASK) +#define X86_ALL_EVENT_FLAGS\ + (ARCH_PERFMON_EVENTSEL_EDGE | \ +ARCH_PERFMON_EVENTSEL_INV |\ +ARCH_PERFMON_EVENTSEL_CMASK | \ +ARCH_PERFMON_EVENTSEL_ANY |\ +ARCH_PERFMON_EVENTSEL_PIN_CONTROL |\ +HSW_IN_TX |\ +HSW_IN_TX_CHECKPOINTED) #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..9907759 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -252,16 +252,24 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) #define INTEL_PLD_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) #define INTEL_PST_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) -/* DataLA version of store sampling without extra enable bit. */ -#define INTEL_PST_HSW_CONSTRAINT(c, n) \ - __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ +/* Event constraint, but match on all event flags too. */ +#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS) + +/* Check only flags, but allow all event/umask */ +#define INTEL_ALL_EVENT_CONSTRAINT(flags, n) \ + EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS) + +/* Same as above, but enable DataLA */ +#define INTEL_ALL_EVENT_CONSTRAINT_DATALA(flags, n) \ + __EVENT_CONSTRAINT(flags, n, X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST_HSW) /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..d50142e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -567,28 +567,10 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { }; struct event_constraint intel_slm_pebs_event_constraints[] = { - INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ - INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ - INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ - INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */ - INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */ - INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */ -