Re: [PATCH 02/14] perf, core: introduce pmu context switch callback
On 02/06/2014 12:01 AM, Stephane Eranian wrote: > On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng wrote: >> The callback is invoked when process is scheduled in or out. It >> provides mechanism for later patches to save/store the LBR stack. >> It can also replace the flush branch stack callback. >> > I think you need to say this callback may be invoked on context switches > with per-thread events attached. As far I understand, the callback cannot > be invoked for system-wide events. It's also invoked when there is only system-wide event. (the flush branch stack case) Regards Yan, Zheng > >> To avoid unnecessary overhead, the callback is enabled dynamically >> >> Signed-off-by: Yan, Zheng >> --- >> arch/x86/kernel/cpu/perf_event.c | 7 + >> arch/x86/kernel/cpu/perf_event.h | 4 +++ >> include/linux/perf_event.h | 8 ++ >> kernel/events/core.c | 60 >> +++- >> 4 files changed, 78 insertions(+), 1 deletion(-) >> >> diff --git a/arch/x86/kernel/cpu/perf_event.c >> b/arch/x86/kernel/cpu/perf_event.c >> index 8e13293..6703d17 100644 >> --- a/arch/x86/kernel/cpu/perf_event.c >> +++ b/arch/x86/kernel/cpu/perf_event.c >> @@ -1846,6 +1846,12 @@ static const struct attribute_group >> *x86_pmu_attr_groups[] = { >> NULL, >> }; >> >> +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool >> sched_in) >> +{ >> + if (x86_pmu.sched_task) >> + x86_pmu.sched_task(ctx, sched_in); >> +} >> + >> static void x86_pmu_flush_branch_stack(void) >> { >> if (x86_pmu.flush_branch_stack) >> @@ -1879,6 +1885,7 @@ static struct pmu pmu = { >> >> .event_idx = x86_pmu_event_idx, >> .flush_branch_stack = x86_pmu_flush_branch_stack, >> + .sched_task = x86_pmu_sched_task, >> }; >> >> void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) >> diff --git a/arch/x86/kernel/cpu/perf_event.h >> b/arch/x86/kernel/cpu/perf_event.h >> index 745f6fb..3fdb751 100644 >> --- a/arch/x86/kernel/cpu/perf_event.h >> +++ b/arch/x86/kernel/cpu/perf_event.h >> @@ -417,6 +417,8 @@ struct x86_pmu { >> >> void(*check_microcode)(void); >> void(*flush_branch_stack)(void); >> + void(*sched_task)(struct perf_event_context *ctx, >> + bool sched_in); >> >> /* >> * Intel Arch Perfmon v2+ >> @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); >> >> void intel_ds_init(void); >> >> +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool >> sched_in); >> + > There is no mention of this function anywhere else. Should not be here. > >> void intel_pmu_lbr_reset(void); >> >> void intel_pmu_lbr_enable(struct perf_event *event); >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h >> index 8f4a70f..6a3e603 100644 >> --- a/include/linux/perf_event.h >> +++ b/include/linux/perf_event.h >> @@ -251,6 +251,12 @@ struct pmu { >> * flush branch stack on context-switches (needed in cpu-wide mode) >> */ >> void (*flush_branch_stack) (void); >> + >> + /* >> +* PMU callback for context-switches. optional >> +*/ >> + void (*sched_task) (struct perf_event_context *ctx, >> +bool sched_in); >> }; >> >> /** >> @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct >> *task); >> extern void perf_event_print_debug(void); >> extern void perf_pmu_disable(struct pmu *pmu); >> extern void perf_pmu_enable(struct pmu *pmu); >> +extern void perf_sched_cb_disable(struct pmu *pmu); >> +extern void perf_sched_cb_enable(struct pmu *pmu); >> extern int perf_event_task_disable(void); >> extern int perf_event_task_enable(void); >> extern int perf_event_refresh(struct perf_event *event, int refresh); >> diff --git a/kernel/events/core.c b/kernel/events/core.c >> index 89d34f9..d110a23 100644 >> --- a/kernel/events/core.c >> +++ b/kernel/events/core.c >> @@ -141,6 +141,7 @@ enum event_type_t { >> struct static_key_deferred perf_sched_events __read_mostly; >> static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); >> static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); >> +static DEFINE_PER_CPU(int, perf_sched_cb_usages); >> >> static atomic_t nr_mmap_events __read_mostly; >> static atomic_t nr_comm_events __read_mostly; >> @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; >> static LIST_HEAD(pmus); >> static DEFINE_MUTEX(pmus_lock); >> static struct srcu_struct pmus_srcu; >> +static struct idr pmu_idr; >> >> /* >> * perf event paranoia level: >> @@ -2327,6 +2329,57 @@ unlock: >> } >> } >> >> +void perf_sched_cb_disable(struct pmu *pmu) >> +{ >> + __get_cpu_var(perf_sched_cb_usages)--; >> +} >> + >> +void perf_sched_cb_enable(struct pmu *pmu) >> +{ >> +
Re: [PATCH 02/14] perf, core: introduce pmu context switch callback
On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng wrote: > The callback is invoked when process is scheduled in or out. It > provides mechanism for later patches to save/store the LBR stack. > It can also replace the flush branch stack callback. > I think you need to say this callback may be invoked on context switches with per-thread events attached. As far I understand, the callback cannot be invoked for system-wide events. > To avoid unnecessary overhead, the callback is enabled dynamically > > Signed-off-by: Yan, Zheng > --- > arch/x86/kernel/cpu/perf_event.c | 7 + > arch/x86/kernel/cpu/perf_event.h | 4 +++ > include/linux/perf_event.h | 8 ++ > kernel/events/core.c | 60 > +++- > 4 files changed, 78 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/cpu/perf_event.c > b/arch/x86/kernel/cpu/perf_event.c > index 8e13293..6703d17 100644 > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -1846,6 +1846,12 @@ static const struct attribute_group > *x86_pmu_attr_groups[] = { > NULL, > }; > > +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) > +{ > + if (x86_pmu.sched_task) > + x86_pmu.sched_task(ctx, sched_in); > +} > + > static void x86_pmu_flush_branch_stack(void) > { > if (x86_pmu.flush_branch_stack) > @@ -1879,6 +1885,7 @@ static struct pmu pmu = { > > .event_idx = x86_pmu_event_idx, > .flush_branch_stack = x86_pmu_flush_branch_stack, > + .sched_task = x86_pmu_sched_task, > }; > > void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) > diff --git a/arch/x86/kernel/cpu/perf_event.h > b/arch/x86/kernel/cpu/perf_event.h > index 745f6fb..3fdb751 100644 > --- a/arch/x86/kernel/cpu/perf_event.h > +++ b/arch/x86/kernel/cpu/perf_event.h > @@ -417,6 +417,8 @@ struct x86_pmu { > > void(*check_microcode)(void); > void(*flush_branch_stack)(void); > + void(*sched_task)(struct perf_event_context *ctx, > + bool sched_in); > > /* > * Intel Arch Perfmon v2+ > @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); > > void intel_ds_init(void); > > +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); > + There is no mention of this function anywhere else. Should not be here. > void intel_pmu_lbr_reset(void); > > void intel_pmu_lbr_enable(struct perf_event *event); > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index 8f4a70f..6a3e603 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -251,6 +251,12 @@ struct pmu { > * flush branch stack on context-switches (needed in cpu-wide mode) > */ > void (*flush_branch_stack) (void); > + > + /* > +* PMU callback for context-switches. optional > +*/ > + void (*sched_task) (struct perf_event_context *ctx, > +bool sched_in); > }; > > /** > @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct > *task); > extern void perf_event_print_debug(void); > extern void perf_pmu_disable(struct pmu *pmu); > extern void perf_pmu_enable(struct pmu *pmu); > +extern void perf_sched_cb_disable(struct pmu *pmu); > +extern void perf_sched_cb_enable(struct pmu *pmu); > extern int perf_event_task_disable(void); > extern int perf_event_task_enable(void); > extern int perf_event_refresh(struct perf_event *event, int refresh); > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 89d34f9..d110a23 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -141,6 +141,7 @@ enum event_type_t { > struct static_key_deferred perf_sched_events __read_mostly; > static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); > static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); > +static DEFINE_PER_CPU(int, perf_sched_cb_usages); > > static atomic_t nr_mmap_events __read_mostly; > static atomic_t nr_comm_events __read_mostly; > @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; > static LIST_HEAD(pmus); > static DEFINE_MUTEX(pmus_lock); > static struct srcu_struct pmus_srcu; > +static struct idr pmu_idr; > > /* > * perf event paranoia level: > @@ -2327,6 +2329,57 @@ unlock: > } > } > > +void perf_sched_cb_disable(struct pmu *pmu) > +{ > + __get_cpu_var(perf_sched_cb_usages)--; > +} > + > +void perf_sched_cb_enable(struct pmu *pmu) > +{ > + __get_cpu_var(perf_sched_cb_usages)++; > +} > + I think you want to use jump_labels instead of this to make the callback optional. This is already used all over the place in the generic code. > +/* > + * This function provides the context switch callback to the lower code > + * layer. It is invoked ONLY when the context switch
Re: [PATCH 02/14] perf, core: introduce pmu context switch callback
On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng zheng.z@intel.com wrote: The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. It can also replace the flush branch stack callback. I think you need to say this callback may be invoked on context switches with per-thread events attached. As far I understand, the callback cannot be invoked for system-wide events. To avoid unnecessary overhead, the callback is enabled dynamically Signed-off-by: Yan, Zheng zheng.z@intel.com --- arch/x86/kernel/cpu/perf_event.c | 7 + arch/x86/kernel/cpu/perf_event.h | 4 +++ include/linux/perf_event.h | 8 ++ kernel/events/core.c | 60 +++- 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e13293..6703d17 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1846,6 +1846,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); +} + static void x86_pmu_flush_branch_stack(void) { if (x86_pmu.flush_branch_stack) @@ -1879,6 +1885,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, }; void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 745f6fb..3fdb751 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -417,6 +417,8 @@ struct x86_pmu { void(*check_microcode)(void); void(*flush_branch_stack)(void); + void(*sched_task)(struct perf_event_context *ctx, + bool sched_in); /* * Intel Arch Perfmon v2+ @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); void intel_ds_init(void); +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + There is no mention of this function anywhere else. Should not be here. void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8f4a70f..6a3e603 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -251,6 +251,12 @@ struct pmu { * flush branch stack on context-switches (needed in cpu-wide mode) */ void (*flush_branch_stack) (void); + + /* +* PMU callback for context-switches. optional +*/ + void (*sched_task) (struct perf_event_context *ctx, +bool sched_in); }; /** @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_disable(struct pmu *pmu); +extern void perf_sched_cb_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); diff --git a/kernel/events/core.c b/kernel/events/core.c index 89d34f9..d110a23 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -141,6 +141,7 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static struct idr pmu_idr; /* * perf event paranoia level: @@ -2327,6 +2329,57 @@ unlock: } } +void perf_sched_cb_disable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)--; +} + +void perf_sched_cb_enable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)++; +} + I think you want to use jump_labels instead of this to make the callback optional. This is already used all over the place in the generic code. +/* + * This function provides the context switch callback to the lower code + * layer. It is invoked ONLY when the context switch callback is enabled. + */ +static void perf_pmu_sched_task(struct task_struct *prev,
Re: [PATCH 02/14] perf, core: introduce pmu context switch callback
On 02/06/2014 12:01 AM, Stephane Eranian wrote: On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng zheng.z@intel.com wrote: The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. It can also replace the flush branch stack callback. I think you need to say this callback may be invoked on context switches with per-thread events attached. As far I understand, the callback cannot be invoked for system-wide events. It's also invoked when there is only system-wide event. (the flush branch stack case) Regards Yan, Zheng To avoid unnecessary overhead, the callback is enabled dynamically Signed-off-by: Yan, Zheng zheng.z@intel.com --- arch/x86/kernel/cpu/perf_event.c | 7 + arch/x86/kernel/cpu/perf_event.h | 4 +++ include/linux/perf_event.h | 8 ++ kernel/events/core.c | 60 +++- 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e13293..6703d17 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1846,6 +1846,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); +} + static void x86_pmu_flush_branch_stack(void) { if (x86_pmu.flush_branch_stack) @@ -1879,6 +1885,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, }; void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 745f6fb..3fdb751 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -417,6 +417,8 @@ struct x86_pmu { void(*check_microcode)(void); void(*flush_branch_stack)(void); + void(*sched_task)(struct perf_event_context *ctx, + bool sched_in); /* * Intel Arch Perfmon v2+ @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); void intel_ds_init(void); +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + There is no mention of this function anywhere else. Should not be here. void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8f4a70f..6a3e603 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -251,6 +251,12 @@ struct pmu { * flush branch stack on context-switches (needed in cpu-wide mode) */ void (*flush_branch_stack) (void); + + /* +* PMU callback for context-switches. optional +*/ + void (*sched_task) (struct perf_event_context *ctx, +bool sched_in); }; /** @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_disable(struct pmu *pmu); +extern void perf_sched_cb_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); diff --git a/kernel/events/core.c b/kernel/events/core.c index 89d34f9..d110a23 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -141,6 +141,7 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static struct idr pmu_idr; /* * perf event paranoia level: @@ -2327,6 +2329,57 @@ unlock: } } +void perf_sched_cb_disable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)--; +} + +void perf_sched_cb_enable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)++; +} + I think you want to use jump_labels instead of this to make the callback optional. This is already used all over the place in the generic code. +/* + * This function provides the context
[PATCH 02/14] perf, core: introduce pmu context switch callback
The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. It can also replace the flush branch stack callback. To avoid unnecessary overhead, the callback is enabled dynamically Signed-off-by: Yan, Zheng --- arch/x86/kernel/cpu/perf_event.c | 7 + arch/x86/kernel/cpu/perf_event.h | 4 +++ include/linux/perf_event.h | 8 ++ kernel/events/core.c | 60 +++- 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e13293..6703d17 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1846,6 +1846,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); +} + static void x86_pmu_flush_branch_stack(void) { if (x86_pmu.flush_branch_stack) @@ -1879,6 +1885,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, }; void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 745f6fb..3fdb751 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -417,6 +417,8 @@ struct x86_pmu { void(*check_microcode)(void); void(*flush_branch_stack)(void); + void(*sched_task)(struct perf_event_context *ctx, + bool sched_in); /* * Intel Arch Perfmon v2+ @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); void intel_ds_init(void); +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8f4a70f..6a3e603 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -251,6 +251,12 @@ struct pmu { * flush branch stack on context-switches (needed in cpu-wide mode) */ void (*flush_branch_stack) (void); + + /* +* PMU callback for context-switches. optional +*/ + void (*sched_task) (struct perf_event_context *ctx, +bool sched_in); }; /** @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_disable(struct pmu *pmu); +extern void perf_sched_cb_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); diff --git a/kernel/events/core.c b/kernel/events/core.c index 89d34f9..d110a23 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -141,6 +141,7 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static struct idr pmu_idr; /* * perf event paranoia level: @@ -2327,6 +2329,57 @@ unlock: } } +void perf_sched_cb_disable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)--; +} + +void perf_sched_cb_enable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)++; +} + +/* + * This function provides the context switch callback to the lower code + * layer. It is invoked ONLY when the context switch callback is enabled. + */ +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + if (prev == next) + return; + + local_irq_save(flags); + + rcu_read_lock(); + + pmu = idr_find(_idr, PERF_TYPE_RAW); + + if (pmu && pmu->sched_task) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + pmu = cpuctx->ctx.pmu; + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + perf_pmu_disable(pmu);
[PATCH 02/14] perf, core: introduce pmu context switch callback
The callback is invoked when process is scheduled in or out. It provides mechanism for later patches to save/store the LBR stack. It can also replace the flush branch stack callback. To avoid unnecessary overhead, the callback is enabled dynamically Signed-off-by: Yan, Zheng zheng.z@intel.com --- arch/x86/kernel/cpu/perf_event.c | 7 + arch/x86/kernel/cpu/perf_event.h | 4 +++ include/linux/perf_event.h | 8 ++ kernel/events/core.c | 60 +++- 4 files changed, 78 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e13293..6703d17 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1846,6 +1846,12 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) +{ + if (x86_pmu.sched_task) + x86_pmu.sched_task(ctx, sched_in); +} + static void x86_pmu_flush_branch_stack(void) { if (x86_pmu.flush_branch_stack) @@ -1879,6 +1885,7 @@ static struct pmu pmu = { .event_idx = x86_pmu_event_idx, .flush_branch_stack = x86_pmu_flush_branch_stack, + .sched_task = x86_pmu_sched_task, }; void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 745f6fb..3fdb751 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -417,6 +417,8 @@ struct x86_pmu { void(*check_microcode)(void); void(*flush_branch_stack)(void); + void(*sched_task)(struct perf_event_context *ctx, + bool sched_in); /* * Intel Arch Perfmon v2+ @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void); void intel_ds_init(void); +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in); + void intel_pmu_lbr_reset(void); void intel_pmu_lbr_enable(struct perf_event *event); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8f4a70f..6a3e603 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -251,6 +251,12 @@ struct pmu { * flush branch stack on context-switches (needed in cpu-wide mode) */ void (*flush_branch_stack) (void); + + /* +* PMU callback for context-switches. optional +*/ + void (*sched_task) (struct perf_event_context *ctx, +bool sched_in); }; /** @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct *task); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); +extern void perf_sched_cb_disable(struct pmu *pmu); +extern void perf_sched_cb_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern int perf_event_refresh(struct perf_event *event, int refresh); diff --git a/kernel/events/core.c b/kernel/events/core.c index 89d34f9..d110a23 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -141,6 +141,7 @@ enum event_type_t { struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); static struct srcu_struct pmus_srcu; +static struct idr pmu_idr; /* * perf event paranoia level: @@ -2327,6 +2329,57 @@ unlock: } } +void perf_sched_cb_disable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)--; +} + +void perf_sched_cb_enable(struct pmu *pmu) +{ + __get_cpu_var(perf_sched_cb_usages)++; +} + +/* + * This function provides the context switch callback to the lower code + * layer. It is invoked ONLY when the context switch callback is enabled. + */ +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + if (prev == next) + return; + + local_irq_save(flags); + + rcu_read_lock(); + + pmu = idr_find(pmu_idr, PERF_TYPE_RAW); + + if (pmu pmu-sched_task) { + cpuctx = this_cpu_ptr(pmu-pmu_cpu_context); + pmu = cpuctx-ctx.pmu; + + perf_ctx_lock(cpuctx, cpuctx-task_ctx); + +