Re: [PATCH 02/14] perf, core: introduce pmu context switch callback

2014-02-05 Thread Yan, Zheng
On 02/06/2014 12:01 AM, Stephane Eranian wrote:
> On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng  wrote:
>> The callback is invoked when process is scheduled in or out. It
>> provides mechanism for later patches to save/store the LBR stack.
>> It can also replace the flush branch stack callback.
>>
> I think you need to say this callback may be invoked on context switches
> with per-thread events attached. As far I understand, the callback cannot
> be invoked for system-wide events.

It's also invoked when there is only system-wide event. (the flush branch stack 
case)

Regards
Yan, Zheng


> 
>> To avoid unnecessary overhead, the callback is enabled dynamically
>>
>> Signed-off-by: Yan, Zheng 
>> ---
>>  arch/x86/kernel/cpu/perf_event.c |  7 +
>>  arch/x86/kernel/cpu/perf_event.h |  4 +++
>>  include/linux/perf_event.h   |  8 ++
>>  kernel/events/core.c | 60 
>> +++-
>>  4 files changed, 78 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kernel/cpu/perf_event.c 
>> b/arch/x86/kernel/cpu/perf_event.c
>> index 8e13293..6703d17 100644
>> --- a/arch/x86/kernel/cpu/perf_event.c
>> +++ b/arch/x86/kernel/cpu/perf_event.c
>> @@ -1846,6 +1846,12 @@ static const struct attribute_group 
>> *x86_pmu_attr_groups[] = {
>> NULL,
>>  };
>>
>> +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool 
>> sched_in)
>> +{
>> +   if (x86_pmu.sched_task)
>> +   x86_pmu.sched_task(ctx, sched_in);
>> +}
>> +
>>  static void x86_pmu_flush_branch_stack(void)
>>  {
>> if (x86_pmu.flush_branch_stack)
>> @@ -1879,6 +1885,7 @@ static struct pmu pmu = {
>>
>> .event_idx  = x86_pmu_event_idx,
>> .flush_branch_stack = x86_pmu_flush_branch_stack,
>> +   .sched_task = x86_pmu_sched_task,
>>  };
>>
>>  void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
>> diff --git a/arch/x86/kernel/cpu/perf_event.h 
>> b/arch/x86/kernel/cpu/perf_event.h
>> index 745f6fb..3fdb751 100644
>> --- a/arch/x86/kernel/cpu/perf_event.h
>> +++ b/arch/x86/kernel/cpu/perf_event.h
>> @@ -417,6 +417,8 @@ struct x86_pmu {
>>
>> void(*check_microcode)(void);
>> void(*flush_branch_stack)(void);
>> +   void(*sched_task)(struct perf_event_context *ctx,
>> + bool sched_in);
>>
>> /*
>>  * Intel Arch Perfmon v2+
>> @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);
>>
>>  void intel_ds_init(void);
>>
>> +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool 
>> sched_in);
>> +
> There is no mention of this function anywhere else. Should not be here.
> 
>>  void intel_pmu_lbr_reset(void);
>>
>>  void intel_pmu_lbr_enable(struct perf_event *event);
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index 8f4a70f..6a3e603 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -251,6 +251,12 @@ struct pmu {
>>  * flush branch stack on context-switches (needed in cpu-wide mode)
>>  */
>> void (*flush_branch_stack)  (void);
>> +
>> +   /*
>> +* PMU callback for context-switches. optional
>> +*/
>> +   void (*sched_task)  (struct perf_event_context *ctx,
>> +bool sched_in);
>>  };
>>
>>  /**
>> @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
>> *task);
>>  extern void perf_event_print_debug(void);
>>  extern void perf_pmu_disable(struct pmu *pmu);
>>  extern void perf_pmu_enable(struct pmu *pmu);
>> +extern void perf_sched_cb_disable(struct pmu *pmu);
>> +extern void perf_sched_cb_enable(struct pmu *pmu);
>>  extern int perf_event_task_disable(void);
>>  extern int perf_event_task_enable(void);
>>  extern int perf_event_refresh(struct perf_event *event, int refresh);
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 89d34f9..d110a23 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -141,6 +141,7 @@ enum event_type_t {
>>  struct static_key_deferred perf_sched_events __read_mostly;
>>  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
>>  static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
>> +static DEFINE_PER_CPU(int, perf_sched_cb_usages);
>>
>>  static atomic_t nr_mmap_events __read_mostly;
>>  static atomic_t nr_comm_events __read_mostly;
>> @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
>>  static LIST_HEAD(pmus);
>>  static DEFINE_MUTEX(pmus_lock);
>>  static struct srcu_struct pmus_srcu;
>> +static struct idr pmu_idr;
>>
>>  /*
>>   * perf event paranoia level:
>> @@ -2327,6 +2329,57 @@ unlock:
>> }
>>  }
>>
>> +void perf_sched_cb_disable(struct pmu *pmu)
>> +{
>> +   __get_cpu_var(perf_sched_cb_usages)--;
>> +}
>> +
>> +void perf_sched_cb_enable(struct pmu *pmu)
>> +{
>> +   

Re: [PATCH 02/14] perf, core: introduce pmu context switch callback

2014-02-05 Thread Stephane Eranian
On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng  wrote:
> The callback is invoked when process is scheduled in or out. It
> provides mechanism for later patches to save/store the LBR stack.
> It can also replace the flush branch stack callback.
>
I think you need to say this callback may be invoked on context switches
with per-thread events attached. As far I understand, the callback cannot
be invoked for system-wide events.

> To avoid unnecessary overhead, the callback is enabled dynamically
>
> Signed-off-by: Yan, Zheng 
> ---
>  arch/x86/kernel/cpu/perf_event.c |  7 +
>  arch/x86/kernel/cpu/perf_event.h |  4 +++
>  include/linux/perf_event.h   |  8 ++
>  kernel/events/core.c | 60 
> +++-
>  4 files changed, 78 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c 
> b/arch/x86/kernel/cpu/perf_event.c
> index 8e13293..6703d17 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1846,6 +1846,12 @@ static const struct attribute_group 
> *x86_pmu_attr_groups[] = {
> NULL,
>  };
>
> +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
> +{
> +   if (x86_pmu.sched_task)
> +   x86_pmu.sched_task(ctx, sched_in);
> +}
> +
>  static void x86_pmu_flush_branch_stack(void)
>  {
> if (x86_pmu.flush_branch_stack)
> @@ -1879,6 +1885,7 @@ static struct pmu pmu = {
>
> .event_idx  = x86_pmu_event_idx,
> .flush_branch_stack = x86_pmu_flush_branch_stack,
> +   .sched_task = x86_pmu_sched_task,
>  };
>
>  void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
> diff --git a/arch/x86/kernel/cpu/perf_event.h 
> b/arch/x86/kernel/cpu/perf_event.h
> index 745f6fb..3fdb751 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -417,6 +417,8 @@ struct x86_pmu {
>
> void(*check_microcode)(void);
> void(*flush_branch_stack)(void);
> +   void(*sched_task)(struct perf_event_context *ctx,
> + bool sched_in);
>
> /*
>  * Intel Arch Perfmon v2+
> @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);
>
>  void intel_ds_init(void);
>
> +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
> +
There is no mention of this function anywhere else. Should not be here.

>  void intel_pmu_lbr_reset(void);
>
>  void intel_pmu_lbr_enable(struct perf_event *event);
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 8f4a70f..6a3e603 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -251,6 +251,12 @@ struct pmu {
>  * flush branch stack on context-switches (needed in cpu-wide mode)
>  */
> void (*flush_branch_stack)  (void);
> +
> +   /*
> +* PMU callback for context-switches. optional
> +*/
> +   void (*sched_task)  (struct perf_event_context *ctx,
> +bool sched_in);
>  };
>
>  /**
> @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
> *task);
>  extern void perf_event_print_debug(void);
>  extern void perf_pmu_disable(struct pmu *pmu);
>  extern void perf_pmu_enable(struct pmu *pmu);
> +extern void perf_sched_cb_disable(struct pmu *pmu);
> +extern void perf_sched_cb_enable(struct pmu *pmu);
>  extern int perf_event_task_disable(void);
>  extern int perf_event_task_enable(void);
>  extern int perf_event_refresh(struct perf_event *event, int refresh);
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 89d34f9..d110a23 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -141,6 +141,7 @@ enum event_type_t {
>  struct static_key_deferred perf_sched_events __read_mostly;
>  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
>  static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
> +static DEFINE_PER_CPU(int, perf_sched_cb_usages);
>
>  static atomic_t nr_mmap_events __read_mostly;
>  static atomic_t nr_comm_events __read_mostly;
> @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
>  static LIST_HEAD(pmus);
>  static DEFINE_MUTEX(pmus_lock);
>  static struct srcu_struct pmus_srcu;
> +static struct idr pmu_idr;
>
>  /*
>   * perf event paranoia level:
> @@ -2327,6 +2329,57 @@ unlock:
> }
>  }
>
> +void perf_sched_cb_disable(struct pmu *pmu)
> +{
> +   __get_cpu_var(perf_sched_cb_usages)--;
> +}
> +
> +void perf_sched_cb_enable(struct pmu *pmu)
> +{
> +   __get_cpu_var(perf_sched_cb_usages)++;
> +}
> +
I think you want to use jump_labels instead of this to make
the callback optional. This is already used all over the place
in the generic code.

> +/*
> + * This function provides the context switch callback to the lower code
> + * layer. It is invoked ONLY when the context switch 

Re: [PATCH 02/14] perf, core: introduce pmu context switch callback

2014-02-05 Thread Stephane Eranian
On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng zheng.z@intel.com wrote:
 The callback is invoked when process is scheduled in or out. It
 provides mechanism for later patches to save/store the LBR stack.
 It can also replace the flush branch stack callback.

I think you need to say this callback may be invoked on context switches
with per-thread events attached. As far I understand, the callback cannot
be invoked for system-wide events.

 To avoid unnecessary overhead, the callback is enabled dynamically

 Signed-off-by: Yan, Zheng zheng.z@intel.com
 ---
  arch/x86/kernel/cpu/perf_event.c |  7 +
  arch/x86/kernel/cpu/perf_event.h |  4 +++
  include/linux/perf_event.h   |  8 ++
  kernel/events/core.c | 60 
 +++-
  4 files changed, 78 insertions(+), 1 deletion(-)

 diff --git a/arch/x86/kernel/cpu/perf_event.c 
 b/arch/x86/kernel/cpu/perf_event.c
 index 8e13293..6703d17 100644
 --- a/arch/x86/kernel/cpu/perf_event.c
 +++ b/arch/x86/kernel/cpu/perf_event.c
 @@ -1846,6 +1846,12 @@ static const struct attribute_group 
 *x86_pmu_attr_groups[] = {
 NULL,
  };

 +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 +{
 +   if (x86_pmu.sched_task)
 +   x86_pmu.sched_task(ctx, sched_in);
 +}
 +
  static void x86_pmu_flush_branch_stack(void)
  {
 if (x86_pmu.flush_branch_stack)
 @@ -1879,6 +1885,7 @@ static struct pmu pmu = {

 .event_idx  = x86_pmu_event_idx,
 .flush_branch_stack = x86_pmu_flush_branch_stack,
 +   .sched_task = x86_pmu_sched_task,
  };

  void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 diff --git a/arch/x86/kernel/cpu/perf_event.h 
 b/arch/x86/kernel/cpu/perf_event.h
 index 745f6fb..3fdb751 100644
 --- a/arch/x86/kernel/cpu/perf_event.h
 +++ b/arch/x86/kernel/cpu/perf_event.h
 @@ -417,6 +417,8 @@ struct x86_pmu {

 void(*check_microcode)(void);
 void(*flush_branch_stack)(void);
 +   void(*sched_task)(struct perf_event_context *ctx,
 + bool sched_in);

 /*
  * Intel Arch Perfmon v2+
 @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);

  void intel_ds_init(void);

 +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
 +
There is no mention of this function anywhere else. Should not be here.

  void intel_pmu_lbr_reset(void);

  void intel_pmu_lbr_enable(struct perf_event *event);
 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
 index 8f4a70f..6a3e603 100644
 --- a/include/linux/perf_event.h
 +++ b/include/linux/perf_event.h
 @@ -251,6 +251,12 @@ struct pmu {
  * flush branch stack on context-switches (needed in cpu-wide mode)
  */
 void (*flush_branch_stack)  (void);
 +
 +   /*
 +* PMU callback for context-switches. optional
 +*/
 +   void (*sched_task)  (struct perf_event_context *ctx,
 +bool sched_in);
  };

  /**
 @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
 *task);
  extern void perf_event_print_debug(void);
  extern void perf_pmu_disable(struct pmu *pmu);
  extern void perf_pmu_enable(struct pmu *pmu);
 +extern void perf_sched_cb_disable(struct pmu *pmu);
 +extern void perf_sched_cb_enable(struct pmu *pmu);
  extern int perf_event_task_disable(void);
  extern int perf_event_task_enable(void);
  extern int perf_event_refresh(struct perf_event *event, int refresh);
 diff --git a/kernel/events/core.c b/kernel/events/core.c
 index 89d34f9..d110a23 100644
 --- a/kernel/events/core.c
 +++ b/kernel/events/core.c
 @@ -141,6 +141,7 @@ enum event_type_t {
  struct static_key_deferred perf_sched_events __read_mostly;
  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
  static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
 +static DEFINE_PER_CPU(int, perf_sched_cb_usages);

  static atomic_t nr_mmap_events __read_mostly;
  static atomic_t nr_comm_events __read_mostly;
 @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
  static LIST_HEAD(pmus);
  static DEFINE_MUTEX(pmus_lock);
  static struct srcu_struct pmus_srcu;
 +static struct idr pmu_idr;

  /*
   * perf event paranoia level:
 @@ -2327,6 +2329,57 @@ unlock:
 }
  }

 +void perf_sched_cb_disable(struct pmu *pmu)
 +{
 +   __get_cpu_var(perf_sched_cb_usages)--;
 +}
 +
 +void perf_sched_cb_enable(struct pmu *pmu)
 +{
 +   __get_cpu_var(perf_sched_cb_usages)++;
 +}
 +
I think you want to use jump_labels instead of this to make
the callback optional. This is already used all over the place
in the generic code.

 +/*
 + * This function provides the context switch callback to the lower code
 + * layer. It is invoked ONLY when the context switch callback is enabled.
 + */
 +static void perf_pmu_sched_task(struct task_struct *prev,
 

Re: [PATCH 02/14] perf, core: introduce pmu context switch callback

2014-02-05 Thread Yan, Zheng
On 02/06/2014 12:01 AM, Stephane Eranian wrote:
 On Fri, Jan 3, 2014 at 6:47 AM, Yan, Zheng zheng.z@intel.com wrote:
 The callback is invoked when process is scheduled in or out. It
 provides mechanism for later patches to save/store the LBR stack.
 It can also replace the flush branch stack callback.

 I think you need to say this callback may be invoked on context switches
 with per-thread events attached. As far I understand, the callback cannot
 be invoked for system-wide events.

It's also invoked when there is only system-wide event. (the flush branch stack 
case)

Regards
Yan, Zheng


 
 To avoid unnecessary overhead, the callback is enabled dynamically

 Signed-off-by: Yan, Zheng zheng.z@intel.com
 ---
  arch/x86/kernel/cpu/perf_event.c |  7 +
  arch/x86/kernel/cpu/perf_event.h |  4 +++
  include/linux/perf_event.h   |  8 ++
  kernel/events/core.c | 60 
 +++-
  4 files changed, 78 insertions(+), 1 deletion(-)

 diff --git a/arch/x86/kernel/cpu/perf_event.c 
 b/arch/x86/kernel/cpu/perf_event.c
 index 8e13293..6703d17 100644
 --- a/arch/x86/kernel/cpu/perf_event.c
 +++ b/arch/x86/kernel/cpu/perf_event.c
 @@ -1846,6 +1846,12 @@ static const struct attribute_group 
 *x86_pmu_attr_groups[] = {
 NULL,
  };

 +static void x86_pmu_sched_task(struct perf_event_context *ctx, bool 
 sched_in)
 +{
 +   if (x86_pmu.sched_task)
 +   x86_pmu.sched_task(ctx, sched_in);
 +}
 +
  static void x86_pmu_flush_branch_stack(void)
  {
 if (x86_pmu.flush_branch_stack)
 @@ -1879,6 +1885,7 @@ static struct pmu pmu = {

 .event_idx  = x86_pmu_event_idx,
 .flush_branch_stack = x86_pmu_flush_branch_stack,
 +   .sched_task = x86_pmu_sched_task,
  };

  void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
 diff --git a/arch/x86/kernel/cpu/perf_event.h 
 b/arch/x86/kernel/cpu/perf_event.h
 index 745f6fb..3fdb751 100644
 --- a/arch/x86/kernel/cpu/perf_event.h
 +++ b/arch/x86/kernel/cpu/perf_event.h
 @@ -417,6 +417,8 @@ struct x86_pmu {

 void(*check_microcode)(void);
 void(*flush_branch_stack)(void);
 +   void(*sched_task)(struct perf_event_context *ctx,
 + bool sched_in);

 /*
  * Intel Arch Perfmon v2+
 @@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);

  void intel_ds_init(void);

 +void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool 
 sched_in);
 +
 There is no mention of this function anywhere else. Should not be here.
 
  void intel_pmu_lbr_reset(void);

  void intel_pmu_lbr_enable(struct perf_event *event);
 diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
 index 8f4a70f..6a3e603 100644
 --- a/include/linux/perf_event.h
 +++ b/include/linux/perf_event.h
 @@ -251,6 +251,12 @@ struct pmu {
  * flush branch stack on context-switches (needed in cpu-wide mode)
  */
 void (*flush_branch_stack)  (void);
 +
 +   /*
 +* PMU callback for context-switches. optional
 +*/
 +   void (*sched_task)  (struct perf_event_context *ctx,
 +bool sched_in);
  };

  /**
 @@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
 *task);
  extern void perf_event_print_debug(void);
  extern void perf_pmu_disable(struct pmu *pmu);
  extern void perf_pmu_enable(struct pmu *pmu);
 +extern void perf_sched_cb_disable(struct pmu *pmu);
 +extern void perf_sched_cb_enable(struct pmu *pmu);
  extern int perf_event_task_disable(void);
  extern int perf_event_task_enable(void);
  extern int perf_event_refresh(struct perf_event *event, int refresh);
 diff --git a/kernel/events/core.c b/kernel/events/core.c
 index 89d34f9..d110a23 100644
 --- a/kernel/events/core.c
 +++ b/kernel/events/core.c
 @@ -141,6 +141,7 @@ enum event_type_t {
  struct static_key_deferred perf_sched_events __read_mostly;
  static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
  static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
 +static DEFINE_PER_CPU(int, perf_sched_cb_usages);

  static atomic_t nr_mmap_events __read_mostly;
  static atomic_t nr_comm_events __read_mostly;
 @@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
  static LIST_HEAD(pmus);
  static DEFINE_MUTEX(pmus_lock);
  static struct srcu_struct pmus_srcu;
 +static struct idr pmu_idr;

  /*
   * perf event paranoia level:
 @@ -2327,6 +2329,57 @@ unlock:
 }
  }

 +void perf_sched_cb_disable(struct pmu *pmu)
 +{
 +   __get_cpu_var(perf_sched_cb_usages)--;
 +}
 +
 +void perf_sched_cb_enable(struct pmu *pmu)
 +{
 +   __get_cpu_var(perf_sched_cb_usages)++;
 +}
 +
 I think you want to use jump_labels instead of this to make
 the callback optional. This is already used all over the place
 in the generic code.
 
 +/*
 + * This function provides the context 

[PATCH 02/14] perf, core: introduce pmu context switch callback

2014-01-02 Thread Yan, Zheng
The callback is invoked when process is scheduled in or out. It
provides mechanism for later patches to save/store the LBR stack.
It can also replace the flush branch stack callback.

To avoid unnecessary overhead, the callback is enabled dynamically

Signed-off-by: Yan, Zheng 
---
 arch/x86/kernel/cpu/perf_event.c |  7 +
 arch/x86/kernel/cpu/perf_event.h |  4 +++
 include/linux/perf_event.h   |  8 ++
 kernel/events/core.c | 60 +++-
 4 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8e13293..6703d17 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1846,6 +1846,12 @@ static const struct attribute_group 
*x86_pmu_attr_groups[] = {
NULL,
 };
 
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+   if (x86_pmu.sched_task)
+   x86_pmu.sched_task(ctx, sched_in);
+}
+
 static void x86_pmu_flush_branch_stack(void)
 {
if (x86_pmu.flush_branch_stack)
@@ -1879,6 +1885,7 @@ static struct pmu pmu = {
 
.event_idx  = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
+   .sched_task = x86_pmu_sched_task,
 };
 
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 745f6fb..3fdb751 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -417,6 +417,8 @@ struct x86_pmu {
 
void(*check_microcode)(void);
void(*flush_branch_stack)(void);
+   void(*sched_task)(struct perf_event_context *ctx,
+ bool sched_in);
 
/*
 * Intel Arch Perfmon v2+
@@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);
 
 void intel_ds_init(void);
 
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8f4a70f..6a3e603 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -251,6 +251,12 @@ struct pmu {
 * flush branch stack on context-switches (needed in cpu-wide mode)
 */
void (*flush_branch_stack)  (void);
+
+   /*
+* PMU callback for context-switches. optional
+*/
+   void (*sched_task)  (struct perf_event_context *ctx,
+bool sched_in);
 };
 
 /**
@@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
*task);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
+extern void perf_sched_cb_disable(struct pmu *pmu);
+extern void perf_sched_cb_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int perf_event_refresh(struct perf_event *event, int refresh);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89d34f9..d110a23 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -141,6 +141,7 @@ enum event_type_t {
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
+static struct idr pmu_idr;
 
 /*
  * perf event paranoia level:
@@ -2327,6 +2329,57 @@ unlock:
}
 }
 
+void perf_sched_cb_disable(struct pmu *pmu)
+{
+   __get_cpu_var(perf_sched_cb_usages)--;
+}
+
+void perf_sched_cb_enable(struct pmu *pmu)
+{
+   __get_cpu_var(perf_sched_cb_usages)++;
+}
+
+/*
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when the context switch callback is enabled.
+ */
+static void perf_pmu_sched_task(struct task_struct *prev,
+   struct task_struct *next,
+   bool sched_in)
+{
+   struct perf_cpu_context *cpuctx;
+   struct pmu *pmu;
+   unsigned long flags;
+
+   if (prev == next)
+   return;
+
+   local_irq_save(flags);
+
+   rcu_read_lock();
+
+   pmu = idr_find(_idr, PERF_TYPE_RAW);
+
+   if (pmu && pmu->sched_task) {
+   cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+   pmu = cpuctx->ctx.pmu;
+
+   perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+
+   perf_pmu_disable(pmu);

[PATCH 02/14] perf, core: introduce pmu context switch callback

2014-01-02 Thread Yan, Zheng
The callback is invoked when process is scheduled in or out. It
provides mechanism for later patches to save/store the LBR stack.
It can also replace the flush branch stack callback.

To avoid unnecessary overhead, the callback is enabled dynamically

Signed-off-by: Yan, Zheng zheng.z@intel.com
---
 arch/x86/kernel/cpu/perf_event.c |  7 +
 arch/x86/kernel/cpu/perf_event.h |  4 +++
 include/linux/perf_event.h   |  8 ++
 kernel/events/core.c | 60 +++-
 4 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8e13293..6703d17 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1846,6 +1846,12 @@ static const struct attribute_group 
*x86_pmu_attr_groups[] = {
NULL,
 };
 
+static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+   if (x86_pmu.sched_task)
+   x86_pmu.sched_task(ctx, sched_in);
+}
+
 static void x86_pmu_flush_branch_stack(void)
 {
if (x86_pmu.flush_branch_stack)
@@ -1879,6 +1885,7 @@ static struct pmu pmu = {
 
.event_idx  = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
+   .sched_task = x86_pmu_sched_task,
 };
 
 void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 745f6fb..3fdb751 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -417,6 +417,8 @@ struct x86_pmu {
 
void(*check_microcode)(void);
void(*flush_branch_stack)(void);
+   void(*sched_task)(struct perf_event_context *ctx,
+ bool sched_in);
 
/*
 * Intel Arch Perfmon v2+
@@ -675,6 +677,8 @@ void intel_pmu_pebs_disable_all(void);
 
 void intel_ds_init(void);
 
+void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8f4a70f..6a3e603 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -251,6 +251,12 @@ struct pmu {
 * flush branch stack on context-switches (needed in cpu-wide mode)
 */
void (*flush_branch_stack)  (void);
+
+   /*
+* PMU callback for context-switches. optional
+*/
+   void (*sched_task)  (struct perf_event_context *ctx,
+bool sched_in);
 };
 
 /**
@@ -546,6 +552,8 @@ extern void perf_event_delayed_put(struct task_struct 
*task);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
 extern void perf_pmu_enable(struct pmu *pmu);
+extern void perf_sched_cb_disable(struct pmu *pmu);
+extern void perf_sched_cb_enable(struct pmu *pmu);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
 extern int perf_event_refresh(struct perf_event *event, int refresh);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89d34f9..d110a23 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -141,6 +141,7 @@ enum event_type_t {
 struct static_key_deferred perf_sched_events __read_mostly;
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
 static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -150,6 +151,7 @@ static atomic_t nr_freq_events __read_mostly;
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
 static struct srcu_struct pmus_srcu;
+static struct idr pmu_idr;
 
 /*
  * perf event paranoia level:
@@ -2327,6 +2329,57 @@ unlock:
}
 }
 
+void perf_sched_cb_disable(struct pmu *pmu)
+{
+   __get_cpu_var(perf_sched_cb_usages)--;
+}
+
+void perf_sched_cb_enable(struct pmu *pmu)
+{
+   __get_cpu_var(perf_sched_cb_usages)++;
+}
+
+/*
+ * This function provides the context switch callback to the lower code
+ * layer. It is invoked ONLY when the context switch callback is enabled.
+ */
+static void perf_pmu_sched_task(struct task_struct *prev,
+   struct task_struct *next,
+   bool sched_in)
+{
+   struct perf_cpu_context *cpuctx;
+   struct pmu *pmu;
+   unsigned long flags;
+
+   if (prev == next)
+   return;
+
+   local_irq_save(flags);
+
+   rcu_read_lock();
+
+   pmu = idr_find(pmu_idr, PERF_TYPE_RAW);
+
+   if (pmu  pmu-sched_task) {
+   cpuctx = this_cpu_ptr(pmu-pmu_cpu_context);
+   pmu = cpuctx-ctx.pmu;
+
+   perf_ctx_lock(cpuctx, cpuctx-task_ctx);
+
+