[RFC PATCH 5/6] tracing: extend scheduling tracepoints

2016-09-16 Thread Julien Desfossez
Create alternate versions of the sched_switch, sched_waking and
sched_process_fork tracepoint probes to output priority-related fields
and PI top-waiter if any.

This uses the TRACE_EVENT_MAP macro, so the instrumented code and the
already existing tracepoints are untouched.

We only expose the priority-related fields visible from userspace,
leaving out the "prio" value which should really be a kernel-internal
representation of the task priority, and must be expected to be
eventually deprecated. The values output are the effective values, not
necessarily the normal values.

We also output the comm and PID of the process blocked by the task if it
is in a PI situation. These fields allow to quickly identify the PI
situations without requiring to keep track of all the
sched_pi_setprio/sched_pi_update_prio events and state.

The values traced are the effective values, which may differ from the
thread normal values in PI scenarios.

Here is an example of the output from these new probes:
sched_process_fork_prio: comm=bash, pid=1988, child_comm=bash,
child_pid=2129, child_policy=SCHED_NORMAL, child_nice=0,
child_rt_priority=0, child_dl_runtime=0,
child_dl_deadline=0, child_dl_period=0

No PI:
sched_switch_prio: prev_comm=swapper/6, prev_pid=0, prev_policy=SCHED_NORMAL,
prev_nice=0, prev_rt_priority=0, prev_dl_runtime=0,
prev_dl_deadline=0, prev_dl_period=0, prev_state=R,
prev_top_waiter_comm=, prev_top_waiter_pid=-1 ==>
next_comm=bash, next_pid=3817, next_policy=SCHED_NORMAL,
next_nice=0, next_rt_priority=0, next_dl_runtime=0,
next_dl_deadline=0, next_dl_period=0, next_top_waiter_comm=,
next_top_waiter_pid=-1

sched_waking_prio: comm=migration/6, pid=38, target_cpu=006,
policy=SCHED_FIFO, nice=0, rt_priority=99, dl_runtime=0,
dl_deadline=0, dl_period=0, top_waiter_comm=, top_waiter_pid=-1

PI:
sched_switch_prio: prev_comm=swapper/1, prev_pid=0, prev_policy=SCHED_NORMAL,
prev_nice=0, prev_rt_priority=0, prev_dl_runtime=0,
prev_dl_deadline=0, prev_dl_period=0, prev_state=R,
prev_top_waiter_comm=, prev_top_waiter_pid=-1 ==>
next_comm=lowprio1, next_pid=3818, next_policy=SCHED_NORMAL,
next_nice=0, next_rt_priority=90, next_dl_runtime=0,
next_dl_deadline=0, next_dl_period=0,
next_top_waiter_comm=highprio0, next_top_waiter_pid=3820

sched_waking_prio: comm=lowprio1, pid=3818, target_cpu=001, policy=SCHED_FIFO,
  nice=0, rt_priority=90, dl_runtime=0, dl_deadline=0,
  dl_period=0, top_waiter_comm=highprio0, top_waiter_pid=3820

Cc: Peter Zijlstra 
Cc: Steven Rostedt (Red Hat) 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Signed-off-by: Mathieu Desnoyers 
Signed-off-by: Julien Desfossez 
---
 include/trace/events/sched.h | 222 +++
 1 file changed, 222 insertions(+)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index bc695e4..11b3358 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -131,6 +131,64 @@
 TP_PROTO(struct task_struct *p),
 TP_ARGS(p));
 
+TRACE_EVENT_MAP(sched_waking, sched_waking_prio,
+
+   TP_PROTO(struct task_struct *p),
+
+   TP_ARGS(p),
+
+   TP_STRUCT__entry(
+   __array(char,   comm,   TASK_COMM_LEN   )
+   __field(pid_t,  pid )
+   __field(int,target_cpu  )
+   __field(unsigned int,   policy  )
+   __field(int,nice)
+   __field(unsigned int,   rt_priority )
+   __field(u64,dl_runtime  )
+   __field(u64,dl_deadline )
+   __field(u64,dl_period   )
+   __array(char,   top_waiter_comm,TASK_COMM_LEN   
)
+   __field(pid_t,  top_waiter_pid  )
+   ),
+
+   TP_fast_assign(
+   struct task_struct *top_waiter = rt_mutex_get_top_task(p);
+
+   memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+   __entry->pid= p->pid;
+   __entry->target_cpu = task_cpu(p);
+   __entry->policy = rt_mutex_get_effective_policy(
+   p->policy, p->prio);
+   __entry->nice   = task_nice(p);
+   __entry->rt_priority= rt_mutex_get_effective_rt_prio(
+   p->prio);
+   __entry->dl_runtime = dl_prio(p->prio) ?
+   p->dl.dl_runtime : 0;
+   __entry->dl_deadline= dl_prio(p->prio) ?
+ 

[RFC PATCH 5/6] tracing: extend scheduling tracepoints

2016-09-16 Thread Julien Desfossez
Create alternate versions of the sched_switch, sched_waking and
sched_process_fork tracepoint probes to output priority-related fields
and PI top-waiter if any.

This uses the TRACE_EVENT_MAP macro, so the instrumented code and the
already existing tracepoints are untouched.

We only expose the priority-related fields visible from userspace,
leaving out the "prio" value which should really be a kernel-internal
representation of the task priority, and must be expected to be
eventually deprecated. The values output are the effective values, not
necessarily the normal values.

We also output the comm and PID of the process blocked by the task if it
is in a PI situation. These fields allow to quickly identify the PI
situations without requiring to keep track of all the
sched_pi_setprio/sched_pi_update_prio events and state.

The values traced are the effective values, which may differ from the
thread normal values in PI scenarios.

Here is an example of the output from these new probes:
sched_process_fork_prio: comm=bash, pid=1988, child_comm=bash,
child_pid=2129, child_policy=SCHED_NORMAL, child_nice=0,
child_rt_priority=0, child_dl_runtime=0,
child_dl_deadline=0, child_dl_period=0

No PI:
sched_switch_prio: prev_comm=swapper/6, prev_pid=0, prev_policy=SCHED_NORMAL,
prev_nice=0, prev_rt_priority=0, prev_dl_runtime=0,
prev_dl_deadline=0, prev_dl_period=0, prev_state=R,
prev_top_waiter_comm=, prev_top_waiter_pid=-1 ==>
next_comm=bash, next_pid=3817, next_policy=SCHED_NORMAL,
next_nice=0, next_rt_priority=0, next_dl_runtime=0,
next_dl_deadline=0, next_dl_period=0, next_top_waiter_comm=,
next_top_waiter_pid=-1

sched_waking_prio: comm=migration/6, pid=38, target_cpu=006,
policy=SCHED_FIFO, nice=0, rt_priority=99, dl_runtime=0,
dl_deadline=0, dl_period=0, top_waiter_comm=, top_waiter_pid=-1

PI:
sched_switch_prio: prev_comm=swapper/1, prev_pid=0, prev_policy=SCHED_NORMAL,
prev_nice=0, prev_rt_priority=0, prev_dl_runtime=0,
prev_dl_deadline=0, prev_dl_period=0, prev_state=R,
prev_top_waiter_comm=, prev_top_waiter_pid=-1 ==>
next_comm=lowprio1, next_pid=3818, next_policy=SCHED_NORMAL,
next_nice=0, next_rt_priority=90, next_dl_runtime=0,
next_dl_deadline=0, next_dl_period=0,
next_top_waiter_comm=highprio0, next_top_waiter_pid=3820

sched_waking_prio: comm=lowprio1, pid=3818, target_cpu=001, policy=SCHED_FIFO,
  nice=0, rt_priority=90, dl_runtime=0, dl_deadline=0,
  dl_period=0, top_waiter_comm=highprio0, top_waiter_pid=3820

Cc: Peter Zijlstra 
Cc: Steven Rostedt (Red Hat) 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Signed-off-by: Mathieu Desnoyers 
Signed-off-by: Julien Desfossez 
---
 include/trace/events/sched.h | 222 +++
 1 file changed, 222 insertions(+)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index bc695e4..11b3358 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -131,6 +131,64 @@
 TP_PROTO(struct task_struct *p),
 TP_ARGS(p));
 
+TRACE_EVENT_MAP(sched_waking, sched_waking_prio,
+
+   TP_PROTO(struct task_struct *p),
+
+   TP_ARGS(p),
+
+   TP_STRUCT__entry(
+   __array(char,   comm,   TASK_COMM_LEN   )
+   __field(pid_t,  pid )
+   __field(int,target_cpu  )
+   __field(unsigned int,   policy  )
+   __field(int,nice)
+   __field(unsigned int,   rt_priority )
+   __field(u64,dl_runtime  )
+   __field(u64,dl_deadline )
+   __field(u64,dl_period   )
+   __array(char,   top_waiter_comm,TASK_COMM_LEN   
)
+   __field(pid_t,  top_waiter_pid  )
+   ),
+
+   TP_fast_assign(
+   struct task_struct *top_waiter = rt_mutex_get_top_task(p);
+
+   memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+   __entry->pid= p->pid;
+   __entry->target_cpu = task_cpu(p);
+   __entry->policy = rt_mutex_get_effective_policy(
+   p->policy, p->prio);
+   __entry->nice   = task_nice(p);
+   __entry->rt_priority= rt_mutex_get_effective_rt_prio(
+   p->prio);
+   __entry->dl_runtime = dl_prio(p->prio) ?
+   p->dl.dl_runtime : 0;
+   __entry->dl_deadline= dl_prio(p->prio) ?
+   p->dl.dl_deadline : 0;
+   __entry->dl_period  = dl_prio(p->prio) ?
+