Add the following tracepoints: * sched_set_need_resched(tsk, cpu, tif) Called when a task is set the need resched [lazy] flag * sched_switch_vain(preempt, tsk, tsk_state) Called when a task is selected again during __schedule i.e. prev == next == tsk : no real context switch
Add new parameter to sched_set_state to identify whether the state change was due to an explicit call or a signal pending while scheduling. We now also trace from try_to_block_task in case a signal was pending and the task is set to runnable. Also adapt all monitors using sched_set_state to avoid breaking build. These tracepoints are useful to describe the Linux task model and are adapted from the patches by Daniel Bristot de Oliveira (https://bristot.me/linux-task-model/). Signed-off-by: Gabriele Monaco <gmon...@redhat.com> --- include/linux/sched.h | 7 ++++++- include/trace/events/sched.h | 17 +++++++++++++++-- kernel/sched/core.c | 10 +++++++++- kernel/trace/rv/monitors/sco/sco.c | 3 ++- kernel/trace/rv/monitors/sleep/sleep.c | 3 ++- kernel/trace/rv/monitors/snroc/snroc.c | 3 ++- 6 files changed, 36 insertions(+), 7 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 7bce4c7ae3b4f..19ab4597c97d3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -339,9 +339,11 @@ extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); -/* wrapper function to trace from this header file */ +/* wrapper functions to trace from this header file */ DECLARE_TRACEPOINT(sched_set_state_tp); extern void __trace_set_current_state(int state_value); +DECLARE_TRACEPOINT(sched_set_need_resched_tp); +extern void __trace_set_need_resched(struct task_struct *curr, int tif); /** * struct prev_cputime - snapshot of system and user cputime @@ -2059,6 +2061,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) static inline void set_tsk_need_resched(struct task_struct *tsk) { + if (tracepoint_enabled(sched_set_need_resched_tp) && + !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED)) + __trace_set_need_resched(tsk, TIF_NEED_RESCHED); set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4e6b2910cec3f..c9dec6d38ad2d 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -889,11 +889,24 @@ DECLARE_TRACE(sched_exit, TP_PROTO(bool is_switch, unsigned long ip), TP_ARGS(is_switch, ip)); +/* + * Tracepoint called when setting the state of a task; + * this tracepoint is guaranteed to be called from the waking context of the + * task setting the state. + */ DECLARE_TRACE_CONDITION(sched_set_state, - TP_PROTO(struct task_struct *tsk, int state), - TP_ARGS(tsk, state), + TP_PROTO(struct task_struct *tsk, int state, bool from_signal), + TP_ARGS(tsk, state, from_signal), TP_CONDITION(!!(tsk->__state) != !!state)); +DECLARE_TRACE(sched_set_need_resched, + TP_PROTO(struct task_struct *tsk, int cpu, int tif), + TP_ARGS(tsk, cpu, tif)); + +DECLARE_TRACE(sched_switch_vain, + TP_PROTO(bool preempt, struct task_struct *tsk, unsigned int prev_state), + TP_ARGS(preempt, tsk, prev_state)); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 81c6df746df17..6cb70e6f7fa17 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -495,7 +495,7 @@ EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp); /* Call via the helper macro trace_set_current_state. */ void __trace_set_current_state(int state_value) { - trace_sched_set_state_tp(current, state_value); + trace_sched_set_state_tp(current, state_value, false); } EXPORT_SYMBOL(__trace_set_current_state); @@ -1110,6 +1110,7 @@ static void __resched_curr(struct rq *rq, int tif) cpu = cpu_of(rq); + trace_sched_set_need_resched_tp(curr, cpu, tif); if (cpu == smp_processor_id()) { set_ti_thread_flag(cti, tif); if (tif == TIF_NEED_RESCHED) @@ -1125,6 +1126,11 @@ static void __resched_curr(struct rq *rq, int tif) } } +void __trace_set_need_resched(struct task_struct *curr, int tif) +{ + trace_sched_set_need_resched_tp(curr, smp_processor_id(), tif); +} + void resched_curr(struct rq *rq) { __resched_curr(rq, TIF_NEED_RESCHED); @@ -6592,6 +6598,7 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p, int flags = DEQUEUE_NOCLOCK; if (signal_pending_state(task_state, p)) { + trace_sched_set_state_tp(p, TASK_RUNNING, true); WRITE_ONCE(p->__state, TASK_RUNNING); *task_state_p = TASK_RUNNING; return false; @@ -6786,6 +6793,7 @@ static void __sched notrace __schedule(int sched_mode) rq = context_switch(rq, prev, next, &rf); } else { rq_unpin_lock(rq, &rf); + trace_sched_switch_vain_tp(preempt, prev, prev_state); __balance_callbacks(rq); raw_spin_rq_unlock_irq(rq); } diff --git a/kernel/trace/rv/monitors/sco/sco.c b/kernel/trace/rv/monitors/sco/sco.c index 66f4639d46ac4..c9206aa12c319 100644 --- a/kernel/trace/rv/monitors/sco/sco.c +++ b/kernel/trace/rv/monitors/sco/sco.c @@ -19,7 +19,8 @@ static struct rv_monitor rv_sco; DECLARE_DA_MON_PER_CPU(sco, unsigned char); -static void handle_sched_set_state(void *data, struct task_struct *tsk, int state) +static void handle_sched_set_state(void *data, struct task_struct *tsk, + int state, bool from_signal) { da_handle_start_event_sco(sched_set_state_sco); } diff --git a/kernel/trace/rv/monitors/sleep/sleep.c b/kernel/trace/rv/monitors/sleep/sleep.c index eea447b069071..5103a98818c53 100644 --- a/kernel/trace/rv/monitors/sleep/sleep.c +++ b/kernel/trace/rv/monitors/sleep/sleep.c @@ -82,7 +82,8 @@ static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bo } -static void handle_sched_set_state(void *data, struct task_struct *task, int state) +static void handle_sched_set_state(void *data, struct task_struct *task, + int state, bool from_signal) { if (state & TASK_INTERRUPTIBLE) ltl_atom_pulse(task, LTL_SLEEP, true); diff --git a/kernel/trace/rv/monitors/snroc/snroc.c b/kernel/trace/rv/monitors/snroc/snroc.c index 540e686e699f4..2651f589d1554 100644 --- a/kernel/trace/rv/monitors/snroc/snroc.c +++ b/kernel/trace/rv/monitors/snroc/snroc.c @@ -19,7 +19,8 @@ static struct rv_monitor rv_snroc; DECLARE_DA_MON_PER_TASK(snroc, unsigned char); -static void handle_sched_set_state(void *data, struct task_struct *tsk, int state) +static void handle_sched_set_state(void *data, struct task_struct *tsk, + int state, bool from_signal) { da_handle_event_snroc(tsk, sched_set_state_snroc); } -- 2.50.1