Add the following tracepoints:
* sched_set_need_resched(tsk, cpu, tif)
    Called when a task is set the need resched [lazy] flag
* sched_switch_vain(preempt, tsk, tsk_state)
    Called when a task is selected again during __schedule
    i.e. prev == next == tsk : no real context switch

Add new parameter to sched_set_state to identify whether the state
change was due to an explicit call or a signal pending while scheduling.
We now also trace from try_to_block_task in case a signal was pending
and the task is set to runnable.

Also adapt all monitors using sched_set_state to avoid breaking build.

These tracepoints are useful to describe the Linux task model and are
adapted from the patches by Daniel Bristot de Oliveira
(https://bristot.me/linux-task-model/).

Signed-off-by: Gabriele Monaco <gmon...@redhat.com>
---
 include/linux/sched.h                  |  7 ++++++-
 include/trace/events/sched.h           | 17 +++++++++++++++--
 kernel/sched/core.c                    | 10 +++++++++-
 kernel/trace/rv/monitors/sco/sco.c     |  3 ++-
 kernel/trace/rv/monitors/sleep/sleep.c |  3 ++-
 kernel/trace/rv/monitors/snroc/snroc.c |  3 ++-
 6 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7bce4c7ae3b4f..19ab4597c97d3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -339,9 +339,11 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-/* wrapper function to trace from this header file */
+/* wrapper functions to trace from this header file */
 DECLARE_TRACEPOINT(sched_set_state_tp);
 extern void __trace_set_current_state(int state_value);
+DECLARE_TRACEPOINT(sched_set_need_resched_tp);
+extern void __trace_set_need_resched(struct task_struct *curr, int tif);
 
 /**
  * struct prev_cputime - snapshot of system and user cputime
@@ -2059,6 +2061,9 @@ static inline int test_tsk_thread_flag(struct task_struct 
*tsk, int flag)
 
 static inline void set_tsk_need_resched(struct task_struct *tsk)
 {
+       if (tracepoint_enabled(sched_set_need_resched_tp) &&
+           !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
+               __trace_set_need_resched(tsk, TIF_NEED_RESCHED);
        set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
 }
 
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 4e6b2910cec3f..c9dec6d38ad2d 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -889,11 +889,24 @@ DECLARE_TRACE(sched_exit,
        TP_PROTO(bool is_switch, unsigned long ip),
        TP_ARGS(is_switch, ip));
 
+/*
+ * Tracepoint called when setting the state of a task;
+ * this tracepoint is guaranteed to be called from the waking context of the
+ * task setting the state.
+ */
 DECLARE_TRACE_CONDITION(sched_set_state,
-       TP_PROTO(struct task_struct *tsk, int state),
-       TP_ARGS(tsk, state),
+       TP_PROTO(struct task_struct *tsk, int state, bool from_signal),
+       TP_ARGS(tsk, state, from_signal),
        TP_CONDITION(!!(tsk->__state) != !!state));
 
+DECLARE_TRACE(sched_set_need_resched,
+       TP_PROTO(struct task_struct *tsk, int cpu, int tif),
+       TP_ARGS(tsk, cpu, tif));
+
+DECLARE_TRACE(sched_switch_vain,
+       TP_PROTO(bool preempt, struct task_struct *tsk, unsigned int 
prev_state),
+       TP_ARGS(preempt, tsk, prev_state));
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 81c6df746df17..6cb70e6f7fa17 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -495,7 +495,7 @@ EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);
 /* Call via the helper macro trace_set_current_state. */
 void __trace_set_current_state(int state_value)
 {
-       trace_sched_set_state_tp(current, state_value);
+       trace_sched_set_state_tp(current, state_value, false);
 }
 EXPORT_SYMBOL(__trace_set_current_state);
 
@@ -1110,6 +1110,7 @@ static void __resched_curr(struct rq *rq, int tif)
 
        cpu = cpu_of(rq);
 
+       trace_sched_set_need_resched_tp(curr, cpu, tif);
        if (cpu == smp_processor_id()) {
                set_ti_thread_flag(cti, tif);
                if (tif == TIF_NEED_RESCHED)
@@ -1125,6 +1126,11 @@ static void __resched_curr(struct rq *rq, int tif)
        }
 }
 
+void __trace_set_need_resched(struct task_struct *curr, int tif)
+{
+       trace_sched_set_need_resched_tp(curr, smp_processor_id(), tif);
+}
+
 void resched_curr(struct rq *rq)
 {
        __resched_curr(rq, TIF_NEED_RESCHED);
@@ -6592,6 +6598,7 @@ static bool try_to_block_task(struct rq *rq, struct 
task_struct *p,
        int flags = DEQUEUE_NOCLOCK;
 
        if (signal_pending_state(task_state, p)) {
+               trace_sched_set_state_tp(p, TASK_RUNNING, true);
                WRITE_ONCE(p->__state, TASK_RUNNING);
                *task_state_p = TASK_RUNNING;
                return false;
@@ -6786,6 +6793,7 @@ static void __sched notrace __schedule(int sched_mode)
                rq = context_switch(rq, prev, next, &rf);
        } else {
                rq_unpin_lock(rq, &rf);
+               trace_sched_switch_vain_tp(preempt, prev, prev_state);
                __balance_callbacks(rq);
                raw_spin_rq_unlock_irq(rq);
        }
diff --git a/kernel/trace/rv/monitors/sco/sco.c 
b/kernel/trace/rv/monitors/sco/sco.c
index 66f4639d46ac4..c9206aa12c319 100644
--- a/kernel/trace/rv/monitors/sco/sco.c
+++ b/kernel/trace/rv/monitors/sco/sco.c
@@ -19,7 +19,8 @@
 static struct rv_monitor rv_sco;
 DECLARE_DA_MON_PER_CPU(sco, unsigned char);
 
-static void handle_sched_set_state(void *data, struct task_struct *tsk, int 
state)
+static void handle_sched_set_state(void *data, struct task_struct *tsk,
+                                  int state, bool from_signal)
 {
        da_handle_start_event_sco(sched_set_state_sco);
 }
diff --git a/kernel/trace/rv/monitors/sleep/sleep.c 
b/kernel/trace/rv/monitors/sleep/sleep.c
index eea447b069071..5103a98818c53 100644
--- a/kernel/trace/rv/monitors/sleep/sleep.c
+++ b/kernel/trace/rv/monitors/sleep/sleep.c
@@ -82,7 +82,8 @@ static void ltl_atoms_init(struct task_struct *task, struct 
ltl_monitor *mon, bo
 
 }
 
-static void handle_sched_set_state(void *data, struct task_struct *task, int 
state)
+static void handle_sched_set_state(void *data, struct task_struct *task,
+                                  int state, bool from_signal)
 {
        if (state & TASK_INTERRUPTIBLE)
                ltl_atom_pulse(task, LTL_SLEEP, true);
diff --git a/kernel/trace/rv/monitors/snroc/snroc.c 
b/kernel/trace/rv/monitors/snroc/snroc.c
index 540e686e699f4..2651f589d1554 100644
--- a/kernel/trace/rv/monitors/snroc/snroc.c
+++ b/kernel/trace/rv/monitors/snroc/snroc.c
@@ -19,7 +19,8 @@
 static struct rv_monitor rv_snroc;
 DECLARE_DA_MON_PER_TASK(snroc, unsigned char);
 
-static void handle_sched_set_state(void *data, struct task_struct *tsk, int 
state)
+static void handle_sched_set_state(void *data, struct task_struct *tsk,
+                                  int state, bool from_signal)
 {
        da_handle_event_snroc(tsk, sched_set_state_snroc);
 }
-- 
2.50.1


Reply via email to