task_work tracepoints can be enabled by:

    echo 1 > /sys/kernel/tracing/events/task_work/enable

and trace logs would look like:

    ... task_work_add_request: target_comm=ls target_pid=227 
work=ffff95d20641a508 func=____fput notify=TWA_RESUME
    ... task_work_add_done: target_comm=ls target_pid=227 work=ffff95d20641a508 
ret=0
    ... task_work_add_request: target_comm=ls target_pid=227 
work=ffff95d20641a5c8 func=____fput notify=TWA_RESUME
    ... task_work_add_done: target_comm=ls target_pid=227 work=ffff95d20641a5c8 
ret=0
    ... task_work_add_request: target_comm=ls target_pid=227 
work=ffff95d20641a688 func=____fput notify=TWA_RESUME
    ... task_work_add_done: target_comm=ls target_pid=227 work=ffff95d20641a688 
ret=0
    ... task_work_add_request: target_comm=ls target_pid=227 
work=ffff95d20641a748 func=____fput notify=TWA_RESUME
    ... task_work_add_done: target_comm=ls target_pid=227 work=ffff95d20641a748 
ret=0
    ... task_work_run_start: comm=ls pid=227 work=ffff95d20641a748 func=____fput
    ... task_work_run_end: comm=ls pid=227 work=ffff95d20641a748 func=____fput
    ... task_work_run_start: comm=ls pid=227 work=ffff95d20641a688 func=____fput
    ... task_work_run_end: comm=ls pid=227 work=ffff95d20641a688 func=____fput
    ... task_work_run_start: comm=ls pid=227 work=ffff95d20641a5c8 func=____fput
    ... task_work_run_end: comm=ls pid=227 work=ffff95d20641a5c8 func=____fput
    ... task_work_run_start: comm=ls pid=227 work=ffff95d20641a508 func=____fput
    ... task_work_run_end: comm=ls pid=227 work=ffff95d20641a508 func=____fput

formatted as:
    target_comm=<comm of target task>
    target_pid=<pid of target task>
    work=<callback_head *>
    func=<callback_head->func>
    notify=<way to notify the target task>
    comm=<comm of current task executing func>
    pid=<pid of current task executing func>

Signed-off-by: Imran Khan <[email protected]>
---
 include/trace/events/task_work.h | 129 +++++++++++++++++++++++++++++++
 kernel/task_work.c               |  32 +++++++-
 2 files changed, 159 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/task_work.h

diff --git a/include/trace/events/task_work.h b/include/trace/events/task_work.h
new file mode 100644
index 0000000000000..e43ffd607e7ec
--- /dev/null
+++ b/include/trace/events/task_work.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM task_work
+
+#if !defined(_TRACE_TASK_WORK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TASK_WORK_H
+
+#include <linux/tracepoint.h>
+#include <linux/task_work.h>
+
+TRACE_DEFINE_ENUM(TWA_NONE);
+TRACE_DEFINE_ENUM(TWA_RESUME);
+TRACE_DEFINE_ENUM(TWA_SIGNAL);
+TRACE_DEFINE_ENUM(TWA_SIGNAL_NO_IPI);
+TRACE_DEFINE_ENUM(TWA_NMI_CURRENT);
+
+#define show_task_work_notify_mode(notify)                     \
+       __print_symbolic(notify,                                \
+               { TWA_NONE,           "TWA_NONE" },             \
+               { TWA_RESUME,         "TWA_RESUME" },           \
+               { TWA_SIGNAL,         "TWA_SIGNAL" },           \
+               { TWA_SIGNAL_NO_IPI,  "TWA_SIGNAL_NO_IPI" },    \
+               { TWA_NMI_CURRENT,    "TWA_NMI_CURRENT" })
+
+/*
+ * task_work_add() is split into two events:
+ *
+ *   task_work:add_request - fires before the cmpxchg that enqueues
+ *                           @work. Guaranteed to happen-before any
+ *                           run_start has picked the @work.
+ *   task_work:add_done    - fires after the cmpxchg loop terminates,
+ *                           carrying the final ret value.
+ */
+TRACE_EVENT(task_work_add_request,
+
+       TP_PROTO(struct task_struct *task,
+                struct callback_head *work,
+                task_work_func_t func,
+                enum task_work_notify_mode notify),
+
+       TP_ARGS(task, work, func, notify),
+
+       TP_STRUCT__entry(
+               __field(pid_t,          pid)
+               __array(char,           comm, TASK_COMM_LEN)
+               __field(void *,         work)
+               __field(void *,         func)
+               __field(int,            notify)
+       ),
+
+       TP_fast_assign(
+               __entry->pid            = task->pid;
+               memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+               __entry->work           = work;
+               __entry->func           = func;
+               __entry->notify         = notify;
+       ),
+
+       TP_printk("target_comm=%s target_pid=%d work=%p func=%ps notify=%s",
+               __entry->comm, __entry->pid, __entry->work,
+               __entry->func, show_task_work_notify_mode(__entry->notify))
+);
+
+TRACE_EVENT(task_work_add_done,
+
+       TP_PROTO(struct task_struct *task,
+                struct callback_head *work,
+                int ret),
+
+       TP_ARGS(task, work, ret),
+
+       TP_STRUCT__entry(
+               __field(pid_t,          pid)
+               __array(char,           comm, TASK_COMM_LEN)
+               __field(void *,         work)
+               __field(int,            ret)
+       ),
+
+       TP_fast_assign(
+               __entry->pid            = task->pid;
+               memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+               __entry->work           = work;
+               __entry->ret            = ret;
+       ),
+
+       TP_printk("target_comm=%s target_pid=%d work=%p ret=%d",
+               __entry->comm, __entry->pid, __entry->work, __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(task_work_run_template,
+
+       TP_PROTO(struct task_struct *task,
+                struct callback_head *work,
+                task_work_func_t func),
+
+       TP_ARGS(task, work, func),
+
+       TP_STRUCT__entry(
+               __field(pid_t,          pid)
+               __array(char,           comm, TASK_COMM_LEN)
+               __field(void *,         work)
+               __field(void *,         func)
+       ),
+
+       TP_fast_assign(
+               __entry->pid            = task->pid;
+               memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+               __entry->work           = work;
+               __entry->func           = func;
+       ),
+
+       TP_printk("comm=%s pid=%d work=%p func=%ps",
+               __entry->comm, __entry->pid, __entry->work, __entry->func)
+);
+
+DEFINE_EVENT(task_work_run_template, task_work_run_start,
+       TP_PROTO(struct task_struct *task, struct callback_head *work,
+                task_work_func_t func),
+       TP_ARGS(task, work, func));
+
+DEFINE_EVENT(task_work_run_template, task_work_run_end,
+       TP_PROTO(struct task_struct *task, struct callback_head *work,
+                task_work_func_t func),
+       TP_ARGS(task, work, func));
+
+#endif /* _TRACE_TASK_WORK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 0f7519f8e7c93..ed04a8c7116de 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -4,6 +4,9 @@
 #include <linux/task_work.h>
 #include <linux/resume_user_mode.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/task_work.h>
+
 static struct callback_head work_exited; /* all we need is ->next == NULL */
 
 #ifdef CONFIG_IRQ_WORK
@@ -60,6 +63,7 @@ int task_work_add(struct task_struct *task, struct 
callback_head *work,
                  enum task_work_notify_mode notify)
 {
        struct callback_head *head;
+       task_work_func_t func;
 
        if (notify == TWA_NMI_CURRENT) {
                if (WARN_ON_ONCE(task != current))
@@ -70,10 +74,25 @@ int task_work_add(struct task_struct *task, struct 
callback_head *work,
                kasan_record_aux_stack(work);
        }
 
+       /*
+        * Snapshot work->func before the cmpxchg below publishes @work.
+        * After publish, a concurrent task_work_run() on @task may invoke
+        * the callback and free @work, after which dereferencing work->func
+        * to fill the tracepoint payload would cause UAF error.
+        */
+       func = work->func;
+
+       /*
+        * Emit add_request BEFORE the cmpxchg loop.
+        * Tracing here guarantees add_request is seen before any possible
+        * run_start.
+        */
+       trace_task_work_add_request(task, work, func, notify);
+
        head = READ_ONCE(task->task_works);
        do {
                if (unlikely(head == &work_exited))
-                       return -ESRCH;
+                       goto out_esrch;
                work->next = head;
        } while (!try_cmpxchg(&task->task_works, &head, work));
 
@@ -100,7 +119,12 @@ int task_work_add(struct task_struct *task, struct 
callback_head *work,
                break;
        }
 
+       trace_task_work_add_done(task, work, 0);
        return 0;
+
+out_esrch:
+       trace_task_work_add_done(task, work, -ESRCH);
+       return -ESRCH;
 }
 
 /**
@@ -229,8 +253,12 @@ void task_work_run(void)
                raw_spin_unlock_irq(&task->pi_lock);
 
                do {
+                       task_work_func_t func = work->func;
+
                        next = work->next;
-                       work->func(work);
+                       trace_task_work_run_start(task, work, func);
+                       func(work);
+                       trace_task_work_run_end(task, work, func);
                        work = next;
                        cond_resched();
                } while (work);
-- 
2.43.0


Reply via email to