FYI. This patch is upstream since linux-3.5. Backported in order to
bring SystemTap functionality back after the switch to linux-3.4 that
doesn't have utrace. :)

The split-out series is available in the git repository at:

  git://fedorapeople.org/home/fedora/aarapov/public_git/kernel-uprobes.git 

Oleg Nesterov (1):
      task_work_add: generic process-context callbacks

Signed-off-by: Anton Arapov <[email protected]>
---
 include/linux/sched.h     |    2 ++
 include/linux/task_work.h |   33 ++++++++++++++++++
 include/linux/tracehook.h |   11 ++++++
 kernel/Makefile           |    2 +-
 kernel/exit.c             |    5 ++-
 kernel/fork.c             |    1 +
 kernel/task_work.c        |   84 +++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/task_work.h
 create mode 100644 kernel/task_work.c

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6869c60..e011a11 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1445,6 +1445,8 @@ struct task_struct {
        int (*notifier)(void *priv);
        void *notifier_data;
        sigset_t *notifier_mask;
+       struct hlist_head task_works;
+
        struct audit_context *audit_context;
 #ifdef CONFIG_AUDITSYSCALL
        uid_t loginuid;
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
new file mode 100644
index 0000000..294d5d5
--- /dev/null
+++ b/include/linux/task_work.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_TASK_WORK_H
+#define _LINUX_TASK_WORK_H
+
+#include <linux/list.h>
+#include <linux/sched.h>
+
+struct task_work;
+typedef void (*task_work_func_t)(struct task_work *);
+
+struct task_work {
+       struct hlist_node hlist;
+       task_work_func_t func;
+       void *data;
+};
+
+static inline void
+init_task_work(struct task_work *twork, task_work_func_t func, void *data)
+{
+       twork->func = func;
+       twork->data = data;
+}
+
+int task_work_add(struct task_struct *task, struct task_work *twork, bool);
+struct task_work *task_work_cancel(struct task_struct *, task_work_func_t);
+void task_work_run(void);
+
+static inline void exit_task_work(struct task_struct *task)
+{
+       if (unlikely(!hlist_empty(&task->task_works)))
+               task_work_run();
+}
+
+#endif /* _LINUX_TASK_WORK_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 51bd91d..48c597d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -49,6 +49,7 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/task_work.h>
 struct linux_binprm;
 
 /*
@@ -165,8 +166,10 @@ static inline void tracehook_signal_handler(int sig, 
siginfo_t *info,
  */
 static inline void set_notify_resume(struct task_struct *task)
 {
+#ifdef TIF_NOTIFY_RESUME
        if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
                kick_process(task);
+#endif
 }
 
 /**
@@ -184,6 +187,14 @@ static inline void set_notify_resume(struct task_struct 
*task)
  */
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
+       /*
+        * The caller just cleared TIF_NOTIFY_RESUME. This barrier
+        * pairs with task_work_add()->set_notify_resume() after
+        * hlist_add_head(task->task_works);
+        */
+       smp_mb__after_clear_bit();
+       if (unlikely(!hlist_empty(&current->task_works)))
+               task_work_run();
 }
 #endif /* TIF_NOTIFY_RESUME */
 
diff --git a/kernel/Makefile b/kernel/Makefile
index cb41b95..2479528 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
 obj-y     = fork.o exec_domain.o panic.o printk.o \
            cpu.o exit.o itimer.o time.o softirq.o resource.o \
            sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
-           signal.o sys.o kmod.o workqueue.o pid.o \
+           signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
            rcupdate.o extable.o params.o posix-timers.o \
            kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
            hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
diff --git a/kernel/exit.c b/kernel/exit.c
index d8bd3b42..b82c38e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -946,11 +946,14 @@ void do_exit(long code)
        exit_signals(tsk);  /* sets PF_EXITING */
        /*
         * tsk->flags are checked in the futex code to protect against
-        * an exiting task cleaning up the robust pi futexes.
+        * an exiting task cleaning up the robust pi futexes, and in
+        * task_work_add() to avoid the race with exit_task_work().
         */
        smp_mb();
        raw_spin_unlock_wait(&tsk->pi_lock);
 
+       exit_task_work(tsk);
+
        exit_irq_thread();
 
        if (unlikely(in_atomic()))
diff --git a/kernel/fork.c b/kernel/fork.c
index 5b87e9f..76a961d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1391,6 +1391,7 @@ static struct task_struct *copy_process(unsigned long 
clone_flags,
         */
        p->group_leader = p;
        INIT_LIST_HEAD(&p->thread_group);
+       INIT_HLIST_HEAD(&p->task_works);
 
        /* Now that the task is set up, run cgroup callbacks if
         * necessary. We need to run them before the task is visible
diff --git a/kernel/task_work.c b/kernel/task_work.c
new file mode 100644
index 0000000..82d1c79
--- /dev/null
+++ b/kernel/task_work.c
@@ -0,0 +1,84 @@
+#include <linux/spinlock.h>
+#include <linux/task_work.h>
+#include <linux/tracehook.h>
+
+int
+task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
+{
+       unsigned long flags;
+       int err = -ESRCH;
+
+#ifndef TIF_NOTIFY_RESUME
+       if (notify)
+               return -ENOTSUPP;
+#endif
+       /*
+        * We must not insert the new work if the task has already passed
+        * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
+        * and check PF_EXITING under pi_lock.
+        */
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       if (likely(!(task->flags & PF_EXITING))) {
+               hlist_add_head(&twork->hlist, &task->task_works);
+               err = 0;
+       }
+       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+       /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
+       if (likely(!err) && notify)
+               set_notify_resume(task);
+       return err;
+}
+
+struct task_work *
+task_work_cancel(struct task_struct *task, task_work_func_t func)
+{
+       unsigned long flags;
+       struct task_work *twork;
+       struct hlist_node *pos;
+
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+       hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
+               if (twork->func == func) {
+                       hlist_del(&twork->hlist);
+                       goto found;
+               }
+       }
+       twork = NULL;
+ found:
+       raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+       return twork;
+}
+
+void task_work_run(void)
+{
+       struct task_struct *task = current;
+       struct hlist_head task_works;
+       struct hlist_node *pos;
+
+       raw_spin_lock_irq(&task->pi_lock);
+       hlist_move_list(&task->task_works, &task_works);
+       raw_spin_unlock_irq(&task->pi_lock);
+
+       if (unlikely(hlist_empty(&task_works)))
+               return;
+       /*
+        * We use hlist to save the space in task_struct, but we want fifo.
+        * Find the last entry, the list should be short, then process them
+        * in reverse order.
+        */
+       for (pos = task_works.first; pos->next; pos = pos->next)
+               ;
+
+       for (;;) {
+               struct hlist_node **pprev = pos->pprev;
+               struct task_work *twork = container_of(pos, struct task_work,
+                                                       hlist);
+               twork->func(twork);
+
+               if (pprev == &task_works.first)
+                       break;
+               pos = container_of(pprev, struct hlist_node, next);
+       }
+}
_______________________________________________
kernel mailing list
[email protected]
https://admin.fedoraproject.org/mailman/listinfo/kernel

Reply via email to