A number of kthread-related functions indirectly take task_struct->pi_lock
while holding worker->lock in the call chain like this:
    spin_lock(&worker->lock)
    kthread_insert_work
    wake_up_process
    try_to_wake_up
    raw_spin_lock_irqsave(&p->pi_lock, flags)

This lock dependency exists whenever kthread_insert_work is called either
directly or indirectly via __kthread_queue_delayed_work in the following
functions:
    kthread_queue_work
    kthread_delayed_work_timer_fn
    kthread_queue_delayed_work
    kthread_flush_work
    kthread_mod_delayed_work

This creates possibilities for circular dependencies like the one reported
at [1]. Break this lock dependency by moving task wakeup after worker->lock
has been released.

[1]: 
https://lore.kernel.org/lkml/cajucfpg4nkhpqvzjgxz_3gm6hf1qgn_euoq8ix9cv1k9whl...@mail.gmail.com

Reported-by: Ke Wang <[email protected]>
Reported-by: Shakeel Butt <[email protected]>
Suggested-by: Peter Zijlstra <[email protected]>
Tested-by: Shakeel Butt <[email protected]>
Signed-off-by: Suren Baghdasaryan <[email protected]>

---
v2:
  * replace lkml link with lore.kernel.org one
  * change a line break to be more consistent with the rest of the code
---
 kernel/kthread.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/kernel/kthread.c b/kernel/kthread.c
index bfbfa481be3a..d37cd37d934c 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -9,6 +9,7 @@
 #include <uapi/linux/sched/types.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
+#include <linux/sched/wake_q.h>
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/err.h>
@@ -806,14 +807,15 @@ static void kthread_insert_work_sanity_check(struct 
kthread_worker *worker,
 /* insert @work before @pos in @worker */
 static void kthread_insert_work(struct kthread_worker *worker,
                                struct kthread_work *work,
-                               struct list_head *pos)
+                               struct list_head *pos,
+                               struct wake_q_head *wake_q)
 {
        kthread_insert_work_sanity_check(worker, work);
 
        list_add_tail(&work->node, pos);
        work->worker = worker;
        if (!worker->current_work && likely(worker->task))
-               wake_up_process(worker->task);
+               wake_q_add(wake_q, worker->task);
 }
 
 /**
@@ -831,15 +833,19 @@ static void kthread_insert_work(struct kthread_worker 
*worker,
 bool kthread_queue_work(struct kthread_worker *worker,
                        struct kthread_work *work)
 {
-       bool ret = false;
+       DEFINE_WAKE_Q(wake_q);
        unsigned long flags;
+       bool ret = false;
 
        raw_spin_lock_irqsave(&worker->lock, flags);
        if (!queuing_blocked(worker, work)) {
-               kthread_insert_work(worker, work, &worker->work_list);
+               kthread_insert_work(worker, work, &worker->work_list, &wake_q);
                ret = true;
        }
        raw_spin_unlock_irqrestore(&worker->lock, flags);
+
+       wake_up_q(&wake_q);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(kthread_queue_work);
@@ -857,6 +863,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
        struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
        struct kthread_work *work = &dwork->work;
        struct kthread_worker *worker = work->worker;
+       DEFINE_WAKE_Q(wake_q);
        unsigned long flags;
 
        /*
@@ -873,15 +880,18 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
        /* Move the work from worker->delayed_work_list. */
        WARN_ON_ONCE(list_empty(&work->node));
        list_del_init(&work->node);
-       kthread_insert_work(worker, work, &worker->work_list);
+       kthread_insert_work(worker, work, &worker->work_list, &wake_q);
 
        raw_spin_unlock_irqrestore(&worker->lock, flags);
+
+       wake_up_q(&wake_q);
 }
 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
 
 static void __kthread_queue_delayed_work(struct kthread_worker *worker,
                                         struct kthread_delayed_work *dwork,
-                                        unsigned long delay)
+                                        unsigned long delay,
+                                        struct wake_q_head *wake_q)
 {
        struct timer_list *timer = &dwork->timer;
        struct kthread_work *work = &dwork->work;
@@ -895,7 +905,7 @@ static void __kthread_queue_delayed_work(struct 
kthread_worker *worker,
         * on that there's no such delay when @delay is 0.
         */
        if (!delay) {
-               kthread_insert_work(worker, work, &worker->work_list);
+               kthread_insert_work(worker, work, &worker->work_list, wake_q);
                return;
        }
 
@@ -928,17 +938,21 @@ bool kthread_queue_delayed_work(struct kthread_worker 
*worker,
                                unsigned long delay)
 {
        struct kthread_work *work = &dwork->work;
+       DEFINE_WAKE_Q(wake_q);
        unsigned long flags;
        bool ret = false;
 
        raw_spin_lock_irqsave(&worker->lock, flags);
 
        if (!queuing_blocked(worker, work)) {
-               __kthread_queue_delayed_work(worker, dwork, delay);
+               __kthread_queue_delayed_work(worker, dwork, delay, &wake_q);
                ret = true;
        }
 
        raw_spin_unlock_irqrestore(&worker->lock, flags);
+
+       wake_up_q(&wake_q);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
@@ -967,6 +981,7 @@ void kthread_flush_work(struct kthread_work *work)
                KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
                COMPLETION_INITIALIZER_ONSTACK(fwork.done),
        };
+       DEFINE_WAKE_Q(wake_q);
        struct kthread_worker *worker;
        bool noop = false;
 
@@ -979,15 +994,18 @@ void kthread_flush_work(struct kthread_work *work)
        WARN_ON_ONCE(work->worker != worker);
 
        if (!list_empty(&work->node))
-               kthread_insert_work(worker, &fwork.work, work->node.next);
+               kthread_insert_work(worker, &fwork.work,
+                                   work->node.next, &wake_q);
        else if (worker->current_work == work)
                kthread_insert_work(worker, &fwork.work,
-                                   worker->work_list.next);
+                                   worker->work_list.next, &wake_q);
        else
                noop = true;
 
        raw_spin_unlock_irq(&worker->lock);
 
+       wake_up_q(&wake_q);
+
        if (!noop)
                wait_for_completion(&fwork.done);
 }
@@ -1065,6 +1083,7 @@ bool kthread_mod_delayed_work(struct kthread_worker 
*worker,
                              unsigned long delay)
 {
        struct kthread_work *work = &dwork->work;
+       DEFINE_WAKE_Q(wake_q);
        unsigned long flags;
        int ret = false;
 
@@ -1083,9 +1102,12 @@ bool kthread_mod_delayed_work(struct kthread_worker 
*worker,
 
        ret = __kthread_cancel_work(work, true, &flags);
 fast_queue:
-       __kthread_queue_delayed_work(worker, dwork, delay);
+       __kthread_queue_delayed_work(worker, dwork, delay, &wake_q);
 out:
        raw_spin_unlock_irqrestore(&worker->lock, flags);
+
+       wake_up_q(&wake_q);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
-- 
2.26.2.526.g744177e7f7-goog

Reply via email to