I found a kernel crash while playing with deadline PI rtmutex.

    BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
    IP: [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30
    PGD 232a75067 PUD 230947067 PMD 0
    Oops: 0000 [#1] SMP
    CPU: 1 PID: 10994 Comm: a.out Not tainted

    Call Trace:
    [<ffffffff810cf8aa>] ? enqueue_task_dl+0x2a/0x320
    [<ffffffff810b658c>] enqueue_task+0x2c/0x80
    [<ffffffff810ba763>] activate_task+0x23/0x30
    [<ffffffff810d0ab5>] pull_dl_task+0x1d5/0x260
    [<ffffffff810d0be6>] pre_schedule_dl+0x16/0x20
    [<ffffffff8164e783>] __schedule+0xd3/0x900
    [<ffffffff8164efd9>] schedule+0x29/0x70
    [<ffffffff8165035b>] __rt_mutex_slowlock+0x4b/0xc0
    [<ffffffff81650501>] rt_mutex_slowlock+0xd1/0x190
    [<ffffffff810eeb33>] rt_mutex_timed_lock+0x53/0x60
    [<ffffffff810ecbfc>] futex_lock_pi.isra.18+0x28c/0x390
    [<ffffffff810cfa15>] ? enqueue_task_dl+0x195/0x320
    [<ffffffff810d0bac>] ? prio_changed_dl+0x6c/0x90
    [<ffffffff810ed8b0>] do_futex+0x190/0x5b0
    [<ffffffff810edd50>] SyS_futex+0x80/0x180
    [<ffffffff8165a089>] system_call_fastpath+0x16/0x1b
    RIP  [<ffffffff810eeb8f>] rt_mutex_get_top_task+0x1f/0x30

This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
are only protected by pi_lock when operating pi waiters, while
rt_mutex_get_top_task() will access them with rq lock held but
not holding pi_lock.

It's hard for rt_mutex_get_top_task() to hold pi_lock, so the
patch ensures rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi()
lock rq when operating "pi_waiters" and "pi_waiters_leftmost".
We need this iff lock owner has the deadline priority.

Signed-off-by: Xunlei Pang <xlp...@redhat.com>
---
 include/linux/sched/deadline.h |  3 +++
 kernel/locking/rtmutex.c       | 18 ++++++++++++++++++
 kernel/sched/deadline.c        | 17 +++++++++++++++++
 3 files changed, 38 insertions(+)

diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 9089a2a..3083f6b 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -26,4 +26,7 @@ static inline bool dl_time_before(u64 a, u64 b)
        return (s64)(a - b) < 0;
 }
 
+extern void *dl_pi_waiters_lock(struct task_struct *p);
+extern void dl_pi_waiters_unlock(void *lockdata);
+
 #endif /* _SCHED_DEADLINE_H */
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e74660..0fb247a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -224,6 +224,7 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct 
rt_mutex_waiter *waiter)
        struct rb_node *parent = NULL;
        struct rt_mutex_waiter *entry;
        int leftmost = 1;
+       void *dl_lockdata = NULL;
 
        while (*link) {
                parent = *link;
@@ -236,24 +237,38 @@ rt_mutex_enqueue_pi(struct task_struct *task, struct 
rt_mutex_waiter *waiter)
                }
        }
 
+       if (dl_task(task))
+               dl_lockdata = dl_pi_waiters_lock(task);
+
        if (leftmost)
                task->pi_waiters_leftmost = &waiter->pi_tree_entry;
 
        rb_link_node(&waiter->pi_tree_entry, parent, link);
        rb_insert_color(&waiter->pi_tree_entry, &task->pi_waiters);
+
+       if (dl_lockdata)
+               dl_pi_waiters_unlock(dl_lockdata);
 }
 
 static void
 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 {
+       void *dl_lockdata = NULL;
+
        if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
                return;
 
+       if (dl_task(task))
+               dl_lockdata = dl_pi_waiters_lock(task);
+
        if (task->pi_waiters_leftmost == &waiter->pi_tree_entry)
                task->pi_waiters_leftmost = rb_next(&waiter->pi_tree_entry);
 
        rb_erase(&waiter->pi_tree_entry, &task->pi_waiters);
        RB_CLEAR_NODE(&waiter->pi_tree_entry);
+
+       if (dl_lockdata)
+               dl_pi_waiters_unlock(dl_lockdata);
 }
 
 /*
@@ -271,6 +286,9 @@ int rt_mutex_getprio(struct task_struct *task)
                   task->normal_prio);
 }
 
+/*
+ * rq->lock of @task must be held.
+ */
 struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
 {
        if (likely(!task_has_pi_waiters(task)))
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a3048fa..7b8aa93 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -926,6 +926,23 @@ static void dequeue_dl_entity(struct sched_dl_entity 
*dl_se)
        __dequeue_dl_entity(dl_se);
 }
 
+/*
+ * dl_pi_waiters_lock()/dl_pi_waiters_unlock() are needed by
+ * rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi() to protect
+ * PI waiters accessed by rt_mutex_get_top_task().
+ */
+void *dl_pi_waiters_lock(struct task_struct *p)
+{
+       lockdep_assert_held(&p->pi_lock);
+
+       return __task_rq_lock(p);
+}
+
+void dl_pi_waiters_unlock(void *lockdata)
+{
+       __task_rq_unlock((struct rq *)lockdata);
+}
+
 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
        struct task_struct *pi_task = rt_mutex_get_top_task(p);
-- 
1.8.3.1

Reply via email to