The following splat was caught when setting uclamp value of a task.

        BUG: sleeping function called from invalid context at 
./include/linux/percpu-rwsem.h:49

        ======================================================
        in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 731, name: 
l_3-1
        WARNING: possible circular locking dependency detected
        5.8.0-rc4-00040-g6345b3305877-dirty #864 Not tainted
        INFO: lockdep is turned off.
        ------------------------------------------------------
        l_0-0/730 is trying to acquire lock:
        irq event stamp: 150
        ffff80001343dea0 (cpu_hotplug_lock){++++}-{0:0}, at: 
static_key_enable+0x1c/0x38
        el0_svc_common.constprop.4+0xe4/0x200

        but task is already holding lock:
        ffff00097ef4ca58 (&rq->lock){-.-.}-{2:2}, at: task_rq_lock+0x60/0xf0
        _raw_spin_lock_irqsave+0x38/0xa8

        which lock already depends on the new lock.

        the existing dependency chain (in reverse order) is:
        copy_process+0x620/0x18f0

        -> #1 (&rq->lock){-.-.}-{2:2}:
        0x0
               _raw_spin_lock+0x64/0x80
               __schedule+0x108/0x910
        CPU: 5 PID: 731 Comm: l_3-1 Not tainted 
5.8.0-rc4-00040-g6345b3305877-dirty #864
               schedule+0x7c/0x108
               schedule_timeout+0x2b0/0x448
        Hardware name: ARM Juno development board (r2) (DT)
               wait_for_completion_killable+0xb8/0x1a8
               __kthread_create_on_node+0xe0/0x1c0
        Call trace:
               kthread_create_on_node+0x8c/0xb8
               create_worker+0xd0/0x1b8
         dump_backtrace+0x0/0x1f0
               workqueue_prepare_cpu+0x5c/0xa0
               cpuhp_invoke_callback+0xe8/0xe30
         show_stack+0x2c/0x38
               _cpu_up+0xf4/0x1c0
               cpu_up+0xa0/0xc0
         dump_stack+0xf0/0x170
               bringup_nonboot_cpus+0x88/0xc0
               smp_init+0x34/0x90
         ___might_sleep+0x144/0x200
               kernel_init_freeable+0x1b8/0x338
               kernel_init+0x18/0x118
         __might_sleep+0x54/0x88
               ret_from_fork+0x10/0x18

        -> #0 (cpu_hotplug_lock){++++}-{0:0}:
         cpus_read_lock+0x2c/0x130
               __lock_acquire+0x11a0/0x1550
               lock_acquire+0xf8/0x460
         static_key_enable+0x1c/0x38
               cpus_read_lock+0x68/0x130
               static_key_enable+0x1c/0x38
         __sched_setscheduler+0x900/0xad8
               __sched_setscheduler+0x900/0xad8
               __arm64_sys_sched_setattr+0x2e0/0x4f8
         __arm64_sys_sched_setattr+0x2e0/0x4f8
               el0_svc_common.constprop.4+0x84/0x200
               do_el0_svc+0x34/0xa0
         el0_svc_common.constprop.4+0x84/0x200
               el0_sync_handler+0x16c/0x340
               el0_sync+0x140/0x180
         do_el0_svc+0x34/0xa0

        other info that might help us debug this:

         Possible unsafe locking scenario:

         el0_sync_handler+0x16c/0x340
               CPU0                    CPU1
               ----                    ----
         el0_sync+0x140/0x180
          lock(&rq->lock);
                                       lock(cpu_hotplug_lock);
                                       lock(&rq->lock);
          lock(cpu_hotplug_lock);

         *** DEADLOCK ***

        3 locks held by l_0-0/730:
         #0: ffff80001345b4d0 (&cpuset_rwsem){++++}-{0:0}, at: 
__sched_setscheduler+0x4c0/0xad8
         #1: ffff00096e83b718 (&p->pi_lock){-.-.}-{2:2}, at: 
task_rq_lock+0x44/0xf0
         #2: ffff00097ef4ca58 (&rq->lock){-.-.}-{2:2}, at: 
task_rq_lock+0x60/0xf0

        stack backtrace:
        CPU: 1 PID: 730 Comm: l_0-0 Tainted: G        W         
5.8.0-rc4-00040-g6345b3305877-dirty #864
        Hardware name: ARM Juno development board (r2) (DT)
        Call trace:
         dump_backtrace+0x0/0x1f0
         show_stack+0x2c/0x38
         dump_stack+0xf0/0x170
         print_circular_bug.isra.40+0x228/0x280
         check_noncircular+0x14c/0x1b0
         __lock_acquire+0x11a0/0x1550
         lock_acquire+0xf8/0x460
         cpus_read_lock+0x68/0x130
         static_key_enable+0x1c/0x38
         __sched_setscheduler+0x900/0xad8
         __arm64_sys_sched_setattr+0x2e0/0x4f8
         el0_svc_common.constprop.4+0x84/0x200
         do_el0_svc+0x34/0xa0
         el0_sync_handler+0x16c/0x340
         el0_sync+0x140/0x180

Fix by ensuring we enable the key outside of the critical section in
__sched_setscheduler()

Fixes: 46609ce22703 ("sched/uclamp: Protect uclamp fast path code with static 
key")
Signed-off-by: Qais Yousef <[email protected]>
Cc: Juri Lelli <[email protected]>
Cc: Vincent Guittot <[email protected]>
Cc: Dietmar Eggemann <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Ben Segall <[email protected]>
Cc: Mel Gorman <[email protected]>
CC: Patrick Bellasi <[email protected]>
Cc: Chris Redpath <[email protected]>
Cc: Lukasz Luba <[email protected]>
Cc: [email protected]
---
 kernel/sched/core.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e1578c3ad40c..947a1f4fa112 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1348,7 +1348,7 @@ static int uclamp_validate(struct task_struct *p,
        return 0;
 }
 
-static void __setscheduler_uclamp(struct task_struct *p,
+static bool __setscheduler_uclamp(struct task_struct *p,
                                  const struct sched_attr *attr)
 {
        enum uclamp_id clamp_id;
@@ -1376,9 +1376,7 @@ static void __setscheduler_uclamp(struct task_struct *p,
        }
 
        if (likely(!(attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)))
-               return;
-
-       static_branch_enable(&sched_uclamp_used);
+               return false;
 
        if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP_MIN) {
                uclamp_se_set(&p->uclamp_req[UCLAMP_MIN],
@@ -1389,6 +1387,8 @@ static void __setscheduler_uclamp(struct task_struct *p,
                uclamp_se_set(&p->uclamp_req[UCLAMP_MAX],
                              attr->sched_util_max, true);
        }
+
+       return true;
 }
 
 static void uclamp_fork(struct task_struct *p)
@@ -1465,8 +1465,11 @@ static inline int uclamp_validate(struct task_struct *p,
 {
        return -EOPNOTSUPP;
 }
-static void __setscheduler_uclamp(struct task_struct *p,
-                                 const struct sched_attr *attr) { }
+static bool __setscheduler_uclamp(struct task_struct *p,
+                                 const struct sched_attr *attr)
+{
+       return false;
+}
 static inline void uclamp_fork(struct task_struct *p) { }
 static inline void uclamp_post_fork(struct task_struct *p) { }
 static inline void init_uclamp(void) { }
@@ -5305,7 +5308,8 @@ static int __sched_setscheduler(struct task_struct *p,
        prev_class = p->sched_class;
 
        __setscheduler(rq, p, attr, pi);
-       __setscheduler_uclamp(p, attr);
+
+       retval = __setscheduler_uclamp(p, attr);
 
        if (queued) {
                /*
@@ -5335,6 +5339,18 @@ static int __sched_setscheduler(struct task_struct *p,
        balance_callback(rq);
        preempt_enable();
 
+#ifdef CONFIG_UCLAMP_TASK
+       /*
+        * Enable uclamp static key outside the critical section.
+        * static_branch_enable() will hold cpu_hotplug_lock; if done from
+        * critical section above which holds other locks (rq->lock namely),
+        * it could lead to deadlock scenarios as both are popular locks and
+        * could be acquired from different paths in different orders.
+        */
+       if (retval)
+               static_branch_enable(&sched_uclamp_used);
+#endif
+
        return 0;
 
 unlock:
-- 
2.17.1

Reply via email to