> Thanks for pointing this out. I think the ideal fix would be to
> correctly initialize/cleanup the coresched attributes in the cpu
> hotplug code path so that lock could be taken successfully if the
> sibling is offlined/onlined after coresched was enabled. We are
> working on another bug related to hotplugpath and shall introduce
> the fix in v3.
>
A possible fix for handling the runqueues during cpu offline/online
is attached here with.

Thanks,
Vineeth

---
 kernel/sched/core.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e8e5f26db052..1a809849a1e7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -253,7 +253,7 @@ static int __sched_core_stopper(void *data)
        bool enabled = !!(unsigned long)data;
        int cpu;
 
-       for_each_possible_cpu(cpu)
+       for_each_online_cpu(cpu)
                cpu_rq(cpu)->core_enabled = enabled;
 
        return 0;
@@ -3764,6 +3764,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, 
struct rq_flags *rf)
                        struct rq *rq_i = cpu_rq(i);
                        struct task_struct *p;
 
+                       if (cpu_is_offline(i))
+                               continue;
+
                        if (rq_i->core_pick)
                                continue;
 
@@ -3866,6 +3869,9 @@ next_class:;
        for_each_cpu(i, smt_mask) {
                struct rq *rq_i = cpu_rq(i);
 
+               if (cpu_is_offline(i))
+                       continue;
+
                WARN_ON_ONCE(!rq_i->core_pick);
 
                rq_i->core_pick->core_occupation = occ;
@@ -6410,8 +6416,14 @@ int sched_cpu_activate(unsigned int cpu)
        /*
         * When going up, increment the number of cores with SMT present.
         */
-       if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+       if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
                static_branch_inc_cpuslocked(&sched_smt_present);
+#ifdef CONFIG_SCHED_CORE
+               if (static_branch_unlikely(&__sched_core_enabled)) {
+                       rq->core_enabled = true;
+               }
+#endif
+       }
 #endif
        set_cpu_active(cpu, true);
 
@@ -6459,8 +6471,15 @@ int sched_cpu_deactivate(unsigned int cpu)
        /*
         * When going down, decrement the number of cores with SMT present.
         */
-       if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+       if (cpumask_weight(cpu_smt_mask(cpu)) == 2) {
+#ifdef CONFIG_SCHED_CORE
+               struct rq *rq = cpu_rq(cpu);
+               if (static_branch_unlikely(&__sched_core_enabled)) {
+                       rq->core_enabled = false;
+               }
+#endif
                static_branch_dec_cpuslocked(&sched_smt_present);
+       }
 #endif
 
        if (!sched_smp_initialized)
@@ -6537,6 +6556,9 @@ int sched_cpu_dying(unsigned int cpu)
        update_max_interval();
        nohz_balance_exit_idle(rq);
        hrtick_clear(rq);
+#ifdef CONFIG_SCHED_CORE
+       rq->core = NULL;
+#endif
        return 0;
 }
 #endif
-- 
2.17.1

Reply via email to