stop_kthread() is the offline callback for "trace/osnoise:online", since
commit 5bfbcd1ee57b ("tracing/timerlat: Add interface_lock around clearing
of kthread in stop_kthread()"), the following ABBA deadlock scenario is
introduced:
T1 | T2 [BP] | T3 [AP]
osnoise_hotplug_workfn() | work_for_cpu_fn() | cpuhp_thread_fun()
| _cpu_down() | osnoise_cpu_die()
mutex_lock(&interface_lock) | | stop_kthread()
| cpus_write_lock() |
mutex_lock(&interface_lock)
cpus_read_lock() | cpuhp_kick_ap() |
As the interface_lock here in just for protecting the "kthread" field of
the osn_var, use xchg() instead to fix this issue. Also use
for_each_online_cpu() back in stop_per_cpu_kthreads() as it can take
cpu_read_lock() again.
Fixes: 5bfbcd1ee57b ("tracing/timerlat: Add interface_lock around clearing of
kthread in stop_kthread()")
Signed-off-by: Wei Li <[email protected]>
---
kernel/trace/trace_osnoise.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 934a14bc72e6..ddc9afb9b7d4 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1953,12 +1953,8 @@ static void stop_kthread(unsigned int cpu)
{
struct task_struct *kthread;
- mutex_lock(&interface_lock);
- kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
+ kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread),
NULL);
if (kthread) {
- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
- mutex_unlock(&interface_lock);
-
if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
!WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
kthread_stop(kthread);
@@ -1972,7 +1968,6 @@ static void stop_kthread(unsigned int cpu)
put_task_struct(kthread);
}
} else {
- mutex_unlock(&interface_lock);
/* if no workload, just return */
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
/*
@@ -1994,8 +1989,12 @@ static void stop_per_cpu_kthreads(void)
{
int cpu;
- for_each_possible_cpu(cpu)
+ cpus_read_lock();
+
+ for_each_online_cpu(cpu)
stop_kthread(cpu);
+
+ cpus_read_unlock();
}
/*
--
2.25.1