On 28.05.24 13:22, Roger Pau Monné wrote:
Hello,

When the stop_machine_run() call in cpu_down() fails and calls the CPU
notifier CPU_DOWN_FAILED hook the following assert triggers in the
scheduling code:

Assertion '!cpumask_test_cpu(cpu, &prv->initialized)' failed at 
common/sched/cred1
----[ Xen-4.19-unstable  x86_64  debug=y  Tainted:   C    ]----
CPU:    0
RIP:    e008:[<ffff82d040248299>] 
common/sched/credit2.c#csched2_free_pdata+0xc8/0x177
RFLAGS: 0000000000010093   CONTEXT: hypervisor
rax: 0000000000000000   rbx: ffff83202ecc2f80   rcx: ffff83202f3e64c0
rdx: 0000000000000001   rsi: 0000000000000002   rdi: ffff83202ecc2f88
rbp: ffff83203ffffd58   rsp: ffff83203ffffd30   r8:  0000000000000000
r9:  ffff83202f3e6e01   r10: 0000000000000000   r11: 0f0f0f0f0f0f0f0f
r12: ffff83202ecb80b0   r13: 0000000000000001   r14: 0000000000000282
r15: ffff83202ecbbf00   cr0: 000000008005003b   cr4: 00000000007526e0
cr3: 00000000574c2000   cr2: 0000000000000000
fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
Xen code around <ffff82d040248299> 
(common/sched/credit2.c#csched2_free_pdata+0xc8/0x177):
  fe ff eb 9a 0f 0b 0f 0b <0f> 0b 49 8d 4f 08 49 8b 47 08 48 3b 48 08 75 2e
Xen stack trace from rsp=ffff83203ffffd30:
    ffff83202d74d100 0000000000000001 ffff82d0404c4430 0000000000000006
    0000000000000000 ffff83203ffffd78 ffff82d040257454 0000000000000000
    0000000000000001 ffff83203ffffda8 ffff82d04021f303 ffff82d0404c4628
    ffff82d0404c4620 ffff82d0404c4430 0000000000000006 ffff83203ffffdf0
    ffff82d04022bc4c ffff83203ffffe18 0000000000000001 0000000000000001
    00000000fffffff0 0000000000000000 0000000000000000 ffff82d0405e6500
    ffff83203ffffe08 ffff82d040204fd5 0000000000000001 ffff83203ffffe30
    ffff82d0402054f0 ffff82d0404c5860 0000000000000001 ffff83202ec75000
    ffff83203ffffe48 ffff82d040348c25 ffff83202d74d0d0 ffff83203ffffe68
    ffff82d0402071aa ffff83202ec751d0 ffff82d0405ce210 ffff83203ffffe80
    ffff82d0402343c9 ffff82d0405ce200 ffff83203ffffeb0 ffff82d040234631
    0000000000000000 0000000000007fff ffff82d0405d5080 ffff82d0405ce210
    ffff83203ffffee8 ffff82d040321411 ffff82d040321399 ffff83202f3a9000
    0000000000000000 0000001d91a6fa2d ffff82d0405e6500 ffff83203ffffde0
    ffff82d040324391 0000000000000000 0000000000000000 0000000000000000
    0000000000000000 0000000000000000 0000000000000000 0000000000000000
    0000000000000000 0000000000000000 0000000000000000 0000000000000000
    0000000000000000 0000000000000000 0000000000000000 0000000000000000
    0000000000000000 0000000000000000 0000000000000000 0000000000000000
    0000000000000000 0000000000000000 0000000000000000 0000000000000000
Xen call trace:
    [<ffff82d040248299>] R common/sched/credit2.c#csched2_free_pdata+0xc8/0x177
    [<ffff82d040257454>] F free_cpu_rm_data+0x41/0x58
    [<ffff82d04021f303>] F common/sched/cpupool.c#cpu_callback+0xfb/0x466
    [<ffff82d04022bc4c>] F notifier_call_chain+0x6c/0x96
    [<ffff82d040204fd5>] F common/cpu.c#cpu_notifier_call_chain+0x1b/0x36
    [<ffff82d0402054f0>] F cpu_down+0xa7/0x143
    [<ffff82d040348c25>] F cpu_down_helper+0x11/0x27
    [<ffff82d0402071aa>] F 
common/domain.c#continue_hypercall_tasklet_handler+0x50/0xbd
    [<ffff82d0402343c9>] F common/tasklet.c#do_tasklet_work+0x76/0xaf
    [<ffff82d040234631>] F do_tasklet+0x5b/0x8d
    [<ffff82d040321411>] F arch/x86/domain.c#idle_loop+0x78/0xe6
    [<ffff82d040324391>] F continue_running+0x5b/0x5d


****************************************
Panic on CPU 0:
Assertion '!cpumask_test_cpu(cpu, &prv->initialized)' failed at 
common/sched/credit2.c:4111
****************************************

The issue seems to be that since the CPU hasn't been removed, it's
still part of prv->initialized and the assert in csched2_free_pdata()
called as part of free_cpu_rm_data() triggers.

It's easy to reproduce by substituting the stop_machine_run() call in
cpu_down() with an error.

Could you please give the attached patch a try?

Only compile tested, though...


Juergen
From 5925f15ace8be186c9f41c0cda2d4a675b0f7bb9 Mon Sep 17 00:00:00 2001
From: Juergen Gross <[email protected]>
Date: Wed, 29 May 2024 13:24:36 +0200
Subject: [PATCH] xen/sched: fix error path of cpu removal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case removal of a cpu fails at the CPU_DYING step, an ASSERT() in
the credit2 scheduler might trigger:

Assertion '!cpumask_test_cpu(cpu, &prv->initialized)' failed at common/sched/cred1
----[ Xen-4.19-unstable  x86_64  debug=y  Tainted:   C    ]----
CPU:    0
RIP:    e008:[<ffff82d040248299>] common/sched/credit2.c#csched2_free_pdata+0xc8/0x177
RFLAGS: 0000000000010093   CONTEXT: hypervisor
rax: 0000000000000000   rbx: ffff83202ecc2f80   rcx: ffff83202f3e64c0
rdx: 0000000000000001   rsi: 0000000000000002   rdi: ffff83202ecc2f88
rbp: ffff83203ffffd58   rsp: ffff83203ffffd30   r8:  0000000000000000
r9:  ffff83202f3e6e01   r10: 0000000000000000   r11: 0f0f0f0f0f0f0f0f
r12: ffff83202ecb80b0   r13: 0000000000000001   r14: 0000000000000282
r15: ffff83202ecbbf00   cr0: 000000008005003b   cr4: 00000000007526e0
cr3: 00000000574c2000   cr2: 0000000000000000
fsb: 0000000000000000   gsb: 0000000000000000   gss: 0000000000000000
ds: 0000   es: 0000   fs: 0000   gs: 0000   ss: 0000   cs: e008
Xen code around <ffff82d040248299> (common/sched/credit2.c#csched2_free_pdata+0xc8/0x177):
 fe ff eb 9a 0f 0b 0f 0b <0f> 0b 49 8d 4f 08 49 8b 47 08 48 3b 48 08 75 2e
Xen stack trace from rsp=ffff83203ffffd30:
   ffff83202d74d100 0000000000000001 ffff82d0404c4430 0000000000000006
   0000000000000000 ffff83203ffffd78 ffff82d040257454 0000000000000000
   0000000000000001 ffff83203ffffda8 ffff82d04021f303 ffff82d0404c4628
   ffff82d0404c4620 ffff82d0404c4430 0000000000000006 ffff83203ffffdf0
   ffff82d04022bc4c ffff83203ffffe18 0000000000000001 0000000000000001
   00000000fffffff0 0000000000000000 0000000000000000 ffff82d0405e6500
   ffff83203ffffe08 ffff82d040204fd5 0000000000000001 ffff83203ffffe30
   ffff82d0402054f0 ffff82d0404c5860 0000000000000001 ffff83202ec75000
   ffff83203ffffe48 ffff82d040348c25 ffff83202d74d0d0 ffff83203ffffe68
   ffff82d0402071aa ffff83202ec751d0 ffff82d0405ce210 ffff83203ffffe80
   ffff82d0402343c9 ffff82d0405ce200 ffff83203ffffeb0 ffff82d040234631
   0000000000000000 0000000000007fff ffff82d0405d5080 ffff82d0405ce210
   ffff83203ffffee8 ffff82d040321411 ffff82d040321399 ffff83202f3a9000
   0000000000000000 0000001d91a6fa2d ffff82d0405e6500 ffff83203ffffde0
   ffff82d040324391 0000000000000000 0000000000000000 0000000000000000
   0000000000000000 0000000000000000 0000000000000000 0000000000000000
   0000000000000000 0000000000000000 0000000000000000 0000000000000000
   0000000000000000 0000000000000000 0000000000000000 0000000000000000
   0000000000000000 0000000000000000 0000000000000000 0000000000000000
   0000000000000000 0000000000000000 0000000000000000 0000000000000000
Xen call trace:
   [<ffff82d040248299>] R common/sched/credit2.c#csched2_free_pdata+0xc8/0x177
   [<ffff82d040257454>] F free_cpu_rm_data+0x41/0x58
   [<ffff82d04021f303>] F common/sched/cpupool.c#cpu_callback+0xfb/0x466
   [<ffff82d04022bc4c>] F notifier_call_chain+0x6c/0x96
   [<ffff82d040204fd5>] F common/cpu.c#cpu_notifier_call_chain+0x1b/0x36
   [<ffff82d0402054f0>] F cpu_down+0xa7/0x143
   [<ffff82d040348c25>] F cpu_down_helper+0x11/0x27
   [<ffff82d0402071aa>] F common/domain.c#continue_hypercall_tasklet_handler+0x50/0xbd
   [<ffff82d0402343c9>] F common/tasklet.c#do_tasklet_work+0x76/0xaf
   [<ffff82d040234631>] F do_tasklet+0x5b/0x8d
   [<ffff82d040321411>] F arch/x86/domain.c#idle_loop+0x78/0xe6
   [<ffff82d040324391>] F continue_running+0x5b/0x5d

This can be fixed by not freeing the cpu's scheduling data in case the
CPU_DYING step hasn't been performed. Instead the not used struct
sched_resource instances need to be freed in order to avoid memory
leaking.

Fixes: d84473689611 ("xen/sched: fix cpu hotplug")
Reported-by: Roger Pau Monné <[email protected]>
Signed-off-by: Juergen Gross <[email protected]>
---
 xen/common/sched/core.c    | 18 ++++++++++++++++--
 xen/common/sched/private.h |  1 +
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index d84b65f197..cbbeeaf0ca 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -3264,6 +3264,8 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc)
         data->sr[idx]->schedule_lock = sr->schedule_lock;
     }
 
+    data->n_sr_unused = idx;
+
  out:
     rcu_read_unlock(&sched_res_rculock);
 
@@ -3272,8 +3274,18 @@ struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc)
 
 void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu)
 {
-    sched_free_udata(mem->old_ops, mem->vpriv_old);
-    sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
+    unsigned int idx;
+
+    if ( !mem->n_sr_unused )
+    {
+        sched_free_udata(mem->old_ops, mem->vpriv_old);
+        sched_free_pdata(mem->old_ops, mem->ppriv_old, cpu);
+    }
+    else
+    {
+        for ( idx = 0; idx < mem->n_sr_unused; idx++ )
+            sched_res_free(&mem->sr[idx]->rcu);
+    }
     free_affinity_masks(&mem->affinity);
 
     xfree(mem);
@@ -3312,6 +3324,8 @@ int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *data)
     /* See comment in schedule_cpu_add() regarding lock switching. */
     old_lock = pcpu_schedule_lock_irqsave(cpu, &flags);
 
+    data->n_sr_unused = 0;
+
     for_each_cpu ( cpu_iter, sr->cpus )
     {
         per_cpu(sched_res_idx, cpu_iter) = 0;
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index c0e7c96d24..f0129fd9cc 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -645,6 +645,7 @@ struct cpu_rm_data {
     const struct scheduler *old_ops;
     void *ppriv_old;
     void *vpriv_old;
+    unsigned int n_sr_unused;
     struct sched_resource *sr[];
 };
 
-- 
2.35.3

Reply via email to