On Sun, Mar 08 2026 at 18:23, Matthieu Baerts wrote:
> 08 Mar 2026 17:58:26 Thomas Gleixner <[email protected]>:
>> So I'm back to square one. I go and do what I should have done in the
>> first place. Write a debug patch with trace_printks and let the people
>> who can actually trigger the problem run with it.
>
> Happy to test such debug patches!
See below.
Enable the tracepoints either on the kernel command line:
trace_event=sched_switch,mmcid:*
or before starting the test case:
echo 1 >/sys/kernel/tracing/events/sched/sched_switch/enable
echo 1 >/sys/kernel/tracing/events/mmcid/enable
I added a 50ms timeout into mm_cid_get() which freezes the trace and
emits a warning. If you enable panic_on_warn and ftrace_dump_on_oops,
then it dumps the trace buffer once it hits the warning.
Either kernel command line:
panic_on_warn ftrace_dump_on_oops
or
echo 1 >/proc/sys/kernel/panic_on_warn
echo 1 >/proc/sys/kernel/ftrace_dump_on_oops
That should provide enough information to decode this mystery.
Thanks,
tglx
---
include/trace/events/mmcid.h | 138 +++++++++++++++++++++++++++++++++++++++++++
kernel/sched/core.c | 10 +++
kernel/sched/sched.h | 20 +++++-
3 files changed, 165 insertions(+), 3 deletions(-)
--- /dev/null
+++ b/include/trace/events/mmcid.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmcid
+
+#if !defined(_TRACE_MMCID_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMCID_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mmcid_class,
+
+ TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(mm, cid),
+
+ TP_STRUCT__entry(
+ __field( void *, mm )
+ __field( unsigned int, cid )
+ ),
+
+ TP_fast_assign(
+ __entry->mm = mm;
+ __entry->cid = cid;
+ ),
+
+ TP_printk("mm=%p cid=%08x", __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_getcid,
+
+ TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(mm, cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_putcid,
+
+ TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_task_class,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(t, mm, cid),
+
+ TP_STRUCT__entry(
+ __field( void *, t )
+ __field( void *, mm )
+ __field( unsigned int, cid )
+ ),
+
+ TP_fast_assign(
+ __entry->t = t;
+ __entry->mm = mm;
+ __entry->cid = cid;
+ ),
+
+ TP_printk("t=%p mm=%p cid=%08x", __entry->t, __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_task_class, mmcid_task_update,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(t, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_cpu_class,
+
+ TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(cpu, mm, cid),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, cpu )
+ __field( void *, mm )
+ __field( unsigned int, cid )
+ ),
+
+ TP_fast_assign(
+ __entry->cpu = cpu;
+ __entry->mm = mm;
+ __entry->cid = cid;
+ ),
+
+ TP_printk("cpu=%u mm=%p cid=%08x", __entry->cpu, __entry->mm,
__entry->cid)
+);
+
+DEFINE_EVENT(mmcid_cpu_class, mmcid_cpu_update,
+
+ TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+ TP_ARGS(cpu, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_user_class,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+ TP_ARGS(t, mm),
+
+ TP_STRUCT__entry(
+ __field( void *, t )
+ __field( void *, mm )
+ __field( unsigned int, users )
+ ),
+
+ TP_fast_assign(
+ __entry->t = t;
+ __entry->mm = mm;
+ __entry->users = mm->mm_cid.users;
+ ),
+
+ TP_printk("t=%p mm=%p users=%u", __entry->t, __entry->mm,
__entry->users)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_add,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+ TP_ARGS(t, mm)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_del,
+
+ TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+ TP_ARGS(t, mm)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -86,6 +86,7 @@
#include <linux/sched/rseq_api.h>
#include <trace/events/sched.h>
#include <trace/events/ipi.h>
+#include <trace/events/mmcid.h>
#undef CREATE_TRACE_POINTS
#include "sched.h"
@@ -10569,7 +10570,9 @@ static inline void mm_cid_transit_to_tas
unsigned int cid = cpu_cid_to_cid(t->mm_cid.cid);
t->mm_cid.cid = cid_to_transit_cid(cid);
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
pcp->cid = t->mm_cid.cid;
+ trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
}
}
@@ -10602,7 +10605,9 @@ static void mm_cid_fixup_cpus_to_tasks(s
if (!cid_in_transit(cid)) {
cid = cid_to_transit_cid(cid);
rq->curr->mm_cid.cid = cid;
+ trace_mmcid_task_update(rq->curr, rq->curr->mm,
cid);
pcp->cid = cid;
+ trace_mmcid_cpu_update(cpu, mm, cid);
}
}
}
@@ -10613,7 +10618,9 @@ static inline void mm_cid_transit_to_cpu
{
if (cid_on_task(t->mm_cid.cid)) {
t->mm_cid.cid = cid_to_transit_cid(t->mm_cid.cid);
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
pcp->cid = t->mm_cid.cid;
+ trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
}
}
@@ -10685,6 +10692,7 @@ static bool sched_mm_cid_add_user(struct
{
t->mm_cid.active = 1;
mm->mm_cid.users++;
+ trace_mmcid_user_add(t, mm);
return mm_update_max_cids(mm);
}
@@ -10727,6 +10735,7 @@ void sched_mm_cid_fork(struct task_struc
} else {
mm_cid_fixup_cpus_to_tasks(mm);
t->mm_cid.cid = mm_get_cid(mm);
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
}
}
@@ -10739,6 +10748,7 @@ static bool sched_mm_cid_remove_user(str
mm_unset_cid_on_task(t);
}
t->mm->mm_cid.users--;
+ trace_mmcid_user_del(t, t->mm);
return mm_update_max_cids(t->mm);
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -75,6 +75,7 @@
#include <linux/delayacct.h>
#include <linux/mmu_context.h>
+#include <trace/events/mmcid.h>
#include <trace/events/power.h>
#include <trace/events/sched.h>
@@ -3809,6 +3810,7 @@ static __always_inline bool cid_on_task(
static __always_inline void mm_drop_cid(struct mm_struct *mm, unsigned int cid)
{
+ trace_mmcid_putcid(mm, cid);
clear_bit(cid, mm_cidmask(mm));
}
@@ -3817,6 +3819,7 @@ static __always_inline void mm_unset_cid
unsigned int cid = t->mm_cid.cid;
t->mm_cid.cid = MM_CID_UNSET;
+ trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
if (cid_on_task(cid))
mm_drop_cid(t->mm, cid);
}
@@ -3838,6 +3841,7 @@ static inline unsigned int __mm_get_cid(
return MM_CID_UNSET;
if (test_and_set_bit(cid, mm_cidmask(mm)))
return MM_CID_UNSET;
+ trace_mmcid_getcid(mm, cid);
return cid;
}
@@ -3845,9 +3849,17 @@ static inline unsigned int mm_get_cid(st
{
unsigned int cid = __mm_get_cid(mm, READ_ONCE(mm->mm_cid.max_cids));
- while (cid == MM_CID_UNSET) {
- cpu_relax();
- cid = __mm_get_cid(mm, num_possible_cpus());
+ if (cid == MM_CID_UNSET) {
+ ktime_t t0 = ktime_get();
+
+ while (cid == MM_CID_UNSET) {
+ cpu_relax();
+ cid = __mm_get_cid(mm, num_possible_cpus());
+ if (ktime_get() - t0 > 50 * NSEC_PER_MSEC) {
+ tracing_off();
+ WARN_ON_ONCE(1);
+ }
+ }
}
return cid;
}
@@ -3874,6 +3886,7 @@ static inline unsigned int mm_cid_conver
static __always_inline void mm_cid_update_task_cid(struct task_struct *t,
unsigned int cid)
{
if (t->mm_cid.cid != cid) {
+ trace_mmcid_task_update(t, t->mm, cid);
t->mm_cid.cid = cid;
rseq_sched_set_ids_changed(t);
}
@@ -3881,6 +3894,7 @@ static __always_inline void mm_cid_updat
static __always_inline void mm_cid_update_pcpu_cid(struct mm_struct *mm,
unsigned int cid)
{
+ trace_mmcid_cpu_update(smp_processor_id(), mm, cid);
__this_cpu_write(mm->mm_cid.pcpu->cid, cid);
}