On Sun, Mar 08 2026 at 18:23, Matthieu Baerts wrote:
> 08 Mar 2026 17:58:26 Thomas Gleixner <[email protected]>:
>> So I'm back to square one. I go and do what I should have done in the
>> first place. Write a debug patch with trace_printks and let the people
>> who can actually trigger the problem run with it.
>
> Happy to test such debug patches!

See below.

Enable the tracepoints either on the kernel command line:

    trace_event=sched_switch,mmcid:*

or before starting the test case:

    echo 1 >/sys/kernel/tracing/events/sched/sched_switch/enable
    echo 1 >/sys/kernel/tracing/events/mmcid/enable

I added a 50ms timeout into mm_cid_get() which freezes the trace and
emits a warning. If you enable panic_on_warn and ftrace_dump_on_oops,
then it dumps the trace buffer once it hits the warning.

Either kernel command line:

   panic_on_warn ftrace_dump_on_oops

or

  echo 1 >/proc/sys/kernel/panic_on_warn
  echo 1 >/proc/sys/kernel/ftrace_dump_on_oops

That should provide enough information to decode this mystery.

Thanks,

        tglx
---
 include/trace/events/mmcid.h |  138 +++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/core.c          |   10 +++
 kernel/sched/sched.h         |   20 +++++-
 3 files changed, 165 insertions(+), 3 deletions(-)

--- /dev/null
+++ b/include/trace/events/mmcid.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mmcid
+
+#if !defined(_TRACE_MMCID_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MMCID_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(mmcid_class,
+
+       TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(mm, cid),
+
+       TP_STRUCT__entry(
+               __field( void *,        mm      )
+               __field( unsigned int,  cid     )
+       ),
+
+       TP_fast_assign(
+               __entry->mm     = mm;
+               __entry->cid    = cid;
+       ),
+
+       TP_printk("mm=%p cid=%08x", __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_getcid,
+
+       TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(mm, cid)
+);
+
+DEFINE_EVENT(mmcid_class, mmcid_putcid,
+
+       TP_PROTO(struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_task_class,
+
+       TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(t, mm, cid),
+
+       TP_STRUCT__entry(
+               __field( void *,        t       )
+               __field( void *,        mm      )
+               __field( unsigned int,  cid     )
+       ),
+
+       TP_fast_assign(
+               __entry->t      = t;
+               __entry->mm     = mm;
+               __entry->cid    = cid;
+       ),
+
+       TP_printk("t=%p mm=%p cid=%08x", __entry->t, __entry->mm, __entry->cid)
+);
+
+DEFINE_EVENT(mmcid_task_class, mmcid_task_update,
+
+       TP_PROTO(struct task_struct *t, struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(t, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_cpu_class,
+
+       TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(cpu, mm, cid),
+
+       TP_STRUCT__entry(
+               __field( unsigned int,  cpu     )
+               __field( void *,        mm      )
+               __field( unsigned int,  cid     )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu    = cpu;
+               __entry->mm     = mm;
+               __entry->cid    = cid;
+       ),
+
+       TP_printk("cpu=%u mm=%p cid=%08x", __entry->cpu, __entry->mm, 
__entry->cid)
+);
+
+DEFINE_EVENT(mmcid_cpu_class, mmcid_cpu_update,
+
+       TP_PROTO(unsigned int cpu, struct mm_struct *mm, unsigned int cid),
+
+       TP_ARGS(cpu, mm, cid)
+);
+
+DECLARE_EVENT_CLASS(mmcid_user_class,
+
+       TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+       TP_ARGS(t, mm),
+
+       TP_STRUCT__entry(
+               __field( void *,        t       )
+               __field( void *,        mm      )
+               __field( unsigned int,  users   )
+       ),
+
+       TP_fast_assign(
+               __entry->t      = t;
+               __entry->mm     = mm;
+               __entry->users  = mm->mm_cid.users;
+       ),
+
+       TP_printk("t=%p mm=%p users=%u", __entry->t, __entry->mm, 
__entry->users)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_add,
+
+       TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+       TP_ARGS(t, mm)
+);
+
+DEFINE_EVENT(mmcid_user_class, mmcid_user_del,
+
+       TP_PROTO(struct task_struct *t, struct mm_struct *mm),
+
+            TP_ARGS(t, mm)
+);
+
+#endif
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -86,6 +86,7 @@
 #include <linux/sched/rseq_api.h>
 #include <trace/events/sched.h>
 #include <trace/events/ipi.h>
+#include <trace/events/mmcid.h>
 #undef CREATE_TRACE_POINTS
 
 #include "sched.h"
@@ -10569,7 +10570,9 @@ static inline void mm_cid_transit_to_tas
                unsigned int cid = cpu_cid_to_cid(t->mm_cid.cid);
 
                t->mm_cid.cid = cid_to_transit_cid(cid);
+               trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
                pcp->cid = t->mm_cid.cid;
+               trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
        }
 }
 
@@ -10602,7 +10605,9 @@ static void mm_cid_fixup_cpus_to_tasks(s
                        if (!cid_in_transit(cid)) {
                                cid = cid_to_transit_cid(cid);
                                rq->curr->mm_cid.cid = cid;
+                               trace_mmcid_task_update(rq->curr, rq->curr->mm, 
cid);
                                pcp->cid = cid;
+                               trace_mmcid_cpu_update(cpu, mm, cid);
                        }
                }
        }
@@ -10613,7 +10618,9 @@ static inline void mm_cid_transit_to_cpu
 {
        if (cid_on_task(t->mm_cid.cid)) {
                t->mm_cid.cid = cid_to_transit_cid(t->mm_cid.cid);
+               trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
                pcp->cid = t->mm_cid.cid;
+               trace_mmcid_cpu_update(task_cpu(t), t->mm, pcp->cid);
        }
 }
 
@@ -10685,6 +10692,7 @@ static bool sched_mm_cid_add_user(struct
 {
        t->mm_cid.active = 1;
        mm->mm_cid.users++;
+       trace_mmcid_user_add(t, mm);
        return mm_update_max_cids(mm);
 }
 
@@ -10727,6 +10735,7 @@ void sched_mm_cid_fork(struct task_struc
        } else {
                mm_cid_fixup_cpus_to_tasks(mm);
                t->mm_cid.cid = mm_get_cid(mm);
+               trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
        }
 }
 
@@ -10739,6 +10748,7 @@ static bool sched_mm_cid_remove_user(str
                mm_unset_cid_on_task(t);
        }
        t->mm->mm_cid.users--;
+       trace_mmcid_user_del(t, t->mm);
        return mm_update_max_cids(t->mm);
 }
 
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -75,6 +75,7 @@
 #include <linux/delayacct.h>
 #include <linux/mmu_context.h>
 
+#include <trace/events/mmcid.h>
 #include <trace/events/power.h>
 #include <trace/events/sched.h>
 
@@ -3809,6 +3810,7 @@ static __always_inline bool cid_on_task(
 
 static __always_inline void mm_drop_cid(struct mm_struct *mm, unsigned int cid)
 {
+       trace_mmcid_putcid(mm, cid);
        clear_bit(cid, mm_cidmask(mm));
 }
 
@@ -3817,6 +3819,7 @@ static __always_inline void mm_unset_cid
        unsigned int cid = t->mm_cid.cid;
 
        t->mm_cid.cid = MM_CID_UNSET;
+       trace_mmcid_task_update(t, t->mm, t->mm_cid.cid);
        if (cid_on_task(cid))
                mm_drop_cid(t->mm, cid);
 }
@@ -3838,6 +3841,7 @@ static inline unsigned int __mm_get_cid(
                return MM_CID_UNSET;
        if (test_and_set_bit(cid, mm_cidmask(mm)))
                return MM_CID_UNSET;
+       trace_mmcid_getcid(mm, cid);
        return cid;
 }
 
@@ -3845,9 +3849,17 @@ static inline unsigned int mm_get_cid(st
 {
        unsigned int cid = __mm_get_cid(mm, READ_ONCE(mm->mm_cid.max_cids));
 
-       while (cid == MM_CID_UNSET) {
-               cpu_relax();
-               cid = __mm_get_cid(mm, num_possible_cpus());
+       if (cid == MM_CID_UNSET) {
+               ktime_t t0 = ktime_get();
+
+               while (cid == MM_CID_UNSET) {
+                       cpu_relax();
+                       cid = __mm_get_cid(mm, num_possible_cpus());
+                       if (ktime_get() - t0 > 50 * NSEC_PER_MSEC) {
+                               tracing_off();
+                               WARN_ON_ONCE(1);
+                       }
+               }
        }
        return cid;
 }
@@ -3874,6 +3886,7 @@ static inline unsigned int mm_cid_conver
 static __always_inline void mm_cid_update_task_cid(struct task_struct *t, 
unsigned int cid)
 {
        if (t->mm_cid.cid != cid) {
+               trace_mmcid_task_update(t, t->mm, cid);
                t->mm_cid.cid = cid;
                rseq_sched_set_ids_changed(t);
        }
@@ -3881,6 +3894,7 @@ static __always_inline void mm_cid_updat
 
 static __always_inline void mm_cid_update_pcpu_cid(struct mm_struct *mm, 
unsigned int cid)
 {
+       trace_mmcid_cpu_update(smp_processor_id(), mm, cid);
        __this_cpu_write(mm->mm_cid.pcpu->cid, cid);
 }
 

Reply via email to