The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=245157fd8a382c3989075789ee98582665f3b31d
commit 245157fd8a382c3989075789ee98582665f3b31d Author: Konstantin Belousov <[email protected]> AuthorDate: 2026-01-22 21:46:01 +0000 Commit: Konstantin Belousov <[email protected]> CommitDate: 2026-01-30 18:45:48 +0000 ktrcsw(): should not be called when the thread is owning interlock or on sleepq The issue is that for ktrcsw() we lock the ktrace_mtx mutex while owning the interlock from a subsystem that called msleep(). In particular, the VM subsystem might call msleep() if page allocation failed. This establishes order VM locks (e.g. domain free queue lock) -> ktrace_mtx. Calling free() while owning ktrace_mtx gives the reverse order. Worse, msleep_spin_sbt() call s ktrcsw() while the thread is put on sleep queue. Then, since the mutex might be contested, the thread needs to be put on turnstil, which cannot work. Move the ktrcsw() call for switch-out after the wakeup, when the thread does not yet re-obtained any locks. From there, we call a special version of ktrcsw(), which is passed the actual time when the context switch occured. The drawback is that the switch-out record is only written in the ktrace.out file after the switch-in occurred, but this is probably not too serious. Reported and tested by: pho Reviewed by: markj Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D54831 --- sys/kern/kern_ktrace.c | 18 ++++++++++++++++-- sys/kern/kern_synch.c | 26 +++++++++++--------------- sys/sys/ktrace.h | 1 + 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index b58e69a3f38e..f3ee1c53fafd 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -838,8 +838,8 @@ ktrpsig(int sig, sig_t action, sigset_t *mask, int code) ktrace_exit(td); } -void -ktrcsw(int out, int user, const char *wmesg) +static void +ktrcsw_impl(int out, int user, const char *wmesg, const struct timespec *tv) { struct thread *td = curthread; struct ktr_request *req; @@ -854,6 +854,8 @@ ktrcsw(int out, int user, const char *wmesg) kc = &req->ktr_data.ktr_csw; kc->out = out; kc->user = user; + if (tv != NULL) + req->ktr_header.ktr_time = *tv; if (wmesg != NULL) strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg)); else @@ -862,6 +864,18 @@ ktrcsw(int out, int user, const char *wmesg) ktrace_exit(td); } +void +ktrcsw(int out, int user, const char *wmesg) +{ + ktrcsw_impl(out, user, wmesg, NULL); +} + +void +ktrcsw_out(const struct timespec *tv, const char *wmesg) +{ + ktrcsw_impl(1, 0, wmesg, tv); +} + void ktrstruct(const char *name, const void *data, size_t datalen) { diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index fc6c9857463c..22628c78d8a5 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -133,6 +133,7 @@ _sleep(const void *ident, struct lock_object *lock, int priority, { struct thread *td __ktrace_used; struct lock_class *class; + struct timespec sw_out_tv __ktrace_used; uintptr_t lock_state; int catch, pri, rval, sleepq_flags; WITNESS_SAVE_DECL(lock_witness); @@ -141,7 +142,7 @@ _sleep(const void *ident, struct lock_object *lock, int priority, td = curthread; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) - ktrcsw(1, 0, wmesg); + nanotime(&sw_out_tv); #endif WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, "Sleeping on \"%s\"", wmesg); @@ -222,8 +223,10 @@ _sleep(const void *ident, struct lock_object *lock, int priority, rval = 0; } #ifdef KTRACE - if (KTRPOINT(td, KTR_CSW)) + if (KTRPOINT(td, KTR_CSW)) { + ktrcsw_out(&sw_out_tv, wmesg); ktrcsw(0, 0, wmesg); + } #endif PICKUP_GIANT(); if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) { @@ -239,6 +242,7 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) { struct thread *td __ktrace_used; + struct timespec sw_out_tv __ktrace_used; int rval; WITNESS_SAVE_DECL(mtx); @@ -266,19 +270,9 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg, if (sbt != 0) sleepq_set_timeout_sbt(ident, sbt, pr, flags); - /* - * Can't call ktrace with any spin locks held so it can lock the - * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold - * any spin lock. Thus, we have to drop the sleepq spin lock while - * we handle those requests. This is safe since we have placed our - * thread on the sleep queue already. - */ #ifdef KTRACE - if (KTRPOINT(td, KTR_CSW)) { - sleepq_release(ident); - ktrcsw(1, 0, wmesg); - sleepq_lock(ident); - } + if (KTRPOINT(td, KTR_CSW)) + nanotime(&sw_out_tv); #endif #ifdef WITNESS sleepq_release(ident); @@ -293,8 +287,10 @@ msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg, rval = 0; } #ifdef KTRACE - if (KTRPOINT(td, KTR_CSW)) + if (KTRPOINT(td, KTR_CSW)) { + ktrcsw_out(&sw_out_tv, wmesg); ktrcsw(0, 0, wmesg); + } #endif PICKUP_GIANT(); mtx_lock_spin(mtx); diff --git a/sys/sys/ktrace.h b/sys/sys/ktrace.h index 822822a6ffe7..1ba5c84000b0 100644 --- a/sys/sys/ktrace.h +++ b/sys/sys/ktrace.h @@ -340,6 +340,7 @@ ktr_get_tracevp(struct proc *p, bool ref) void ktr_io_params_free(struct ktr_io_params *); void ktrnamei(const char *); void ktrcsw(int, int, const char *); +void ktrcsw_out(const struct timespec *, const char *); void ktrpsig(int, sig_t, sigset_t *, int); void ktrfault(vm_offset_t, int); void ktrfaultend(int);
