Module Name: src Committed By: riastradh Date: Wed Mar 30 10:34:14 UTC 2022
Modified Files: src/sys/kern: kern_softint.c kern_timeout.c vfs_bio.c Log Message: kern: Sprinkle biglock-slippage assertions. We seem to have a poltergeist that occasionally messes with the biglock depth, but it's very hard to reproduce and only manifests as some other CPU spinning out on the kernel lock which is no good for diagnostics. To generate a diff of this commit: cvs rdiff -u -r1.67 -r1.68 src/sys/kern/kern_softint.c cvs rdiff -u -r1.66 -r1.67 src/sys/kern/kern_timeout.c cvs rdiff -u -r1.301 -r1.302 src/sys/kern/vfs_bio.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_softint.c diff -u src/sys/kern/kern_softint.c:1.67 src/sys/kern/kern_softint.c:1.68 --- src/sys/kern/kern_softint.c:1.67 Sun Dec 5 04:56:40 2021 +++ src/sys/kern/kern_softint.c Wed Mar 30 10:34:14 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_softint.c,v 1.67 2021/12/05 04:56:40 msaitoh Exp $ */ +/* $NetBSD: kern_softint.c,v 1.68 2022/03/30 10:34:14 riastradh Exp $ */ /*- * Copyright (c) 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. @@ -170,7 +170,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.67 2021/12/05 04:56:40 msaitoh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.68 2022/03/30 10:34:14 riastradh Exp $"); #include <sys/param.h> #include <sys/proc.h> @@ -536,6 +536,7 @@ softint_execute(lwp_t *l, int s) { softint_t *si = l->l_private; softhand_t *sh; + int locks; KASSERT(si->si_lwp == curlwp); KASSERT(si->si_cpu == curcpu()); @@ -560,6 +561,9 @@ softint_execute(lwp_t *l, int s) sh->sh_flags ^= SOFTINT_PENDING; splx(s); + /* Record the kernel lock depth for diagnostics. */ + locks = curcpu()->ci_biglock_count; + /* Run the handler. */ if (__predict_true((sh->sh_flags & SOFTINT_MPSAFE) != 0)) { (*sh->sh_func)(sh->sh_arg); @@ -576,6 +580,10 @@ softint_execute(lwp_t *l, int s) /* Diagnostic: check that psrefs have not leaked. */ KASSERTMSG(l->l_psrefs == 0, "%s: l_psrefs=%d, sh_func=%p\n", __func__, l->l_psrefs, sh->sh_func); + /* Diagnostic: check that biglocks have not leaked. */ + KASSERTMSG(locks == curcpu()->ci_biglock_count, + "%s: sh_func=%p slipped %d->%d biglocks", + __func__, sh->sh_func, locks, curcpu()->ci_biglock_count); (void)splhigh(); } Index: src/sys/kern/kern_timeout.c diff -u src/sys/kern/kern_timeout.c:1.66 src/sys/kern/kern_timeout.c:1.67 --- src/sys/kern/kern_timeout.c:1.66 Sat Jun 27 01:26:32 2020 +++ src/sys/kern/kern_timeout.c Wed Mar 30 10:34:14 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_timeout.c,v 1.66 2020/06/27 01:26:32 rin Exp $ */ +/* $NetBSD: kern_timeout.c,v 1.67 2022/03/30 10:34:14 riastradh Exp $ */ /*- * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -59,7 +59,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.66 2020/06/27 01:26:32 rin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.67 2022/03/30 10:34:14 riastradh Exp $"); /* * Timeouts are kept in a hierarchical timing wheel. The c_time is the @@ -740,7 +740,7 @@ callout_softclock(void *v) struct callout_cpu *cc; void (*func)(void *); void *arg; - int mpsafe, count, ticks, delta; + int mpsafe, count, ticks, delta, locks; lwp_t *l; l = curlwp; @@ -776,6 +776,7 @@ callout_softclock(void *v) cc->cc_active = c; mutex_spin_exit(cc->cc_lock); + locks = curcpu()->ci_biglock_count; KASSERT(func != NULL); if (__predict_false(!mpsafe)) { KERNEL_LOCK(1, NULL); @@ -783,6 +784,9 @@ callout_softclock(void *v) KERNEL_UNLOCK_ONE(NULL); } else (*func)(arg); + KASSERTMSG(locks == curcpu()->ci_biglock_count, + "callout %p func %p slipped %d->%d biglocks", + c, func, locks, curcpu()->ci_biglock_count); mutex_spin_enter(cc->cc_lock); /* Index: src/sys/kern/vfs_bio.c diff -u src/sys/kern/vfs_bio.c:1.301 src/sys/kern/vfs_bio.c:1.302 --- src/sys/kern/vfs_bio.c:1.301 Sun Jul 25 06:06:40 2021 +++ src/sys/kern/vfs_bio.c Wed Mar 30 10:34:14 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_bio.c,v 1.301 2021/07/25 06:06:40 simonb Exp $ */ +/* $NetBSD: vfs_bio.c,v 1.302 2022/03/30 10:34:14 riastradh Exp $ */ /*- * Copyright (c) 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. @@ -123,7 +123,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.301 2021/07/25 06:06:40 simonb Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.302 2022/03/30 10:34:14 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_bufcache.h" @@ -1673,6 +1673,7 @@ static void biodone2(buf_t *bp) { void (*callout)(buf_t *); + int locks; SDT_PROBE1(io, kernel, ,done, bp); @@ -1699,7 +1700,11 @@ biodone2(buf_t *bp) KASSERT(!cv_has_waiters(&bp->b_done)); bp->b_iodone = NULL; mutex_exit(bp->b_objlock); + locks = curcpu()->ci_biglock_count; (*callout)(bp); + KASSERTMSG(locks == curcpu()->ci_biglock_count, + "buf %p b_iodone %p slipped %d->%d biglocks", + bp, callout, locks, curcpu()->ci_biglock_count); } else if (ISSET(bp->b_flags, B_ASYNC)) { /* If async, release. */ BIOHIST_LOG(biohist, "async", 0, 0, 0, 0);