Module Name:    src
Committed By:   riastradh
Date:           Wed Mar 30 10:34:14 UTC 2022

Modified Files:
        src/sys/kern: kern_softint.c kern_timeout.c vfs_bio.c

Log Message:
kern: Sprinkle biglock-slippage assertions.

We seem to have a poltergeist that occasionally messes with the
biglock depth, but it's very hard to reproduce and only manifests as
some other CPU spinning out on the kernel lock which is no good for
diagnostics.


To generate a diff of this commit:
cvs rdiff -u -r1.67 -r1.68 src/sys/kern/kern_softint.c
cvs rdiff -u -r1.66 -r1.67 src/sys/kern/kern_timeout.c
cvs rdiff -u -r1.301 -r1.302 src/sys/kern/vfs_bio.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_softint.c
diff -u src/sys/kern/kern_softint.c:1.67 src/sys/kern/kern_softint.c:1.68
--- src/sys/kern/kern_softint.c:1.67	Sun Dec  5 04:56:40 2021
+++ src/sys/kern/kern_softint.c	Wed Mar 30 10:34:14 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_softint.c,v 1.67 2021/12/05 04:56:40 msaitoh Exp $	*/
+/*	$NetBSD: kern_softint.c,v 1.68 2022/03/30 10:34:14 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -170,7 +170,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.67 2021/12/05 04:56:40 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.68 2022/03/30 10:34:14 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -536,6 +536,7 @@ softint_execute(lwp_t *l, int s)
 {
 	softint_t *si = l->l_private;
 	softhand_t *sh;
+	int locks;
 
 	KASSERT(si->si_lwp == curlwp);
 	KASSERT(si->si_cpu == curcpu());
@@ -560,6 +561,9 @@ softint_execute(lwp_t *l, int s)
 		sh->sh_flags ^= SOFTINT_PENDING;
 		splx(s);
 
+		/* Record the kernel lock depth for diagnostics.  */
+		locks = curcpu()->ci_biglock_count;
+
 		/* Run the handler. */
 		if (__predict_true((sh->sh_flags & SOFTINT_MPSAFE) != 0)) {
 			(*sh->sh_func)(sh->sh_arg);
@@ -576,6 +580,10 @@ softint_execute(lwp_t *l, int s)
 		/* Diagnostic: check that psrefs have not leaked. */
 		KASSERTMSG(l->l_psrefs == 0, "%s: l_psrefs=%d, sh_func=%p\n",
 		    __func__, l->l_psrefs, sh->sh_func);
+		/* Diagnostic: check that biglocks have not leaked. */
+		KASSERTMSG(locks == curcpu()->ci_biglock_count,
+		    "%s: sh_func=%p slipped %d->%d biglocks",
+		    __func__, sh->sh_func, locks, curcpu()->ci_biglock_count);
 
 		(void)splhigh();
 	}

Index: src/sys/kern/kern_timeout.c
diff -u src/sys/kern/kern_timeout.c:1.66 src/sys/kern/kern_timeout.c:1.67
--- src/sys/kern/kern_timeout.c:1.66	Sat Jun 27 01:26:32 2020
+++ src/sys/kern/kern_timeout.c	Wed Mar 30 10:34:14 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_timeout.c,v 1.66 2020/06/27 01:26:32 rin Exp $	*/
+/*	$NetBSD: kern_timeout.c,v 1.67 2022/03/30 10:34:14 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.66 2020/06/27 01:26:32 rin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.67 2022/03/30 10:34:14 riastradh Exp $");
 
 /*
  * Timeouts are kept in a hierarchical timing wheel.  The c_time is the
@@ -740,7 +740,7 @@ callout_softclock(void *v)
 	struct callout_cpu *cc;
 	void (*func)(void *);
 	void *arg;
-	int mpsafe, count, ticks, delta;
+	int mpsafe, count, ticks, delta, locks;
 	lwp_t *l;
 
 	l = curlwp;
@@ -776,6 +776,7 @@ callout_softclock(void *v)
 		cc->cc_active = c;
 
 		mutex_spin_exit(cc->cc_lock);
+		locks = curcpu()->ci_biglock_count;
 		KASSERT(func != NULL);
 		if (__predict_false(!mpsafe)) {
 			KERNEL_LOCK(1, NULL);
@@ -783,6 +784,9 @@ callout_softclock(void *v)
 			KERNEL_UNLOCK_ONE(NULL);
 		} else
 			(*func)(arg);
+		KASSERTMSG(locks == curcpu()->ci_biglock_count,
+		    "callout %p func %p slipped %d->%d biglocks",
+		    c, func, locks, curcpu()->ci_biglock_count);
 		mutex_spin_enter(cc->cc_lock);
 
 		/*

Index: src/sys/kern/vfs_bio.c
diff -u src/sys/kern/vfs_bio.c:1.301 src/sys/kern/vfs_bio.c:1.302
--- src/sys/kern/vfs_bio.c:1.301	Sun Jul 25 06:06:40 2021
+++ src/sys/kern/vfs_bio.c	Wed Mar 30 10:34:14 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_bio.c,v 1.301 2021/07/25 06:06:40 simonb Exp $	*/
+/*	$NetBSD: vfs_bio.c,v 1.302 2022/03/30 10:34:14 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
@@ -123,7 +123,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.301 2021/07/25 06:06:40 simonb Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_bio.c,v 1.302 2022/03/30 10:34:14 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_bufcache.h"
@@ -1673,6 +1673,7 @@ static void
 biodone2(buf_t *bp)
 {
 	void (*callout)(buf_t *);
+	int locks;
 
 	SDT_PROBE1(io, kernel, ,done, bp);
 
@@ -1699,7 +1700,11 @@ biodone2(buf_t *bp)
 		KASSERT(!cv_has_waiters(&bp->b_done));
 		bp->b_iodone = NULL;
 		mutex_exit(bp->b_objlock);
+		locks = curcpu()->ci_biglock_count;
 		(*callout)(bp);
+		KASSERTMSG(locks == curcpu()->ci_biglock_count,
+		    "buf %p b_iodone %p slipped %d->%d biglocks",
+		    bp, callout, locks, curcpu()->ci_biglock_count);
 	} else if (ISSET(bp->b_flags, B_ASYNC)) {
 		/* If async, release. */
 		BIOHIST_LOG(biohist, "async", 0, 0, 0, 0);

Reply via email to