Module Name:    src
Committed By:   ad
Date:           Thu Nov 21 18:56:55 UTC 2019

Modified Files:
        src/sys/kern: kern_sleepq.c kern_turnstile.c
        src/sys/sys: sleepq.h

Log Message:
Sleep queues & turnstiles:

- Avoid false sharing.
- Make the turnstile hash function more suitable.
- Increase turnstile hash table size.
- Make amends by having only one set of system wide sleep queue hash locks.


To generate a diff of this commit:
cvs rdiff -u -r1.51 -r1.52 src/sys/kern/kern_sleepq.c
cvs rdiff -u -r1.32 -r1.33 src/sys/kern/kern_turnstile.c
cvs rdiff -u -r1.25 -r1.26 src/sys/sys/sleepq.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_sleepq.c
diff -u src/sys/kern/kern_sleepq.c:1.51 src/sys/kern/kern_sleepq.c:1.52
--- src/sys/kern/kern_sleepq.c:1.51	Sun Jul  3 14:24:58 2016
+++ src/sys/kern/kern_sleepq.c	Thu Nov 21 18:56:55 2019
@@ -1,7 +1,7 @@
-/*	$NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $	*/
+/*	$NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $	*/
 
 /*-
- * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -65,7 +65,8 @@ __KERNEL_RCSID(0, "$NetBSD: kern_sleepq.
 static int	sleepq_sigtoerror(lwp_t *, int);
 
 /* General purpose sleep table, used by mtsleep() and condition variables. */
-sleeptab_t	sleeptab	__cacheline_aligned;
+sleeptab_t	sleeptab __cacheline_aligned;
+kmutex_t	*sleepq_locks[SLEEPTAB_HASH_SIZE] __read_mostly;
 
 /*
  * sleeptab_init:
@@ -75,14 +76,11 @@ sleeptab_t	sleeptab	__cacheline_aligned;
 void
 sleeptab_init(sleeptab_t *st)
 {
-	sleepq_t *sq;
 	int i;
 
 	for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) {
-		sq = &st->st_queues[i].st_queue;
-		st->st_queues[i].st_mutex =
-		    mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
-		sleepq_init(sq);
+		sleepq_locks[i] = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
+		sleepq_init(&st->st_queue[i]);
 	}
 }
 
@@ -266,8 +264,12 @@ sleepq_block(int timo, bool catch_p)
 		/* The LWP and sleep queue are now unlocked. */
 		if (timo) {
 			/*
-			 * Even if the callout appears to have fired, we need to
-			 * stop it in order to synchronise with other CPUs.
+			 * Even if the callout appears to have fired, we
+			 * need to stop it in order to synchronise with
+			 * other CPUs.  It's important that we do this in
+			 * this LWP's context, and not during wakeup, in
+			 * order to keep the callout & its cache lines
+			 * co-located on the CPU with the LWP.
 			 */
 			if (callout_halt(&l->l_timeout_ch, NULL))
 				error = EWOULDBLOCK;
@@ -333,10 +335,10 @@ sleepq_wake(sleepq_t *sq, wchan_t wchan,
  *
  *	Remove an LWP from its sleep queue and set it runnable again. 
  *	sleepq_unsleep() is called with the LWP's mutex held, and will
- *	always release it.
+ *	release it if "unlock" is true.
  */
 void
-sleepq_unsleep(lwp_t *l, bool cleanup)
+sleepq_unsleep(lwp_t *l, bool unlock)
 {
 	sleepq_t *sq = l->l_sleepq;
 	kmutex_t *mp = l->l_mutex;
@@ -345,7 +347,7 @@ sleepq_unsleep(lwp_t *l, bool cleanup)
 	KASSERT(l->l_wchan != NULL);
 
 	sleepq_remove(sq, l);
-	if (cleanup) {
+	if (unlock) {
 		mutex_spin_exit(mp);
 	}
 }

Index: src/sys/kern/kern_turnstile.c
diff -u src/sys/kern/kern_turnstile.c:1.32 src/sys/kern/kern_turnstile.c:1.33
--- src/sys/kern/kern_turnstile.c:1.32	Fri Jun 15 13:51:40 2012
+++ src/sys/kern/kern_turnstile.c	Thu Nov 21 18:56:55 2019
@@ -1,7 +1,7 @@
-/*	$NetBSD: kern_turnstile.c,v 1.32 2012/06/15 13:51:40 yamt Exp $	*/
+/*	$NetBSD: kern_turnstile.c,v 1.33 2019/11/21 18:56:55 ad Exp $	*/
 
 /*-
- * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2006, 2007, 2009, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -56,11 +56,11 @@
  * grabs a free turnstile off the free list.  Otherwise, it can take back
  * the active turnstile from the lock (thus deactivating the turnstile).
  *
- * Turnstiles are the place to do priority inheritence.
+ * Turnstiles are where we do priority inheritence.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.32 2012/06/15 13:51:40 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.33 2019/11/21 18:56:55 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/lockdebug.h>
@@ -69,17 +69,23 @@ __KERNEL_RCSID(0, "$NetBSD: kern_turnsti
 #include <sys/sleepq.h>
 #include <sys/systm.h>
 
-#define	TS_HASH_SIZE	64
+/*
+ * Shift of 6 aligns to typical cache line size of 64 bytes;  there's no
+ * point having two turnstile locks to back two lock objects that share one
+ * cache line.
+ */
+#define	TS_HASH_SIZE	128
 #define	TS_HASH_MASK	(TS_HASH_SIZE - 1)
-#define	TS_HASH(obj)	(((uintptr_t)(obj) >> 3) & TS_HASH_MASK)
+#define	TS_HASH(obj)	(((uintptr_t)(obj) >> 6) & TS_HASH_MASK)
 
-static tschain_t	turnstile_tab[TS_HASH_SIZE]	__cacheline_aligned;
-pool_cache_t		turnstile_cache			__read_mostly;
+/* Keep the chains and mutex pointers apart to prevent false sharing. */
+static tschain_t	turnstile_chains[TS_HASH_SIZE] __cacheline_aligned;
+static kmutex_t		*turnstile_locks[TS_HASH_SIZE] __read_mostly;
+pool_cache_t		turnstile_cache __read_mostly;
+extern turnstile_t	turnstile0 __cacheline_aligned;
 
 static int		turnstile_ctor(void *, void *, int);
 
-extern turnstile_t	turnstile0;
-
 /*
  * turnstile_init:
  *
@@ -88,17 +94,15 @@ extern turnstile_t	turnstile0;
 void
 turnstile_init(void)
 {
-	tschain_t *tc;
 	int i;
 
 	for (i = 0; i < TS_HASH_SIZE; i++) {
-		tc = &turnstile_tab[i];
-		LIST_INIT(&tc->tc_chain);
-		tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
+		LIST_INIT(&turnstile_chains[i]);
+		turnstile_locks[i] = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
 	}
 
-	turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0,
-	    "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL);
+	turnstile_cache = pool_cache_init(sizeof(turnstile_t), coherency_unit,
+	    0, 0, "tstile", NULL, IPL_NONE, turnstile_ctor, NULL, NULL);
 	KASSERT(turnstile_cache != NULL);
 
 	(void)turnstile_ctor(NULL, &turnstile0, 0);
@@ -165,11 +169,13 @@ turnstile_lookup(wchan_t obj)
 {
 	turnstile_t *ts;
 	tschain_t *tc;
+	u_int hash;
 
-	tc = &turnstile_tab[TS_HASH(obj)];
-	mutex_spin_enter(tc->tc_mutex);
+	hash = TS_HASH(obj);
+	tc = &turnstile_chains[hash];
+	mutex_spin_enter(turnstile_locks[hash]);
 
-	LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
+	LIST_FOREACH(ts, tc, ts_chain)
 		if (ts->ts_obj == obj)
 			return (ts);
 
@@ -188,10 +194,8 @@ turnstile_lookup(wchan_t obj)
 void
 turnstile_exit(wchan_t obj)
 {
-	tschain_t *tc;
 
-	tc = &turnstile_tab[TS_HASH(obj)];
-	mutex_spin_exit(tc->tc_mutex);
+	mutex_spin_exit(turnstile_locks[TS_HASH(obj)]);
 }
 
 /*
@@ -370,13 +374,17 @@ turnstile_block(turnstile_t *ts, int q, 
 	lwp_t * const l = curlwp; /* cached curlwp */
 	turnstile_t *ots;
 	tschain_t *tc;
+	kmutex_t *lock;
 	sleepq_t *sq;
 	pri_t obase;
+	u_int hash;
 
-	tc = &turnstile_tab[TS_HASH(obj)];
+	hash = TS_HASH(obj);
+	tc = &turnstile_chains[hash];
+	lock = turnstile_locks[hash];
 
 	KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
-	KASSERT(mutex_owned(tc->tc_mutex));
+	KASSERT(mutex_owned(lock));
 	KASSERT(l != NULL && l->l_ts != NULL);
 
 	if (ts == NULL) {
@@ -390,7 +398,7 @@ turnstile_block(turnstile_t *ts, int q, 
 			TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
 		ts->ts_obj = obj;
 		ts->ts_inheritor = NULL;
-		LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain);
+		LIST_INSERT_HEAD(tc, ts, ts_chain);
 	} else {
 		/*
 		 * Object already has a turnstile.  Put our turnstile
@@ -411,8 +419,8 @@ turnstile_block(turnstile_t *ts, int q, 
 
 	sq = &ts->ts_sleepq[q];
 	ts->ts_waiters[q]++;
-	sleepq_enter(sq, l, tc->tc_mutex);
-	LOCKDEBUG_BARRIER(tc->tc_mutex, 1);
+	sleepq_enter(sq, l, lock);
+	LOCKDEBUG_BARRIER(lock, 1);
 	l->l_kpriority = true;
 	obase = l->l_kpribase;
 	if (obase < PRI_KTHREAD)
@@ -425,7 +433,7 @@ turnstile_block(turnstile_t *ts, int q, 
 	 * to be interrupted while in a state of flux.
 	 */
 	KPREEMPT_DISABLE(l);
-	KASSERT(tc->tc_mutex == l->l_mutex);
+	KASSERT(lock == l->l_mutex);
 	turnstile_lendpri(l);
 	sleepq_block(0, false);
 	l->l_kpribase = obase;
@@ -442,15 +450,17 @@ void
 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl)
 {
 	sleepq_t *sq;
-	tschain_t *tc;
+	kmutex_t *lock;
+	u_int hash;
 	lwp_t *l;
 
-	tc = &turnstile_tab[TS_HASH(ts->ts_obj)];
+	hash = TS_HASH(ts->ts_obj);
+	lock = turnstile_locks[hash];
 	sq = &ts->ts_sleepq[q];
 
 	KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
 	KASSERT(count > 0 && count <= TS_WAITERS(ts, q));
-	KASSERT(mutex_owned(tc->tc_mutex));
+	KASSERT(mutex_owned(lock));
 	KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL);
 
 	/*
@@ -478,7 +488,7 @@ turnstile_wakeup(turnstile_t *ts, int q,
 			turnstile_remove(ts, l, q);
 		}
 	}
-	mutex_spin_exit(tc->tc_mutex);
+	mutex_spin_exit(lock);
 }
 
 /*
@@ -523,15 +533,19 @@ turnstile_print(volatile void *obj, void
 	turnstile_t *ts;
 	tschain_t *tc;
 	sleepq_t *rsq, *wsq;
+	kmutex_t *lock;
+	u_int hash;
 	lwp_t *l;
 
-	tc = &turnstile_tab[TS_HASH(obj)];
+	hash = TS_HASH(obj);
+	tc = &turnstile_chains[hash];
+	lock = turnstile_locks[hash];
 
-	LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
+	LIST_FOREACH(ts, tc, ts_chain)
 		if (ts->ts_obj == obj)
 			break;
 
-	(*pr)("Turnstile chain at %p.\n", tc);
+	(*pr)("Turnstile chain at %p with mutex %p.\n", tc, lock);
 	if (ts == NULL) {
 		(*pr)("=> No active turnstile for this lock.\n");
 		return;

Index: src/sys/sys/sleepq.h
diff -u src/sys/sys/sleepq.h:1.25 src/sys/sys/sleepq.h:1.26
--- src/sys/sys/sleepq.h:1.25	Thu Apr 19 21:19:07 2018
+++ src/sys/sys/sleepq.h	Thu Nov 21 18:56:55 2019
@@ -1,7 +1,7 @@
-/*	$NetBSD: sleepq.h,v 1.25 2018/04/19 21:19:07 christos Exp $	*/
+/*	$NetBSD: sleepq.h,v 1.26 2019/11/21 18:56:55 ad Exp $	*/
 
 /*-
- * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -53,10 +53,7 @@ TAILQ_HEAD(sleepq, lwp);
 typedef struct sleepq sleepq_t;
 
 typedef struct sleeptab {
-	struct {
-		kmutex_t	*st_mutex;
-		sleepq_t	st_queue;
-	} st_queues[SLEEPTAB_HASH_SIZE];
+	sleepq_t	st_queue[SLEEPTAB_HASH_SIZE];
 } sleeptab_t;
 
 void	sleepq_init(sleepq_t *);
@@ -95,10 +92,13 @@ sleepq_dontsleep(lwp_t *l)
 static __inline sleepq_t *
 sleeptab_lookup(sleeptab_t *st, wchan_t wchan, kmutex_t **mp)
 {
+	extern kmutex_t *sleepq_locks[SLEEPTAB_HASH_SIZE];
 	sleepq_t *sq;
+	u_int hash;
 
-	sq = &st->st_queues[SLEEPTAB_HASH(wchan)].st_queue;
-	*mp = st->st_queues[SLEEPTAB_HASH(wchan)].st_mutex;
+	hash = SLEEPTAB_HASH(wchan);
+	sq = &st->st_queue[hash];
+	*mp = sleepq_locks[hash];
 	mutex_spin_enter(*mp);
 	return sq;
 }
@@ -106,9 +106,10 @@ sleeptab_lookup(sleeptab_t *st, wchan_t 
 static __inline kmutex_t *
 sleepq_hashlock(wchan_t wchan)
 {
+	extern kmutex_t *sleepq_locks[SLEEPTAB_HASH_SIZE];
 	kmutex_t *mp;
 
-	mp = sleeptab.st_queues[SLEEPTAB_HASH(wchan)].st_mutex;
+	mp = sleepq_locks[SLEEPTAB_HASH(wchan)];
 	mutex_spin_enter(mp);
 	return mp;
 }
@@ -148,10 +149,9 @@ typedef struct turnstile {
 	SLIST_ENTRY(turnstile)	ts_pichain;
 } turnstile_t;
 
-typedef struct tschain {
-	kmutex_t		*tc_mutex;	/* mutex on structs & queues */
-	LIST_HEAD(, turnstile)	tc_chain;	/* turnstile chain */
-} tschain_t;
+LIST_HEAD(tschain, turnstile);
+
+typedef struct tschain tschain_t;
 
 #define	TS_READER_Q	0		/* reader sleep queue */
 #define	TS_WRITER_Q	1		/* writer sleep queue */

Reply via email to