Module Name: src Committed By: ad Date: Thu Nov 21 18:56:55 UTC 2019
Modified Files: src/sys/kern: kern_sleepq.c kern_turnstile.c src/sys/sys: sleepq.h Log Message: Sleep queues & turnstiles: - Avoid false sharing. - Make the turnstile hash function more suitable. - Increase turnstile hash table size. - Make amends by having only one set of system wide sleep queue hash locks. To generate a diff of this commit: cvs rdiff -u -r1.51 -r1.52 src/sys/kern/kern_sleepq.c cvs rdiff -u -r1.32 -r1.33 src/sys/kern/kern_turnstile.c cvs rdiff -u -r1.25 -r1.26 src/sys/sys/sleepq.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_sleepq.c diff -u src/sys/kern/kern_sleepq.c:1.51 src/sys/kern/kern_sleepq.c:1.52 --- src/sys/kern/kern_sleepq.c:1.51 Sun Jul 3 14:24:58 2016 +++ src/sys/kern/kern_sleepq.c Thu Nov 21 18:56:55 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $ */ +/* $NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.51 2016/07/03 14:24:58 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -65,7 +65,8 @@ __KERNEL_RCSID(0, "$NetBSD: kern_sleepq. static int sleepq_sigtoerror(lwp_t *, int); /* General purpose sleep table, used by mtsleep() and condition variables. */ -sleeptab_t sleeptab __cacheline_aligned; +sleeptab_t sleeptab __cacheline_aligned; +kmutex_t *sleepq_locks[SLEEPTAB_HASH_SIZE] __read_mostly; /* * sleeptab_init: @@ -75,14 +76,11 @@ sleeptab_t sleeptab __cacheline_aligned; void sleeptab_init(sleeptab_t *st) { - sleepq_t *sq; int i; for (i = 0; i < SLEEPTAB_HASH_SIZE; i++) { - sq = &st->st_queues[i].st_queue; - st->st_queues[i].st_mutex = - mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); - sleepq_init(sq); + sleepq_locks[i] = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); + sleepq_init(&st->st_queue[i]); } } @@ -266,8 +264,12 @@ sleepq_block(int timo, bool catch_p) /* The LWP and sleep queue are now unlocked. */ if (timo) { /* - * Even if the callout appears to have fired, we need to - * stop it in order to synchronise with other CPUs. + * Even if the callout appears to have fired, we + * need to stop it in order to synchronise with + * other CPUs. It's important that we do this in + * this LWP's context, and not during wakeup, in + * order to keep the callout & its cache lines + * co-located on the CPU with the LWP. */ if (callout_halt(&l->l_timeout_ch, NULL)) error = EWOULDBLOCK; @@ -333,10 +335,10 @@ sleepq_wake(sleepq_t *sq, wchan_t wchan, * * Remove an LWP from its sleep queue and set it runnable again. * sleepq_unsleep() is called with the LWP's mutex held, and will - * always release it. + * release it if "unlock" is true. */ void -sleepq_unsleep(lwp_t *l, bool cleanup) +sleepq_unsleep(lwp_t *l, bool unlock) { sleepq_t *sq = l->l_sleepq; kmutex_t *mp = l->l_mutex; @@ -345,7 +347,7 @@ sleepq_unsleep(lwp_t *l, bool cleanup) KASSERT(l->l_wchan != NULL); sleepq_remove(sq, l); - if (cleanup) { + if (unlock) { mutex_spin_exit(mp); } } Index: src/sys/kern/kern_turnstile.c diff -u src/sys/kern/kern_turnstile.c:1.32 src/sys/kern/kern_turnstile.c:1.33 --- src/sys/kern/kern_turnstile.c:1.32 Fri Jun 15 13:51:40 2012 +++ src/sys/kern/kern_turnstile.c Thu Nov 21 18:56:55 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_turnstile.c,v 1.32 2012/06/15 13:51:40 yamt Exp $ */ +/* $NetBSD: kern_turnstile.c,v 1.33 2019/11/21 18:56:55 ad Exp $ */ /*- - * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2006, 2007, 2009, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -56,11 +56,11 @@ * grabs a free turnstile off the free list. Otherwise, it can take back * the active turnstile from the lock (thus deactivating the turnstile). * - * Turnstiles are the place to do priority inheritence. + * Turnstiles are where we do priority inheritence. */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.32 2012/06/15 13:51:40 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.33 2019/11/21 18:56:55 ad Exp $"); #include <sys/param.h> #include <sys/lockdebug.h> @@ -69,17 +69,23 @@ __KERNEL_RCSID(0, "$NetBSD: kern_turnsti #include <sys/sleepq.h> #include <sys/systm.h> -#define TS_HASH_SIZE 64 +/* + * Shift of 6 aligns to typical cache line size of 64 bytes; there's no + * point having two turnstile locks to back two lock objects that share one + * cache line. + */ +#define TS_HASH_SIZE 128 #define TS_HASH_MASK (TS_HASH_SIZE - 1) -#define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) +#define TS_HASH(obj) (((uintptr_t)(obj) >> 6) & TS_HASH_MASK) -static tschain_t turnstile_tab[TS_HASH_SIZE] __cacheline_aligned; -pool_cache_t turnstile_cache __read_mostly; +/* Keep the chains and mutex pointers apart to prevent false sharing. */ +static tschain_t turnstile_chains[TS_HASH_SIZE] __cacheline_aligned; +static kmutex_t *turnstile_locks[TS_HASH_SIZE] __read_mostly; +pool_cache_t turnstile_cache __read_mostly; +extern turnstile_t turnstile0 __cacheline_aligned; static int turnstile_ctor(void *, void *, int); -extern turnstile_t turnstile0; - /* * turnstile_init: * @@ -88,17 +94,15 @@ extern turnstile_t turnstile0; void turnstile_init(void) { - tschain_t *tc; int i; for (i = 0; i < TS_HASH_SIZE; i++) { - tc = &turnstile_tab[i]; - LIST_INIT(&tc->tc_chain); - tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); + LIST_INIT(&turnstile_chains[i]); + turnstile_locks[i] = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); } - turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, - "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); + turnstile_cache = pool_cache_init(sizeof(turnstile_t), coherency_unit, + 0, 0, "tstile", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); KASSERT(turnstile_cache != NULL); (void)turnstile_ctor(NULL, &turnstile0, 0); @@ -165,11 +169,13 @@ turnstile_lookup(wchan_t obj) { turnstile_t *ts; tschain_t *tc; + u_int hash; - tc = &turnstile_tab[TS_HASH(obj)]; - mutex_spin_enter(tc->tc_mutex); + hash = TS_HASH(obj); + tc = &turnstile_chains[hash]; + mutex_spin_enter(turnstile_locks[hash]); - LIST_FOREACH(ts, &tc->tc_chain, ts_chain) + LIST_FOREACH(ts, tc, ts_chain) if (ts->ts_obj == obj) return (ts); @@ -188,10 +194,8 @@ turnstile_lookup(wchan_t obj) void turnstile_exit(wchan_t obj) { - tschain_t *tc; - tc = &turnstile_tab[TS_HASH(obj)]; - mutex_spin_exit(tc->tc_mutex); + mutex_spin_exit(turnstile_locks[TS_HASH(obj)]); } /* @@ -370,13 +374,17 @@ turnstile_block(turnstile_t *ts, int q, lwp_t * const l = curlwp; /* cached curlwp */ turnstile_t *ots; tschain_t *tc; + kmutex_t *lock; sleepq_t *sq; pri_t obase; + u_int hash; - tc = &turnstile_tab[TS_HASH(obj)]; + hash = TS_HASH(obj); + tc = &turnstile_chains[hash]; + lock = turnstile_locks[hash]; KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); - KASSERT(mutex_owned(tc->tc_mutex)); + KASSERT(mutex_owned(lock)); KASSERT(l != NULL && l->l_ts != NULL); if (ts == NULL) { @@ -390,7 +398,7 @@ turnstile_block(turnstile_t *ts, int q, TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); ts->ts_obj = obj; ts->ts_inheritor = NULL; - LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); + LIST_INSERT_HEAD(tc, ts, ts_chain); } else { /* * Object already has a turnstile. Put our turnstile @@ -411,8 +419,8 @@ turnstile_block(turnstile_t *ts, int q, sq = &ts->ts_sleepq[q]; ts->ts_waiters[q]++; - sleepq_enter(sq, l, tc->tc_mutex); - LOCKDEBUG_BARRIER(tc->tc_mutex, 1); + sleepq_enter(sq, l, lock); + LOCKDEBUG_BARRIER(lock, 1); l->l_kpriority = true; obase = l->l_kpribase; if (obase < PRI_KTHREAD) @@ -425,7 +433,7 @@ turnstile_block(turnstile_t *ts, int q, * to be interrupted while in a state of flux. */ KPREEMPT_DISABLE(l); - KASSERT(tc->tc_mutex == l->l_mutex); + KASSERT(lock == l->l_mutex); turnstile_lendpri(l); sleepq_block(0, false); l->l_kpribase = obase; @@ -442,15 +450,17 @@ void turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) { sleepq_t *sq; - tschain_t *tc; + kmutex_t *lock; + u_int hash; lwp_t *l; - tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; + hash = TS_HASH(ts->ts_obj); + lock = turnstile_locks[hash]; sq = &ts->ts_sleepq[q]; KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); - KASSERT(mutex_owned(tc->tc_mutex)); + KASSERT(mutex_owned(lock)); KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); /* @@ -478,7 +488,7 @@ turnstile_wakeup(turnstile_t *ts, int q, turnstile_remove(ts, l, q); } } - mutex_spin_exit(tc->tc_mutex); + mutex_spin_exit(lock); } /* @@ -523,15 +533,19 @@ turnstile_print(volatile void *obj, void turnstile_t *ts; tschain_t *tc; sleepq_t *rsq, *wsq; + kmutex_t *lock; + u_int hash; lwp_t *l; - tc = &turnstile_tab[TS_HASH(obj)]; + hash = TS_HASH(obj); + tc = &turnstile_chains[hash]; + lock = turnstile_locks[hash]; - LIST_FOREACH(ts, &tc->tc_chain, ts_chain) + LIST_FOREACH(ts, tc, ts_chain) if (ts->ts_obj == obj) break; - (*pr)("Turnstile chain at %p.\n", tc); + (*pr)("Turnstile chain at %p with mutex %p.\n", tc, lock); if (ts == NULL) { (*pr)("=> No active turnstile for this lock.\n"); return; Index: src/sys/sys/sleepq.h diff -u src/sys/sys/sleepq.h:1.25 src/sys/sys/sleepq.h:1.26 --- src/sys/sys/sleepq.h:1.25 Thu Apr 19 21:19:07 2018 +++ src/sys/sys/sleepq.h Thu Nov 21 18:56:55 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: sleepq.h,v 1.25 2018/04/19 21:19:07 christos Exp $ */ +/* $NetBSD: sleepq.h,v 1.26 2019/11/21 18:56:55 ad Exp $ */ /*- - * Copyright (c) 2002, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -53,10 +53,7 @@ TAILQ_HEAD(sleepq, lwp); typedef struct sleepq sleepq_t; typedef struct sleeptab { - struct { - kmutex_t *st_mutex; - sleepq_t st_queue; - } st_queues[SLEEPTAB_HASH_SIZE]; + sleepq_t st_queue[SLEEPTAB_HASH_SIZE]; } sleeptab_t; void sleepq_init(sleepq_t *); @@ -95,10 +92,13 @@ sleepq_dontsleep(lwp_t *l) static __inline sleepq_t * sleeptab_lookup(sleeptab_t *st, wchan_t wchan, kmutex_t **mp) { + extern kmutex_t *sleepq_locks[SLEEPTAB_HASH_SIZE]; sleepq_t *sq; + u_int hash; - sq = &st->st_queues[SLEEPTAB_HASH(wchan)].st_queue; - *mp = st->st_queues[SLEEPTAB_HASH(wchan)].st_mutex; + hash = SLEEPTAB_HASH(wchan); + sq = &st->st_queue[hash]; + *mp = sleepq_locks[hash]; mutex_spin_enter(*mp); return sq; } @@ -106,9 +106,10 @@ sleeptab_lookup(sleeptab_t *st, wchan_t static __inline kmutex_t * sleepq_hashlock(wchan_t wchan) { + extern kmutex_t *sleepq_locks[SLEEPTAB_HASH_SIZE]; kmutex_t *mp; - mp = sleeptab.st_queues[SLEEPTAB_HASH(wchan)].st_mutex; + mp = sleepq_locks[SLEEPTAB_HASH(wchan)]; mutex_spin_enter(mp); return mp; } @@ -148,10 +149,9 @@ typedef struct turnstile { SLIST_ENTRY(turnstile) ts_pichain; } turnstile_t; -typedef struct tschain { - kmutex_t *tc_mutex; /* mutex on structs & queues */ - LIST_HEAD(, turnstile) tc_chain; /* turnstile chain */ -} tschain_t; +LIST_HEAD(tschain, turnstile); + +typedef struct tschain tschain_t; #define TS_READER_Q 0 /* reader sleep queue */ #define TS_WRITER_Q 1 /* writer sleep queue */