Module Name:    src
Committed By:   pooka
Date:           Mon Mar 11 21:37:54 UTC 2013

Modified Files:
        src/sys/kern: subr_pool.c

Log Message:
In pool_cache_put_slow(), pool_get() can block (it does mutex_enter()),
so we need to retry if curlwp took a context switch during the call.
Otherwise, CPU-local invariants can get screwed up:

    panic: kernel diagnostic assertion "cur->pcg_avail == cur->pcg_size" failed

This is (was) very easy to reproduce by just running:

  while : ; do RUMP_NCPU=32 ./a.out ; done

where a.out only calls rump_init().  But, any situation there's contention
and a pool doesn't have emptygroups would do.


To generate a diff of this commit:
cvs rdiff -u -r1.199 -r1.200 src/sys/kern/subr_pool.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/subr_pool.c
diff -u src/sys/kern/subr_pool.c:1.199 src/sys/kern/subr_pool.c:1.200
--- src/sys/kern/subr_pool.c:1.199	Sat Feb  9 00:31:21 2013
+++ src/sys/kern/subr_pool.c	Mon Mar 11 21:37:54 2013
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_pool.c,v 1.199 2013/02/09 00:31:21 christos Exp $	*/
+/*	$NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1999, 2000, 2002, 2007, 2008, 2010
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.199 2013/02/09 00:31:21 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_pool.c,v 1.200 2013/03/11 21:37:54 pooka Exp $");
 
 #include "opt_ddb.h"
 #include "opt_lockdebug.h"
@@ -2261,6 +2261,7 @@ pool_cache_get_paddr(pool_cache_t pc, in
 static bool __noinline
 pool_cache_put_slow(pool_cache_cpu_t *cc, int s, void *object)
 {
+	struct lwp *l = curlwp;
 	pcg_t *pcg, *cur;
 	uint64_t ncsw;
 	pool_cache_t pc;
@@ -2271,6 +2272,7 @@ pool_cache_put_slow(pool_cache_cpu_t *cc
 	pc = cc->cc_cache;
 	pcg = NULL;
 	cc->cc_misses++;
+	ncsw = l->l_ncsw;
 
 	/*
 	 * If there are no empty groups in the cache then allocate one
@@ -2280,6 +2282,16 @@ pool_cache_put_slow(pool_cache_cpu_t *cc
 		if (__predict_true(!pool_cache_disable)) {
 			pcg = pool_get(pc->pc_pcgpool, PR_NOWAIT);
 		}
+		/*
+		 * If pool_get() blocked, then our view of
+		 * the per-CPU data is invalid: retry.
+		 */
+		if (__predict_false(l->l_ncsw != ncsw)) {
+			if (pcg != NULL) {
+				pool_put(pc->pc_pcgpool, pcg);
+			}
+			return true;
+		}
 		if (__predict_true(pcg != NULL)) {
 			pcg->pcg_avail = 0;
 			pcg->pcg_size = pc->pc_pcgsize;
@@ -2288,7 +2300,6 @@ pool_cache_put_slow(pool_cache_cpu_t *cc
 
 	/* Lock the cache. */
 	if (__predict_false(!mutex_tryenter(&pc->pc_lock))) {
-		ncsw = curlwp->l_ncsw;
 		mutex_enter(&pc->pc_lock);
 		pc->pc_contended++;
 
@@ -2296,7 +2307,7 @@ pool_cache_put_slow(pool_cache_cpu_t *cc
 		 * If we context switched while locking, then our view of
 		 * the per-CPU data is invalid: retry.
 		 */
-		if (__predict_false(curlwp->l_ncsw != ncsw)) {
+		if (__predict_false(l->l_ncsw != ncsw)) {
 			mutex_exit(&pc->pc_lock);
 			if (pcg != NULL) {
 				pool_put(pc->pc_pcgpool, pcg);

Reply via email to