Module Name: src
Committed By: riz
Date: Mon Aug 8 19:45:57 UTC 2011
Modified Files:
src/sys/kern [netbsd-5]: init_main.c uipc_socket.c
src/sys/sys [netbsd-5]: socketvar.h
Log Message:
Pull up following revision(s) (requested by bouyer in ticket #1644):
sys/sys/socketvar.h: revision 1.126
sys/kern/init_main.c: revision 1.433
sys/kern/uipc_socket.c: revision 1.205
Fix kern/45093 as discussed on tech-kern@:
http://mail-index.netbsd.org/tech-kern/2011/06/17/msg010734.html
The cause of the problem is that the so_pendfree is processed with
the softnet_lock held at one point, and processing the list
calls sodoloanfree() which may kpause(). As the thread sleeps with
softnet_lock held, it ultimately cause a deadlock (see the PR or tech-kern
thread for details).
Although it should be possible to call sodopendfree() after releasing
the socket lock, it's not so easy to know where he socket lock is held and
where it's not, so we may hit the issue again later.
Add a kernel thread to handle the so_pendfree list, and wake up this
thread when adding mbufs to this list. Get rid of the various sodopendfree()
calls, hopefully fixing definitively the problem.
To generate a diff of this commit:
cvs rdiff -u -r1.371.2.5 -r1.371.2.6 src/sys/kern/init_main.c
cvs rdiff -u -r1.177.4.3 -r1.177.4.4 src/sys/kern/uipc_socket.c
cvs rdiff -u -r1.116.4.2 -r1.116.4.3 src/sys/sys/socketvar.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/init_main.c
diff -u src/sys/kern/init_main.c:1.371.2.5 src/sys/kern/init_main.c:1.371.2.6
--- src/sys/kern/init_main.c:1.371.2.5 Tue Dec 21 22:21:42 2010
+++ src/sys/kern/init_main.c Mon Aug 8 19:45:57 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: init_main.c,v 1.371.2.5 2010/12/21 22:21:42 riz Exp $ */
+/* $NetBSD: init_main.c,v 1.371.2.6 2011/08/08 19:45:57 riz Exp $ */
/*-
* Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -97,7 +97,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.371.2.5 2010/12/21 22:21:42 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.371.2.6 2011/08/08 19:45:57 riz Exp $");
#include "opt_ddb.h"
#include "opt_ipsec.h"
@@ -463,6 +463,10 @@
/* Initialize interfaces. */
ifinit1();
+ /* Initialize sockets thread(s) */
+ soinit1();
+
+
/* Configure the system hardware. This will enable interrupts. */
configure();
Index: src/sys/kern/uipc_socket.c
diff -u src/sys/kern/uipc_socket.c:1.177.4.3 src/sys/kern/uipc_socket.c:1.177.4.4
--- src/sys/kern/uipc_socket.c:1.177.4.3 Sun May 3 13:18:55 2009
+++ src/sys/kern/uipc_socket.c Mon Aug 8 19:45:57 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: uipc_socket.c,v 1.177.4.3 2009/05/03 13:18:55 bouyer Exp $ */
+/* $NetBSD: uipc_socket.c,v 1.177.4.4 2011/08/08 19:45:57 riz Exp $ */
/*-
* Copyright (c) 2002, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -63,7 +63,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.177.4.3 2009/05/03 13:18:55 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_socket.c,v 1.177.4.4 2011/08/08 19:45:57 riz Exp $");
#include "opt_sock_counters.h"
#include "opt_sosend_loan.h"
@@ -91,6 +91,7 @@
#include <sys/kauth.h>
#include <sys/mutex.h>
#include <sys/condvar.h>
+#include <sys/kthread.h>
#include <uvm/uvm.h>
@@ -136,7 +137,7 @@
#endif
static kmutex_t so_pendfree_lock;
-static struct mbuf *so_pendfree;
+static struct mbuf *so_pendfree = NULL;
#ifndef SOMAXKVA
#define SOMAXKVA (16 * 1024 * 1024)
@@ -147,8 +148,9 @@
#define SOCK_LOAN_CHUNK 65536
-static size_t sodopendfree(void);
-static size_t sodopendfreel(void);
+static void sopendfree_thread(void *);
+static kcondvar_t pendfree_thread_cv;
+static lwp_t *sopendfree_lwp;
static vsize_t
sokvareserve(struct socket *so, vsize_t len)
@@ -157,21 +159,6 @@
mutex_enter(&so_pendfree_lock);
while (socurkva + len > somaxkva) {
- size_t freed;
-
- /*
- * try to do pendfree.
- */
-
- freed = sodopendfreel();
-
- /*
- * if some kva was freed, try again.
- */
-
- if (freed)
- continue;
-
SOSEND_COUNTER_INCR(&sosend_kvalimit);
error = cv_wait_sig(&socurkva_cv, &so_pendfree_lock);
if (error) {
@@ -264,56 +251,45 @@
sokvafree(sva, len);
}
-static size_t
-sodopendfree(void)
-{
- size_t rv;
-
- if (__predict_true(so_pendfree == NULL))
- return 0;
-
- mutex_enter(&so_pendfree_lock);
- rv = sodopendfreel();
- mutex_exit(&so_pendfree_lock);
-
- return rv;
-}
-
/*
- * sodopendfreel: free mbufs on "pendfree" list.
+ * sopendfree_thread: free mbufs on "pendfree" list.
* unlock and relock so_pendfree_lock when freeing mbufs.
- *
- * => called with so_pendfree_lock held.
*/
-static size_t
-sodopendfreel(void)
+static void
+sopendfree_thread(void *v)
{
struct mbuf *m, *next;
- size_t rv = 0;
-
- KASSERT(mutex_owned(&so_pendfree_lock));
+ size_t rv;
- while (so_pendfree != NULL) {
- m = so_pendfree;
- so_pendfree = NULL;
- mutex_exit(&so_pendfree_lock);
+ mutex_enter(&so_pendfree_lock);
- for (; m != NULL; m = next) {
- next = m->m_next;
- KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0);
- KASSERT(m->m_ext.ext_refcnt == 0);
+ for (;;) {
+ rv = 0;
+ while (so_pendfree != NULL) {
+ m = so_pendfree;
+ so_pendfree = NULL;
+ mutex_exit(&so_pendfree_lock);
+
+ for (; m != NULL; m = next) {
+ next = m->m_next;
+ KASSERT((~m->m_flags & (M_EXT|M_EXT_PAGES)) == 0);
+ KASSERT(m->m_ext.ext_refcnt == 0);
+
+ rv += m->m_ext.ext_size;
+ sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
+ m->m_ext.ext_size);
+ pool_cache_put(mb_cache, m);
+ }
- rv += m->m_ext.ext_size;
- sodoloanfree(m->m_ext.ext_pgs, m->m_ext.ext_buf,
- m->m_ext.ext_size);
- pool_cache_put(mb_cache, m);
+ mutex_enter(&so_pendfree_lock);
}
-
- mutex_enter(&so_pendfree_lock);
+ if (rv)
+ cv_broadcast(&socurkva_cv);
+ cv_wait(&pendfree_thread_cv, &so_pendfree_lock);
}
-
- return (rv);
+ panic("sopendfree_thread");
+ /* NOTREACHED */
}
void
@@ -332,7 +308,7 @@
mutex_enter(&so_pendfree_lock);
m->m_next = so_pendfree;
so_pendfree = m;
- cv_broadcast(&socurkva_cv);
+ cv_signal(&pendfree_thread_cv);
mutex_exit(&so_pendfree_lock);
}
@@ -402,7 +378,6 @@
KASSERT(ce == &sokva_reclaimerentry);
KASSERT(obj == NULL);
- sodopendfree();
if (!vm_map_starved_p(kernel_map)) {
return CALLBACK_CHAIN_ABORT;
}
@@ -420,14 +395,15 @@
}
void
-soinit(void)
+soinit()
{
-
mutex_init(&so_pendfree_lock, MUTEX_DEFAULT, IPL_VM);
softnet_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
cv_init(&socurkva_cv, "sokva");
+ cv_init(&pendfree_thread_cv, "sopendfr");
soinit2();
+
/* Set the initial adjusted socket buffer size. */
if (sb_max_set(sb_max))
panic("bad initial sb_max value: %lu", sb_max);
@@ -436,6 +412,15 @@
&sokva_reclaimerentry, NULL, sokva_reclaim_callback);
}
+void
+soinit1(void)
+{
+ int error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
+ sopendfree_thread, NULL, &sopendfree_lwp, "sopendfree");
+ if (error)
+ panic("soinit1 %d", error);
+}
+
/*
* Socket operation routines.
* These routines are called by the routines in
@@ -787,7 +772,6 @@
error = (*so->so_proto->pr_usrreq)(so, PRU_DISCONNECT,
NULL, NULL, NULL, NULL);
}
- sodopendfree();
return (error);
}
@@ -819,7 +803,6 @@
int error, s, dontroute, atomic;
p = l->l_proc;
- sodopendfree();
clen = 0;
/*
@@ -1088,9 +1071,6 @@
else
flags = 0;
- if ((flags & MSG_DONTWAIT) == 0)
- sodopendfree();
-
if (flags & MSG_OOB) {
m = m_get(M_WAIT, MT_DATA);
solock(so);
Index: src/sys/sys/socketvar.h
diff -u src/sys/sys/socketvar.h:1.116.4.2 src/sys/sys/socketvar.h:1.116.4.3
--- src/sys/sys/socketvar.h:1.116.4.2 Sat Apr 4 23:36:28 2009
+++ src/sys/sys/socketvar.h Mon Aug 8 19:45:57 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: socketvar.h,v 1.116.4.2 2009/04/04 23:36:28 snj Exp $ */
+/* $NetBSD: socketvar.h,v 1.116.4.3 2011/08/08 19:45:57 riz Exp $ */
/*-
* Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
@@ -281,6 +281,7 @@
int sbwait(struct sockbuf *);
int sb_max_set(u_long);
void soinit(void);
+void soinit1(void);
void soinit2(void);
int soabort(struct socket *);
int soaccept(struct socket *, struct mbuf *);