Diff below adds a 'struct socket *' argument to sounlock() in order to
prepare the stack for per-socket locks.
That means sofree() will now unlock a given socket before freeing it.
But since we do not want to not release the NET_LOCK() when processing
incoming TCP packets, in_pcbdetach() needs a special treatment. That's
also true for unp_drop() as long as Unix sockets will required the
KERNEL_LOCK().
This is on top of my previous diff to reduce the number of sofree().
Comments? Oks?
diff --git sys/kern/sys_socket.c sys/kern/sys_socket.c
index 916c33a0c1a..a754a7b2698 100644
--- sys/kern/sys_socket.c
+++ sys/kern/sys_socket.c
@@ -88,7 +88,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct
proc *p)
so->so_state |= SS_NBIO;
else
so->so_state &= ~SS_NBIO;
- sounlock(s);
+ sounlock(so, s);
break;
case FIOASYNC:
@@ -102,7 +102,7 @@ soo_ioctl(struct file *fp, u_long cmd, caddr_t data, struct
proc *p)
so->so_rcv.sb_flags &= ~SB_ASYNC;
so->so_snd.sb_flags &= ~SB_ASYNC;
}
- sounlock(s);
+ sounlock(so, s);
break;
case FIONREAD:
@@ -176,7 +176,7 @@ soo_poll(struct file *fp, int events, struct proc *p)
so->so_snd.sb_flags |= SB_SEL;
}
}
- sounlock(s);
+ sounlock(so, s);
return (revents);
}
@@ -197,7 +197,7 @@ soo_stat(struct file *fp, struct stat *ub, struct proc *p)
ub->st_gid = so->so_egid;
(void) ((*so->so_proto->pr_usrreq)(so, PRU_SENSE,
(struct mbuf *)ub, NULL, NULL, p));
- sounlock(s);
+ sounlock(so, s);
return (0);
}
diff --git sys/kern/uipc_socket.c sys/kern/uipc_socket.c
index 211966c79c8..aa789d403cc 100644
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -142,11 +142,11 @@ socreate(int dom, struct socket **aso, int type, int
proto)
error = (*prp->pr_attach)(so, proto);
if (error) {
so->so_state |= SS_NOFDREF;
- sofree(so);
- sounlock(s);
+ /* sofree() calls sounlock(). */
+ sofree(so, s);
return (error);
}
- sounlock(s);
+ sounlock(so, s);
*aso = so;
return (0);
}
@@ -177,7 +177,7 @@ solisten(struct socket *so, int backlog)
error = (*so->so_proto->pr_usrreq)(so, PRU_LISTEN, NULL, NULL, NULL,
curproc);
if (error) {
- sounlock(s);
+ sounlock(so, s);
return (error);
}
if (TAILQ_FIRST(&so->so_q) == NULL)
@@ -187,25 +187,29 @@ solisten(struct socket *so, int backlog)
if (backlog < sominconn)
backlog = sominconn;
so->so_qlimit = backlog;
- sounlock(s);
+ sounlock(so, s);
return (0);
}
void
-sofree(struct socket *so)
+sofree(struct socket *so, int s)
{
soassertlocked(so);
- if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
+ if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
+ sounlock(so, s);
return;
+ }
if (so->so_head) {
/*
* We must not decommission a socket that's on the accept(2)
* queue. If we do, then accept(2) may hang after select(2)
* indicated that the listening socket was ready.
*/
- if (!soqremque(so, 0))
+ if (!soqremque(so, 0)) {
+ sounlock(so, s);
return;
+ }
}
#ifdef SOCKET_SPLICE
if (so->so_sp) {
@@ -218,6 +222,7 @@ sofree(struct socket *so)
#endif /* SOCKET_SPLICE */
sbrelease(so, &so->so_snd);
sorflush(so);
+ sounlock(so, s);
#ifdef SOCKET_SPLICE
if (so->so_sp) {
/* Reuse splice idle, sounsplice() has been called before. */
@@ -284,8 +289,8 @@ drop:
discard:
KASSERT((so->so_state & SS_NOFDREF) == 0);
so->so_state |= SS_NOFDREF;
- sofree(so);
- sounlock(s);
+ /* sofree() calls sounlock(). */
+ sofree(so, s);
return (error);
}
@@ -349,7 +354,7 @@ soconnect2(struct socket *so1, struct socket *so2)
s = solock(so1);
error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
(struct mbuf *)so2, NULL, curproc);
- sounlock(s);
+ sounlock(so1, s);
return (error);
}
@@ -478,7 +483,7 @@ restart:
if (flags & MSG_EOR)
top->m_flags |= M_EOR;
} else {
- sounlock(s);
+ sounlock(so, s);
error = m_getuio(&top, atomic, space, uio);
s = solock(so);
if (error)
@@ -507,7 +512,7 @@ release:
so->so_state &= ~SS_ISSENDING;
sbunlock(so, &so->so_snd);
out:
- sounlock(s);
+ sounlock(so, s);
m_freem(top);
m_freem(control);
return (error);
@@ -661,7 +666,7 @@ soreceive(struct socket *so, struct mbuf **paddr, struct
uio *uio,
s = solock(so);
error = (*pr->pr_usrreq)(so, PRU_RCVOOB, m,
(struct mbuf *)(long)(flags & MSG_PEEK), NULL, curproc);
- sounlock(s);
+ sounlock(so, s);
if (error)
goto bad;
do {
@@ -679,7 +684,7 @@ bad:
s = solock(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
- sounlock(s);
+ sounlock(so, s);
return (error);
}
@@ -747,7 +752,7 @@ restart:
sbunlock(so, &so->so_rcv);
error = sbwait(so, &so->so_rcv);
if (error) {
- sounlock(s);
+ sounlock(so, s);
return (error);
}
goto restart;
@@ -883,7 +888,7 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
- sounlock(s);
+ sounlock(so, s);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
s = solock(so);
if (uio_error)
@@ -967,7 +972,7 @@ dontblock:
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
- sounlock(s);
+ sounlock(so, s);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -1013,7 +1018,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
- sounlock(s);
+ sounlock(so, s);
return (error);
}
@@ -1039,7 +1044,7 @@ soshutdown(struct socket *so, int how)
error = EINVAL;
break;
}
- sounlock(s);
+ sounlock(so, s);
return (error);
}
@@ -1218,7 +1223,7 @@ soidle(void *arg)
so->so_error = ETIMEDOUT;
sounsplice(so, so->so_sp->ssp_socket, 1);
}
- sounlock(s);
+ sounlock(so, s);
}
void
@@ -1236,7 +1241,7 @@ sotask(void *arg)
*/
somove(so, M_DONTWAIT);
}
- sounlock(s);
+ sounlock(so, s);
/* Avoid user land starvation. */
yield();
diff --git sys/kern/uipc_socket2.c sys/kern/uipc_socket2.c
index 8bb11fd97a1..0cb8b6dc98f 100644
--- sys/kern/uipc_socket2.c
+++ sys/kern/uipc_socket2.c
@@ -277,25 +277,38 @@ solock(struct socket *so)
{
int s = 0;
- if ((so->so_proto->pr_domain->dom_family != PF_UNIX) &&
- (so->so_proto->pr_domain->dom_family != PF_ROUTE) &&
- (so->so_proto->pr_domain->dom_family != PF_KEY))
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ s = -42;
NET_LOCK();
- else {
+ break;
+ case PF_UNIX:
+ case PF_ROUTE:
+ case PF_KEY:
+ default:
KERNEL_LOCK();
- s = -42;
+ break;
}
return (s);
}
void
-sounlock(int s)
+sounlock(struct socket *so, int s)
{
- if (s != -42)
- NET_UNLOCK();
- else {
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (s == -42)
+ NET_UNLOCK();
+ break;
+ case PF_UNIX:
+ case PF_ROUTE:
+ case PF_KEY:
+ default:
KERNEL_UNLOCK();
+ break;
}
}
diff --git sys/kern/uipc_syscalls.c sys/kern/uipc_syscalls.c
index 1c23bb59091..a6a6aee173d 100644
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -209,7 +209,7 @@ sys_bind(struct proc *p, void *v, register_t *retval)
#endif
s = solock(so);
error = sobind(so, nam, p);
- sounlock(s);
+ sounlock(so, s);
m_freem(nam);
out:
FRELE(fp, p);
@@ -351,7 +351,7 @@ out:
so->so_state |= SS_NBIO;
else
so->so_state &= ~SS_NBIO;
- sounlock(s);
+ sounlock(head, s);
fdplock(fdp);
fp->f_data = so;
fdinsert(fdp, tmpfd, cloexec, fp);
@@ -359,7 +359,7 @@ out:
FRELE(fp, p);
*retval = tmpfd;
} else {
- sounlock(s);
+ sounlock(head, s);
fdplock(fdp);
fdremove(fdp, tmpfd);
closef(fp, p);
@@ -437,7 +437,7 @@ bad:
if (!interrupted)
so->so_state &= ~SS_ISCONNECTING;
out:
- sounlock(s);
+ sounlock(so, s);
FRELE(fp, p);
m_freem(nam);
if (error == ERESTART)
@@ -1000,7 +1000,7 @@ sys_setsockopt(struct proc *p, void *v, register_t
*retval)
so = fp->f_data;
s = solock(so);
error = sosetopt(so, SCARG(uap, level), SCARG(uap, name), m);
- sounlock(s);
+ sounlock(so, s);
bad:
m_freem(m);
FRELE(fp, p);
@@ -1039,7 +1039,7 @@ sys_getsockopt(struct proc *p, void *v, register_t
*retval)
so = fp->f_data;
s = solock(so);
error = sogetopt(so, SCARG(uap, level), SCARG(uap, name), m);
- sounlock(s);
+ sounlock(so, s);
if (error == 0 && SCARG(uap, val) && valsize && m != NULL) {
if (valsize > m->m_len)
valsize = m->m_len;
@@ -1083,7 +1083,7 @@ sys_getsockname(struct proc *p, void *v, register_t
*retval)
m = m_getclr(M_WAIT, MT_SONAME);
s = solock(so);
error = (*so->so_proto->pr_usrreq)(so, PRU_SOCKADDR, 0, m, 0, p);
- sounlock(s);
+ sounlock(so, s);
if (error)
goto bad;
error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
@@ -1126,7 +1126,7 @@ sys_getpeername(struct proc *p, void *v, register_t
*retval)
m = m_getclr(M_WAIT, MT_SONAME);
s = solock(so);
error = (*so->so_proto->pr_usrreq)(so, PRU_PEERADDR, 0, m, 0, p);
- sounlock(s);
+ sounlock(so, s);
if (error)
goto bad;
error = copyaddrout(p, m, SCARG(uap, asa), len, SCARG(uap, alen));
diff --git sys/kern/uipc_usrreq.c sys/kern/uipc_usrreq.c
index 5d95208adc6..81a64a8554a 100644
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -612,11 +612,19 @@ unp_drop(struct unpcb *unp, int errno)
{
struct socket *so = unp->unp_socket;
+ KERNEL_ASSERT_LOCKED();
+
so->so_error = errno;
unp_disconnect(unp);
if (so->so_head) {
so->so_pcb = NULL;
- sofree(so);
+ /*
+ * sofree() releases the socket lock, so we need to
+ * grab it beforehand as long as Unix sockets rely on
+ * the KERNEL_LOCK();
+ */
+ KERNEL_LOCK();
+ sofree(so, 0);
m_freem(unp->unp_addr);
free(unp, M_PCB, sizeof *unp);
}
diff --git sys/miscfs/fifofs/fifo_vnops.c sys/miscfs/fifofs/fifo_vnops.c
index 03a5677a05d..472bfd408a0 100644
--- sys/miscfs/fifofs/fifo_vnops.c
+++ sys/miscfs/fifofs/fifo_vnops.c
@@ -170,7 +170,7 @@ fifo_open(void *v)
fip->fi_writers++;
if ((ap->a_mode & O_NONBLOCK) && fip->fi_readers == 0) {
error = ENXIO;
- sounlock(s);
+ sounlock(wso, s);
goto bad;
}
if (fip->fi_writers == 1) {
@@ -179,7 +179,7 @@ fifo_open(void *v)
wakeup(&fip->fi_readers);
}
}
- sounlock(s);
+ sounlock(wso, s);
if ((ap->a_mode & O_NONBLOCK) == 0) {
if ((ap->a_mode & FREAD) && fip->fi_writers == 0) {
VOP_UNLOCK(vp);
@@ -334,7 +334,7 @@ fifo_poll(void *v)
wso->so_snd.sb_flags |= SB_SEL;
}
}
- sounlock(s);
+ sounlock(rso, s);
return (revents);
}
@@ -369,7 +369,7 @@ fifo_close(void *v)
s = solock(wso);
socantsendmore(wso);
- sounlock(s);
+ sounlock(wso, s);
}
}
if (ap->a_fflag & FWRITE) {
@@ -380,7 +380,7 @@ fifo_close(void *v)
/* SS_ISDISCONNECTED will result in POLLHUP */
rso->so_state |= SS_ISDISCONNECTED;
socantrcvmore(rso);
- sounlock(s);
+ sounlock(rso, s);
}
}
if (fip->fi_readers == 0 && fip->fi_writers == 0) {
diff --git sys/net/bfd.c sys/net/bfd.c
index e3e557e0d37..8bcfb305c99 100644
--- sys/net/bfd.c
+++ sys/net/bfd.c
@@ -611,7 +611,7 @@ bfd_sender(struct bfd_config *bfd, unsigned int port)
s = solock(so);
error = soconnect(so, m);
- sounlock(s);
+ sounlock(so, s);
if (error && error != ECONNREFUSED) {
printf("%s: soconnect error %d\n",
__func__, error);
diff --git sys/net/if_pflow.c sys/net/if_pflow.c
index 9ba382d5069..1b1c2a4e33e 100644
--- sys/net/if_pflow.c
+++ sys/net/if_pflow.c
@@ -442,7 +442,7 @@ pflow_set(struct pflow_softc *sc, struct pflowreq *pflowr)
s = solock(so);
error = sobind(so, m, p);
- sounlock(s);
+ sounlock(so, s);
m_freem(m);
if (error) {
soclose(so);
diff --git sys/netinet/in_pcb.c sys/netinet/in_pcb.c
index 413f9de9df5..62bb9af000c 100644
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -584,8 +584,13 @@ in_pcbdetach(struct inpcb *inp)
NET_ASSERT_LOCKED();
- so->so_pcb = 0;
- sofree(so);
+ so->so_pcb = NULL;
+ /*
+ * As long as the NET_LOCK() is the default lock for Internet
+ * sockets, do not release it to not introduce new sleeping
+ * points.
+ */
+ sofree(so, 0);
m_freem(inp->inp_options);
if (inp->inp_route.ro_rt) {
rtfree(inp->inp_route.ro_rt);
diff --git sys/nfs/krpc_subr.c sys/nfs/krpc_subr.c
index 346ff9ec989..e487867db1b 100644
--- sys/nfs/krpc_subr.c
+++ sys/nfs/krpc_subr.c
@@ -241,7 +241,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers,
u_int func,
m->m_len = sizeof(tv);
s = solock(so);
error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m);
- sounlock(s);
+ sounlock(so, s);
m_freem(m);
if (error)
goto out;
@@ -257,7 +257,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers,
u_int func,
*on = 1;
s = solock(so);
error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m);
- sounlock(s);
+ sounlock(so, s);
m_freem(m);
if (error)
goto out;
@@ -274,7 +274,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers,
u_int func,
*ip = IP_PORTRANGE_LOW;
s = solock(so);
error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
- sounlock(s);
+ sounlock(so, s);
m_freem(mopt);
if (error)
goto out;
@@ -288,7 +288,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers,
u_int func,
sin->sin_port = htons(0);
s = solock(so);
error = sobind(so, m, &proc0);
- sounlock(s);
+ sounlock(so, s);
m_freem(m);
if (error) {
printf("bind failed\n");
@@ -301,7 +301,7 @@ krpc_call(struct sockaddr_in *sa, u_int prog, u_int vers,
u_int func,
*ip = IP_PORTRANGE_DEFAULT;
s = solock(so);
error = sosetopt(so, IPPROTO_IP, IP_PORTRANGE, mopt);
- sounlock(s);
+ sounlock(so, s);
m_freem(mopt);
if (error)
goto out;
diff --git sys/nfs/nfs_socket.c sys/nfs/nfs_socket.c
index 383db1cb930..0119f135ef2 100644
--- sys/nfs/nfs_socket.c
+++ sys/nfs/nfs_socket.c
@@ -365,7 +365,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
goto bad;
so->so_rcv.sb_flags |= SB_NOINTR;
so->so_snd.sb_flags |= SB_NOINTR;
- sounlock(s);
+ sounlock(so, s);
m_freem(mopt);
m_freem(nam);
@@ -378,7 +378,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
return (0);
bad:
- sounlock(s);
+ sounlock(so, s);
m_freem(mopt);
m_freem(nam);
diff --git sys/nfs/nfs_syscalls.c sys/nfs/nfs_syscalls.c
index 527a61a37de..6b501cc419f 100644
--- sys/nfs/nfs_syscalls.c
+++ sys/nfs/nfs_syscalls.c
@@ -250,7 +250,7 @@ nfssvc_addsock(struct file *fp, struct mbuf *mynam)
s = solock(so);
error = soreserve(so, siz, siz);
if (error) {
- sounlock(s);
+ sounlock(so, s);
m_freem(mynam);
return (error);
}
@@ -279,7 +279,7 @@ nfssvc_addsock(struct file *fp, struct mbuf *mynam)
so->so_rcv.sb_timeo = 0;
so->so_snd.sb_flags &= ~SB_NOINTR;
so->so_snd.sb_timeo = 0;
- sounlock(s);
+ sounlock(so, s);
if (tslp)
slp = tslp;
else {
diff --git sys/sys/socketvar.h sys/sys/socketvar.h
index 097ae3a4ab9..ce8bbdf9f2d 100644
--- sys/sys/socketvar.h
+++ sys/sys/socketvar.h
@@ -311,7 +311,7 @@ int soconnect(struct socket *so, struct mbuf *nam);
int soconnect2(struct socket *so1, struct socket *so2);
int socreate(int dom, struct socket **aso, int type, int proto);
int sodisconnect(struct socket *so);
-void sofree(struct socket *so);
+void sofree(struct socket *so, int);
int sogetopt(struct socket *so, int level, int optname, struct mbuf *m);
void sohasoutofband(struct socket *so);
void soisconnected(struct socket *so);
@@ -338,7 +338,7 @@ int sockargs(struct mbuf **, const void *, size_t, int);
int sosleep(struct socket *, void *, int, const char *, int);
int solock(struct socket *);
-void sounlock(int);
+void sounlock(struct socket *, int);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
int recvit(struct proc *, int, struct msghdr *, caddr_t,