The next diff before introduce rwlock(9) for UNIX sockets garbage
collector data.
Release `unp_lock' before call unp_internalize() and take it within when
access garbage collector data such as `unp_rights', `unp_msgcount' and
`unp_file'. The garbage collector rwlock(9) will be introduced with the
following diff, but right now M_WAITOK/M_WAIT allocations moved outside
rwlock(9) held.
The lock order between fdplock() and `unp_lock' has changed to fdplock()
-> `unp_lock'. unp_internalize() is the only place where these locks
simultaneously held together so this order doesn't matter. This shoud be
helpful to mpi@'s knote(9) work.
sosend() releases solock() before m_getuio() call, so this socket could
be disconnected before uipc_usrreq() called. uipc_usrreq() checks
connection oriented sockets state or tries to connect datagram sockets
after successful unp_internalize() call, so this introduced solock()
release doesn't modify existing behavior.
Index: sys/kern/uipc_usrreq.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.149
diff -u -p -r1.149 uipc_usrreq.c
--- sys/kern/uipc_usrreq.c 14 Oct 2021 23:05:10 -0000 1.149
+++ sys/kern/uipc_usrreq.c 15 Oct 2021 11:03:16 -0000
@@ -219,8 +219,13 @@ uipc_usrreq(struct socket *so, int req,
break;
case PRU_SEND:
- if (control && (error = unp_internalize(control, p)))
- break;
+ if (control) {
+ sounlock(so, SL_LOCKED);
+ error = unp_internalize(control, p);
+ solock(so);
+ if (error)
+ break;
+ }
switch (so->so_type) {
case SOCK_DGRAM: {
@@ -973,8 +978,6 @@ unp_internalize(struct mbuf *control, st
int i, error;
int nfds, *ip, fd, neededspace;
- rw_assert_wrlock(&unp_lock);
-
/*
* Check for two potential msg_controllen values because
* IETF stuck their nose in a place it does not belong.
@@ -987,8 +990,13 @@ unp_internalize(struct mbuf *control, st
return (EINVAL);
nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
- if (unp_rights + nfds > maxfiles / 10)
+ rw_enter_write(&unp_lock);
+ if (unp_rights + nfds > maxfiles / 10) {
+ rw_exit_write(&unp_lock);
return (EMFILE);
+ }
+ unp_rights += nfds;
+ rw_exit_write(&unp_lock);
/* Make sure we have room for the struct file pointers */
morespace:
@@ -997,8 +1005,10 @@ morespace:
if (neededspace > m_trailingspace(control)) {
char *tmp;
/* if we already have a cluster, the message is just too big */
- if (control->m_flags & M_EXT)
- return (E2BIG);
+ if (control->m_flags & M_EXT) {
+ error = E2BIG;
+ goto nospace;
+ }
/* copy cmsg data temporarily out of the mbuf */
tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
@@ -1008,7 +1018,8 @@ morespace:
MCLGET(control, M_WAIT);
if ((control->m_flags & M_EXT) == 0) {
free(tmp, M_TEMP, control->m_len);
- return (ENOBUFS); /* allocation failed */
+ error = ENOBUFS; /* allocation failed */
+ goto nospace;
}
/* copy the data back into the cluster */
@@ -1049,10 +1060,11 @@ morespace:
rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
rp--;
if ((unp = fptounp(fp)) != NULL) {
+ rw_enter_write(&unp_lock);
unp->unp_file = fp;
unp->unp_msgcount++;
+ rw_exit_write(&unp_lock);
}
- unp_rights++;
}
fdpunlock(fdp);
return (0);
@@ -1064,11 +1076,18 @@ fail:
for ( ; i > 0; i--) {
rp++;
fp = rp->fp;
- if ((unp = fptounp(fp)) != NULL)
+ if ((unp = fptounp(fp)) != NULL) {
+ rw_enter_write(&unp_lock);
unp->unp_msgcount--;
+ rw_exit_write(&unp_lock);
+ }
FRELE(fp, p);
- unp_rights--;
}
+
+nospace:
+ rw_enter_write(&unp_lock);
+ unp_rights -= nfds;
+ rw_exit_write(&unp_lock);
return (error);
}