On Wed, Mar 27, 2024 at 08:17:33AM +0100, Anton Lindqvist wrote:
>
> Observing two regress hangs in the kernel on netio. Both seems make use
> of unix sockets. Could this be the culprit?
>
> regress/lib/libc/fread
> regress/usr.bin/ssh (scp.sh)
Sorry for delay.
It was exposed that `sb_mtx' should not be released between `so_rcv'
usage check and corresponding sbwait() sleep. Otherwise wakeup() could
be lost sometimes.
This diff fixed regress tests. It introduces sbunlock_locked() and
sbwait_locked() to perform with `sb_mtx' held.
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.322
diff -u -p -r1.322 uipc_socket.c
--- sys/kern/uipc_socket.c 26 Mar 2024 09:46:47 -0000 1.322
+++ sys/kern/uipc_socket.c 27 Mar 2024 19:17:52 -0000
@@ -834,6 +834,7 @@ bad:
if (mp)
*mp = NULL;
+restart_unlocked:
solock_shared(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
@@ -903,12 +904,23 @@ restart:
}
SBLASTRECORDCHK(&so->so_rcv, "soreceive sbwait 1");
SBLASTMBUFCHK(&so->so_rcv, "soreceive sbwait 1");
- sb_mtx_unlock(&so->so_rcv);
- sbunlock(so, &so->so_rcv);
- error = sbwait(so, &so->so_rcv);
- if (error) {
+
+ if (so->so_proto->pr_protocol == AF_UNIX) {
+ sbunlock_locked(so, &so->so_rcv);
sounlock_shared(so);
- return (error);
+ error = sbwait_locked(so, &so->so_rcv);
+ sb_mtx_unlock(&so->so_rcv);
+ if (error)
+ return (error);
+ goto restart_unlocked;
+ } else {
+ sb_mtx_unlock(&so->so_rcv);
+ sbunlock(so, &so->so_rcv);
+ error = sbwait(so, &so->so_rcv);
+ if (error) {
+ sounlock_shared(so);
+ return (error);
+ }
}
goto restart;
}
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.145
diff -u -p -r1.145 uipc_socket2.c
--- sys/kern/uipc_socket2.c 26 Mar 2024 09:46:47 -0000 1.145
+++ sys/kern/uipc_socket2.c 27 Mar 2024 19:17:52 -0000
@@ -523,6 +523,18 @@ sbmtxassertlocked(struct socket *so, str
* Wait for data to arrive at/drain from a socket buffer.
*/
int
+sbwait_locked(struct socket *so, struct sockbuf *sb)
+{
+ int prio = (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH;
+
+ MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
+
+ sb->sb_flags |= SB_WAIT;
+ return msleep_nsec(&sb->sb_cc, &sb->sb_mtx, prio, "sbwait",
+ sb->sb_timeo_nsecs);
+}
+
+int
sbwait(struct socket *so, struct sockbuf *sb)
{
uint64_t timeo_nsecs;
@@ -573,20 +585,23 @@ out:
}
void
-sbunlock(struct socket *so, struct sockbuf *sb)
+sbunlock_locked(struct socket *so, struct sockbuf *sb)
{
- int dowakeup = 0;
+ MUTEX_ASSERT_LOCKED(&sb->sb_mtx);
- mtx_enter(&sb->sb_mtx);
sb->sb_flags &= ~SB_LOCK;
if (sb->sb_flags & SB_WANT) {
sb->sb_flags &= ~SB_WANT;
- dowakeup = 1;
+ wakeup(&sb->sb_flags);
}
- mtx_leave(&sb->sb_mtx);
+}
- if (dowakeup)
- wakeup(&sb->sb_flags);
+void
+sbunlock(struct socket *so, struct sockbuf *sb)
+{
+ mtx_enter(&sb->sb_mtx);
+ sbunlock_locked(so, sb);
+ mtx_leave(&sb->sb_mtx);
}
/*
Index: sys/sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.126
diff -u -p -r1.126 socketvar.h
--- sys/sys/socketvar.h 26 Mar 2024 09:46:47 -0000 1.126
+++ sys/sys/socketvar.h 27 Mar 2024 19:17:53 -0000
@@ -320,6 +320,7 @@ int sblock(struct socket *, struct sockb
/* release lock on sockbuf sb */
void sbunlock(struct socket *, struct sockbuf *);
+void sbunlock_locked(struct socket *, struct sockbuf *);
#define SB_EMPTY_FIXUP(sb) do {
\
if ((sb)->sb_mb == NULL) { \
@@ -367,6 +368,7 @@ int sbcheckreserve(u_long, u_long);
int sbchecklowmem(void);
int sbreserve(struct socket *, struct sockbuf *, u_long);
int sbwait(struct socket *, struct sockbuf *);
+int sbwait_locked(struct socket *, struct sockbuf *);
void soinit(void);
void soabort(struct socket *);
int soaccept(struct socket *, struct mbuf *);