As recently profiled, the softnet thread which runs mostly without
KERNEL_LOCK() is still somehow serialized with the rest of the kernel.
This is because the various subsystems to notify userland, either via
poll(2)/select(2), kqueue(2) or pgsignal(9) still require this lock.
We worked around this in bpf(4) by deferring the notification. However
such approach isn't trivially applied everywhere and doesn't help with
getting the subsystems out of the KERNEL_LOCK().
Thanks to many people working on MP, the kernel now has multiple consumers
of the notification hooks that can be used to test & expose possible bugs
related to the removal of the KERNEL_LOCK() in these areas.
Diff below takes the first step of pushing the KERNEL_LOCK() in the
subsystems. That means the lock might be taken & released twice if an
application asked for being notified via signal and via poll/kevent.
It is unclear to me if this change of behavior degrades latency in a
noticeable way. If it does and people report it, we should consider
keeping a single KERNEL_LOCK/UNLOCK() dance in the affected subsystem.
Comments? Oks?
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.239
diff -u -p -r1.239 uipc_socket.c
--- kern/uipc_socket.c 15 Jan 2020 13:17:35 -0000 1.239
+++ kern/uipc_socket.c 4 Feb 2020 12:21:30 -0000
@@ -1988,10 +1988,8 @@ sogetopt(struct socket *so, int level, i
void
sohasoutofband(struct socket *so)
{
- KERNEL_LOCK();
pgsigio(&so->so_sigio, SIGURG, 0);
selwakeup(&so->so_rcv.sb_sel);
- KERNEL_UNLOCK();
}
int
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.245
diff -u -p -r1.245 kern_sig.c
--- kern/kern_sig.c 1 Feb 2020 15:52:34 -0000 1.245
+++ kern/kern_sig.c 4 Feb 2020 12:20:34 -0000
@@ -740,6 +740,7 @@ pgsigio(struct sigio_ref *sir, int sig,
if (sir->sir_sigio == NULL)
return;
+ KERNEL_LOCK();
mtx_enter(&sigio_lock);
sigio = sir->sir_sigio;
if (sigio == NULL)
@@ -756,6 +757,7 @@ pgsigio(struct sigio_ref *sir, int sig,
}
out:
mtx_leave(&sigio_lock);
+ KERNEL_UNLOCK();
}
/*
Index: kern/sys_pipe.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_pipe.c,v
retrieving revision 1.115
diff -u -p -r1.115 sys_pipe.c
--- kern/sys_pipe.c 1 Feb 2020 15:52:34 -0000 1.115
+++ kern/sys_pipe.c 4 Feb 2020 12:24:29 -0000
@@ -372,20 +372,17 @@ pipeselwakeup(struct pipe *cpipe)
{
rw_assert_wrlock(cpipe->pipe_lock);
- KERNEL_LOCK();
-
- /* Kernel lock needed in order to prevent race with kevent. */
if (cpipe->pipe_state & PIPE_SEL) {
cpipe->pipe_state &= ~PIPE_SEL;
selwakeup(&cpipe->pipe_sel);
- } else
+ } else {
+ KERNEL_LOCK();
KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT);
+ KERNEL_UNLOCK();
+ }
- /* Kernel lock needed since pgsigio() calls ptsignal(). */
if (cpipe->pipe_state & PIPE_ASYNC)
pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
-
- KERNEL_UNLOCK();
}
int
Index: kern/uipc_socket2.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.102
diff -u -p -r1.102 uipc_socket2.c
--- kern/uipc_socket2.c 15 Jan 2020 13:17:35 -0000 1.102
+++ kern/uipc_socket2.c 4 Feb 2020 12:20:54 -0000
@@ -408,11 +408,9 @@ sowakeup(struct socket *so, struct sockb
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
}
- KERNEL_LOCK();
if (so->so_state & SS_ASYNC)
pgsigio(&so->so_sigio, SIGIO, 0);
selwakeup(&sb->sb_sel);
- KERNEL_UNLOCK();
}
/*
Index: kern/sys_generic.c
===================================================================
RCS file: /cvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.129
diff -u -p -r1.129 sys_generic.c
--- kern/sys_generic.c 1 Feb 2020 08:57:27 -0000 1.129
+++ kern/sys_generic.c 4 Feb 2020 12:49:17 -0000
@@ -73,6 +73,7 @@ int dopselect(struct proc *, int, fd_set
struct timespec *, const sigset_t *, register_t *);
int doppoll(struct proc *, struct pollfd *, u_int, struct timespec *,
const sigset_t *, register_t *);
+void doselwakeup(struct selinfo *);
int
iovec_copyin(const struct iovec *uiov, struct iovec **iovp, struct iovec *aiov,
@@ -750,6 +751,8 @@ selrecord(struct proc *selector, struct
struct proc *p;
pid_t mytid;
+ KERNEL_ASSERT_LOCKED();
+
mytid = selector->p_tid;
if (sip->si_seltid == mytid)
return;
@@ -766,9 +769,19 @@ selrecord(struct proc *selector, struct
void
selwakeup(struct selinfo *sip)
{
+ KERNEL_LOCK();
+ KNOTE(&sip->si_note, NOTE_SUBMIT);
+ doselwakeup(sip);
+ KERNEL_UNLOCK();
+}
+
+void
+doselwakeup(struct selinfo *sip)
+{
struct proc *p;
- KNOTE(&sip->si_note, NOTE_SUBMIT);
+ KERNEL_ASSERT_LOCKED();
+
if (sip->si_seltid == 0)
return;
if (sip->si_flags & SI_COLL) {
Index: net/bpf.c
===================================================================
RCS file: /cvs/src/sys/net/bpf.c,v
retrieving revision 1.186
diff -u -p -r1.186 bpf.c
--- net/bpf.c 27 Jan 2020 19:16:43 -0000 1.186
+++ net/bpf.c 4 Feb 2020 12:56:04 -0000
@@ -572,8 +572,6 @@ bpf_wakeup_cb(void *xd)
{
struct bpf_d *d = xd;
- KERNEL_ASSERT_LOCKED();
-
wakeup(d);
if (d->bd_async && d->bd_sig)
pgsigio(&d->bd_sigio, d->bd_sig, 0);
Index: net/if_tun.c
===================================================================
RCS file: /cvs/src/sys/net/if_tun.c,v
retrieving revision 1.217
diff -u -p -r1.217 if_tun.c
--- net/if_tun.c 31 Jan 2020 02:58:28 -0000 1.217
+++ net/if_tun.c 4 Feb 2020 12:56:02 -0000
@@ -625,11 +625,9 @@ tun_wakeup(struct tun_softc *sc)
if (sc->sc_reading)
wakeup(&sc->sc_if.if_snd);
- KERNEL_LOCK();
selwakeup(&sc->sc_rsel);
if (sc->sc_flags & TUN_ASYNC)
pgsigio(&sc->sc_sigio, SIGIO, 0);
- KERNEL_UNLOCK();
}
/*