My previous attempt to grab the NET_LOCK(), thus potentially sleeping,
inside kqueue_scan() resulting in NULL dereferences:
https://marc.info/?l=openbsd-bugs&m=149935139022501&w=2
The problem is that the loop isn't ready to be consulted by multiple
threads at the same time. By "at the same time", I mean that when a
thread sleeps it can be consulted by another one.
The diff below addresses that by correcting kqueue's refcount and by
using a per-threada marker. I took this idea from Dragonfly because
I believe that we can extend it to make kevent(2) MPSAFE later.
Diff below also includes socket filter modifications grabbing the
NET_LOCK() as well. juanfra@ told me he couldn't reproduce the
previous crash with this diff, however I'm looking for more testers.
Test reports? Comments?
Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.79
diff -u -p -r1.79 kern_event.c
--- kern/kern_event.c 31 May 2017 14:52:05 -0000 1.79
+++ kern/kern_event.c 5 Sep 2017 14:24:50 -0000
@@ -757,25 +757,40 @@ start:
goto done;
}
- TAILQ_INSERT_TAIL(&kq->kq_head, &marker, kn_tqe);
+ marker.kn_filter = EVFILT_MARKER;
+ TAILQ_INSERT_HEAD(&kq->kq_head, &marker, kn_tqe);
while (count) {
- kn = TAILQ_FIRST(&kq->kq_head);
- TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
- if (kn == &marker) {
+ kn = TAILQ_NEXT(&marker, kn_tqe);
+ if (kn == NULL) {
+ TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
splx(s);
if (count == maxevents)
goto retry;
goto done;
}
+ if (kn->kn_filter == EVFILT_MARKER) {
+ kn = TAILQ_NEXT(kn, kn_tqe);
+ if (kn == NULL) {
+ TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
+ splx(s);
+ if (count == maxevents)
+ goto retry;
+ goto done;
+ }
+ /* Move marker past some other threads marker */
+ TAILQ_REMOVE(&kq->kq_head, &marker, kn_tqe);
+ TAILQ_INSERT_BEFORE(kn, &marker, kn_tqe);
+ continue;
+ }
+ TAILQ_REMOVE(&kq->kq_head, kn, kn_tqe);
+ kq->kq_count--;
if (kn->kn_status & KN_DISABLED) {
kn->kn_status &= ~KN_QUEUED;
- kq->kq_count--;
continue;
}
if ((kn->kn_flags & EV_ONESHOT) == 0 &&
kn->kn_fop->f_event(kn, 0) == 0) {
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
- kq->kq_count--;
continue;
}
*kevp = kn->kn_kevent;
@@ -783,7 +798,6 @@ start:
nkev++;
if (kn->kn_flags & EV_ONESHOT) {
kn->kn_status &= ~KN_QUEUED;
- kq->kq_count--;
splx(s);
kn->kn_fop->f_detach(kn);
knote_drop(kn, p, p->p_fd);
@@ -796,8 +810,8 @@ start:
if (kn->kn_flags & EV_DISPATCH)
kn->kn_status |= KN_DISABLED;
kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE);
- kq->kq_count--;
} else {
+ kq->kq_count++;
TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe);
}
count--;
Index: kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.204
diff -u -p -r1.204 uipc_socket.c
--- kern/uipc_socket.c 11 Sep 2017 11:15:52 -0000 1.204
+++ kern/uipc_socket.c 12 Sep 2017 07:46:54 -0000
@@ -1960,8 +1960,10 @@ int
filt_sowrite(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = sbspace(so, &so->so_snd);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
@@ -1977,6 +1979,8 @@ filt_sowrite(struct knote *kn, long hint
} else {
rv = (kn->kn_data >= so->so_snd.sb_lowat);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (rv);
}
@@ -1985,8 +1989,13 @@ int
filt_solisten(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
+ int s;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = so->so_qlen;
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (kn->kn_data != 0);
}
Index: miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: /cvs/src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.58
diff -u -p -r1.58 fifo_vnops.c
--- miscfs/fifofs/fifo_vnops.c 24 Jul 2017 15:07:39 -0000 1.58
+++ miscfs/fifofs/fifo_vnops.c 5 Sep 2017 14:25:59 -0000
@@ -545,8 +545,10 @@ int
filt_fiforead(struct knote *kn, long hint)
{
struct socket *so = (struct socket *)kn->kn_hook;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = so->so_rcv.sb_cc;
if (so->so_state & SS_CANTRCVMORE) {
kn->kn_flags |= EV_EOF;
@@ -555,6 +557,8 @@ filt_fiforead(struct knote *kn, long hin
kn->kn_flags &= ~EV_EOF;
rv = (kn->kn_data > 0);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (rv);
}
@@ -573,8 +577,10 @@ int
filt_fifowrite(struct knote *kn, long hint)
{
struct socket *so = (struct socket *)kn->kn_hook;
- int rv;
+ int s, rv;
+ if (!(hint & NOTE_SUBMIT))
+ s = solock(so);
kn->kn_data = sbspace(so, &so->so_snd);
if (so->so_state & SS_CANTSENDMORE) {
kn->kn_flags |= EV_EOF;
@@ -583,6 +589,8 @@ filt_fifowrite(struct knote *kn, long hi
kn->kn_flags &= ~EV_EOF;
rv = (kn->kn_data >= so->so_snd.sb_lowat);
}
+ if (!(hint & NOTE_SUBMIT))
+ sounlock(s);
return (rv);
}
Index: sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.26
diff -u -p -r1.26 event.h
--- sys/event.h 26 Jun 2017 09:32:32 -0000 1.26
+++ sys/event.h 5 Sep 2017 13:36:29 -0000
@@ -80,13 +80,6 @@ struct kevent {
#define EV_ERROR 0x4000 /* error, data contains errno */
/*
- * hint flag for in-kernel use - must not equal any existing note
- */
-#ifdef _KERNEL
-#define NOTE_SUBMIT 0x01000000 /* initial knote submission */
-#endif
-
-/*
* data/hint flags for EVFILT_{READ|WRITE}, shared with userspace
*/
#define NOTE_LOWAT 0x0001 /* low water mark */
@@ -127,6 +120,13 @@ struct knote;
SLIST_HEAD(klist, knote);
#ifdef _KERNEL
+
+#define EVFILT_MARKER 0xF /* placemarker for tailq */
+
+/*
+ * hint flag for in-kernel use - must not equal any existing note
+ */
+#define NOTE_SUBMIT 0x01000000 /* initial knote submission */
#define KNOTE(list_, hint) do { \
struct klist *list = (list_); \
Index: sys/socketvar.h
===================================================================
RCS file: /cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.76
diff -u -p -r1.76 socketvar.h
--- sys/socketvar.h 1 Sep 2017 15:05:31 -0000 1.76
+++ sys/socketvar.h 5 Sep 2017 14:18:48 -0000
@@ -186,10 +186,7 @@ static inline long
sbspace(struct socket *so, struct sockbuf *sb)
{
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
-#if 0
- /* XXXSMP kqueue_scan() calling filt_sowrite() cannot sleep. */
soassertlocked(so);
-#endif
return lmin(sb->sb_hiwat - sb->sb_cc, sb->sb_mbmax - sb->sb_mbcnt);
}