On Sun, Nov 14, 2021 at 11:35:26PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> on my arm64 regress machine I saw this panic during syslogd regress:
> 
> OpenBSD 7.0-current (GENERIC.MP) #1391: Fri Nov 12 21:55:27 MST 2021
>     [email protected]:/usr/src/sys/arch/arm64/compile/GENERIC.MP
> 
> panic: kernel diagnostic assertion "TAILQ_EMPTY(&kq->kq_head)" failed: file 
> "/usr/src/sys/kern/kern_event.c", line 224
> Stopped at      panic+0x160:    cmp     w21, #0x0
>     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
>  242487  43389      0         0x2          0    2  rsyslogd
>  153253   7135      0         0x2          0    3  perl
> db_enter() at panic+0x15c
> panic() at __assert+0x24
> panic() at KQRELE+0x178
> KQRELE() at kqpoll_exit+0x54
> kqpoll_exit() at exit1+0x204
> exit1() at sys___threxit+0x48
> process_untrace() at svc_handler+0x30c

Thank you for reporting this.

I think the panic can happen when a thread has polled a file descriptor
and is exiting while another thread in the process closes that fd. With
poll(2) and select(2), knote_remove() sets up knotes that convey EBADF
error to the system call frontend. Unfortunately, the function has to
release kq_lock at times, which lets other threads change the kqueue.
In particular, another thread can start closing the poll/select kqueue.
It is not safe to enqueue EBADF knotes after closing has started.

Alexander, is the panic easy to reproduce on your setup? It would be
interesting to know if the following patch helps.

Index: kern/kern_event.c
===================================================================
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.173
diff -u -p -r1.173 kern_event.c
--- kern/kern_event.c   15 Nov 2021 15:48:54 -0000      1.173
+++ kern/kern_event.c   15 Nov 2021 15:54:24 -0000
@@ -73,7 +73,7 @@ void  kqueue_terminate(struct proc *p, st
 void   KQREF(struct kqueue *);
 void   KQRELE(struct kqueue *);
 
-void   kqueue_purge(struct proc *, struct kqueue *);
+void   kqueue_purge(struct proc *, struct kqueue *, int);
 int    kqueue_sleep(struct kqueue *, struct timespec *);
 
 int    kqueue_read(struct file *, struct uio *, int);
@@ -787,7 +787,7 @@ kqpoll_init(unsigned int num)
 
        if (p->p_kq_serial + num < p->p_kq_serial) {
                /* Serial is about to wrap. Clear all attached knotes. */
-               kqueue_purge(p, p->p_kq);
+               kqueue_purge(p, p->p_kq, 0);
                p->p_kq_serial = 0;
        }
 
@@ -813,9 +813,6 @@ kqpoll_done(unsigned int num)
        KASSERT(p->p_kq_serial + num >= p->p_kq_serial);
 
        p->p_kq_serial += num;
-
-       /* XXX Work around a race condition. */
-       kqueue_purge(p, p->p_kq);
 }
 
 void
@@ -826,7 +823,7 @@ kqpoll_exit(void)
        if (p->p_kq == NULL)
                return;
 
-       kqueue_purge(p, p->p_kq);
+       kqueue_purge(p, p->p_kq, 1);
        /* Clear any detached knotes that remain in the queue. */
        kqpoll_dequeue(p, 1);
        kqueue_terminate(p, p->p_kq);
@@ -1559,11 +1556,16 @@ kqueue_stat(struct file *fp, struct stat
 }
 
 void
-kqueue_purge(struct proc *p, struct kqueue *kq)
+kqueue_purge(struct proc *p, struct kqueue *kq, int dying)
 {
        int i;
 
        mtx_enter(&kq->kq_lock);
+       if (dying) {
+               kq->kq_state |= KQ_DYING;
+               kqueue_wakeup(kq);
+       }
+
        for (i = 0; i < kq->kq_knlistsize; i++)
                knote_remove(p, kq, &kq->kq_knlist[i], 1);
        if (kq->kq_knhashmask != 0) {
@@ -1588,8 +1590,6 @@ kqueue_terminate(struct proc *p, struct 
        TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe)
                KASSERT(kn->kn_filter == EVFILT_MARKER);
 
-       kq->kq_state |= KQ_DYING;
-       kqueue_wakeup(kq);
        mtx_leave(&kq->kq_lock);
 
        KASSERT(klist_empty(&kq->kq_sel.si_note));
@@ -1604,7 +1604,7 @@ kqueue_close(struct file *fp, struct pro
 
        fp->f_data = NULL;
 
-       kqueue_purge(p, kq);
+       kqueue_purge(p, kq, 1);
        kqueue_terminate(p, kq);
 
        KQRELE(kq);
@@ -1840,9 +1840,12 @@ knote_remove(struct proc *p, struct kque
                        kn->kn_fop = &badfd_filtops;
                        filter_event(kn, 0);
                        mtx_enter(&kq->kq_lock);
-                       knote_activate(kn);
-                       knote_release(kn);
-                       continue;
+                       if ((kq->kq_state & KQ_DYING) == 0) {
+                               knote_activate(kn);
+                               knote_release(kn);
+                               continue;
+                       }
+                       mtx_leave(&kq->kq_lock);
                }
 
                knote_drop(kn, p);

Reply via email to