On Mon, Nov 15, 2021 at 04:21:39PM +0000, Visa Hankala wrote:
> Alexander, is the panic easy to reproduce on your setup?
My syslogd regress starts rsyslogd to test compatibility. The
rsyslogd sometimes causes a panic. Running the test in a loop may
crash the kernel within two hours.
> It would be interesting to know if the following patch helps.
According to my console log it was runnig for 5 hours without crash.
I will continue testing today.
I seems sthen@ has another test case. With some poll diffs for ssh
in snapshots he did see problems with rsync over ssh. At that time
lazy kqueue was commited.
Maybe he has a setup to help testing this diff.
bluhm
> Index: kern/kern_event.c
> ===================================================================
> RCS file: src/sys/kern/kern_event.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 kern_event.c
> --- kern/kern_event.c 15 Nov 2021 15:48:54 -0000 1.173
> +++ kern/kern_event.c 15 Nov 2021 15:54:24 -0000
> @@ -73,7 +73,7 @@ void kqueue_terminate(struct proc *p, st
> void KQREF(struct kqueue *);
> void KQRELE(struct kqueue *);
>
> -void kqueue_purge(struct proc *, struct kqueue *);
> +void kqueue_purge(struct proc *, struct kqueue *, int);
> int kqueue_sleep(struct kqueue *, struct timespec *);
>
> int kqueue_read(struct file *, struct uio *, int);
> @@ -787,7 +787,7 @@ kqpoll_init(unsigned int num)
>
> if (p->p_kq_serial + num < p->p_kq_serial) {
> /* Serial is about to wrap. Clear all attached knotes. */
> - kqueue_purge(p, p->p_kq);
> + kqueue_purge(p, p->p_kq, 0);
> p->p_kq_serial = 0;
> }
>
> @@ -813,9 +813,6 @@ kqpoll_done(unsigned int num)
> KASSERT(p->p_kq_serial + num >= p->p_kq_serial);
>
> p->p_kq_serial += num;
> -
> - /* XXX Work around a race condition. */
> - kqueue_purge(p, p->p_kq);
> }
>
> void
> @@ -826,7 +823,7 @@ kqpoll_exit(void)
> if (p->p_kq == NULL)
> return;
>
> - kqueue_purge(p, p->p_kq);
> + kqueue_purge(p, p->p_kq, 1);
> /* Clear any detached knotes that remain in the queue. */
> kqpoll_dequeue(p, 1);
> kqueue_terminate(p, p->p_kq);
> @@ -1559,11 +1556,16 @@ kqueue_stat(struct file *fp, struct stat
> }
>
> void
> -kqueue_purge(struct proc *p, struct kqueue *kq)
> +kqueue_purge(struct proc *p, struct kqueue *kq, int dying)
> {
> int i;
>
> mtx_enter(&kq->kq_lock);
> + if (dying) {
> + kq->kq_state |= KQ_DYING;
> + kqueue_wakeup(kq);
> + }
> +
> for (i = 0; i < kq->kq_knlistsize; i++)
> knote_remove(p, kq, &kq->kq_knlist[i], 1);
> if (kq->kq_knhashmask != 0) {
> @@ -1588,8 +1590,6 @@ kqueue_terminate(struct proc *p, struct
> TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe)
> KASSERT(kn->kn_filter == EVFILT_MARKER);
>
> - kq->kq_state |= KQ_DYING;
> - kqueue_wakeup(kq);
> mtx_leave(&kq->kq_lock);
>
> KASSERT(klist_empty(&kq->kq_sel.si_note));
> @@ -1604,7 +1604,7 @@ kqueue_close(struct file *fp, struct pro
>
> fp->f_data = NULL;
>
> - kqueue_purge(p, kq);
> + kqueue_purge(p, kq, 1);
> kqueue_terminate(p, kq);
>
> KQRELE(kq);
> @@ -1840,9 +1840,12 @@ knote_remove(struct proc *p, struct kque
> kn->kn_fop = &badfd_filtops;
> filter_event(kn, 0);
> mtx_enter(&kq->kq_lock);
> - knote_activate(kn);
> - knote_release(kn);
> - continue;
> + if ((kq->kq_state & KQ_DYING) == 0) {
> + knote_activate(kn);
> + knote_release(kn);
> + continue;
> + }
> + mtx_leave(&kq->kq_lock);
> }
>
> knote_drop(kn, p);