On Mon, Nov 15, 2021 at 04:21:39PM +0000, Visa Hankala wrote:
> Alexander, is the panic easy to reproduce on your setup?

My syslogd regress starts rsyslogd to test compatibility.  The
rsyslogd sometimes causes a panic.  Running the test in a loop may
crash the kernel within two hours.

> It would be interesting to know if the following patch helps.

According to my console log it was runnig for 5 hours without crash.
I will continue testing today.

I seems sthen@ has another test case.  With some poll diffs for ssh
in snapshots he did see problems with rsync over ssh.  At that time
lazy kqueue was commited.

Maybe he has a setup to help testing this diff.

bluhm

> Index: kern/kern_event.c
> ===================================================================
> RCS file: src/sys/kern/kern_event.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 kern_event.c
> --- kern/kern_event.c 15 Nov 2021 15:48:54 -0000      1.173
> +++ kern/kern_event.c 15 Nov 2021 15:54:24 -0000
> @@ -73,7 +73,7 @@ void        kqueue_terminate(struct proc *p, st
>  void KQREF(struct kqueue *);
>  void KQRELE(struct kqueue *);
>  
> -void kqueue_purge(struct proc *, struct kqueue *);
> +void kqueue_purge(struct proc *, struct kqueue *, int);
>  int  kqueue_sleep(struct kqueue *, struct timespec *);
>  
>  int  kqueue_read(struct file *, struct uio *, int);
> @@ -787,7 +787,7 @@ kqpoll_init(unsigned int num)
>  
>       if (p->p_kq_serial + num < p->p_kq_serial) {
>               /* Serial is about to wrap. Clear all attached knotes. */
> -             kqueue_purge(p, p->p_kq);
> +             kqueue_purge(p, p->p_kq, 0);
>               p->p_kq_serial = 0;
>       }
>  
> @@ -813,9 +813,6 @@ kqpoll_done(unsigned int num)
>       KASSERT(p->p_kq_serial + num >= p->p_kq_serial);
>  
>       p->p_kq_serial += num;
> -
> -     /* XXX Work around a race condition. */
> -     kqueue_purge(p, p->p_kq);
>  }
>  
>  void
> @@ -826,7 +823,7 @@ kqpoll_exit(void)
>       if (p->p_kq == NULL)
>               return;
>  
> -     kqueue_purge(p, p->p_kq);
> +     kqueue_purge(p, p->p_kq, 1);
>       /* Clear any detached knotes that remain in the queue. */
>       kqpoll_dequeue(p, 1);
>       kqueue_terminate(p, p->p_kq);
> @@ -1559,11 +1556,16 @@ kqueue_stat(struct file *fp, struct stat
>  }
>  
>  void
> -kqueue_purge(struct proc *p, struct kqueue *kq)
> +kqueue_purge(struct proc *p, struct kqueue *kq, int dying)
>  {
>       int i;
>  
>       mtx_enter(&kq->kq_lock);
> +     if (dying) {
> +             kq->kq_state |= KQ_DYING;
> +             kqueue_wakeup(kq);
> +     }
> +
>       for (i = 0; i < kq->kq_knlistsize; i++)
>               knote_remove(p, kq, &kq->kq_knlist[i], 1);
>       if (kq->kq_knhashmask != 0) {
> @@ -1588,8 +1590,6 @@ kqueue_terminate(struct proc *p, struct 
>       TAILQ_FOREACH(kn, &kq->kq_head, kn_tqe)
>               KASSERT(kn->kn_filter == EVFILT_MARKER);
>  
> -     kq->kq_state |= KQ_DYING;
> -     kqueue_wakeup(kq);
>       mtx_leave(&kq->kq_lock);
>  
>       KASSERT(klist_empty(&kq->kq_sel.si_note));
> @@ -1604,7 +1604,7 @@ kqueue_close(struct file *fp, struct pro
>  
>       fp->f_data = NULL;
>  
> -     kqueue_purge(p, kq);
> +     kqueue_purge(p, kq, 1);
>       kqueue_terminate(p, kq);
>  
>       KQRELE(kq);
> @@ -1840,9 +1840,12 @@ knote_remove(struct proc *p, struct kque
>                       kn->kn_fop = &badfd_filtops;
>                       filter_event(kn, 0);
>                       mtx_enter(&kq->kq_lock);
> -                     knote_activate(kn);
> -                     knote_release(kn);
> -                     continue;
> +                     if ((kq->kq_state & KQ_DYING) == 0) {
> +                             knote_activate(kn);
> +                             knote_release(kn);
> +                             continue;
> +                     }
> +                     mtx_leave(&kq->kq_lock);
>               }
>  
>               knote_drop(kn, p);

Reply via email to