On 06/11/21(Sat) 15:53, Visa Hankala wrote:
> On Fri, Nov 05, 2021 at 10:04:50AM +0100, Martin Pieuchot wrote:
> > New poll/select(2) implementation convert 'struct pollfd' and 'fdset' to
> > knotes (kqueue event descriptors) then pass them to the kqueue subsystem.
> > A knote is allocated, with kqueue_register(), for every read, write and
> > except condition watched on a given FD.  That means at most 3 allocations
> > might be necessary per FD.
> > 
> > The diff below reduce the overhead of per-syscall allocation/free of those
> > descriptors by leaving those which didn't trigger on the kqueue across
> > syscall.  Leaving knotes on the kqueue allows kqueue_register() to re-use
> > existing descriptor instead of re-allocating a new one.
> > 
> > With this knotes are now lazily removed.  The mechanism uses a serial
> > number which is incremented for every syscall that indicates if a knote
> > sitting in the kqueue is still valid or should be freed.
> > 
> > Note that performance improvements might not be visible with this diff
> > alone because kqueue_register() still pre-allocate a descriptor then drop
> > it.
> > 
> > visa@ already pointed out that the lazy removal logic could be integrated
> > in kqueue_scan() which would reduce the complexity of those two syscalls.
> > I'm arguing for doing this in a next step in-tree.
> 
> I think it would make more sense to add the removal logic to the scan
> function first as doing so would keep the code modifications more
> logical and simpler. This would also avoid the need to go through
> a temporary removal approach.

I totally support your effort and your design however I don't have the
time to do another round of test/debugging.  So please, can you take
care of doing these cleanups afterward?  If not, please send a full diff
and take over this feature, it's too much effort for me to work out of
tree.

> Index: kern/kern_event.c
> ===================================================================
> RCS file: src/sys/kern/kern_event.c,v
> retrieving revision 1.170
> diff -u -p -r1.170 kern_event.c
> --- kern/kern_event.c 6 Nov 2021 05:48:47 -0000       1.170
> +++ kern/kern_event.c 6 Nov 2021 15:31:04 -0000
> @@ -73,6 +73,7 @@ void        kqueue_terminate(struct proc *p, st
>  void KQREF(struct kqueue *);
>  void KQRELE(struct kqueue *);
>  
> +void kqueue_purge(struct proc *, struct kqueue *);
>  int  kqueue_sleep(struct kqueue *, struct timespec *);
>  
>  int  kqueue_read(struct file *, struct uio *, int);
> @@ -806,6 +807,22 @@ kqpoll_exit(void)
>  }
>  
>  void
> +kqpoll_done(unsigned int num)
> +{
> +     struct proc *p = curproc;
> +
> +     KASSERT(p->p_kq != NULL);
> +
> +     if (p->p_kq_serial + num >= p->p_kq_serial) {
> +             p->p_kq_serial += num;
> +     } else {
> +             /* Clear all knotes after serial wraparound. */
> +             kqueue_purge(p, p->p_kq);
> +             p->p_kq_serial = 1;
> +     }
> +}
> +
> +void
>  kqpoll_dequeue(struct proc *p, int all)
>  {
>       struct knote marker;
> @@ -1383,6 +1400,15 @@ retry:
>  
>               mtx_leave(&kq->kq_lock);
>  
> +             /* Drop spurious events. */
> +             if (p->p_kq == kq &&
> +                 p->p_kq_serial > (unsigned long)kn->kn_udata) {
> +                     filter_detach(kn);
> +                     knote_drop(kn, p);
> +                     mtx_enter(&kq->kq_lock);
> +                     continue;
> +             }
> +
>               memset(kevp, 0, sizeof(*kevp));
>               if (filter_process(kn, kevp) == 0) {
>                       mtx_enter(&kq->kq_lock);
> Index: kern/sys_generic.c
> ===================================================================
> RCS file: src/sys/kern/sys_generic.c,v
> retrieving revision 1.139
> diff -u -p -r1.139 sys_generic.c
> --- kern/sys_generic.c        29 Oct 2021 15:52:44 -0000      1.139
> +++ kern/sys_generic.c        6 Nov 2021 15:31:04 -0000
> @@ -730,8 +730,7 @@ done:
>       if (pibits[0] != (fd_set *)&bits[0])
>               free(pibits[0], M_TEMP, 6 * ni);
>  
> -     kqueue_purge(p, p->p_kq);
> -     p->p_kq_serial += nd;
> +     kqpoll_done(nd);
>  
>       return (error);
>  }
> @@ -1230,8 +1229,7 @@ bad:
>       if (pl != pfds)
>               free(pl, M_TEMP, sz);
>  
> -     kqueue_purge(p, p->p_kq);
> -     p->p_kq_serial += nfds;
> +     kqpoll_done(nfds);
>  
>       return (error);
>  }
> @@ -1251,8 +1249,7 @@ ppollcollect(struct proc *p, struct keve
>       /*
>        * Lazily delete spurious events.
>        *
> -      * This should not happen as long as kqueue_purge() is called
> -      * at the end of every syscall.  It migh be interesting to do
> +      * This should not happen.  It might be interesting to do
>        * like DragonFlyBSD and not always allocated a new knote in
>        * kqueue_register() with that lazy removal makes sense.
>        */
> Index: sys/event.h
> ===================================================================
> RCS file: src/sys/sys/event.h,v
> retrieving revision 1.57
> diff -u -p -r1.57 event.h
> --- sys/event.h       24 Oct 2021 07:02:47 -0000      1.57
> +++ sys/event.h       6 Nov 2021 15:31:04 -0000
> @@ -289,6 +289,7 @@ extern const struct klistops socket_klis
>  
>  extern void  kqpoll_init(void);
>  extern void  kqpoll_exit(void);
> +extern void  kqpoll_done(unsigned int);
>  extern void  knote(struct klist *list, long hint);
>  extern void  knote_fdclose(struct proc *p, int fd);
>  extern void  knote_processexit(struct proc *);
> @@ -302,7 +303,6 @@ extern int        kqueue_scan(struct kqueue_sca
>                   struct timespec *, struct proc *, int *);
>  extern void  kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
>  extern void  kqueue_scan_finish(struct kqueue_scan_state *);
> -extern void  kqueue_purge(struct proc *, struct kqueue *);
>  extern int   filt_seltrue(struct knote *kn, long hint);
>  extern int   seltrue_kqfilter(dev_t, struct knote *);
>  extern void  klist_init(struct klist *, const struct klistops *, void *);

Reply via email to