Re: kqueue_scan_setup/finish

2020-08-15 Thread Visa Hankala
On Fri, Aug 14, 2020 at 12:31:33PM +0200, Martin Pieuchot wrote:
> The previous change introducing the kqueue_scan_setup()/finish() API
> required to switch poll(2) internals to use the kqueue mechanism has
> been backed out.  The reason for the regression is still unknown, so
> let's take a baby step approach.
> 
> Diff below introduces the new API with only minimal changes.  It should
> not introduce any change in behavior.

There is a subtle change in behaviour: the markers' kn_filter and
kn_status are now initialized only once, at the start of the scan.
Previously, they were set also at the start of each retry.

This diff should be tested at least by those who had problems with
the original kqueue_scan_state patch. The original patch could cause
data loss, and the exact cause is still unknown. Hence extra caution is
warranted.

> Index: kern/kern_event.c
> ===
> RCS file: /cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.142
> diff -u -p -r1.142 kern_event.c
> --- kern/kern_event.c 12 Aug 2020 13:49:24 -  1.142
> +++ kern/kern_event.c 14 Aug 2020 10:13:38 -
> @@ -64,9 +64,6 @@ voidKQREF(struct kqueue *);
>  void KQRELE(struct kqueue *);
>  
>  int  kqueue_sleep(struct kqueue *, struct timespec *);
> -int  kqueue_scan(struct kqueue *kq, int maxevents,
> - struct kevent *ulistp, struct timespec *timeout,
> - struct kevent *kev, struct proc *p, int *retval);
>  
>  int  kqueue_read(struct file *, struct uio *, int);
>  int  kqueue_write(struct file *, struct uio *, int);
> @@ -554,6 +551,7 @@ out:
>  int
>  sys_kevent(struct proc *p, void *v, register_t *retval)
>  {
> + struct kqueue_scan_state scan;
>   struct filedesc* fdp = p->p_fd;
>   struct sys_kevent_args /* {
>   syscallarg(int) fd;
> @@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi
>   goto done;
>   }
>  
> - KQREF(kq);
> + kqueue_scan_setup(, kq);
>   FRELE(fp, p);
> - error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist),
> + error = kqueue_scan(, SCARG(uap, nevents), SCARG(uap, eventlist),
>   tsp, kev, p, );
> - KQRELE(kq);
> + kqueue_scan_finish();
> +
>   *retval = n;
>   return (error);
>  
> @@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t
>  }
>  
>  int
> -kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp,
> -struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval)
> +kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
> +struct kevent *ulistp, struct timespec *tsp, struct kevent *kev,
> +struct proc *p, int *retval)
>  {
> + struct kqueue *kq = scan->kqs_kq;
>   struct kevent *kevp;
> - struct knote mend, mstart, *kn;
> + struct knote *kn;
>   int s, count, nkev, error = 0;
>  
>   nkev = 0;
> @@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev
>   if (count == 0)
>   goto done;
>  
> - memset(, 0, sizeof(mstart));
> - memset(, 0, sizeof(mend));
> -
>  retry:
>   KASSERT(count == maxevents);
>   KASSERT(nkev == 0);
> @@ -939,18 +937,16 @@ retry:
>   goto done;
>   }
>  
> - mstart.kn_filter = EVFILT_MARKER;
> - mstart.kn_status = KN_PROCESSING;
> - TAILQ_INSERT_HEAD(>kq_head, , kn_tqe);
> - mend.kn_filter = EVFILT_MARKER;
> - mend.kn_status = KN_PROCESSING;
> - TAILQ_INSERT_TAIL(>kq_head, , kn_tqe);
> + TAILQ_INSERT_TAIL(>kq_head, >kqs_end, kn_tqe);
> + TAILQ_INSERT_HEAD(>kq_head, >kqs_start, kn_tqe);
>   while (count) {
> - kn = TAILQ_NEXT(, kn_tqe);
> + kn = TAILQ_NEXT(>kqs_start, kn_tqe);
>   if (kn->kn_filter == EVFILT_MARKER) {
> - if (kn == ) {
> - TAILQ_REMOVE(>kq_head, , kn_tqe);
> - TAILQ_REMOVE(>kq_head, , kn_tqe);
> + if (kn == >kqs_end) {
> + TAILQ_REMOVE(>kq_head, >kqs_end,
> + kn_tqe);
> + TAILQ_REMOVE(>kq_head, >kqs_start,
> + kn_tqe);
>   splx(s);
>   if (count == maxevents)
>   goto retry;
> @@ -958,8 +954,9 @@ retry:
>   }
>  
>   /* Move start marker past another thread's marker. */
> - TAILQ_REMOVE(>kq_head, , 

kqueue_scan_setup/finish

2020-08-14 Thread Martin Pieuchot
The previous change introducing the kqueue_scan_setup()/finish() API
required to switch poll(2) internals to use the kqueue mechanism has
been backed out.  The reason for the regression is still unknown, so
let's take a baby step approach.

Diff below introduces the new API with only minimal changes.  It should
not introduce any change in behavior.

Comments?  Oks?

Index: kern/kern_event.c
===
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.142
diff -u -p -r1.142 kern_event.c
--- kern/kern_event.c   12 Aug 2020 13:49:24 -  1.142
+++ kern/kern_event.c   14 Aug 2020 10:13:38 -
@@ -64,9 +64,6 @@ void  KQREF(struct kqueue *);
 void   KQRELE(struct kqueue *);
 
 intkqueue_sleep(struct kqueue *, struct timespec *);
-intkqueue_scan(struct kqueue *kq, int maxevents,
-   struct kevent *ulistp, struct timespec *timeout,
-   struct kevent *kev, struct proc *p, int *retval);
 
 intkqueue_read(struct file *, struct uio *, int);
 intkqueue_write(struct file *, struct uio *, int);
@@ -554,6 +551,7 @@ out:
 int
 sys_kevent(struct proc *p, void *v, register_t *retval)
 {
+   struct kqueue_scan_state scan;
struct filedesc* fdp = p->p_fd;
struct sys_kevent_args /* {
syscallarg(int) fd;
@@ -635,11 +633,12 @@ sys_kevent(struct proc *p, void *v, regi
goto done;
}
 
-   KQREF(kq);
+   kqueue_scan_setup(, kq);
FRELE(fp, p);
-   error = kqueue_scan(kq, SCARG(uap, nevents), SCARG(uap, eventlist),
+   error = kqueue_scan(, SCARG(uap, nevents), SCARG(uap, eventlist),
tsp, kev, p, );
-   KQRELE(kq);
+   kqueue_scan_finish();
+
*retval = n;
return (error);
 
@@ -895,11 +894,13 @@ kqueue_sleep(struct kqueue *kq, struct t
 }
 
 int
-kqueue_scan(struct kqueue *kq, int maxevents, struct kevent *ulistp,
-struct timespec *tsp, struct kevent *kev, struct proc *p, int *retval)
+kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
+struct kevent *ulistp, struct timespec *tsp, struct kevent *kev,
+struct proc *p, int *retval)
 {
+   struct kqueue *kq = scan->kqs_kq;
struct kevent *kevp;
-   struct knote mend, mstart, *kn;
+   struct knote *kn;
int s, count, nkev, error = 0;
 
nkev = 0;
@@ -909,9 +910,6 @@ kqueue_scan(struct kqueue *kq, int maxev
if (count == 0)
goto done;
 
-   memset(, 0, sizeof(mstart));
-   memset(, 0, sizeof(mend));
-
 retry:
KASSERT(count == maxevents);
KASSERT(nkev == 0);
@@ -939,18 +937,16 @@ retry:
goto done;
}
 
-   mstart.kn_filter = EVFILT_MARKER;
-   mstart.kn_status = KN_PROCESSING;
-   TAILQ_INSERT_HEAD(>kq_head, , kn_tqe);
-   mend.kn_filter = EVFILT_MARKER;
-   mend.kn_status = KN_PROCESSING;
-   TAILQ_INSERT_TAIL(>kq_head, , kn_tqe);
+   TAILQ_INSERT_TAIL(>kq_head, >kqs_end, kn_tqe);
+   TAILQ_INSERT_HEAD(>kq_head, >kqs_start, kn_tqe);
while (count) {
-   kn = TAILQ_NEXT(, kn_tqe);
+   kn = TAILQ_NEXT(>kqs_start, kn_tqe);
if (kn->kn_filter == EVFILT_MARKER) {
-   if (kn == ) {
-   TAILQ_REMOVE(>kq_head, , kn_tqe);
-   TAILQ_REMOVE(>kq_head, , kn_tqe);
+   if (kn == >kqs_end) {
+   TAILQ_REMOVE(>kq_head, >kqs_end,
+   kn_tqe);
+   TAILQ_REMOVE(>kq_head, >kqs_start,
+   kn_tqe);
splx(s);
if (count == maxevents)
goto retry;
@@ -958,8 +954,9 @@ retry:
}
 
/* Move start marker past another thread's marker. */
-   TAILQ_REMOVE(>kq_head, , kn_tqe);
-   TAILQ_INSERT_AFTER(>kq_head, kn, , kn_tqe);
+   TAILQ_REMOVE(>kq_head, >kqs_start, kn_tqe);
+   TAILQ_INSERT_AFTER(>kq_head, kn, >kqs_start,
+   kn_tqe);
continue;
}
 
@@ -1029,8 +1026,8 @@ retry:
break;
}
}
-   TAILQ_REMOVE(>kq_head, , kn_tqe);
-   TAILQ_REMOVE(>kq_head, , kn_tqe);
+   TAILQ_REMOVE(>kq_head, >kqs_end, kn_tqe);
+   TAILQ_REMOVE(>kq_head, >kqs_start, kn_tqe);
splx(s);
 done:
if (nkev != 0) {
@@ -1044,6 +1041,33 @@ done:
*retval = maxevents - count;
return (error);
 }
+
+void
+kqueue_scan_setup(struct kqueue_scan_state *scan, struct kqueue *kq)
+{
+   memset(scan, 0, sizeof(*scan));
+
+