On 10/04/20(Fri) 14:43, Martin Pieuchot wrote:
> Diff below reduces kqueue_scan() to the collect of events on a given
> kqueue and let its caller, sys_kevent(), responsible for the copyout(9).
>
> Apart from the code simplification, this refactoring clearly separates
> kqueue_scan() from the syscall logic. That should allow us to re-use
> the function in a different context and to address its need for locking
> independently.
>
> Since the number of events that are ready to be collected can be bigger
> than the size of the array, the pair kqueue_scan()/copyout() may be
> called multiple times. In that case, successive calls should no longer
> block, this is performed by using a zero, but not NULL, timeout which
> correspond to a non-blocking scan.
>
> This is the next piece of the ongoing work to move select/poll/kqueue.
> I'd like to be sure it doesn't introduce any regression.
Updated diff based on recent changes and incorporating feedbacks from
visa@.
Comments? Oks?
Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.132
diff -u -p -r1.132 kern_event.c
--- kern/kern_event.c 17 May 2020 10:53:14 -0000 1.132
+++ kern/kern_event.c 25 May 2020 11:54:08 -0000
@@ -62,9 +62,6 @@ void KQREF(struct kqueue *);
void KQRELE(struct kqueue *);
int kqueue_sleep(struct kqueue *, struct timespec *);
-int kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
- struct kevent *ulistp, struct timespec *timeout,
- struct proc *p, int *retval);
int kqueue_read(struct file *, struct uio *, int);
int kqueue_write(struct file *, struct uio *, int);
@@ -545,6 +542,7 @@ sys_kevent(struct proc *p, void *v, regi
struct timespec ts;
struct timespec *tsp = NULL;
int i, n, nerrors, error;
+ int ready, total;
struct kevent kev[KQ_NEVENTS];
if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
@@ -573,9 +571,9 @@ sys_kevent(struct proc *p, void *v, regi
kq = fp->f_data;
nerrors = 0;
- while (SCARG(uap, nchanges) > 0) {
- n = SCARG(uap, nchanges) > KQ_NEVENTS ?
- KQ_NEVENTS : SCARG(uap, nchanges);
+ while ((n = SCARG(uap, nchanges)) > 0) {
+ if (n > nitems(kev))
+ n = nitems(kev);
error = copyin(SCARG(uap, changelist), kev,
n * sizeof(struct kevent));
if (error)
@@ -611,14 +609,41 @@ sys_kevent(struct proc *p, void *v, regi
goto done;
}
+
KQREF(kq);
FRELE(fp, p);
+ /*
+ * Collect as many events as we can. The timeout on successive
+ * loops is disabled (kqueue_scan() becomes non-blocking).
+ */
+ total = 0;
+ error = 0;
kqueue_scan_setup(&scan, kq);
- error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist),
- tsp, p, &n);
+ while ((n = SCARG(uap, nevents) - total) > 0) {
+ if (n > nitems(kev))
+ n = nitems(kev);
+ ready = kqueue_scan(&scan, n, kev, tsp, p, &error);
+ if (ready == 0)
+ break;
+ error = copyout(kev, SCARG(uap, eventlist) + total,
+ sizeof(struct kevent) * ready);
+#ifdef KTRACE
+ if (KTRPOINT(p, KTR_STRUCT))
+ ktrevent(p, kev, ready);
+#endif
+ total += ready;
+ /*
+ * Stop if there was an error or if we had enough
+ * place to collect all events that were ready.
+ */
+ if (error || ready < n)
+ break;
+ tsp = &ts; /* successive loops non-blocking */
+ timespecclear(tsp);
+ }
kqueue_scan_finish(&scan);
KQRELE(kq);
- *retval = n;
+ *retval = total;
return (error);
done:
@@ -872,15 +897,18 @@ kqueue_sleep(struct kqueue *kq, struct t
return (error);
}
+/*
+ * Scan the kqueue, blocking if necessary until the target time is reached.
+ * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both
+ * 0 we do not block at all.
+ */
int
kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
- struct kevent *ulistp, struct timespec *tsp, struct proc *p, int *retval)
+ struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp)
{
- struct kevent *kevp;
struct knote *kn;
struct kqueue *kq = scan->kqs_kq;
int s, count, nkev = 0, error = 0;
- struct kevent kev[KQ_NEVENTS];
count = maxevents;
if (count == 0)
@@ -892,7 +920,6 @@ retry:
goto done;
}
- kevp = &kev[0];
s = splhigh();
if (kq->kq_count == 0) {
if ((tsp != NULL && !timespecisset(tsp)) ||
@@ -933,14 +960,8 @@ retry:
while (count) {
kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe);
if (kn->kn_filter == EVFILT_MARKER) {
- if (kn == &scan->kqs_end) {
- TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start,
- kn_tqe);
- splx(s);
- if (scan->kqs_nevent == 0)
- goto retry;
- goto done;
- }
+ if (kn == &scan->kqs_end)
+ break;
/* Move start marker past another thread's marker. */
TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
@@ -976,6 +997,9 @@ retry:
count--;
scan->kqs_nevent++;
+ /*
+ * Post-event action on the note
+ */
if (kn->kn_flags & EV_ONESHOT) {
splx(s);
kn->kn_fop->f_detach(kn);
@@ -1001,35 +1025,14 @@ retry:
knote_release(kn);
}
kqueue_check(kq);
- if (nkev == KQ_NEVENTS) {
- splx(s);
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_STRUCT))
- ktrevent(p, kev, nkev);
-#endif
- error = copyout(kev, ulistp,
- sizeof(struct kevent) * nkev);
- ulistp += nkev;
- nkev = 0;
- kevp = &kev[0];
- s = splhigh();
- if (error)
- break;
- }
}
TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
splx(s);
+ if (scan->kqs_nevent == 0)
+ goto retry;
done:
- if (nkev != 0) {
-#ifdef KTRACE
- if (KTRPOINT(p, KTR_STRUCT))
- ktrevent(p, kev, nkev);
-#endif
- error = copyout(kev, ulistp,
- sizeof(struct kevent) * nkev);
- }
- *retval = maxevents - count;
- return (error);
+ *errorp = error;
+ return (nkev);
}
void
Index: sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.37
diff -u -p -r1.37 event.h
--- sys/event.h 17 May 2020 10:53:14 -0000 1.37
+++ sys/event.h 25 May 2020 11:34:34 -0000
@@ -211,6 +211,8 @@ extern int kqueue_register(struct kqueue
struct kevent *kev, struct proc *p);
extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
extern void kqueue_scan_finish(struct kqueue_scan_state *);
+extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,
+ struct timespec *, struct proc *, int *);
extern int filt_seltrue(struct knote *kn, long hint);
extern int seltrue_kqfilter(dev_t, struct knote *);
extern void klist_insert(struct klist *, struct knote *);