On 18/06/20(Thu) 09:03, Martin Pieuchot wrote:
> On 17/06/20(Wed) 11:50, Martin Pieuchot wrote:
> > On 16/06/20(Tue) 06:18, Todd C. Miller wrote:
> > > On Tue, 16 Jun 2020 12:48:58 +0200, Martin Pieuchot wrote:
> > >
> > > > The diff below implements DragonFly's approach of adding a new kind of
> > > > filter, EVFILT_EXCEPT, to report such conditions. This extends the
> > > > existing kqueue interface which is questionable. On the one hand this
> > > > allows userland programs to use kevent(2) to check for this conditions.
> > > > One the other hand this is not supported by any other BSD and thus non
> > > > standard.
> > >
> > > Actually, it looks like macOS uses EVFILT_EXCEPT too. They were
> > > the first OS to implement poll in terms of kqueue as far as I know.
> > > I don't think there is a problem extended kqueue with EVFILT_EXCEPT.
> > >
> > > > In the tree there's two poll handlers that set the POLLPRI & POLLRDBAND
> > > > bits as illustrated by the diff below.
> > > >
> > > > Do we see value in this new type of filter? Should I document it and
> > > > put it in? Or should I restrict it to the __EV_POLL for now? In the
> > > > latter case should we pick a different name and/or prefix it?
> > >
> > > I think EVFILT_EXCEPT should be exposed to userland. It is not our
> > > own invention and two other OSes support it.
> >
> > Updated diff below addresses multiples comments and implements the
> > common functionalities of EVFILT_EXCEPT supported by both DragonFly
> > and xnu. Changes compared to the previous version include:
> >
> > - Introduction of NOTE_OOB which should be set in `fflags' according to
> > other implementations
> > - Set `kn_data' to the value of `so_oobmark' for sockets. This matches
> > xnu behavior, the arbitrary value was also questioned by visa@.
> > - Adds a missing break pointed out by anton@
> > - Set NOTE_OOB for pty as well, this isn't supported by DragonFly nor
> > xnu but I don't see the point of introducing something different.
> > - Document the new filter
> >
> > Note that the last available version of Xnu available on github, from
> > late 2018, has the following behavior which this diff doesn't implement:
> >
> > If the read direction of the socket has shutdown, then
> > the filter also sets EV_EOF in flags, and returns the
> > socket error (if any) in fflags
>
> Simpler version that re-use the read filters. This one checks for NOTE_OOB
> in `kn_sfflags' like it is done for all other flags (pointed out by visa@).
New version that properly handles the case where NOTE_OOB is not set,
pointed out by visa@.
Index: sys/kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.139
diff -u -p -r1.139 kern_event.c
--- sys/kern/kern_event.c 15 Jun 2020 15:42:11 -0000 1.139
+++ sys/kern/kern_event.c 17 Jun 2020 09:35:42 -0000
@@ -158,6 +158,7 @@ const struct filterops *const sysfilt_op
&sig_filtops, /* EVFILT_SIGNAL */
&timer_filtops, /* EVFILT_TIMER */
&file_filtops, /* EVFILT_DEVICE */
+ &file_filtops, /* EVFILT_EXCEPT */
};
void
Index: sys/kern/tty_pty.c
===================================================================
RCS file: /cvs/src/sys/kern/tty_pty.c,v
retrieving revision 1.100
diff -u -p -r1.100 tty_pty.c
--- sys/kern/tty_pty.c 15 Jun 2020 15:29:40 -0000 1.100
+++ sys/kern/tty_pty.c 18 Jun 2020 14:42:33 -0000
@@ -668,6 +668,16 @@ filt_ptcread(struct knote *kn, long hint
tp = pti->pt_tty;
kn->kn_data = 0;
+ if (kn->kn_sfflags & NOTE_OOB) {
+ /* If in packet or user control mode, check for data. */
+ if (((pti->pt_flags & PF_PKT) && pti->pt_send) ||
+ ((pti->pt_flags & PF_UCNTL) && pti->pt_ucntl)) {
+ kn->kn_fflags |= NOTE_OOB;
+ kn->kn_data = 1;
+ return (1);
+ }
+ return (0);
+ }
if (ISSET(tp->t_state, TS_ISOPEN)) {
if (!ISSET(tp->t_state, TS_TTSTOP))
kn->kn_data = tp->t_outq.c_cc;
@@ -733,6 +743,13 @@ const struct filterops ptcwrite_filtops
.f_event = filt_ptcwrite,
};
+const struct filterops ptcexcept_filtops = {
+ .f_flags = FILTEROP_ISFD,
+ .f_attach = NULL,
+ .f_detach = filt_ptcrdetach,
+ .f_event = filt_ptcread,
+};
+
int
ptckqfilter(dev_t dev, struct knote *kn)
{
@@ -748,6 +765,10 @@ ptckqfilter(dev_t dev, struct knote *kn)
case EVFILT_WRITE:
klist = &pti->pt_selw.si_note;
kn->kn_fop = &ptcwrite_filtops;
+ break;
+ case EVFILT_EXCEPT:
+ klist = &pti->pt_selr.si_note;
+ kn->kn_fop = &ptcexcept_filtops;
break;
default:
return (EINVAL);
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.245
diff -u -p -r1.245 uipc_socket.c
--- sys/kern/uipc_socket.c 15 Jun 2020 15:29:40 -0000 1.245
+++ sys/kern/uipc_socket.c 18 Jun 2020 14:42:17 -0000
@@ -93,6 +93,12 @@ const struct filterops sowrite_filtops =
.f_event = filt_sowrite,
};
+const struct filterops soexcept_filtops = {
+ .f_flags = FILTEROP_ISFD,
+ .f_attach = NULL,
+ .f_detach = filt_sordetach,
+ .f_event = filt_soread,
+};
#ifndef SOMINCONN
#define SOMINCONN 80
@@ -2026,6 +2032,10 @@ soo_kqfilter(struct file *fp, struct kno
kn->kn_fop = &sowrite_filtops;
sb = &so->so_snd;
break;
+ case EVFILT_EXCEPT:
+ kn->kn_fop = &soexcept_filtops;
+ sb = &so->so_rcv;
+ break;
default:
return (EINVAL);
}
@@ -2052,7 +2062,7 @@ int
filt_soread(struct knote *kn, long hint)
{
struct socket *so = kn->kn_fp->f_data;
- int s, rv;
+ int s, rv = 0;
if ((hint & NOTE_SUBMIT) == 0)
s = solock(so);
@@ -2062,7 +2072,13 @@ filt_soread(struct knote *kn, long hint)
rv = 0;
} else
#endif /* SOCKET_SPLICE */
- if (so->so_state & SS_CANTRCVMORE) {
+ if (kn->kn_sfflags & NOTE_OOB) {
+ if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) {
+ kn->kn_fflags |= NOTE_OOB;
+ kn->kn_data -= so->so_oobmark;
+ rv = 1;
+ }
+ } else if (so->so_state & SS_CANTRCVMORE) {
kn->kn_flags |= EV_EOF;
if (kn->kn_flags & __EV_POLL) {
if (so->so_state & SS_ISDISCONNECTED)
Index: sys/sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.43
diff -u -p -r1.43 event.h
--- sys/sys/event.h 15 Jun 2020 15:42:11 -0000 1.43
+++ sys/sys/event.h 17 Jun 2020 09:29:39 -0000
@@ -39,6 +39,7 @@
#define EVFILT_SIGNAL (-6) /* attached to struct process */
#define EVFILT_TIMER (-7) /* timers */
#define EVFILT_DEVICE (-8) /* devices */
+#define EVFILT_EXCEPT (-9) /* exceptional conditions */
#define EVFILT_SYSCOUNT 8
@@ -85,6 +86,12 @@ struct kevent {
*/
#define NOTE_LOWAT 0x0001 /* low water mark */
#define NOTE_EOF 0x0002 /* return on EOF */
+
+/*
+ * data/hint flags for EVFILT_EXCEPT, shared with userspace and with
+ * EVFILT_{READ|WRITE}
+ */
+#define NOTE_OOB 0x0004 /* OOB data on a socket */
/*
* data/hint flags for EVFILT_VNODE, shared with userspace
Index: lib/libc/sys/kqueue.2
===================================================================
RCS file: /cvs/src/lib/libc/sys/kqueue.2,v
retrieving revision 1.40
diff -u -p -r1.40 kqueue.2
--- lib/libc/sys/kqueue.2 31 May 2020 03:49:44 -0000 1.40
+++ lib/libc/sys/kqueue.2 17 Jun 2020 09:26:15 -0000
@@ -310,6 +310,13 @@ enabled and there is any data to read;
.Fa data
contains the number of bytes available.
.El
+.It Dv EVFILT_EXCEPT
+Takes a descriptor as the identifier, and returns whenever one of the
+specified exceptional conditions has occured on the descriptor.
+Conditions are specified in
+.Fa fflags .
+Currently, a filter can monitor the reception of out-of-band data with
+.Dv NOTE_OOB .
.It Dv EVFILT_WRITE
Takes a descriptor as the identifier, and returns whenever
it is possible to write to the descriptor.