On Tue, Dec 07, 2021 at 08:40:53AM -0700, Todd C. Miller wrote:
> On Mon, 06 Dec 2021 17:05:39 +0000, Visa Hankala wrote:
>
> > This adds EVFILT_EXCEPT handler for pipes. It is a subset of
> > EVFILT_READ; it triggers on EOF only. This captures the POLLHUP
> > condition of pipe_poll(), used with select(2)'s exceptfds and when
> > poll(2) has events without (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM).
>
> This looks wrong. We should only be setting exceptfds for out of
> band data on sockets and ptys.
>
> Translating POLLHUP to exceptfds was a bug introduced by the previous
> select -> poll conversion in the kernel. I'm probably responsible
> for that, sorry. This non-standard behavior has resulted in at
> least one bug in the tree with ssh-keyscan being subtly broken on
> other systems.
Very well. Let's adjust the code already now, then.
In simple cases that do not have out-of-band data, the system can skip
registering the EVFILT_EXCEPT filter. A new flag could indicate when
the registration is done for select(2). The patch below does the
tweaking for FIFOs and pipes, to demonstrate the idea. Ptys and sockets
need to be adjusted as well.
The flags should probably be refactored so that __EV_POLL would mean
poll(2) only. However, that results in some mechanical code churn that
I would do separately if the use of __EV_SELECT appears viable in the
first place.
Index: kern/sys_generic.c
===================================================================
RCS file: src/sys/kern/sys_generic.c,v
retrieving revision 1.144
diff -u -p -r1.144 sys_generic.c
--- kern/sys_generic.c 30 Nov 2021 02:58:33 -0000 1.144
+++ kern/sys_generic.c 7 Dec 2021 15:57:45 -0000
@@ -762,7 +762,7 @@ pselregister(struct proc *p, fd_set *pib
DPRINTFN(2, "select fd %d mask %d serial %lu\n",
fd, msk, p->p_kq_serial);
EV_SET(&kev, fd, evf[msk],
- EV_ADD|EV_ENABLE|__EV_POLL,
+ EV_ADD|EV_ENABLE|__EV_POLL|__EV_SELECT,
evff[msk], 0, (void *)(p->p_kq_serial));
#ifdef KTRACE
if (KTRPOINT(p, KTR_STRUCT))
@@ -775,7 +775,8 @@ pselregister(struct proc *p, fd_set *pib
/* FALLTHROUGH */
case EOPNOTSUPP:/* No underlying kqfilter */
case EINVAL: /* Unimplemented filter */
- case EPERM: /* Specific to FIFO */
+ case EPERM: /* Specific to FIFO and
+ * __EV_SELECT */
error = 0;
break;
case EPIPE: /* Specific to pipes */
Index: kern/sys_pipe.c
===================================================================
RCS file: src/sys/kern/sys_pipe.c,v
retrieving revision 1.130
diff -u -p -r1.130 sys_pipe.c
--- kern/sys_pipe.c 7 Dec 2021 14:06:16 -0000 1.130
+++ kern/sys_pipe.c 7 Dec 2021 15:57:45 -0000
@@ -922,6 +922,11 @@ pipe_kqfilter(struct file *fp, struct kn
klist_insert_locked(&wpipe->pipe_sel.si_note, kn);
break;
case EVFILT_EXCEPT:
+ if (kn->kn_flags & __EV_SELECT) {
+ /* Prevent triggering exceptfds. */
+ error = EPERM;
+ break;
+ }
kn->kn_fop = &pipe_efiltops;
kn->kn_hook = rpipe;
klist_insert_locked(&rpipe->pipe_sel.si_note, kn);
Index: miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.85
diff -u -p -r1.85 fifo_vnops.c
--- miscfs/fifofs/fifo_vnops.c 24 Oct 2021 11:23:22 -0000 1.85
+++ miscfs/fifofs/fifo_vnops.c 7 Dec 2021 15:57:45 -0000
@@ -540,6 +540,10 @@ fifo_kqfilter(void *v)
sb = &so->so_snd;
break;
case EVFILT_EXCEPT:
+ if (ap->a_kn->kn_flags & __EV_SELECT) {
+ /* Prevent triggering exceptfds. */
+ return (EPERM);
+ }
ap->a_kn->kn_fop = &fifoexcept_filtops;
so = fip->fi_readsock;
sb = &so->so_rcv;
Index: sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.59
diff -u -p -r1.59 event.h
--- sys/event.h 29 Nov 2021 15:54:04 -0000 1.59
+++ sys/event.h 7 Dec 2021 15:57:45 -0000
@@ -74,7 +74,7 @@ struct kevent {
#define EV_RECEIPT 0x0040 /* force EV_ERROR on success, data=0 */
#define EV_DISPATCH 0x0080 /* disable event after reporting */
-#define EV_SYSFLAGS 0xF000 /* reserved by system */
+#define EV_SYSFLAGS 0xf800 /* reserved by system */
#define EV_FLAG1 0x2000 /* filter-specific flag */
/* returned values */
@@ -141,6 +141,7 @@ struct klist {
#ifdef _KERNEL
/* kernel-only flags */
+#define __EV_SELECT 0x0800 /* match behavior of select */
#define __EV_POLL 0x1000 /* match behavior of poll & select */
#define __EV_HUP EV_FLAG1 /* device or socket disconnected */