Re: NFS kqueue handlers & poll(2)/select(2) compatibility

2020-06-03 Thread Martin Pieuchot
On 01/06/20(Mon) 15:41, Visa Hankala wrote:
> On Sun, May 31, 2020 at 10:48:52AM +0200, Martin Pieuchot wrote:
> > NFS poll(2)/select(2) and kqueue(2) behaviors are incoherent.  Diff
> > below uses the kernel-only NOTE_IMM hint to make the kqueue handlers
> > behave like the current poll handler: the poller is bypassed.
> > 
> > The new EVFILT_WRITE handler doesn't check for NOTE_IMM because it is
> > unlikely to introduce regression.
> > 
> > Is this a preferred approach?  Ok?
> 
> Yes, this does not undermine the functionality.

Thanks for the feedbacks.

> [...] 
> Ah, here it does make a difference that the userland can manipulate
> kn_sfflags. The flag should be immutable (or the need of unwatching
> managed inside nfs_kq.c somehow).

Would the approach below using the last bit masked by EV_SYSFLAGS would
be more appropriate?

I renamed the define to "OLDAPI" matching DragonFly's name because we're
now using it for more than immediate read behavior.

Index: isofs/cd9660/cd9660_vnops.c
===
RCS file: /cvs/src/sys/isofs/cd9660/cd9660_vnops.c,v
retrieving revision 1.83
diff -u -p -r1.83 cd9660_vnops.c
--- isofs/cd9660/cd9660_vnops.c 7 Apr 2020 13:27:51 -   1.83
+++ isofs/cd9660/cd9660_vnops.c 3 Jun 2020 09:14:15 -
@@ -1036,6 +1036,9 @@ filt_cd9660read(struct knote *kn, long h
return (1);
}
 
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
return (kn->kn_data != 0);
 }
 
Index: miscfs/fuse/fuse_vnops.c
===
RCS file: /cvs/src/sys/miscfs/fuse/fuse_vnops.c,v
retrieving revision 1.59
diff -u -p -r1.59 fuse_vnops.c
--- miscfs/fuse/fuse_vnops.c7 Apr 2020 13:27:51 -   1.59
+++ miscfs/fuse/fuse_vnops.c3 Jun 2020 09:14:15 -
@@ -188,6 +188,9 @@ filt_fusefsread(struct knote *kn, long h
return (1);
}
 
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
return (kn->kn_data != 0);
 }
 
Index: msdosfs/msdosfs_vnops.c
===
RCS file: /cvs/src/sys/msdosfs/msdosfs_vnops.c,v
retrieving revision 1.132
diff -u -p -r1.132 msdosfs_vnops.c
--- msdosfs/msdosfs_vnops.c 7 Apr 2020 13:27:52 -   1.132
+++ msdosfs/msdosfs_vnops.c 3 Jun 2020 09:14:15 -
@@ -2013,6 +2013,10 @@ filt_msdosfsread(struct knote *kn, long 
kn->kn_fflags |= NOTE_EOF;
return (1);
}
+
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
return (kn->kn_data != 0);
 }
 
Index: nfs/nfs_kq.c
===
RCS file: /cvs/src/sys/nfs/nfs_kq.c,v
retrieving revision 1.30
diff -u -p -r1.30 nfs_kq.c
--- nfs/nfs_kq.c7 Apr 2020 13:27:52 -   1.30
+++ nfs/nfs_kq.c3 Jun 2020 09:14:15 -
@@ -50,9 +50,12 @@
 #include 
 
 void   nfs_kqpoll(void *);
+intnfs_kqwatch(struct vnode *);
+void   nfs_kqunwatch(struct vnode *);
 
 void   filt_nfsdetach(struct knote *);
 intfilt_nfsread(struct knote *, long);
+intfilt_nfswrite(struct knote *, long);
 intfilt_nfsvnode(struct knote *, long);
 
 struct kevq {
@@ -182,11 +185,19 @@ void
 filt_nfsdetach(struct knote *kn)
 {
struct vnode *vp = (struct vnode *)kn->kn_hook;
-   struct kevq *ke;
 
klist_remove(&vp->v_selectinfo.si_note, kn);
 
/* Remove the vnode from watch list */
+   if ((kn->kn_flags & EV_OLDAPI) == 0)
+   nfs_kqunwatch(vp);
+}
+
+void
+nfs_kqunwatch(struct vnode *vp)
+{
+   struct kevq *ke;
+
rw_enter_write(&nfskevq_lock);
SLIST_FOREACH(ke, &kevlist, kev_link) {
if (ke->vp == vp) {
@@ -234,10 +245,30 @@ filt_nfsread(struct knote *kn, long hint
kn->kn_fflags |= NOTE_EOF;
return (1);
}
+
+   if (kn->kn_flags & EV_OLDAPI)
+   return (1);
+
 return (kn->kn_data != 0);
 }
 
 int
+filt_nfswrite(struct knote *kn, long hint)
+{
+   /*
+* filesystem is gone, so set the EOF flag and schedule
+* the knote for deletion.
+*/
+   if (hint == NOTE_REVOKE) {
+   kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+   return (1);
+   }
+
+   kn->kn_data = 0;
+   return (1);
+}
+
+int
 filt_nfsvnode(struct knote *kn, long hint)
 {
if (kn->kn_sfflags & hint)
@@ -256,6 +287,13 @@ static const struct filterops nfsread_fi
.f_event= filt_nfsread,
 };
 
+const struct filterops nfswrite_filtops = {
+   .f_flags= FILTEROP_ISFD,
+   .f_attach   = NULL,
+   .f_detach   = filt_nfsdetach,
+   .f_event= filt_nfswrite,
+};
+
 static const struct filterops nfsvnode_filtops = {
.f_flags= FILTEROP_ISFD,
.f_attach   = NULL,
@@ -269,10 +307,6 @@ nfs_kqfilter(void *v)
struct vop_k

NFS kqueue handlers & poll(2)/select(2) compatibility

2020-05-31 Thread Martin Pieuchot
NFS poll(2)/select(2) and kqueue(2) behaviors are incoherent.  Diff
below uses the kernel-only NOTE_IMM hint to make the kqueue handlers
behave like the current poll handler: the poller is bypassed.

The new EVFILT_WRITE handler doesn't check for NOTE_IMM because it is
unlikely to introduce regression.

Is this a preferred approach?  Ok?

Index: nfs/nfs_kq.c
===
RCS file: /cvs/src/sys/nfs/nfs_kq.c,v
retrieving revision 1.30
diff -u -p -r1.30 nfs_kq.c
--- nfs/nfs_kq.c7 Apr 2020 13:27:52 -   1.30
+++ nfs/nfs_kq.c31 May 2020 08:43:36 -
@@ -50,9 +50,12 @@
 #include 
 
 void   nfs_kqpoll(void *);
+intnfs_kqwatch(struct vnode *);
+void   nfs_kqunwatch(struct vnode *);
 
 void   filt_nfsdetach(struct knote *);
 intfilt_nfsread(struct knote *, long);
+intfilt_nfswrite(struct knote *, long);
 intfilt_nfsvnode(struct knote *, long);
 
 struct kevq {
@@ -182,11 +185,19 @@ void
 filt_nfsdetach(struct knote *kn)
 {
struct vnode *vp = (struct vnode *)kn->kn_hook;
-   struct kevq *ke;
 
klist_remove(&vp->v_selectinfo.si_note, kn);
 
/* Remove the vnode from watch list */
+   if ((kn->kn_sfflags & NOTE_IMM) == 0)
+   nfs_kqunwatch(vp);
+}
+
+void
+nfs_kqunwatch(struct vnode *vp)
+{
+   struct kevq *ke;
+
rw_enter_write(&nfskevq_lock);
SLIST_FOREACH(ke, &kevlist, kev_link) {
if (ke->vp == vp) {
@@ -238,6 +249,22 @@ filt_nfsread(struct knote *kn, long hint
 }
 
 int
+filt_nfswrite(struct knote *kn, long hint)
+{
+   /*
+* filesystem is gone, so set the EOF flag and schedule
+* the knote for deletion.
+*/
+   if (hint == NOTE_REVOKE) {
+   kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+   return (1);
+   }
+
+   kn->kn_data = 0;
+   return (1);
+}
+
+int
 filt_nfsvnode(struct knote *kn, long hint)
 {
if (kn->kn_sfflags & hint)
@@ -256,6 +283,13 @@ static const struct filterops nfsread_fi
.f_event= filt_nfsread,
 };
 
+const struct filterops nfswrite_filtops = {
+   .f_flags= FILTEROP_ISFD,
+   .f_attach   = NULL,
+   .f_detach   = filt_nfsdetach,
+   .f_event= filt_nfswrite,
+};
+
 static const struct filterops nfsvnode_filtops = {
.f_flags= FILTEROP_ISFD,
.f_attach   = NULL,
@@ -269,10 +303,6 @@ nfs_kqfilter(void *v)
struct vop_kqfilter_args *ap = v;
struct vnode *vp;
struct knote *kn;
-   struct kevq *ke;
-   int error = 0;
-   struct vattr attr;
-   struct proc *p = curproc;   /* XXX */
 
vp = ap->a_vp;
kn = ap->a_kn;
@@ -286,6 +316,9 @@ nfs_kqfilter(void *v)
case EVFILT_READ:
kn->kn_fop = &nfsread_filtops;
break;
+   case EVFILT_WRITE:
+   kn->kn_fop = &nfswrite_filtops;
+   break;
case EVFILT_VNODE:
kn->kn_fop = &nfsvnode_filtops;
break;
@@ -298,7 +331,27 @@ nfs_kqfilter(void *v)
/*
 * Put the vnode to watched list.
 */
-   
+   if ((kn->kn_sfflags & NOTE_IMM) == 0) {
+   int error;
+
+   error = nfs_kqwatch(vp);
+   if (error)
+   return (error);
+   }
+
+   klist_insert(&vp->v_selectinfo.si_note, kn);
+
+   return (0);
+}
+
+int
+nfs_kqwatch(struct vnode *vp)
+{
+   struct proc *p = curproc;   /* XXX */
+   struct vattr attr;
+   struct kevq *ke;
+   int error = 0;
+
/*
 * Fetch current attributes. It's only needed when the vnode
 * is not watched yet, but we need to do this without lock
@@ -338,8 +391,6 @@ nfs_kqfilter(void *v)
 
/* kick the poller */
wakeup(pnfskq);
-
-   klist_insert(&vp->v_selectinfo.si_note, kn);
 
 out:
rw_exit_write(&nfskevq_lock);