Diff below gets rid of the (un)famous XXX in NFS by implementing proper
locking for NFS nodes, similar to the inode one.

It needs *a lot* of tests.  If you run into a problem, try to reproduce
it by defining VFSLCKDEBUG and WITNESS in your kernel config file.  If
that's not enough, you can toggle the "#if 0" in the diff below.

Having proper locking is a requirement to fix more NFS races and
presumably VFS issues as well.  So any help would benefit a lot future
improvements in this area.

Comments?

Index: nfs/nfs_node.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_node.c,v
retrieving revision 1.67
diff -u -p -r1.67 nfs_node.c
--- nfs/nfs_node.c      9 Apr 2018 09:39:53 -0000       1.67
+++ nfs/nfs_node.c      17 Apr 2018 14:15:23 -0000
@@ -134,6 +134,10 @@ loop:
        }
 
        vp = nvp;
+#ifdef VFSLCKDEBUG
+       vp->v_flag |= VLOCKSWORK;
+#endif
+       rrw_init_flags(&np->n_lock, "nfsnode", RWL_DUPOK | RWL_IS_VNODE);
        vp->v_data = np;
        /* we now have an nfsnode on this vnode */
        vp->v_flag &= ~VLARVAL;
@@ -142,6 +146,8 @@ loop:
        np->n_fhp = &np->n_fh;
        bcopy(fh, np->n_fhp, fhsize);
        np->n_fhsize = fhsize;
+       /* lock the nfsnode, then put it on the rbtree */
+       rrw_enter(&np->n_lock, RW_WRITE);
        np2 = RBT_INSERT(nfs_nodetree, &nmp->nm_ntree, np);
        KASSERT(np2 == NULL);
        np->n_accstamp = -1;
@@ -183,9 +189,10 @@ nfs_inactive(void *v)
                 * Remove the silly file that was rename'd earlier
                 */
                nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, curproc);
+               vn_lock(sp->s_dvp, LK_EXCLUSIVE | LK_RETRY, curproc);
                nfs_removeit(sp);
                crfree(sp->s_cred);
-               vrele(sp->s_dvp);
+               vput(sp->s_dvp);
                free(sp, M_NFSREQ, sizeof(*sp));
        }
        np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT);
Index: nfs/nfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vnops.c,v
retrieving revision 1.172
diff -u -p -r1.172 nfs_vnops.c
--- nfs/nfs_vnops.c     17 Apr 2018 07:45:24 -0000      1.172
+++ nfs/nfs_vnops.c     17 Apr 2018 14:15:23 -0000
@@ -88,7 +88,9 @@ int nfs_flush(struct vnode *, struct ucr
 int nfs_fsync(void *);
 int nfs_getattr(void *);
 int nfs_getreq(struct nfsrv_descript *, struct nfsd *, int);
+int nfs_islocked(void *);
 int nfs_link(void *);
+int nfs_lock(void *);
 int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct proc *,
        struct nfsnode **);
 int nfs_lookup(void *);
@@ -120,6 +122,7 @@ int nfs_sillyrename(struct vnode *, stru
                         struct componentname *);
 int nfs_strategy(void *);
 int nfs_symlink(void *);
+int nfs_unlock(void *);
 
 void nfs_cache_enter(struct vnode *, struct vnode *, struct componentname *);
 
@@ -161,12 +164,12 @@ struct vops nfs_vops = {
        .vop_abortop    = vop_generic_abortop,
        .vop_inactive   = nfs_inactive,
        .vop_reclaim    = nfs_reclaim,
-       .vop_lock       = vop_generic_lock,     /* XXX: beck@ must fix this. */
-       .vop_unlock     = vop_generic_unlock,
+       .vop_lock       = nfs_lock,
+       .vop_unlock     = nfs_unlock,
        .vop_bmap       = nfs_bmap,
        .vop_strategy   = nfs_strategy,
        .vop_print      = nfs_print,
-       .vop_islocked   = vop_generic_islocked,
+       .vop_islocked   = nfs_islocked,
        .vop_pathconf   = nfs_pathconf,
        .vop_advlock    = nfs_advlock,
        .vop_bwrite     = nfs_bwrite
@@ -183,10 +186,10 @@ struct vops nfs_specvops = {
        .vop_fsync      = nfs_fsync,
        .vop_inactive   = nfs_inactive,
        .vop_reclaim    = nfs_reclaim,
-       .vop_lock       = vop_generic_lock,
-       .vop_unlock     = vop_generic_unlock,
+       .vop_lock       = nfs_lock,
+       .vop_unlock     = nfs_unlock,
        .vop_print      = nfs_print,
-       .vop_islocked   = vop_generic_islocked,
+       .vop_islocked   = nfs_islocked,
 
        /* XXX: Keep in sync with spec_vops. */
        .vop_lookup     = vop_generic_lookup,
@@ -224,10 +227,10 @@ struct vops nfs_fifovops = {
        .vop_fsync      = nfs_fsync,
        .vop_inactive   = nfs_inactive,
        .vop_reclaim    = nfsfifo_reclaim,
-       .vop_lock       = vop_generic_lock,
-       .vop_unlock     = vop_generic_unlock,
+       .vop_lock       = nfs_lock,
+       .vop_unlock     = nfs_unlock,
        .vop_print      = nfs_print,
-       .vop_islocked   = vop_generic_islocked,
+       .vop_islocked   = nfs_islocked,
        .vop_bwrite     = vop_generic_bwrite,
 
        /* XXX: Keep in sync with fifo_vops. */
@@ -1033,6 +1036,54 @@ nfs_readlink(void *v)
        return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
+#ifdef DDB
+#include <ddb/db_output.h>
+#endif
+
+/*
+ * Lock an inode.
+ */
+int
+nfs_lock(void *v)
+{
+       struct vop_lock_args *ap = v;
+       struct vnode *vp = ap->a_vp;
+
+#if 0 //def DDB
+       db_stack_dump();
+#endif
+
+       return rrw_enter(&VTONFS(vp)->n_lock, ap->a_flags & LK_RWFLAGS);
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+nfs_unlock(void *v)
+{
+       struct vop_unlock_args *ap = v;
+       struct vnode *vp = ap->a_vp;
+
+#if 0 //def DDB
+       db_stack_dump();
+#endif
+
+       rrw_exit(&VTONFS(vp)->n_lock);
+       return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+nfs_islocked(void *v)
+{
+       struct vop_islocked_args *ap = v;
+
+       return rrw_status(&VTONFS(ap->a_vp)->n_lock);
+}
+
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
@@ -1545,9 +1596,9 @@ nfs_remove(void *v)
 int
 nfs_removeit(struct sillyrename *sp)
 {
+       KASSERT(VOP_ISLOCKED(sp->s_dvp));
        /*
         * Make sure that the directory vnode is still valid.
-        * XXX we should lock sp->s_dvp here.
         *
         * NFS can potentially try to nuke a silly *after* the directory
         * has already been pushed out on a forced unmount. Since the silly
@@ -1725,6 +1776,7 @@ nfs_link(void *v)
        struct vnode *vp = ap->a_vp;
        struct vnode *dvp = ap->a_dvp;
        struct componentname *cnp = ap->a_cnp;
+       struct proc *p = cnp->cn_proc;
        struct nfsm_info        info;
        u_int32_t *tl;
        int32_t t1;
@@ -1738,6 +1790,12 @@ nfs_link(void *v)
                vput(dvp);
                return (EXDEV);
        }
+       error = vn_lock(vp, LK_EXCLUSIVE, p);
+       if (error != 0) {
+               VOP_ABORTOP(dvp, cnp);
+               vput(dvp);
+               return (error);
+       }
 
        /*
         * Push all writes to the server, so that the attribute cache
@@ -1771,6 +1829,7 @@ nfsmout: 
 
        VN_KNOTE(vp, NOTE_LINK);
        VN_KNOTE(dvp, NOTE_WRITE);
+       VOP_UNLOCK(vp, p);
        vput(dvp);
        return (error);
 }
Index: nfs/nfsnode.h
===================================================================
RCS file: /cvs/src/sys/nfs/nfsnode.h,v
retrieving revision 1.39
diff -u -p -r1.39 nfsnode.h
--- nfs/nfsnode.h       15 Dec 2009 15:53:48 -0000      1.39
+++ nfs/nfsnode.h       17 Apr 2018 14:15:23 -0000
@@ -43,7 +43,7 @@
 #include <nfs/nfs.h>
 #endif
 
-#include <sys/rwlock.h>
+#include <sys/lock.h>
 
 /*
  * Silly rename structure that hangs off the nfsnode until the name
@@ -79,6 +79,7 @@ struct nfsnode {
        nfsfh_t                 *n_fhp;         /* NFS File Handle */
        struct vnode            *n_vnode;       /* associated vnode */
        struct lockf            *n_lockf;       /* Locking record of file */
+       struct rrwlock          n_lock;         /* NFSnode lock */
        int                     n_error;        /* Save write error value */
        union {
                struct timespec nf_atim;        /* Special file times */

Reply via email to