Diff below gets rid of the (un)famous XXX in NFS by implementing proper locking for NFS nodes, similar to the inode one.
It needs *a lot* of tests. If you run into a problem, try to reproduce it by defining VFSLCKDEBUG and WITNESS in your kernel config file. If that's not enough, you can toggle the "#if 0" in the diff below. Having proper locking is a requirement to fix more NFS races and presumably VFS issues as well. So any help would benefit a lot future improvements in this area. Comments? Index: nfs/nfs_node.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_node.c,v retrieving revision 1.67 diff -u -p -r1.67 nfs_node.c --- nfs/nfs_node.c 9 Apr 2018 09:39:53 -0000 1.67 +++ nfs/nfs_node.c 17 Apr 2018 14:15:23 -0000 @@ -134,6 +134,10 @@ loop: } vp = nvp; +#ifdef VFSLCKDEBUG + vp->v_flag |= VLOCKSWORK; +#endif + rrw_init_flags(&np->n_lock, "nfsnode", RWL_DUPOK | RWL_IS_VNODE); vp->v_data = np; /* we now have an nfsnode on this vnode */ vp->v_flag &= ~VLARVAL; @@ -142,6 +146,8 @@ loop: np->n_fhp = &np->n_fh; bcopy(fh, np->n_fhp, fhsize); np->n_fhsize = fhsize; + /* lock the nfsnode, then put it on the rbtree */ + rrw_enter(&np->n_lock, RW_WRITE); np2 = RBT_INSERT(nfs_nodetree, &nmp->nm_ntree, np); KASSERT(np2 == NULL); np->n_accstamp = -1; @@ -183,9 +189,10 @@ nfs_inactive(void *v) * Remove the silly file that was rename'd earlier */ nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, curproc); + vn_lock(sp->s_dvp, LK_EXCLUSIVE | LK_RETRY, curproc); nfs_removeit(sp); crfree(sp->s_cred); - vrele(sp->s_dvp); + vput(sp->s_dvp); free(sp, M_NFSREQ, sizeof(*sp)); } np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT); Index: nfs/nfs_vnops.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_vnops.c,v retrieving revision 1.172 diff -u -p -r1.172 nfs_vnops.c --- nfs/nfs_vnops.c 17 Apr 2018 07:45:24 -0000 1.172 +++ nfs/nfs_vnops.c 17 Apr 2018 14:15:23 -0000 @@ -88,7 +88,9 @@ int nfs_flush(struct vnode *, struct ucr int nfs_fsync(void *); int nfs_getattr(void *); int nfs_getreq(struct nfsrv_descript *, struct nfsd *, int); +int nfs_islocked(void *); int nfs_link(void *); +int nfs_lock(void *); int nfs_lookitup(struct vnode *, char *, int, struct ucred *, struct proc *, struct nfsnode **); int nfs_lookup(void *); @@ -120,6 +122,7 @@ int nfs_sillyrename(struct vnode *, stru struct componentname *); int nfs_strategy(void *); int nfs_symlink(void *); +int nfs_unlock(void *); void nfs_cache_enter(struct vnode *, struct vnode *, struct componentname *); @@ -161,12 +164,12 @@ struct vops nfs_vops = { .vop_abortop = vop_generic_abortop, .vop_inactive = nfs_inactive, .vop_reclaim = nfs_reclaim, - .vop_lock = vop_generic_lock, /* XXX: beck@ must fix this. */ - .vop_unlock = vop_generic_unlock, + .vop_lock = nfs_lock, + .vop_unlock = nfs_unlock, .vop_bmap = nfs_bmap, .vop_strategy = nfs_strategy, .vop_print = nfs_print, - .vop_islocked = vop_generic_islocked, + .vop_islocked = nfs_islocked, .vop_pathconf = nfs_pathconf, .vop_advlock = nfs_advlock, .vop_bwrite = nfs_bwrite @@ -183,10 +186,10 @@ struct vops nfs_specvops = { .vop_fsync = nfs_fsync, .vop_inactive = nfs_inactive, .vop_reclaim = nfs_reclaim, - .vop_lock = vop_generic_lock, - .vop_unlock = vop_generic_unlock, + .vop_lock = nfs_lock, + .vop_unlock = nfs_unlock, .vop_print = nfs_print, - .vop_islocked = vop_generic_islocked, + .vop_islocked = nfs_islocked, /* XXX: Keep in sync with spec_vops. */ .vop_lookup = vop_generic_lookup, @@ -224,10 +227,10 @@ struct vops nfs_fifovops = { .vop_fsync = nfs_fsync, .vop_inactive = nfs_inactive, .vop_reclaim = nfsfifo_reclaim, - .vop_lock = vop_generic_lock, - .vop_unlock = vop_generic_unlock, + .vop_lock = nfs_lock, + .vop_unlock = nfs_unlock, .vop_print = nfs_print, - .vop_islocked = vop_generic_islocked, + .vop_islocked = nfs_islocked, .vop_bwrite = vop_generic_bwrite, /* XXX: Keep in sync with fifo_vops. */ @@ -1033,6 +1036,54 @@ nfs_readlink(void *v) return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); } +#ifdef DDB +#include <ddb/db_output.h> +#endif + +/* + * Lock an inode. + */ +int +nfs_lock(void *v) +{ + struct vop_lock_args *ap = v; + struct vnode *vp = ap->a_vp; + +#if 0 //def DDB + db_stack_dump(); +#endif + + return rrw_enter(&VTONFS(vp)->n_lock, ap->a_flags & LK_RWFLAGS); +} + +/* + * Unlock an inode. + */ +int +nfs_unlock(void *v) +{ + struct vop_unlock_args *ap = v; + struct vnode *vp = ap->a_vp; + +#if 0 //def DDB + db_stack_dump(); +#endif + + rrw_exit(&VTONFS(vp)->n_lock); + return 0; +} + +/* + * Check for a locked inode. + */ +int +nfs_islocked(void *v) +{ + struct vop_islocked_args *ap = v; + + return rrw_status(&VTONFS(ap->a_vp)->n_lock); +} + /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. @@ -1545,9 +1596,9 @@ nfs_remove(void *v) int nfs_removeit(struct sillyrename *sp) { + KASSERT(VOP_ISLOCKED(sp->s_dvp)); /* * Make sure that the directory vnode is still valid. - * XXX we should lock sp->s_dvp here. * * NFS can potentially try to nuke a silly *after* the directory * has already been pushed out on a forced unmount. Since the silly @@ -1725,6 +1776,7 @@ nfs_link(void *v) struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; struct nfsm_info info; u_int32_t *tl; int32_t t1; @@ -1738,6 +1790,12 @@ nfs_link(void *v) vput(dvp); return (EXDEV); } + error = vn_lock(vp, LK_EXCLUSIVE, p); + if (error != 0) { + VOP_ABORTOP(dvp, cnp); + vput(dvp); + return (error); + } /* * Push all writes to the server, so that the attribute cache @@ -1771,6 +1829,7 @@ nfsmout: VN_KNOTE(vp, NOTE_LINK); VN_KNOTE(dvp, NOTE_WRITE); + VOP_UNLOCK(vp, p); vput(dvp); return (error); } Index: nfs/nfsnode.h =================================================================== RCS file: /cvs/src/sys/nfs/nfsnode.h,v retrieving revision 1.39 diff -u -p -r1.39 nfsnode.h --- nfs/nfsnode.h 15 Dec 2009 15:53:48 -0000 1.39 +++ nfs/nfsnode.h 17 Apr 2018 14:15:23 -0000 @@ -43,7 +43,7 @@ #include <nfs/nfs.h> #endif -#include <sys/rwlock.h> +#include <sys/lock.h> /* * Silly rename structure that hangs off the nfsnode until the name @@ -79,6 +79,7 @@ struct nfsnode { nfsfh_t *n_fhp; /* NFS File Handle */ struct vnode *n_vnode; /* associated vnode */ struct lockf *n_lockf; /* Locking record of file */ + struct rrwlock n_lock; /* NFSnode lock */ int n_error; /* Save write error value */ union { struct timespec nf_atim; /* Special file times */