Diff below changes the representation of a NFS mount point. It is
adapted from NetBSD with some tweaks of mine and is required for
properly locking NFSnodes.
The idea is to keep a reference to the root vnode in 'struct nfsmount'
instead of doing a lookup every time VFS_ROOT() is called. Calls to
nfs_root() in the diskless path also become useless and we don't have
to play with locking there. Finally we look at the number of refcounts
when unmounting a filesystem, this is useful when debugging async races
as reported by bluhm@ on bugs@.
Note that nfs_root() already returns a "locked" node, that's why I'm
using vput(9) and not vrele(9). However in the current context of NFS
these functions are identical since VOP_LOCK/UNLOCK are noop.
I have tested this on diskless & non-diskless, but I'm looking for more
tests 'cause this area is critical.
Comments? Oks?
Index: nfs/nfs_node.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_node.c,v
retrieving revision 1.66
diff -u -p -r1.66 nfs_node.c
--- nfs/nfs_node.c 28 Mar 2018 09:40:26 -0000 1.66
+++ nfs/nfs_node.c 3 Apr 2018 12:22:12 -0000
@@ -138,19 +138,7 @@ loop:
/* we now have an nfsnode on this vnode */
vp->v_flag &= ~VLARVAL;
np->n_vnode = vp;
-
rw_init(&np->n_commitlock, "nfs_commitlk");
-
- /*
- * Are we getting the root? If so, make sure the vnode flags
- * are correct
- */
- if ((fhsize == nmp->nm_fhsize) && !bcmp(fh, nmp->nm_fh, fhsize)) {
- if (vp->v_type == VNON)
- vp->v_type = VDIR;
- vp->v_flag |= VROOT;
- }
-
np->n_fhp = &np->n_fh;
bcopy(fh, np->n_fhp, fhsize);
np->n_fhsize = fhsize;
Index: nfs/nfs_vfsops.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.116
diff -u -p -r1.116 nfs_vfsops.c
--- nfs/nfs_vfsops.c 10 Feb 2018 05:24:23 -0000 1.116
+++ nfs/nfs_vfsops.c 3 Apr 2018 13:14:40 -0000
@@ -71,11 +71,13 @@ extern struct nfsstats nfsstats;
extern int nfs_ticks;
extern u_int32_t nfs_procids[NFS_NPROCS];
-int nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t,
struct proc *);
-int nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred
**);
-struct mount *nfs_mount_diskless(struct nfs_dlmount *, char *, int);
+int nfs_sysctl(int *, u_int, void *, size_t *, void *, size_t,
+ struct proc *);
+int nfs_checkexp(struct mount *, struct mbuf *, int *, struct ucred **);
+struct mount *nfs_mount_diskless(struct nfs_dlmount *, char *, int,
+ struct vnode **, struct proc *p);
int mountnfs(struct nfs_args *, struct mount *, struct mbuf *,
- const char *, char *);
+ const char *, char *, struct vnode **, struct proc *p);
int nfs_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *);
int nfs_root(struct mount *, struct vnode **);
int nfs_start(struct mount *, int, struct proc *);
@@ -123,15 +125,13 @@ nfs_statfs(struct mount *mp, struct stat
struct nfsmount *nmp = VFSTONFS(mp);
int error = 0, retattr;
struct ucred *cred;
- struct nfsnode *np;
u_quad_t tquad;
info.nmi_v3 = (nmp->nm_flag & NFSMNT_NFSV3);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
+ error = nfs_root(mp, &vp);
if (error)
return (error);
- vp = NFSTOV(np);
cred = crget();
cred->cr_ngroups = 0;
if (info.nmi_v3 && (nmp->nm_flag & NFSMNT_GOTFSINFO) == 0)
@@ -178,7 +178,7 @@ nfs_statfs(struct mount *mp, struct stat
copy_statfs_info(sbp, mp);
m_freem(info.nmi_mrep);
nfsmout:
- vrele(vp);
+ vput(vp);
crfree(cred);
return (error);
}
@@ -281,14 +281,14 @@ nfs_mountroot(void)
*/
if (nfs_boot_getfh(&nfs_diskless.nd_boot, "root",
&nfs_diskless.nd_root, -1))
panic("nfs_mountroot: root");
- mp = nfs_mount_diskless(&nfs_diskless.nd_root, "/", 0);
- nfs_root(mp, &rootvp);
+ mp = nfs_mount_diskless(&nfs_diskless.nd_root, "/", 0, &vp, procp);
printf("root on %s\n", nfs_diskless.nd_root.ndm_host);
/*
* Link it into the mount list.
*/
TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ rootvp = vp;
vfs_unbusy(mp);
/* Get root attributes (for the time). */
@@ -333,8 +333,8 @@ nfs_mountroot(void)
*/
error = nfs_boot_getfh(&nfs_diskless.nd_boot, "swap",
&nfs_diskless.nd_swap, 5);
if (!error) {
- mp = nfs_mount_diskless(&nfs_diskless.nd_swap, "/swap", 0);
- nfs_root(mp, &vp);
+ mp = nfs_mount_diskless(&nfs_diskless.nd_swap, "/swap", 0, &vp,
+ procp);
vfs_unbusy(mp);
/*
@@ -376,7 +376,8 @@ nfs_mountroot(void)
* Internal version of mount system call for diskless setup.
*/
struct mount *
-nfs_mount_diskless(struct nfs_dlmount *ndmntp, char *mntname, int mntflag)
+nfs_mount_diskless(struct nfs_dlmount *ndmntp, char *mntname, int mntflag,
+ struct vnode **vpp, struct proc *p)
{
struct mount *mp;
struct mbuf *m;
@@ -392,7 +393,7 @@ nfs_mount_diskless(struct nfs_dlmount *n
(m->m_len = ndmntp->ndm_args.addr->sa_len));
error = mountnfs(&ndmntp->ndm_args, mp, m, mntname,
- ndmntp->ndm_args.hostname);
+ ndmntp->ndm_args.hostname, vpp, p);
if (error)
panic("nfs_mountroot: mount %s failed: %d", mntname, error);
@@ -556,6 +557,7 @@ nfs_mount(struct mount *mp, const char *
int error;
struct nfs_args *args = data;
struct mbuf *nam;
+ struct vnode *vp;
char hst[MNAMELEN];
size_t len;
u_char nfh[NFSX_V3FHMAX];
@@ -599,7 +601,7 @@ nfs_mount(struct mount *mp, const char *
if (error)
return (error);
args->fh = nfh;
- error = mountnfs(args, mp, nam, path, hst);
+ error = mountnfs(args, mp, nam, path, hst, &vp, p);
return (error);
}
@@ -608,9 +610,12 @@ nfs_mount(struct mount *mp, const char *
*/
int
mountnfs(struct nfs_args *argp, struct mount *mp, struct mbuf *nam,
- const char *pth, char *hst)
+ const char *pth, char *hst, struct vnode **vpp, struct proc *p)
{
struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *vp;
+ struct vattr attr;
int error;
if (mp->mnt_flag & MNT_UPDATE) {
@@ -633,12 +638,10 @@ mountnfs(struct nfs_args *argp, struct m
nmp->nm_readdirsize = NFS_READDIRSIZE;
nmp->nm_numgrps = NFS_MAXGRPS;
nmp->nm_readahead = NFS_DEFRAHEAD;
- nmp->nm_fhsize = argp->fhsize;
nmp->nm_acregmin = NFS_MINATTRTIMO;
nmp->nm_acregmax = NFS_MAXATTRTIMO;
nmp->nm_acdirmin = NFS_MINATTRTIMO;
nmp->nm_acdirmax = NFS_MAXATTRTIMO;
- bcopy(argp->fh, nmp->nm_fh, argp->fhsize);
mp->mnt_stat.f_namemax = MAXNAMLEN;
memset(mp->mnt_stat.f_mntonname, 0, MNAMELEN);
strlcpy(mp->mnt_stat.f_mntonname, pth, MNAMELEN);
@@ -673,6 +676,30 @@ mountnfs(struct nfs_args *argp, struct m
* point.
*/
mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+ error = nfs_nget(mp, (nfsfh_t *)argp->fh, argp->fhsize, &np);
+ if (error)
+ goto bad;
+ vp = NFSTOV(np);
+ error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
+ if (error) {
+ vput(vp);
+ goto bad;
+ }
+
+ /*
+ * A reference count is needed on the nfsnode representing the
+ * remote root. If this object is not persistent, then backward
+ * traversals of the mount point (i.e. "..") will not work if
+ * the nfsnode gets flushed out of the cache. Ufs does not have
+ * this problem, because one can identify root inodes by their
+ * number == ROOTINO (2). So, just unlock, but no rele.
+ */
+ nmp->nm_vnode = vp;
+ if (vp->v_type == VNON)
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ VOP_UNLOCK(vp, curproc);
+ *vpp = vp;
return (0);
bad:
@@ -687,18 +714,35 @@ int
nfs_unmount(struct mount *mp, int mntflags, struct proc *p)
{
struct nfsmount *nmp;
- int error, flags;
+ struct vnode *vp;
+ int error, flags = 0;
nmp = VFSTONFS(mp);
- flags = 0;
+ error = nfs_root(mp, &vp);
+ if (error)
+ return (error);
+
+ if ((mntflags & MNT_FORCE) == 0 && vp->v_usecount > 2) {
+ vput(vp);
+ return (EBUSY);
+ }
if (mntflags & MNT_FORCE)
flags |= FORCECLOSE;
- error = vflush(mp, NULL, flags);
- if (error)
+ error = vflush(mp, vp, flags);
+ if (error) {
+ vput(vp);
return (error);
+ }
+ /*
+ * There are two references count to get rid of here: one
+ * from mountnfs() and one from nfs_root() above.
+ */
+ vrele(vp);
+ vput(vp);
+ vgone(vp);
nfs_disconnect(nmp);
m_freem(nmp->nm_nam);
timeout_del(&nmp->nm_rtimeout);
@@ -713,15 +757,19 @@ nfs_unmount(struct mount *mp, int mntfla
int
nfs_root(struct mount *mp, struct vnode **vpp)
{
+ struct vnode *vp;
struct nfsmount *nmp;
- struct nfsnode *np;
int error;
nmp = VFSTONFS(mp);
- error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
- if (error)
+ vp = nmp->nm_vnode;
+ vref(vp);
+ error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
+ if (error) {
+ vrele(vp);
return (error);
- *vpp = NFSTOV(np);
+ }
+ *vpp = vp;
return (0);
}
Index: nfs/nfsmount.h
===================================================================
RCS file: /cvs/src/sys/nfs/nfsmount.h,v
retrieving revision 1.27
diff -u -p -r1.27 nfsmount.h
--- nfs/nfsmount.h 22 Feb 2017 11:42:46 -0000 1.27
+++ nfs/nfsmount.h 3 Apr 2018 12:22:12 -0000
@@ -49,12 +49,11 @@ struct nfsmount {
nm_ntree; /* filehandle/node tree */
TAILQ_HEAD(reqs, nfsreq)
nm_reqsq; /* request queue for this mount. */
- struct timeout nm_rtimeout; /* timeout (scans/resends nm_reqsq). */
- int nm_flag; /* Flags for soft/hard... */
+ struct timeout nm_rtimeout; /* timeout (scans/resends nm_reqsq). */
struct mount *nm_mountp; /* Vfs structure for this filesystem */
+ struct vnode *nm_vnode; /* vnode of root dir */
+ int nm_flag; /* Flags for soft/hard... */
int nm_numgrps; /* Max. size of groupslist */
- u_char nm_fh[NFSX_V3FHMAX]; /* File handle of root dir */
- int nm_fhsize; /* Size of root file handle */
struct socket *nm_so; /* Rpc socket */
int nm_sotype; /* Type of socket */
int nm_soproto; /* and protocol */