Module Name: src Committed By: ad Date: Sat Apr 4 20:49:31 UTC 2020
Modified Files: src/sys/fs/cd9660: cd9660_lookup.c cd9660_vfsops.c src/sys/fs/msdosfs: msdosfs_lookup.c msdosfs_vfsops.c src/sys/fs/tmpfs: tmpfs_subr.c tmpfs_vfsops.c src/sys/kern: vfs_cache.c vfs_getcwd.c vfs_lookup.c vfs_syscalls.c vfs_vnode.c src/sys/miscfs/genfs: layer_vnops.c src/sys/miscfs/nullfs: null_vfsops.c src/sys/miscfs/procfs: procfs_vfsops.c src/sys/sys: fstypes.h namei.src src/sys/ufs/chfs: chfs_vnops.c src/sys/ufs/ext2fs: ext2fs_lookup.c ext2fs_vfsops.c src/sys/ufs/ffs: ffs_vfsops.c src/sys/ufs/lfs: lfs_vfsops.c ulfs_lookup.c src/sys/ufs/ufs: ufs_lookup.c ufs_vnops.c Log Message: Merge the remaining changes from the ad-namecache branch, affecting namei() and getcwd(): - push vnode locking back as far as possible. - do most lookups directly in the namecache, avoiding vnode locks & refs. - don't block new refs to vnodes across VOP_INACTIVE(). - get shared locks for VOP_LOOKUP() if the file system supports it. - correct lock types for VOP_ACCESS() / VOP_GETATTR() in a few places. Possible future enhancements: - make the lookups lockless. - support dotdot lookups by being lockless and inferring absence of chroot. - maybe make it work for layered file systems. - avoid vnode references at the root & cwd. To generate a diff of this commit: cvs rdiff -u -r1.30 -r1.31 src/sys/fs/cd9660/cd9660_lookup.c cvs rdiff -u -r1.95 -r1.96 src/sys/fs/cd9660/cd9660_vfsops.c cvs rdiff -u -r1.35 -r1.36 src/sys/fs/msdosfs/msdosfs_lookup.c cvs rdiff -u -r1.133 -r1.134 src/sys/fs/msdosfs/msdosfs_vfsops.c cvs rdiff -u -r1.107 -r1.108 src/sys/fs/tmpfs/tmpfs_subr.c cvs rdiff -u -r1.76 -r1.77 src/sys/fs/tmpfs/tmpfs_vfsops.c cvs rdiff -u -r1.136 -r1.137 src/sys/kern/vfs_cache.c cvs rdiff -u -r1.56 -r1.57 src/sys/kern/vfs_getcwd.c cvs rdiff -u -r1.214 -r1.215 src/sys/kern/vfs_lookup.c cvs rdiff -u -r1.544 -r1.545 src/sys/kern/vfs_syscalls.c cvs rdiff -u -r1.116 -r1.117 src/sys/kern/vfs_vnode.c cvs rdiff -u -r1.68 -r1.69 src/sys/miscfs/genfs/layer_vnops.c cvs rdiff -u -r1.97 -r1.98 src/sys/miscfs/nullfs/null_vfsops.c cvs rdiff -u -r1.103 -r1.104 src/sys/miscfs/procfs/procfs_vfsops.c cvs rdiff -u -r1.37 -r1.38 src/sys/sys/fstypes.h cvs rdiff -u -r1.52 -r1.53 src/sys/sys/namei.src cvs rdiff -u -r1.36 -r1.37 src/sys/ufs/chfs/chfs_vnops.c cvs rdiff -u -r1.89 -r1.90 src/sys/ufs/ext2fs/ext2fs_lookup.c cvs rdiff -u -r1.217 -r1.218 src/sys/ufs/ext2fs/ext2fs_vfsops.c cvs rdiff -u -r1.366 -r1.367 src/sys/ufs/ffs/ffs_vfsops.c cvs rdiff -u -r1.377 -r1.378 src/sys/ufs/lfs/lfs_vfsops.c cvs rdiff -u -r1.42 -r1.43 src/sys/ufs/lfs/ulfs_lookup.c cvs rdiff -u -r1.151 -r1.152 src/sys/ufs/ufs/ufs_lookup.c cvs rdiff -u -r1.249 -r1.250 src/sys/ufs/ufs/ufs_vnops.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/fs/cd9660/cd9660_lookup.c diff -u src/sys/fs/cd9660/cd9660_lookup.c:1.30 src/sys/fs/cd9660/cd9660_lookup.c:1.31 --- src/sys/fs/cd9660/cd9660_lookup.c:1.30 Sat Mar 28 19:24:05 2015 +++ src/sys/fs/cd9660/cd9660_lookup.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cd9660_lookup.c,v 1.30 2015/03/28 19:24:05 maxv Exp $ */ +/* $NetBSD: cd9660_lookup.c,v 1.31 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (c) 1989, 1993, 1994 @@ -39,7 +39,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cd9660_lookup.c,v 1.30 2015/03/28 19:24:05 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cd9660_lookup.c,v 1.31 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/namei.h> @@ -152,6 +152,9 @@ cd9660_lookup(void *v) cnp->cn_nameiop, cnp->cn_flags, NULL, vpp)) { return *vpp == NULLVP ? ENOENT : 0; } + /* May need to restart the lookup with an exclusive lock. */ + if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) + return ENOLCK; len = cnp->cn_namelen; name = cnp->cn_nameptr; Index: src/sys/fs/cd9660/cd9660_vfsops.c diff -u src/sys/fs/cd9660/cd9660_vfsops.c:1.95 src/sys/fs/cd9660/cd9660_vfsops.c:1.96 --- src/sys/fs/cd9660/cd9660_vfsops.c:1.95 Mon Mar 16 21:20:09 2020 +++ src/sys/fs/cd9660/cd9660_vfsops.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: cd9660_vfsops.c,v 1.95 2020/03/16 21:20:09 pgoyette Exp $ */ +/* $NetBSD: cd9660_vfsops.c,v 1.96 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (c) 1994 @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.95 2020/03/16 21:20:09 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: cd9660_vfsops.c,v 1.96 2020/04/04 20:49:30 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -446,7 +446,7 @@ iso_mountfs(struct vnode *devvp, struct mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; mp->mnt_stat.f_namemax = ISO_MAXNAMLEN; mp->mnt_flag |= MNT_LOCAL; - mp->mnt_iflag |= IMNT_MPSAFE; + mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP; mp->mnt_dev_bshift = iso_bsize; mp->mnt_fs_bshift = isomp->im_bshift; isomp->im_mountp = mp; Index: src/sys/fs/msdosfs/msdosfs_lookup.c diff -u src/sys/fs/msdosfs/msdosfs_lookup.c:1.35 src/sys/fs/msdosfs/msdosfs_lookup.c:1.36 --- src/sys/fs/msdosfs/msdosfs_lookup.c:1.35 Sat Jan 30 09:59:27 2016 +++ src/sys/fs/msdosfs/msdosfs_lookup.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: msdosfs_lookup.c,v 1.35 2016/01/30 09:59:27 mlelstv Exp $ */ +/* $NetBSD: msdosfs_lookup.c,v 1.36 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. @@ -52,7 +52,7 @@ #endif #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: msdosfs_lookup.c,v 1.35 2016/01/30 09:59:27 mlelstv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: msdosfs_lookup.c,v 1.36 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> @@ -161,6 +161,10 @@ msdosfs_lookup(void *v) return *vpp == NULLVP ? ENOENT: 0; } + /* May need to restart the lookup with an exclusive lock. */ + if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) + return ENOLCK; + /* * If they are going after the . or .. entry in the root directory, * they won't find it. DOS filesystems don't have them in the root Index: src/sys/fs/msdosfs/msdosfs_vfsops.c diff -u src/sys/fs/msdosfs/msdosfs_vfsops.c:1.133 src/sys/fs/msdosfs/msdosfs_vfsops.c:1.134 --- src/sys/fs/msdosfs/msdosfs_vfsops.c:1.133 Mon Mar 16 21:20:10 2020 +++ src/sys/fs/msdosfs/msdosfs_vfsops.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: msdosfs_vfsops.c,v 1.133 2020/03/16 21:20:10 pgoyette Exp $ */ +/* $NetBSD: msdosfs_vfsops.c,v 1.134 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. @@ -48,7 +48,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.133 2020/03/16 21:20:10 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.134 2020/04/04 20:49:30 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -868,6 +868,7 @@ msdosfs_mountfs(struct vnode *devvp, str mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; mp->mnt_stat.f_namemax = MSDOSFS_NAMEMAX(pmp); mp->mnt_flag |= MNT_LOCAL; + mp->mnt_iflag |= IMNT_SHRLOOKUP; mp->mnt_dev_bshift = pmp->pm_bnshift; mp->mnt_fs_bshift = pmp->pm_cnshift; Index: src/sys/fs/tmpfs/tmpfs_subr.c diff -u src/sys/fs/tmpfs/tmpfs_subr.c:1.107 src/sys/fs/tmpfs/tmpfs_subr.c:1.108 --- src/sys/fs/tmpfs/tmpfs_subr.c:1.107 Sat Mar 14 13:37:49 2020 +++ src/sys/fs/tmpfs/tmpfs_subr.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs_subr.c,v 1.107 2020/03/14 13:37:49 ad Exp $ */ +/* $NetBSD: tmpfs_subr.c,v 1.108 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 2005-2013 The NetBSD Foundation, Inc. @@ -73,7 +73,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.107 2020/03/14 13:37:49 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_subr.c,v 1.108 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/cprng.h> @@ -147,6 +147,8 @@ tmpfs_init_vnode(struct vnode *vp, tmpfs vp->v_data = node; node->tn_vnode = vp; uvm_vnp_setsize(vp, node->tn_size); + KASSERT(node->tn_mode != VNOVAL); + cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid); } /* @@ -1039,6 +1041,7 @@ tmpfs_chmod(vnode_t *vp, mode_t mode, ka node->tn_mode = (mode & ALLPERMS); tmpfs_update(vp, TMPFS_UPDATE_CTIME); VN_KNOTE(vp, NOTE_ATTRIB); + cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid); return 0; } @@ -1083,6 +1086,7 @@ tmpfs_chown(vnode_t *vp, uid_t uid, gid_ node->tn_gid = gid; tmpfs_update(vp, TMPFS_UPDATE_CTIME); VN_KNOTE(vp, NOTE_ATTRIB); + cache_enter_id(vp, node->tn_mode, node->tn_uid, node->tn_gid); return 0; } Index: src/sys/fs/tmpfs/tmpfs_vfsops.c diff -u src/sys/fs/tmpfs/tmpfs_vfsops.c:1.76 src/sys/fs/tmpfs/tmpfs_vfsops.c:1.77 --- src/sys/fs/tmpfs/tmpfs_vfsops.c:1.76 Fri Jan 17 20:08:08 2020 +++ src/sys/fs/tmpfs/tmpfs_vfsops.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: tmpfs_vfsops.c,v 1.76 2020/01/17 20:08:08 ad Exp $ */ +/* $NetBSD: tmpfs_vfsops.c,v 1.77 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc. @@ -42,7 +42,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.76 2020/01/17 20:08:08 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: tmpfs_vfsops.c,v 1.77 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -182,7 +182,8 @@ tmpfs_mount(struct mount *mp, const char mp->mnt_stat.f_namemax = TMPFS_MAXNAMLEN; mp->mnt_fs_bshift = PAGE_SHIFT; mp->mnt_dev_bshift = DEV_BSHIFT; - mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO; + mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP | + IMNT_NCLOOKUP; vfs_getnewfsid(mp); /* Allocate the tmpfs mount structure and fill it. */ Index: src/sys/kern/vfs_cache.c diff -u src/sys/kern/vfs_cache.c:1.136 src/sys/kern/vfs_cache.c:1.137 --- src/sys/kern/vfs_cache.c:1.136 Mon Mar 30 19:15:28 2020 +++ src/sys/kern/vfs_cache.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_cache.c,v 1.136 2020/03/30 19:15:28 ad Exp $ */ +/* $NetBSD: vfs_cache.c,v 1.137 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc. @@ -172,7 +172,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.136 2020/03/30 19:15:28 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.137 2020/04/04 20:49:30 ad Exp $"); #define __NAMECACHE_PRIVATE #ifdef _KERNEL_OPT @@ -624,7 +624,6 @@ cache_lookup_raw(struct vnode *dvp, cons * names in the cache. The node locks are chained along the way: a parent's * lock is not dropped until the child's is acquired. */ -#ifdef notyet bool cache_lookup_linked(struct vnode *dvp, const char *name, size_t namelen, struct vnode **vn_ret, krwlock_t **plock, @@ -721,7 +720,6 @@ cache_lookup_linked(struct vnode *dvp, c *vn_ret = ncp->nc_vp; return true; } -#endif /* notyet */ /* * Scan cache looking for name of directory entry pointing at vp. @@ -967,7 +965,6 @@ cache_enter_id(struct vnode *vp, mode_t * information, missing some updates, so always check the mount flag * instead of looking for !VNOVAL. */ -#ifdef notyet bool cache_have_id(struct vnode *vp) { @@ -982,7 +979,6 @@ cache_have_id(struct vnode *vp) return false; } } -#endif /* notyet */ /* * Name cache initialization, from vfs_init() when the system is booting. Index: src/sys/kern/vfs_getcwd.c diff -u src/sys/kern/vfs_getcwd.c:1.56 src/sys/kern/vfs_getcwd.c:1.57 --- src/sys/kern/vfs_getcwd.c:1.56 Sun Mar 22 14:38:37 2020 +++ src/sys/kern/vfs_getcwd.c Sat Apr 4 20:49:30 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_getcwd.c,v 1.56 2020/03/22 14:38:37 ad Exp $ */ +/* $NetBSD: vfs_getcwd.c,v 1.57 2020/04/04 20:49:30 ad Exp $ */ /*- - * Copyright (c) 1999 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.56 2020/03/22 14:38:37 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.57 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -87,7 +87,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c * On exit, *uvpp is either NULL or is a locked vnode reference. */ static int -getcwd_scandir(struct vnode **lvpp, struct vnode **uvpp, char **bpp, +getcwd_scandir(struct vnode *lvp, struct vnode **uvpp, char **bpp, char *bufp, struct lwp *l) { int error = 0; @@ -101,12 +101,14 @@ getcwd_scandir(struct vnode **lvpp, stru ino_t fileno; struct vattr va; struct vnode *uvp = NULL; - struct vnode *lvp = *lvpp; kauth_cred_t cred = l->l_cred; struct componentname cn; int len, reclen; tries = 0; + /* Need exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */ + KASSERT(VOP_ISLOCKED(lvp) == LK_EXCLUSIVE); + /* * If we want the filename, get some info we need while the * current directory is still locked. @@ -114,8 +116,7 @@ getcwd_scandir(struct vnode **lvpp, stru if (bufp != NULL) { error = VOP_GETATTR(lvp, &va, cred); if (error) { - vput(lvp); - *lvpp = NULL; + VOP_UNLOCK(lvp); *uvpp = NULL; return error; } @@ -134,24 +135,14 @@ getcwd_scandir(struct vnode **lvpp, stru /* At this point, lvp is locked */ error = VOP_LOOKUP(lvp, uvpp, &cn); - vput(lvp); + VOP_UNLOCK(lvp); if (error) { - *lvpp = NULL; *uvpp = NULL; return error; } uvp = *uvpp; - /* Now lvp is unlocked, try to lock uvp */ - error = vn_lock(uvp, LK_EXCLUSIVE); - if (error) { - *lvpp = NULL; - *uvpp = NULL; - return error; - } - /* If we don't care about the pathname, we're done */ if (bufp == NULL) { - *lvpp = NULL; return 0; } @@ -163,6 +154,14 @@ getcwd_scandir(struct vnode **lvpp, stru dirbuflen = va.va_blocksize; dirbuf = kmem_alloc(dirbuflen, KM_SLEEP); + /* Now lvp is unlocked, try to lock uvp */ + error = vn_lock(uvp, LK_SHARED); + if (error) { + vrele(uvp); + *uvpp = NULL; + return error; + } + #if 0 unionread: #endif @@ -254,73 +253,21 @@ unionread: vput(tvp); vref(uvp); *uvpp = uvp; - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY); + vn_lock(uvp, LK_SHARED | LK_RETRY); goto unionread; } #endif error = ENOENT; out: - *lvpp = NULL; + VOP_UNLOCK(uvp); kmem_free(dirbuf, dirbuflen); return error; } /* - * Look in the vnode-to-name reverse cache to see if - * we can find things the easy way. - * - * XXX vget failure path is untested. - * - * On entry, *lvpp is a locked vnode reference. - * On exit, one of the following is the case: - * 0) Both *lvpp and *uvpp are NULL and failure is returned. - * 1) *uvpp is NULL, *lvpp remains locked and -1 is returned (cache miss) - * 2) *uvpp is a locked vnode reference, *lvpp is vput and NULL'ed - * and 0 is returned (cache hit) - */ - -static int -getcwd_getcache(struct vnode **lvpp, struct vnode **uvpp, char **bpp, - char *bufp) -{ - struct vnode *lvp, *uvp = NULL; - int error; - - lvp = *lvpp; - - /* - * This returns 0 on a cache hit, -1 on a clean cache miss, - * or an errno on other failure. - */ - error = cache_revlookup(lvp, uvpp, bpp, bufp, 0, 0); - if (error) { - if (error != -1) { - vput(lvp); - *lvpp = NULL; - *uvpp = NULL; - } - return error; - } - uvp = *uvpp; - - /* - * Since we're going up, we have to release the current lock - * before we take the parent lock. - */ - - VOP_UNLOCK(lvp); - vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY); - vrele(lvp); - *lvpp = NULL; - - return error; -} - -/* * common routine shared by sys___getcwd() and vn_isunder() */ - int getcwd_common(struct vnode *lvp, struct vnode *rvp, char **bpp, char *bufp, int limit, int flags, struct lwp *l) @@ -345,11 +292,10 @@ getcwd_common(struct vnode *lvp, struct /* * Error handling invariant: * Before a `goto out': - * lvp is either NULL, or locked and held. - * uvp is either NULL, or locked and held. + * lvp is either NULL, or held. + * uvp is either NULL, or held. */ - vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); if (bufp) bp = *bpp; @@ -369,63 +315,93 @@ getcwd_common(struct vnode *lvp, struct * access check here is optional, depending on * whether or not caller cares. */ - if (flags & GETCWD_CHECK_ACCESS) { - error = VOP_ACCESS(lvp, perms, cred); - if (error) - goto out; - perms = VEXEC|VREAD; - } + int chkaccess = (flags & GETCWD_CHECK_ACCESS); + bool locked = false; /* * step up if we're a covered vnode.. + * check access on the first vnode only. */ - while (lvp->v_vflag & VV_ROOT) { - struct vnode *tvp; + if (lvp->v_vflag & VV_ROOT) { + vn_lock(lvp, LK_SHARED | LK_RETRY); + if (chkaccess) { + error = VOP_ACCESS(lvp, perms, cred); + if (error) { + VOP_UNLOCK(lvp); + goto out; + } + chkaccess = 0; + } + while (lvp->v_vflag & VV_ROOT) { + struct vnode *tvp; - if (lvp == rvp) - goto out; + if (lvp == rvp) { + VOP_UNLOCK(lvp); + goto out; + } - tvp = lvp; - lvp = lvp->v_mount->mnt_vnodecovered; - vput(tvp); - /* - * hodie natus est radici frater - */ - if (lvp == NULL) { - error = ENOENT; - goto out; + tvp = lvp->v_mount->mnt_vnodecovered; + /* + * hodie natus est radici frater + */ + if (tvp == NULL) { + VOP_UNLOCK(lvp); + error = ENOENT; + goto out; + } + vref(tvp); + vput(lvp); + lvp = tvp; + if (lvp->v_vflag & VV_ROOT) + vn_lock(lvp, LK_SHARED | LK_RETRY); } - vref(lvp); - error = vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); - if (error != 0) { - vrele(lvp); - lvp = NULL; + } + + /* Do we need to check access to the directory? */ + if (chkaccess && !cache_have_id(lvp)) { + /* Need exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */ + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_ACCESS(lvp, perms, cred); + if (error) { + VOP_UNLOCK(lvp); goto out; } + chkaccess = 0; + locked = true; } + /* * Look in the name cache; if that fails, look in the * directory.. */ - error = getcwd_getcache(&lvp, &uvp, &bp, bufp); + error = cache_revlookup(lvp, &uvp, &bp, bufp, chkaccess, + perms); if (error == -1) { + if (!locked) { + locked = true; + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY); + } if (lvp->v_type != VDIR) { + VOP_UNLOCK(lvp); error = ENOTDIR; goto out; } - error = getcwd_scandir(&lvp, &uvp, &bp, bufp, l); + error = getcwd_scandir(lvp, &uvp, &bp, bufp, l); + /* lvp now unlocked */ + } else if (locked) { + VOP_UNLOCK(lvp); } if (error) goto out; #if DIAGNOSTIC - if (lvp != NULL) - panic("getcwd: oops, forgot to null lvp"); if (bufp && (bp <= bufp)) { panic("getcwd: oops, went back too far"); } #endif + perms = VEXEC | VREAD; if (bp) *(--bp) = '/'; + vrele(lvp); lvp = uvp; uvp = NULL; limit--; @@ -435,9 +411,9 @@ out: if (bpp) *bpp = bp; if (uvp) - vput(uvp); + vrele(uvp); if (lvp) - vput(lvp); + vrele(lvp); vrele(rvp); return error; } @@ -556,11 +532,7 @@ vnode_to_path(char *path, size_t len, st bp = bend = &path[len]; *(--bp) = '\0'; - error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - if (error != 0) - return error; - error = cache_revlookup(vp, &dvp, &bp, path, 0, 0); - VOP_UNLOCK(vp); + error = cache_revlookup(vp, &dvp, &bp, path, false, 0); if (error != 0) return (error == -1 ? ENOENT : error); Index: src/sys/kern/vfs_lookup.c diff -u src/sys/kern/vfs_lookup.c:1.214 src/sys/kern/vfs_lookup.c:1.215 --- src/sys/kern/vfs_lookup.c:1.214 Sun Feb 23 22:14:03 2020 +++ src/sys/kern/vfs_lookup.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_lookup.c,v 1.214 2020/02/23 22:14:03 ad Exp $ */ +/* $NetBSD: vfs_lookup.c,v 1.215 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.214 2020/02/23 22:14:03 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.215 2020/04/04 20:49:30 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_magiclinks.h" @@ -50,6 +50,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c #include <sys/time.h> #include <sys/namei.h> #include <sys/vnode.h> +#include <sys/vnode_impl.h> #include <sys/mount.h> #include <sys/errno.h> #include <sys/filedesc.h> @@ -709,8 +710,6 @@ namei_start(struct namei_state *state, i return ENOTDIR; } - vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); - *startdir_ret = startdir; return 0; } @@ -748,15 +747,17 @@ namei_follow(struct namei_state *state, size_t linklen; int error; - KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); - KASSERT(VOP_ISLOCKED(foundobj) == LK_EXCLUSIVE); if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { return ELOOP; } + + vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); if (foundobj->v_mount->mnt_flag & MNT_SYMPERM) { error = VOP_ACCESS(foundobj, VEXEC, cnp->cn_cred); - if (error != 0) + if (error != 0) { + VOP_UNLOCK(foundobj); return error; + } } /* FUTURE: fix this to not use a second buffer */ @@ -770,6 +771,7 @@ namei_follow(struct namei_state *state, auio.uio_resid = MAXPATHLEN; UIO_SETUP_SYSSPACE(&auio); error = VOP_READLINK(foundobj, &auio, cnp->cn_cred); + VOP_UNLOCK(foundobj); if (error) { PNBUF_PUT(cp); return error; @@ -806,14 +808,11 @@ namei_follow(struct namei_state *state, /* we're now starting from the beginning of the buffer again */ cnp->cn_nameptr = ndp->ni_pnbuf; - /* must unlock this before relocking searchdir */ - VOP_UNLOCK(foundobj); - /* * Check if root directory should replace current directory. */ if (ndp->ni_pnbuf[0] == '/') { - vput(searchdir); + vrele(searchdir); /* Keep absolute symbolic links inside emulation root */ searchdir = ndp->ni_erootdir; if (searchdir == NULL || @@ -824,7 +823,6 @@ namei_follow(struct namei_state *state, searchdir = ndp->ni_rootdir; } vref(searchdir); - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); while (cnp->cn_nameptr[0] == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; @@ -832,7 +830,6 @@ namei_follow(struct namei_state *state, } *newsearchdir_ret = searchdir; - KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); return 0; } @@ -860,7 +857,7 @@ lookup_parsepath(struct namei_state *sta * responsibility for freeing the pathname buffer. * * At this point, our only vnode state is that the search dir - * is held and locked. + * is held. */ cnp->cn_consume = 0; cnp->cn_namelen = namei_getcomponent(cnp->cn_nameptr); @@ -917,6 +914,111 @@ lookup_parsepath(struct namei_state *sta } /* + * Take care of crossing a mounted-on vnode. On error, foundobj_ret will be + * vrele'd, but searchdir is left alone. + */ +static int +lookup_crossmount(struct namei_state *state, + struct vnode **searchdir_ret, + struct vnode **foundobj_ret, + bool *searchdir_locked) +{ + struct componentname *cnp = state->cnp; + struct vnode *foundobj; + struct vnode *searchdir; + struct mount *mp; + int error, lktype; + + searchdir = *searchdir_ret; + foundobj = *foundobj_ret; + error = 0; + + KASSERT((cnp->cn_flags & NOCROSSMOUNT) == 0); + KASSERT(searchdir != NULL); + + /* First, unlock searchdir (oof). */ + if (*searchdir_locked) { + lktype = VOP_ISLOCKED(searchdir); + VOP_UNLOCK(searchdir); + *searchdir_locked = false; + } else { + lktype = LK_NONE; + } + + /* + * Do an unlocked check to see if the vnode has been mounted on; if + * so find the root of the mounted file system. + */ + while (foundobj->v_type == VDIR && + (mp = foundobj->v_mountedhere) != NULL && + (cnp->cn_flags & NOCROSSMOUNT) == 0) { + KASSERTMSG(searchdir != foundobj, "same vn %p", searchdir); + /* + * First get the vnode stable. LK_SHARED works brilliantly + * here because almost nothing else wants to lock the + * covered vnode. + */ + error = vn_lock(foundobj, LK_SHARED); + if (error != 0) { + vrele(foundobj); + *foundobj_ret = NULL; + break; + } + + /* Then check to see if something is still mounted on it. */ + if ((mp = foundobj->v_mountedhere) == NULL) { + VOP_UNLOCK(foundobj); + break; + } + + /* Get a reference to the mountpoint, and ditch foundobj. */ + error = vfs_busy(mp); + vput(foundobj); + if (error != 0) { + *foundobj_ret = NULL; + break; + } + + /* Now get a reference on the root vnode, and drop mount. */ + error = VFS_ROOT(mp, LK_NONE, &foundobj); + vfs_unbusy(mp); + if (error) { + *foundobj_ret = NULL; + break; + } + + /* + * Avoid locking vnodes from two filesystems because + * it's prone to deadlock, e.g. when using puffs. + * Also, it isn't a good idea to propagate slowness of + * a filesystem up to the root directory. For now, + * only handle the common case, where foundobj is + * VDIR. + * + * In this case set searchdir to null to avoid using + * it again. It is not correct to set searchdir == + * foundobj here as that will confuse the caller. + * (See PR 40740.) + */ + if (searchdir == NULL) { + /* already been here once; do nothing further */ + } else if (foundobj->v_type == VDIR) { + vrele(searchdir); + *searchdir_ret = searchdir = NULL; + *foundobj_ret = foundobj; + lktype = LK_NONE; + } + } + + /* If searchdir is still around, re-lock it. */ + if (error == 0 && lktype != LK_NONE) { + vn_lock(searchdir, lktype | LK_RETRY); + *searchdir_locked = true; + } + return error; +} + +/* * Call VOP_LOOKUP for a single lookup; return a new search directory * (used when crossing mountpoints up or searching union mounts down) and * the found object, which for create operations may be NULL on success. @@ -932,19 +1034,19 @@ static int lookup_once(struct namei_state *state, struct vnode *searchdir, struct vnode **newsearchdir_ret, - struct vnode **foundobj_ret) + struct vnode **foundobj_ret, + bool *newsearchdir_locked_ret) { struct vnode *tmpvn; /* scratch vnode */ struct vnode *foundobj; /* result */ - struct mount *mp; /* mount table entry */ struct lwp *l = curlwp; - int error; + bool searchdir_locked = false; + int error, lktype; struct componentname *cnp = state->cnp; struct nameidata *ndp = state->ndp; KASSERT(cnp == &ndp->ni_cnd); - KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); *newsearchdir_ret = searchdir; /* @@ -976,9 +1078,7 @@ lookup_once(struct namei_state *state, if (ndp->ni_rootdir != rootvnode) { int retval; - VOP_UNLOCK(searchdir); retval = vn_isunder(searchdir, ndp->ni_rootdir, l); - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); if (!retval) { /* Oops! We got out of jail! */ log(LOG_WARNING, @@ -987,12 +1087,11 @@ lookup_once(struct namei_state *state, p->p_pid, kauth_cred_geteuid(l->l_cred), p->p_comm); /* Put us at the jail root. */ - vput(searchdir); + vrele(searchdir); searchdir = NULL; foundobj = ndp->ni_rootdir; vref(foundobj); vref(foundobj); - vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); *newsearchdir_ret = foundobj; *foundobj_ret = foundobj; error = 0; @@ -1005,18 +1104,35 @@ lookup_once(struct namei_state *state, tmpvn = searchdir; searchdir = searchdir->v_mount->mnt_vnodecovered; vref(searchdir); - vput(tmpvn); - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); + vrele(tmpvn); *newsearchdir_ret = searchdir; } } /* + * If the file system supports VOP_LOOKUP() with a shared lock, and + * we are not making any modifications (nameiop LOOKUP) or this is + * not the last component then get a shared lock. Where we can't do + * fast-forwarded lookups (for example with layered file systems) + * then this is the fallback for reducing lock contention. + */ + if ((searchdir->v_mount->mnt_iflag & IMNT_SHRLOOKUP) != 0 && + (cnp->cn_nameiop == LOOKUP || (cnp->cn_flags & ISLASTCN) == 0)) { + lktype = LK_SHARED; + } else { + lktype = LK_EXCLUSIVE; + } + + /* * We now have a segment name to search for, and a directory to search. - * Our vnode state here is that "searchdir" is held and locked. + * Our vnode state here is that "searchdir" is held. */ unionlookup: foundobj = NULL; + if (!searchdir_locked) { + vn_lock(searchdir, lktype | LK_RETRY); + searchdir_locked = true; + } error = VOP_LOOKUP(searchdir, &foundobj, cnp); if (error != 0) { @@ -1026,6 +1142,23 @@ unionlookup: #ifdef NAMEI_DIAGNOSTIC printf("not found\n"); #endif /* NAMEI_DIAGNOSTIC */ + + /* + * If ENOLCK, the file system needs us to retry the lookup + * with an exclusive lock. It's likely nothing was found in + * cache and/or modifications need to be made. + */ + if (error == ENOLCK) { + KASSERT(VOP_ISLOCKED(searchdir) == LK_SHARED); + KASSERT(searchdir_locked); + if (vn_lock(searchdir, LK_UPGRADE | LK_NOWAIT)) { + VOP_UNLOCK(searchdir); + searchdir_locked = false; + } + lktype = LK_EXCLUSIVE; + goto unionlookup; + } + if ((error == ENOENT) && (searchdir->v_vflag & VV_ROOT) && (searchdir->v_mount->mnt_flag & MNT_UNION)) { @@ -1033,7 +1166,7 @@ unionlookup: searchdir = searchdir->v_mount->mnt_vnodecovered; vref(searchdir); vput(tmpvn); - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); + searchdir_locked = false; *newsearchdir_ret = searchdir; goto unionlookup; } @@ -1087,85 +1220,187 @@ unionlookup: cnp->cn_flags |= ISLASTCN; } - /* - * "searchdir" is locked and held, "foundobj" is held, - * they may be the same vnode. - */ - if (searchdir != foundobj) { - if (cnp->cn_flags & ISDOTDOT) - VOP_UNLOCK(searchdir); - error = vn_lock(foundobj, LK_EXCLUSIVE); - if (cnp->cn_flags & ISDOTDOT) - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); - if (error != 0) { - vrele(foundobj); - goto done; + /* Unlock, unless the caller needs the parent locked. */ + if (searchdir != NULL) { + KASSERT(searchdir_locked); + if ((cnp->cn_flags & (ISLASTCN | LOCKPARENT)) != + (ISLASTCN | LOCKPARENT)) { + VOP_UNLOCK(searchdir); + searchdir_locked = false; } + } else { + KASSERT(!searchdir_locked); } - /* - * Check to see if the vnode has been mounted on; - * if so find the root of the mounted file system. - */ - KASSERT(searchdir != NULL); - while (foundobj->v_type == VDIR && - (mp = foundobj->v_mountedhere) != NULL && - (cnp->cn_flags & NOCROSSMOUNT) == 0) { + *foundobj_ret = foundobj; + error = 0; +done: + *newsearchdir_locked_ret = searchdir_locked; + return error; +} - KASSERT(searchdir != foundobj); +/* + * Parse out the first path name component that we need to to consider. + * + * While doing this, attempt to use the name cache to fast-forward through + * as many "easy" to find components of the path as possible. + * + * We use the namecache's node locks to form a chain, and avoid as many + * vnode references and locks as possible. In the ideal case, only the + * final vnode will have its reference count adjusted and lock taken. + */ +static int +lookup_fastforward(struct namei_state *state, struct vnode **searchdir_ret, + struct vnode **foundobj_ret) +{ + struct componentname *cnp = state->cnp; + struct nameidata *ndp = state->ndp; + krwlock_t *plock; + struct vnode *foundobj, *searchdir; + int error, error2; + size_t oldpathlen; + const char *oldnameptr; - error = vfs_busy(mp); - if (error != 0) { - vput(foundobj); - goto done; + /* + * Eat as many path name components as possible before giving up and + * letting lookup_once() handle it. Remember the starting point in + * case we can't get vnode references and need to roll back. + */ + plock = NULL; + searchdir = *searchdir_ret; + oldnameptr = cnp->cn_nameptr; + oldpathlen = ndp->ni_pathlen; + for (;;) { + foundobj = NULL; + + /* + * Get the next component name. There should be no slashes + * here, and we shouldn't have looped around if we were + * done. + */ + KASSERT(cnp->cn_nameptr[0] != '/'); + KASSERT(cnp->cn_nameptr[0] != '\0'); + if ((error = lookup_parsepath(state)) != 0) { + break; } - if (searchdir != NULL) { - VOP_UNLOCK(searchdir); + + /* + * Can't deal with dotdot lookups, because it means lock + * order reversal, and there are checks in lookup_once() + * that need to be made. Also check for missing mountpoints. + */ + if ((cnp->cn_flags & ISDOTDOT) != 0 || + searchdir->v_mount == NULL) { + error = EOPNOTSUPP; + break; } - vput(foundobj); - error = VFS_ROOT(mp, LK_EXCLUSIVE, &foundobj); - vfs_unbusy(mp); - if (error) { - if (searchdir != NULL) { - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); + + /* + * Can't deal with last component when modifying; this needs + * searchdir locked and VOP_LOOKUP() called (which can and + * does modify state, despite the name). + */ + if ((cnp->cn_flags & ISLASTCN) != 0) { + if (cnp->cn_nameiop != LOOKUP || + (cnp->cn_flags & LOCKPARENT) != 0) { + error = EOPNOTSUPP; + break; } - goto done; } + + /* Can't deal with -o union lookups. */ + if ((searchdir->v_vflag & VV_ROOT) != 0 && + (searchdir->v_mount->mnt_flag & MNT_UNION) != 0) { + error = EOPNOTSUPP; + break; + } + /* - * Avoid locking vnodes from two filesystems because - * it's prone to deadlock, e.g. when using puffs. - * Also, it isn't a good idea to propagate slowness of - * a filesystem up to the root directory. For now, - * only handle the common case, where foundobj is - * VDIR. + * Good, now look for it in cache. cache_lookup_linked() + * will fail if there's nothing there, or if there's no + * ownership info for the directory, or if the user doesn't + * have permission to look up files in this directory. + */ + if (!cache_lookup_linked(searchdir, cnp->cn_nameptr, + cnp->cn_namelen, &foundobj, &plock, cnp->cn_cred)) { + error = EOPNOTSUPP; + break; + } + KASSERT(plock != NULL && rw_lock_held(plock)); + + /* Scored a hit. Negative is good too (ENOENT). */ + if (foundobj == NULL) { + error = ENOENT; + break; + } + + /* + * Stop and get a hold on the vnode if there's something + * that can't be handled here: * - * In this case set searchdir to null to avoid using - * it again. It is not correct to set searchdir == - * foundobj here as that will confuse the caller. - * (See PR 40740.) + * - we've reached the last component. + * - or encountered a mount point that needs to be crossed. + * - or encountered something other than a directory. */ - if (searchdir == NULL) { - /* already been here once; do nothing further */ - } else if (foundobj->v_type == VDIR) { - vrele(searchdir); - *newsearchdir_ret = searchdir = NULL; + if ((cnp->cn_flags & ISLASTCN) != 0 || + foundobj->v_type != VDIR || + (foundobj->v_type == VDIR && + foundobj->v_mountedhere != NULL)) { + mutex_enter(foundobj->v_interlock); + error = vcache_tryvget(foundobj); + /* v_interlock now unheld */ + if (error != 0) { + foundobj = NULL; + } + break; + } + + /* + * Otherwise, we're still in business. Set the found VDIR + * vnode as the search dir for the next component and + * continue on to it. + */ + cnp->cn_nameptr = ndp->ni_next; + searchdir = foundobj; + } + + /* + * If we ended up with a new search dir, ref it before dropping the + * namecache's lock. The lock prevents both searchdir and foundobj + * from disappearing. If we can't ref the new searchdir, we have a + * bit of a problem. Roll back the fastforward to the beginning and + * let lookup_once() take care of it. + */ + if (searchdir != *searchdir_ret) { + mutex_enter(searchdir->v_interlock); + error2 = vcache_tryvget(searchdir); + /* v_interlock now unheld */ + KASSERT(plock != NULL); + rw_exit(plock); + if (__predict_true(error2 == 0)) { + /* Returning new searchdir, and maybe new foundobj. */ + vrele(*searchdir_ret); + *searchdir_ret = searchdir; } else { - VOP_UNLOCK(foundobj); - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); - vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); + /* Returning nothing. */ + if (foundobj != NULL) { + vrele(foundobj); + foundobj = NULL; + } + cnp->cn_nameptr = oldnameptr; + ndp->ni_pathlen = oldpathlen; + error = lookup_parsepath(state); + if (error == 0) { + error = EOPNOTSUPP; + } } + } else if (plock != NULL) { + /* Drop any namecache lock still held. */ + rw_exit(plock); } + KASSERT(error == 0 ? foundobj != NULL : foundobj == NULL); *foundobj_ret = foundobj; - error = 0; -done: - KASSERT(*newsearchdir_ret == NULL || - VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); - /* - * *foundobj_ret is valid only if error == 0. - */ - KASSERT(error != 0 || *foundobj_ret == NULL || - VOP_ISLOCKED(*foundobj_ret) == LK_EXCLUSIVE); return error; } @@ -1182,6 +1417,7 @@ namei_oneroot(struct namei_state *state, struct nameidata *ndp = state->ndp; struct componentname *cnp = state->cnp; struct vnode *searchdir, *foundobj; + bool searchdir_locked = false; int error; error = namei_start(state, isnfsd, &searchdir); @@ -1222,44 +1458,47 @@ namei_oneroot(struct namei_state *state, for (;;) { KASSERT(searchdir != NULL); - KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); + KASSERT(!searchdir_locked); /* - * If the directory we're on is unmounted, bail out. - * XXX: should this also check if it's unlinked? - * XXX: yes it should... but how? + * Parse out the first path name component that we need to + * to consider. While doing this, attempt to use the name + * cache to fast-forward through as many "easy" to find + * components of the path as possible. */ - if (searchdir->v_mount == NULL) { - vput(searchdir); - ndp->ni_dvp = NULL; - ndp->ni_vp = NULL; - return (ENOENT); - } + error = lookup_fastforward(state, &searchdir, &foundobj); /* - * Look up the next path component. - * (currently, this may consume more than one) + * If we didn't get a good answer from the namecache, then + * go directly to the file system. */ + if (error != 0 && error != ENOENT) { + error = lookup_once(state, searchdir, &searchdir, + &foundobj, &searchdir_locked); + } - /* There should be no slashes here. */ - KASSERT(cnp->cn_nameptr[0] != '/'); - - /* and we shouldn't have looped around if we were done */ - KASSERT(cnp->cn_nameptr[0] != '\0'); - - error = lookup_parsepath(state); - if (error) { - vput(searchdir); - ndp->ni_dvp = NULL; - ndp->ni_vp = NULL; - state->attempt_retry = 1; - return (error); + /* + * If the vnode we found is mounted on, then cross the mount + * and get the root vnode in foundobj. If this encounters + * an error, it will dispose of foundobj, but searchdir is + * untouched. + */ + if (error == 0 && foundobj != NULL && + foundobj->v_type == VDIR && + foundobj->v_mountedhere != NULL && + (cnp->cn_flags & NOCROSSMOUNT) == 0) { + error = lookup_crossmount(state, &searchdir, + &foundobj, &searchdir_locked); } - error = lookup_once(state, searchdir, &searchdir, &foundobj); if (error) { if (searchdir != NULL) { - vput(searchdir); + if (searchdir_locked) { + searchdir_locked = false; + vput(searchdir); + } else { + vrele(searchdir); + } } ndp->ni_dvp = NULL; ndp->ni_vp = NULL; @@ -1296,6 +1535,11 @@ namei_oneroot(struct namei_state *state, * them again. */ if (namei_atsymlink(state, foundobj)) { + /* Don't need searchdir locked any more. */ + if (searchdir_locked) { + searchdir_locked = false; + VOP_UNLOCK(searchdir); + } ndp->ni_pathlen += state->slashes; ndp->ni_next -= state->slashes; if (neverfollow) { @@ -1337,14 +1581,13 @@ namei_oneroot(struct namei_state *state, if (error) { KASSERT(searchdir != foundobj); if (searchdir != NULL) { - vput(searchdir); + vrele(searchdir); } - vput(foundobj); + vrele(foundobj); ndp->ni_dvp = NULL; ndp->ni_vp = NULL; return error; } - /* namei_follow unlocks it (ugh) so rele, not put */ vrele(foundobj); foundobj = NULL; @@ -1375,9 +1618,16 @@ namei_oneroot(struct namei_state *state, (cnp->cn_flags & REQUIREDIR)) { KASSERT(foundobj != searchdir); if (searchdir) { - vput(searchdir); + if (searchdir_locked) { + searchdir_locked = false; + vput(searchdir); + } else { + vrele(searchdir); + } + } else { + KASSERT(!searchdir_locked); } - vput(foundobj); + vrele(foundobj); ndp->ni_dvp = NULL; ndp->ni_vp = NULL; state->attempt_retry = 1; @@ -1395,15 +1645,21 @@ namei_oneroot(struct namei_state *state, * Continue with the next component. */ cnp->cn_nameptr = ndp->ni_next; - if (searchdir == foundobj) { - vrele(searchdir); - } else if (searchdir != NULL) { - vput(searchdir); + if (searchdir != NULL) { + if (searchdir_locked) { + searchdir_locked = false; + vput(searchdir); + } else { + vrele(searchdir); + } } searchdir = foundobj; foundobj = NULL; } + KASSERT((cnp->cn_flags & LOCKPARENT) == 0 || searchdir == NULL || + VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); + skiploop: if (foundobj != NULL) { @@ -1416,16 +1672,17 @@ namei_oneroot(struct namei_state *state, * forever. So convert it to the real root. */ if (searchdir != NULL) { - if (searchdir == foundobj) - vrele(searchdir); - else + if (searchdir_locked) { vput(searchdir); + searchdir_locked = false; + } else { + vrele(searchdir); + } searchdir = NULL; } - vput(foundobj); + vrele(foundobj); foundobj = ndp->ni_rootdir; vref(foundobj); - vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); } /* @@ -1438,9 +1695,15 @@ namei_oneroot(struct namei_state *state, (searchdir == NULL || searchdir->v_mount != foundobj->v_mount)) { if (searchdir) { - vput(searchdir); + if (searchdir_locked) { + vput(searchdir); + searchdir_locked = false; + } else { + vrele(searchdir); + } + searchdir = NULL; } - vput(foundobj); + vrele(foundobj); foundobj = NULL; ndp->ni_dvp = NULL; ndp->ni_vp = NULL; @@ -1465,21 +1728,25 @@ namei_oneroot(struct namei_state *state, if (state->rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { if (searchdir) { - if (foundobj != searchdir) { + if (searchdir_locked) { vput(searchdir); + searchdir_locked = false; } else { vrele(searchdir); } searchdir = NULL; } - vput(foundobj); + vrele(foundobj); foundobj = NULL; ndp->ni_dvp = NULL; ndp->ni_vp = NULL; state->attempt_retry = 1; return EROFS; } - if ((cnp->cn_flags & LOCKLEAF) == 0) { + + /* Lock the leaf node if requested. */ + if ((cnp->cn_flags & (LOCKLEAF | LOCKPARENT)) == LOCKPARENT && + searchdir == foundobj) { /* * Note: if LOCKPARENT but not LOCKLEAF is * set, and searchdir == foundobj, this code @@ -1491,7 +1758,15 @@ namei_oneroot(struct namei_state *state, * that uses this combination "knows" this, so * it can't be safely changed. Feh. XXX */ - VOP_UNLOCK(foundobj); + KASSERT(searchdir_locked); + VOP_UNLOCK(searchdir); + searchdir_locked = false; + } else if ((cnp->cn_flags & LOCKLEAF) != 0 && + (searchdir != foundobj || + (cnp->cn_flags & LOCKPARENT) == 0)) { + const int lktype = (cnp->cn_flags & LOCKSHARED) != 0 ? + LK_SHARED : LK_EXCLUSIVE; + vn_lock(foundobj, lktype | LK_RETRY); } } @@ -1503,11 +1778,7 @@ namei_oneroot(struct namei_state *state, * If LOCKPARENT is not set, the parent directory isn't returned. */ if ((cnp->cn_flags & LOCKPARENT) == 0 && searchdir != NULL) { - if (searchdir == foundobj) { - vrele(searchdir); - } else { - vput(searchdir); - } + vrele(searchdir); searchdir = NULL; } @@ -1649,6 +1920,7 @@ do_lookup_for_nfsd_index(struct namei_st struct nameidata *ndp = state->ndp; struct vnode *startdir; struct vnode *foundobj; + bool startdir_locked; const char *cp; /* pointer into pathname argument */ KASSERT(cnp == &ndp->ni_cnd); @@ -1681,30 +1953,37 @@ do_lookup_for_nfsd_index(struct namei_st * own reference to it to avoid consuming the caller's. */ vref(startdir); - vn_lock(startdir, LK_EXCLUSIVE | LK_RETRY); - error = lookup_once(state, startdir, &startdir, &foundobj); - if (error == 0 && startdir == foundobj) { - vrele(startdir); - } else if (startdir != NULL) { - vput(startdir); - } - if (error) { - goto bad; - } - ndp->ni_vp = foundobj; + error = lookup_once(state, startdir, &startdir, &foundobj, + &startdir_locked); - if (foundobj == NULL) { - return 0; + KASSERT((cnp->cn_flags & LOCKPARENT) == 0); + if (startdir_locked) { + VOP_UNLOCK(startdir); + startdir_locked = false; } - KASSERT((cnp->cn_flags & LOCKPARENT) == 0); - if ((cnp->cn_flags & LOCKLEAF) == 0) { - VOP_UNLOCK(foundobj); + /* + * If the vnode we found is mounted on, then cross the mount and get + * the root vnode in foundobj. If this encounters an error, it will + * dispose of foundobj, but searchdir is untouched. + */ + if (error == 0 && foundobj != NULL && + foundobj->v_type == VDIR && + foundobj->v_mountedhere != NULL && + (cnp->cn_flags & NOCROSSMOUNT) == 0) { + error = lookup_crossmount(state, &startdir, &foundobj, + &startdir_locked); } - return (0); -bad: - ndp->ni_vp = NULL; + /* Now toss startdir and see if we have an error. */ + if (startdir != NULL) + vrele(startdir); + if (error) + foundobj = NULL; + else if (foundobj != NULL && (cnp->cn_flags & LOCKLEAF) != 0) + vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY); + + ndp->ni_vp = foundobj; return (error); } Index: src/sys/kern/vfs_syscalls.c diff -u src/sys/kern/vfs_syscalls.c:1.544 src/sys/kern/vfs_syscalls.c:1.545 --- src/sys/kern/vfs_syscalls.c:1.544 Wed Mar 25 18:08:34 2020 +++ src/sys/kern/vfs_syscalls.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_syscalls.c,v 1.544 2020/03/25 18:08:34 gdt Exp $ */ +/* $NetBSD: vfs_syscalls.c,v 1.545 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.544 2020/03/25 18:08:34 gdt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.545 2020/04/04 20:49:30 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_fileassoc.h" @@ -1529,7 +1529,7 @@ chdir_lookup(const char *path, int where if (error) { return error; } - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); if ((error = namei(&nd)) != 0) { pathbuf_destroy(pb); return error; @@ -2995,7 +2995,7 @@ do_sys_accessat(struct lwp *l, int fdat, return EINVAL; } - nd_flag = FOLLOW | LOCKLEAF | TRYEMULROOT; + nd_flag = FOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT; if (flags & AT_SYMLINK_NOFOLLOW) nd_flag &= ~FOLLOW; @@ -3221,7 +3221,7 @@ do_sys_readlinkat(struct lwp *l, int fda if (error) { return error; } - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | TRYEMULROOT, pb); if ((error = fd_nameiat(l, fdat, &nd)) != 0) { pathbuf_destroy(pb); return error; @@ -4687,7 +4687,7 @@ dorevoke(struct vnode *vp, kauth_cred_t struct vattr vattr; int error, fs_decision; - vn_lock(vp, LK_SHARED | LK_RETRY); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_GETATTR(vp, &vattr, cred); VOP_UNLOCK(vp); if (error != 0) Index: src/sys/kern/vfs_vnode.c diff -u src/sys/kern/vfs_vnode.c:1.116 src/sys/kern/vfs_vnode.c:1.117 --- src/sys/kern/vfs_vnode.c:1.116 Sun Mar 22 18:45:28 2020 +++ src/sys/kern/vfs_vnode.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_vnode.c,v 1.116 2020/03/22 18:45:28 ad Exp $ */ +/* $NetBSD: vfs_vnode.c,v 1.117 2020/04/04 20:49:30 ad Exp $ */ /*- * Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc. @@ -155,7 +155,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.116 2020/03/22 18:45:28 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.117 2020/04/04 20:49:30 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_pax.h" @@ -828,9 +828,6 @@ vrelel(vnode_t *vp, int flags, int lktyp if (VSTATE_GET(vp) == VS_RECLAIMED) { VOP_UNLOCK(vp); } else { - VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); - mutex_exit(vp->v_interlock); - /* * The vnode must not gain another reference while being * deactivated. If VOP_INACTIVE() indicates that @@ -839,19 +836,19 @@ vrelel(vnode_t *vp, int flags, int lktyp * * Note that VOP_INACTIVE() will not drop the vnode lock. */ + mutex_exit(vp->v_interlock); recycle = false; VOP_INACTIVE(vp, &recycle); - if (!recycle) - VOP_UNLOCK(vp); rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); mutex_enter(vp->v_interlock); - VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); + if (vtryrele(vp)) { + VOP_UNLOCK(vp); + mutex_exit(vp->v_interlock); + rw_exit(vp->v_uobj.vmobjlock); + return; + } if (!recycle) { - if (vtryrele(vp)) { - mutex_exit(vp->v_interlock); - rw_exit(vp->v_uobj.vmobjlock); - return; - } + VOP_UNLOCK(vp); } /* Take care of space accounting. */ Index: src/sys/miscfs/genfs/layer_vnops.c diff -u src/sys/miscfs/genfs/layer_vnops.c:1.68 src/sys/miscfs/genfs/layer_vnops.c:1.69 --- src/sys/miscfs/genfs/layer_vnops.c:1.68 Sun Feb 23 15:46:41 2020 +++ src/sys/miscfs/genfs/layer_vnops.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: layer_vnops.c,v 1.68 2020/02/23 15:46:41 ad Exp $ */ +/* $NetBSD: layer_vnops.c,v 1.69 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -170,7 +170,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.68 2020/02/23 15:46:41 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: layer_vnops.c,v 1.69 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -384,6 +384,7 @@ layer_lookup(void *v) vrele(lvp); } else if (lvp != NULL) { /* Note: dvp and ldvp are both locked. */ + KASSERT(error != ENOLCK); error = layer_node_create(dvp->v_mount, lvp, ap->a_vpp); if (error) { vrele(lvp); Index: src/sys/miscfs/nullfs/null_vfsops.c diff -u src/sys/miscfs/nullfs/null_vfsops.c:1.97 src/sys/miscfs/nullfs/null_vfsops.c:1.98 --- src/sys/miscfs/nullfs/null_vfsops.c:1.97 Mon Mar 16 21:20:11 2020 +++ src/sys/miscfs/nullfs/null_vfsops.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: null_vfsops.c,v 1.97 2020/03/16 21:20:11 pgoyette Exp $ */ +/* $NetBSD: null_vfsops.c,v 1.98 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -76,7 +76,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.97 2020/03/16 21:20:11 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: null_vfsops.c,v 1.98 2020/04/04 20:49:30 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -139,6 +139,7 @@ nullfs_mount(struct mount *mp, const cha nmp = kmem_zalloc(sizeof(struct null_mount), KM_SLEEP); mp->mnt_data = nmp; mp->mnt_iflag |= IMNT_MPSAFE; + mp->mnt_iflag |= lowerrootvp->v_mount->mnt_iflag & IMNT_SHRLOOKUP; /* * Make sure that the mount point is sufficiently initialized Index: src/sys/miscfs/procfs/procfs_vfsops.c diff -u src/sys/miscfs/procfs/procfs_vfsops.c:1.103 src/sys/miscfs/procfs/procfs_vfsops.c:1.104 --- src/sys/miscfs/procfs/procfs_vfsops.c:1.103 Mon Mar 16 21:20:11 2020 +++ src/sys/miscfs/procfs/procfs_vfsops.c Sat Apr 4 20:49:30 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: procfs_vfsops.c,v 1.103 2020/03/16 21:20:11 pgoyette Exp $ */ +/* $NetBSD: procfs_vfsops.c,v 1.104 2020/04/04 20:49:30 ad Exp $ */ /* * Copyright (c) 1993 @@ -76,7 +76,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.103 2020/03/16 21:20:11 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: procfs_vfsops.c,v 1.104 2020/04/04 20:49:30 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -171,7 +171,7 @@ procfs_mount( else pmnt->pmnt_flags = 0; - mp->mnt_iflag |= IMNT_MPSAFE; + mp->mnt_iflag |= IMNT_MPSAFE | IMNT_SHRLOOKUP; return error; } Index: src/sys/sys/fstypes.h diff -u src/sys/sys/fstypes.h:1.37 src/sys/sys/fstypes.h:1.38 --- src/sys/sys/fstypes.h:1.37 Wed Feb 20 10:07:27 2019 +++ src/sys/sys/fstypes.h Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: fstypes.h,v 1.37 2019/02/20 10:07:27 hannken Exp $ */ +/* $NetBSD: fstypes.h,v 1.38 2020/04/04 20:49:31 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993 @@ -220,7 +220,9 @@ typedef struct fhandle fhandle_t; #define IMNT_UNMOUNT 0x00000002 /* unmount in progress */ #define IMNT_WANTRDWR 0x00000004 /* upgrade to read/write requested */ #define IMNT_WANTRDONLY 0x00000008 /* upgrade to readonly requested */ +#define IMNT_NCLOOKUP 0x00000020 /* can do lookop direct in namecache */ #define IMNT_DTYPE 0x00000040 /* returns d_type fields */ +#define IMNT_SHRLOOKUP 0x00000080 /* can do LK_SHARED lookups */ #define IMNT_MPSAFE 0x00000100 /* file system code MP safe */ #define IMNT_CAN_RWTORO 0x00000200 /* can downgrade fs to from rw to r/o */ #define IMNT_ONWORKLIST 0x00000400 /* on syncer worklist */ @@ -271,7 +273,9 @@ typedef struct fhandle fhandle_t; "\13IMNT_ONWORKLIST" \ "\12IMNT_CAN_RWTORO" \ "\11IMNT_MPSAFE" \ + "\10IMNT_SHRLOOKUP" \ "\07IMNT_DTYPE" \ + "\06IMNT_NCLOOKUP" \ "\04IMNT_WANTRDONLY" \ "\03IMNT_WANTRDWR" \ "\02IMNT_UNMOUNT" \ Index: src/sys/sys/namei.src diff -u src/sys/sys/namei.src:1.52 src/sys/sys/namei.src:1.53 --- src/sys/sys/namei.src:1.52 Mon Mar 23 23:28:11 2020 +++ src/sys/sys/namei.src Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: namei.src,v 1.52 2020/03/23 23:28:11 ad Exp $ */ +/* $NetBSD: namei.src,v 1.53 2020/04/04 20:49:31 ad Exp $ */ /* * Copyright (c) 1985, 1989, 1991, 1993 @@ -152,13 +152,14 @@ NAMEIFL NOFOLLOW 0x00000000 /* do not fo (pseudo) */ NAMEIFL EMULROOTSET 0x00000080 /* emulation root already in ni_erootdir */ +NAMEIFL LOCKSHARED 0x00000100 /* want shared locks if possible */ NAMEIFL NOCHROOT 0x01000000 /* no chroot on abs path lookups */ -NAMEIFL MODMASK 0x010000fc /* mask of operational modifiers */ +NAMEIFL MODMASK 0x010001fc /* mask of operational modifiers */ /* * Namei parameter descriptors. */ -NAMEIFL NOCROSSMOUNT 0x0000100 /* do not cross mount points */ -NAMEIFL RDONLY 0x0000200 /* lookup with read-only semantics */ +NAMEIFL NOCROSSMOUNT 0x0000800 /* do not cross mount points */ +NAMEIFL RDONLY 0x0001000 /* lookup with read-only semantics */ NAMEIFL ISDOTDOT 0x0002000 /* current component name is .. */ NAMEIFL MAKEENTRY 0x0004000 /* entry is to be added to name cache */ NAMEIFL ISLASTCN 0x0008000 /* this is last component of pathname */ @@ -166,7 +167,7 @@ NAMEIFL ISWHITEOUT 0x0020000 /* found wh NAMEIFL DOWHITEOUT 0x0040000 /* do whiteouts */ NAMEIFL REQUIREDIR 0x0080000 /* must be a directory */ NAMEIFL CREATEDIR 0x0200000 /* trailing slashes are ok */ -NAMEIFL PARAMASK 0x02ee300 /* mask of parameter descriptors */ +NAMEIFL PARAMASK 0x02ef800 /* mask of parameter descriptors */ /* * Initialization of a nameidata structure. @@ -207,12 +208,11 @@ NAMEIFL PARAMASK 0x02ee300 /* mask of pa * * Field markings and their corresponding locks: * - * - stable throught the lifetime of the namecache entry + * - stable throughout the lifetime of the namecache entry * d protected by nc_dvp->vi_nc_lock * v protected by nc_vp->vi_nc_listlock * l protected by cache_lru_lock */ -struct nchnode; struct namecache { struct rb_node nc_tree; /* d red-black tree, must be first */ uint64_t nc_key; /* - hashed key value */ @@ -220,7 +220,7 @@ struct namecache { TAILQ_ENTRY(namecache) nc_lru; /* l pseudo-lru chain */ struct vnode *nc_dvp; /* - vnode of parent of name */ struct vnode *nc_vp; /* - vnode the name refers to */ - int nc_lrulist; /* l which LRU list its on */ + int nc_lrulist; /* l which LRU list it's on */ u_short nc_nlen; /* - length of the name */ char nc_whiteout; /* - true if a whiteout */ char nc_name[41]; /* - segment name */ @@ -313,18 +313,17 @@ void namecache_print(struct vnode *, voi * Stats on usefulness of namei caches. A couple of structures are * used for counting, with members having the same names but different * types. Containerize member names with the preprocessor to avoid - * cut-'n'-paste. A (U) in the comment documents values that are - * incremented unlocked; we may treat these specially. + * cut-'n'-paste. */ #define _NAMEI_CACHE_STATS(type) { \ - type ncs_goodhits; /* hits that we can really use (U) */ \ + type ncs_goodhits; /* hits that we can really use */ \ type ncs_neghits; /* negative hits that we can use */ \ type ncs_badhits; /* hits we must drop */ \ - type ncs_falsehits; /* hits with id mismatch (U) */ \ + type ncs_falsehits; /* hits with id mismatch */ \ type ncs_miss; /* misses */ \ type ncs_long; /* long names that ignore cache */ \ - type ncs_pass2; /* names found with passes == 2 (U) */ \ - type ncs_2passes; /* number of times we attempt it (U) */ \ + type ncs_pass2; /* names found with passes == 2 */ \ + type ncs_2passes; /* number of times we attempt it */ \ type ncs_revhits; /* reverse-cache hits */ \ type ncs_revmiss; /* reverse-cache misses */ \ type ncs_denied; /* access denied */ \ Index: src/sys/ufs/chfs/chfs_vnops.c diff -u src/sys/ufs/chfs/chfs_vnops.c:1.36 src/sys/ufs/chfs/chfs_vnops.c:1.37 --- src/sys/ufs/chfs/chfs_vnops.c:1.36 Sun Feb 23 15:46:42 2020 +++ src/sys/ufs/chfs/chfs_vnops.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: chfs_vnops.c,v 1.36 2020/02/23 15:46:42 ad Exp $ */ +/* $NetBSD: chfs_vnops.c,v 1.37 2020/04/04 20:49:31 ad Exp $ */ /*- * Copyright (c) 2010 Department of Software Engineering, @@ -90,6 +90,10 @@ chfs_lookup(void *v) return (*vpp == NULLVP ? ENOENT : 0); } + /* May need to restart the lookup with an exclusive lock. */ + if (VOP_ISLOCKED(dvp) != LK_EXCLUSIVE) + return ENOLCK; + ip = VTOI(dvp); ump = VFSTOUFS(dvp->v_mount); chmp = ump->um_chfs; Index: src/sys/ufs/ext2fs/ext2fs_lookup.c diff -u src/sys/ufs/ext2fs/ext2fs_lookup.c:1.89 src/sys/ufs/ext2fs/ext2fs_lookup.c:1.90 --- src/sys/ufs/ext2fs/ext2fs_lookup.c:1.89 Sat Mar 14 18:08:40 2020 +++ src/sys/ufs/ext2fs/ext2fs_lookup.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_lookup.c,v 1.89 2020/03/14 18:08:40 ad Exp $ */ +/* $NetBSD: ext2fs_lookup.c,v 1.90 2020/04/04 20:49:31 ad Exp $ */ /* * Modified for NetBSD 1.2E @@ -48,7 +48,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ext2fs_lookup.c,v 1.89 2020/03/14 18:08:40 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_lookup.c,v 1.90 2020/04/04 20:49:31 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -313,14 +313,6 @@ ext2fs_lookup(void *v) *vpp = NULL; /* - * Produce the auxiliary lookup results into i_crap. Increment - * its serial number so elsewhere we can tell if we're using - * stale results. This should not be done this way. XXX. - */ - results = &dp->i_crap; - dp->i_crapcounter++; - - /* * Check accessiblity of directory. */ if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) @@ -342,6 +334,18 @@ ext2fs_lookup(void *v) return *vpp == NULLVP ? ENOENT : 0; } + /* May need to restart the lookup with an exclusive lock. */ + if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) + return ENOLCK; + + /* + * Produce the auxiliary lookup results into i_crap. Increment + * its serial number so elsewhere we can tell if we're using + * stale results. This should not be done this way. XXX. + */ + results = &dp->i_crap; + dp->i_crapcounter++; + /* * Suppress search for slots unless creating * file and at end of pathname, in which case Index: src/sys/ufs/ext2fs/ext2fs_vfsops.c diff -u src/sys/ufs/ext2fs/ext2fs_vfsops.c:1.217 src/sys/ufs/ext2fs/ext2fs_vfsops.c:1.218 --- src/sys/ufs/ext2fs/ext2fs_vfsops.c:1.217 Mon Mar 16 21:20:12 2020 +++ src/sys/ufs/ext2fs/ext2fs_vfsops.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ext2fs_vfsops.c,v 1.217 2020/03/16 21:20:12 pgoyette Exp $ */ +/* $NetBSD: ext2fs_vfsops.c,v 1.218 2020/04/04 20:49:31 ad Exp $ */ /* * Copyright (c) 1989, 1991, 1993, 1994 @@ -60,7 +60,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.217 2020/03/16 21:20:12 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ext2fs_vfsops.c,v 1.218 2020/04/04 20:49:31 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -738,7 +738,7 @@ ext2fs_mountfs(struct vnode *devvp, stru mp->mnt_flag |= MNT_LOCAL; mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ mp->mnt_fs_bshift = m_fs->e2fs_bshift; - mp->mnt_iflag |= IMNT_DTYPE; + mp->mnt_iflag |= IMNT_DTYPE | IMNT_SHRLOOKUP; ump->um_flags = 0; ump->um_mountp = mp; ump->um_dev = dev; Index: src/sys/ufs/ffs/ffs_vfsops.c diff -u src/sys/ufs/ffs/ffs_vfsops.c:1.366 src/sys/ufs/ffs/ffs_vfsops.c:1.367 --- src/sys/ufs/ffs/ffs_vfsops.c:1.366 Mon Mar 16 21:20:12 2020 +++ src/sys/ufs/ffs/ffs_vfsops.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ffs_vfsops.c,v 1.366 2020/03/16 21:20:12 pgoyette Exp $ */ +/* $NetBSD: ffs_vfsops.c,v 1.367 2020/04/04 20:49:31 ad Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.366 2020/03/16 21:20:12 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ffs_vfsops.c,v 1.367 2020/04/04 20:49:31 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -1454,7 +1454,8 @@ ffs_mountfs(struct vnode *devvp, struct mp->mnt_fs_bshift = fs->fs_bshift; mp->mnt_dev_bshift = DEV_BSHIFT; /* XXX */ mp->mnt_flag |= MNT_LOCAL; - mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO; + mp->mnt_iflag |= IMNT_MPSAFE | IMNT_CAN_RWTORO | IMNT_SHRLOOKUP | + IMNT_NCLOOKUP; #ifdef FFS_EI if (needswap) ump->um_flags |= UFS_NEEDSWAP; @@ -2083,6 +2084,7 @@ ffs_loadvnode(struct mount *mp, struct v ip->i_gid = ip->i_ffs1_ogid; /* XXX */ } /* XXX */ uvm_vnp_setsize(vp, ip->i_size); + cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid); *new_key = &ip->i_number; return 0; } @@ -2204,6 +2206,7 @@ ffs_newvnode(struct mount *mp, struct vn } uvm_vnp_setsize(vp, ip->i_size); + cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid); *new_key = &ip->i_number; return 0; } Index: src/sys/ufs/lfs/lfs_vfsops.c diff -u src/sys/ufs/lfs/lfs_vfsops.c:1.377 src/sys/ufs/lfs/lfs_vfsops.c:1.378 --- src/sys/ufs/lfs/lfs_vfsops.c:1.377 Mon Mar 16 21:20:13 2020 +++ src/sys/ufs/lfs/lfs_vfsops.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: lfs_vfsops.c,v 1.377 2020/03/16 21:20:13 pgoyette Exp $ */ +/* $NetBSD: lfs_vfsops.c,v 1.378 2020/04/04 20:49:31 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007 @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.377 2020/03/16 21:20:13 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.378 2020/04/04 20:49:31 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_lfs.h" @@ -1125,6 +1125,7 @@ lfs_mountfs(struct vnode *devvp, struct mp->mnt_stat.f_namemax = LFS_MAXNAMLEN; mp->mnt_stat.f_iosize = lfs_sb_getbsize(fs); mp->mnt_flag |= MNT_LOCAL; + mp->mnt_iflag |= IMNT_SHRLOOKUP; mp->mnt_fs_bshift = lfs_sb_getbshift(fs); mp->mnt_iflag |= IMNT_CAN_RWTORO; if (fs->um_maxsymlinklen > 0) Index: src/sys/ufs/lfs/ulfs_lookup.c diff -u src/sys/ufs/lfs/ulfs_lookup.c:1.42 src/sys/ufs/lfs/ulfs_lookup.c:1.43 --- src/sys/ufs/lfs/ulfs_lookup.c:1.42 Sat Mar 14 18:08:40 2020 +++ src/sys/ufs/lfs/ulfs_lookup.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ulfs_lookup.c,v 1.42 2020/03/14 18:08:40 ad Exp $ */ +/* $NetBSD: ulfs_lookup.c,v 1.43 2020/04/04 20:49:31 ad Exp $ */ /* from NetBSD: ufs_lookup.c,v 1.135 2015/07/11 11:04:48 mlelstv */ /* @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.42 2020/03/14 18:08:40 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ulfs_lookup.c,v 1.43 2020/04/04 20:49:31 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_lfs.h" @@ -162,14 +162,6 @@ ulfs_lookup(void *v) endsearch = 0; /* silence compiler warning */ /* - * Produce the auxiliary lookup results into i_crap. Increment - * its serial number so elsewhere we can tell if we're using - * stale results. This should not be done this way. XXX. - */ - results = &dp->i_crap; - dp->i_crapcounter++; - - /* * Check accessiblity of directory. */ if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) @@ -193,6 +185,19 @@ ulfs_lookup(void *v) } return *vpp == NULLVP ? ENOENT : 0; } + + /* May need to restart the lookup with an exclusive lock. */ + if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) + return ENOLCK; + + /* + * Produce the auxiliary lookup results into i_crap. Increment + * its serial number so elsewhere we can tell if we're using + * stale results. This should not be done this way. XXX. + */ + results = &dp->i_crap; + dp->i_crapcounter++; + if (iswhiteout) { /* * The namecache set iswhiteout without finding a Index: src/sys/ufs/ufs/ufs_lookup.c diff -u src/sys/ufs/ufs/ufs_lookup.c:1.151 src/sys/ufs/ufs/ufs_lookup.c:1.152 --- src/sys/ufs/ufs/ufs_lookup.c:1.151 Sat Mar 14 18:08:40 2020 +++ src/sys/ufs/ufs/ufs_lookup.c Sat Apr 4 20:49:31 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: ufs_lookup.c,v 1.151 2020/03/14 18:08:40 ad Exp $ */ +/* $NetBSD: ufs_lookup.c,v 1.152 2020/04/04 20:49:31 ad Exp $ */ /* * Copyright (c) 1989, 1993 @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.151 2020/03/14 18:08:40 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.152 2020/04/04 20:49:31 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ffs.h" @@ -330,14 +330,6 @@ ufs_lookup(void *v) endsearch = 0; /* silence compiler warning */ /* - * Produce the auxiliary lookup results into i_crap. Increment - * its serial number so elsewhere we can tell if we're using - * stale results. This should not be done this way. XXX. - */ - results = &dp->i_crap; - dp->i_crapcounter++; - - /* * Check accessiblity of directory. */ if ((error = VOP_ACCESS(vdp, VEXEC, cred)) != 0) @@ -361,6 +353,20 @@ ufs_lookup(void *v) } return *vpp == NULLVP ? ENOENT : 0; } + + /* May need to restart the lookup with an exclusive lock. */ + if (VOP_ISLOCKED(vdp) != LK_EXCLUSIVE) { + return ENOLCK; + } + + /* + * Produce the auxiliary lookup results into i_crap. Increment + * its serial number so elsewhere we can tell if we're using + * stale results. This should not be done this way. XXX. + */ + results = &dp->i_crap; + dp->i_crapcounter++; + if (iswhiteout) { /* * The namecache set iswhiteout without finding a Index: src/sys/ufs/ufs/ufs_vnops.c diff -u src/sys/ufs/ufs/ufs_vnops.c:1.249 src/sys/ufs/ufs/ufs_vnops.c:1.250 --- src/sys/ufs/ufs/ufs_vnops.c:1.249 Wed Feb 26 18:00:12 2020 +++ src/sys/ufs/ufs/ufs_vnops.c Sat Apr 4 20:49:31 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: ufs_vnops.c,v 1.249 2020/02/26 18:00:12 maxv Exp $ */ +/* $NetBSD: ufs_vnops.c,v 1.250 2020/04/04 20:49:31 ad Exp $ */ /*- - * Copyright (c) 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.249 2020/02/26 18:00:12 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ufs_vnops.c,v 1.250 2020/04/04 20:49:31 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_ffs.h" @@ -621,6 +621,7 @@ ufs_setattr(void *v) } VN_KNOTE(vp, NOTE_ATTRIB); out: + cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid); return (error); } @@ -648,6 +649,7 @@ ufs_chmod(struct vnode *vp, int mode, ka ip->i_flag |= IN_CHANGE; DIP_ASSIGN(ip, mode, ip->i_mode); UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); + cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid); return (0); } @@ -708,6 +710,7 @@ ufs_chown(struct vnode *vp, uid_t uid, g #endif /* QUOTA || QUOTA2 */ ip->i_flag |= IN_CHANGE; UFS_WAPBL_UPDATE(vp, NULL, NULL, 0); + cache_enter_id(vp, ip->i_mode, ip->i_uid, ip->i_gid); return (0); }