Module Name: src Committed By: riastradh Date: Mon Oct 15 23:08:20 UTC 2012
Modified Files: src/external/cddl/osnet/dist/uts/common/fs/zfs: zfs_vfsops.c zfs_vnops.c zfs_znode.c Log Message: Fix various issues in zfs life cycle, locking, and vop protocol. - Restore some zfs locking and unlocking that got lost randomly. - Enable use of the BSD vnode lock. Lock order: all BSD vnode locks are taken before all zfs internal locks. There remains an issue with O_EXCL, to be solved later (famous last words). KASSERT the locking scheme up the wazoo. - Take our cruft out of zfs_lookup and move it to zfs_netbsd_lookup. Restore much of the way zfs_lookup looked to make merging future versions easier. Disable use of the namecache for now because its locking dance is too scary to contemplate. - Implement BSD semantics for rename, to appease our tests. This is a provisional kludge; eventually we need VOP_RENAME to take a flag specifying whether to use BSD semantics or POSIX semantics. - Simplify zfs_netbsd_reclaim and make it work. Now that getnewvnode never tries to vclean anything itself, we need not worry about recursion of ZFS_OBJ_MUTEX locks. - Clarify and fix genfs node initialization and destruction. zfs passes most of our atf vfs tests now, including the rename races. Still to do: - fix the impedance mismatch between our permissions model and zfs's; - fix O_EXCL (nontrivial); - throw dirconc at it and see how badly it explodes; - find why zpool sometimes wedges itself during mkfs; and - find why pool caches sometimes seem to get corrupted. To generate a diff of this commit: cvs rdiff -u -r1.7 -r1.8 \ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c cvs rdiff -u -r1.11 -r1.12 \ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c cvs rdiff -u -r1.12 -r1.13 \ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c:1.7 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c:1.8 --- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c:1.7 Sun Nov 20 02:54:25 2011 +++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c Mon Oct 15 23:08:19 2012 @@ -1811,6 +1811,10 @@ zfs_root(vfs_t *vfsp, vnode_t **vpp) *vpp = ZTOV(rootzp); dprintf("vpp -> %d, error %d -- %p\n", (*vpp)->v_type, error, *vpp); ZFS_EXIT(zfsvfs); + if (error == 0) + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + KASSERT((error != 0) || (*vpp != NULL)); + KASSERT((error != 0) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)); return (error); } Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.11 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.12 --- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c:1.11 Mon Oct 1 18:19:18 2012 +++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c Mon Oct 15 23:08:19 2012 @@ -1197,13 +1197,12 @@ specvp_check(vnode_t **vpp, cred_t *cr) */ /* ARGSUSED */ static int -zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct componentname *cnp, - int nameiop, cred_t *cr, int flags) +zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp, + int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, + int *direntflags, pathname_t *realpnp) { znode_t *zdp = VTOZ(dvp); zfsvfs_t *zfsvfs = zdp->z_zfsvfs; - int *direntflags = NULL; - void *realpnp = NULL; int error = 0; /* fast path */ @@ -1249,7 +1248,7 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode ZFS_VERIFY_ZP(zdp); *vpp = NULL; - dprintf("zfs_lookup called %s\n", nm); + if (flags & LOOKUP_XATTR) { #ifdef TODO /* @@ -1301,17 +1300,6 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode return (error); } - /* - * Before tediously performing a linear scan of the directory, - * check the name cache to see if the directory/name pair - * we are looking for is known already. - */ - - if ((error = cache_lookup(dvp, vpp, cnp)) >= 0) { - ZFS_EXIT(zfsvfs); - return (error); - } - if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) { ZFS_EXIT(zfsvfs); @@ -1323,56 +1311,6 @@ zfs_lookup(vnode_t *dvp, char *nm, vnode error = specvp_check(vpp, cr); ZFS_EXIT(zfsvfs); - - /* Translate errors and add SAVENAME when needed. */ - if (cnp->cn_flags & ISLASTCN) { - switch (nameiop) { - case CREATE: - case RENAME: - if (error == ENOENT) { - error = EJUSTRETURN; - break; - } - /* FALLTHROUGH */ - case DELETE: - break; - } - } - - if (error == 0 && (nm[0] != '.' || nm[1] != '\0')) { - int ltype = 0; - - if (cnp->cn_flags & ISDOTDOT) { - ltype = VOP_ISLOCKED(dvp); - VOP_UNLOCK(dvp); - } - error = vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); - if (cnp->cn_flags & ISDOTDOT) - vn_lock(dvp, ltype | LK_RETRY); - if (error != 0) { - VN_RELE(*vpp); - *vpp = NULL; - return (error); - } - } - - /* - * Insert name into cache if appropriate. - */ - if ((cnp->cn_flags & MAKEENTRY) == 0){ - return (error); - } - switch (error) { - case 0: - cache_enter(dvp, *vpp, cnp); - break; - case ENOENT: - if (nameiop != CREATE) - cache_enter(dvp, *vpp, cnp); - break; - default: - break; - } return (error); } @@ -2033,6 +1971,12 @@ top: vnevent_rmdir(vp, dvp, name, ct); /* + * Grab a lock on the directory to make sure that noone is + * trying to add (or lookup) entries while we are removing it. + */ + rw_enter(&zp->z_name_lock, RW_WRITER); + + /* * Grab a lock on the parent pointer to make sure we play well * with the treewalk and directory rename code. */ @@ -3571,10 +3515,20 @@ top: * entries refer to the same file object, rename * must do nothing and exit without error. */ +#ifndef __NetBSD__ + /* + * But on NetBSD we have a different system call to do + * this, posix_rename, which sorta kinda handles this + * case (modulo races), and our tests expect BSD + * semantics for rename, so we'll do that until we can + * push the choice between BSD and POSIX semantics into + * the VOP_RENAME protocol as a flag. + */ if (szp->z_id == tzp->z_id) { error = 0; goto out; } +#endif } vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); @@ -3623,15 +3577,20 @@ top: return (error); } - if (tzp) /* Attempt to remove the existing target */ + if (tzp && (tzp->z_id != szp->z_id)) + /* Attempt to remove the existing target */ error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL); if (error == 0) { - error = zfs_link_create(tdl, szp, tx, ZRENAMING); + if (!tzp || (tzp->z_id != szp->z_id)) + error = zfs_link_create(tdl, szp, tx, ZRENAMING); if (error == 0) { szp->z_phys->zp_flags |= ZFS_AV_MODIFIED; - error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL); + error = zfs_link_destroy(sdl, szp, tx, + /* Kludge for BSD rename semantics. */ + ((tzp && (tzp->z_id == szp->z_id)) ? + zflg : ZRENAMING), NULL); ASSERT(error == 0); zfs_log_rename(zilog, tx, @@ -4779,13 +4738,8 @@ static int zfs_netbsd_open(void *v) { struct vop_open_args *ap = v; - vnode_t *vp = ap->a_vp; - znode_t *zp = VTOZ(vp); - int error; - error = zfs_open(&vp, ap->a_mode, ap->a_cred, NULL); - - return (error); + return (zfs_open(&ap->a_vp, ap->a_mode, ap->a_cred, NULL)); } static int @@ -4846,63 +4800,346 @@ zfs_netbsd_access(void *v) static int zfs_netbsd_lookup(void *v) { - struct vop_lookup_args *ap = v; + struct vop_lookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; char nm[NAME_MAX + 1]; - int err; - - ASSERT(cnp->cn_namelen < sizeof(nm)); - strlcpy(nm, cnp->cn_nameptr, MIN(cnp->cn_namelen + 1, sizeof(nm))); + int error; - err = zfs_lookup(ap->a_dvp, nm, ap->a_vpp, cnp, cnp->cn_nameiop, - cnp->cn_cred, 0); - - return err; + KASSERT(dvp != NULL); + KASSERT(vpp != NULL); + KASSERT(cnp != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT(cnp->cn_namelen < sizeof nm); + +#if 0 /* Namecache too scary to contemplate. */ + *vpp = NULL; + + /* + * Do an access check before the cache lookup. zfs_lookup does + * an access check too, but it's too scary to contemplate + * injecting our namecache stuff into zfs internals. + * + * XXX Is this the correct access check? + */ + if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred)) != 0) + goto out; + + /* + * Check the namecache before entering zfs_lookup. + * cache_lookup does the locking dance for us. + */ + if ((error = cache_lookup(dvp, vpp, cnp)) >= 0) + goto out; +#endif + + /* + * zfs_lookup wants a null-terminated component name, but namei + * gives us a pointer into the full pathname. + */ + (void)strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1); + + error = zfs_lookup(dvp, nm, vpp, NULL, 0, NULL, cnp->cn_cred, NULL, + NULL, NULL); + + /* + * Translate errors to match our namei insanity. + */ + if (cnp->cn_flags & ISLASTCN) { + switch (cnp->cn_nameiop) { + case CREATE: + case RENAME: + if (error == ENOENT) { + error = EJUSTRETURN; + break; + } + /* FALLTHROUGH */ + case DELETE: + break; + } + } + + if (error) { + KASSERT(*vpp == NULL); + goto out; + } + KASSERT(*vpp != NULL); /* XXX Correct? */ + + /* + * Do a locking dance in conformance to the VOP_LOOKUP protocol. + */ + if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { + KASSERT(!(cnp->cn_flags & ISDOTDOT)); + KASSERT(dvp == *vpp); + } else if ((cnp->cn_namelen == 2) && + (cnp->cn_nameptr[0] == '.') && + (cnp->cn_nameptr[1] == '.')) { + KASSERT(cnp->cn_flags & ISDOTDOT); + VOP_UNLOCK(dvp); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); + } else { + KASSERT(!(cnp->cn_flags & ISDOTDOT)); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + } + +out: + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)); + +#if 0 /* Namecache too scary to contemplate. */ + /* + * Insert name into cache if appropriate. + * + * XXX This seems like a lot of code. Is it all necessary? + * Can we just do a single cache_enter call? + */ +#if 0 + cache_enter(dvp, *vpp, cnp); +#endif + + if ((cnp->cn_flags & MAKEENTRY) == 0) + return (error); + + switch (error) { + case 0: + cache_enter(dvp, *vpp, cnp); + break; + case ENOENT: + KASSERT(*vpp == NULL); + if (cnp->cn_nameiop != CREATE) + cache_enter(dvp, NULL, cnp); + break; + default: + break; + } +#endif + + return (error); } static int zfs_netbsd_create(void *v) { - struct vop_create_args *ap = v; + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; - vattr_t *vap = ap->a_vap; + struct vattr *vap = ap->a_vap; int mode; + int error; + + KASSERT(dvp != NULL); + KASSERT(vpp != NULL); + KASSERT(cnp != NULL); + KASSERT(vap != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + +#if 0 + /* + * ZFS doesn't require dvp to be BSD-locked, and the caller + * expects us to drop this lock anyway, so we might as well + * drop it early to encourage concurrency. + */ + VOP_UNLOCK(dvp); +#endif vattr_init_mask(vap); mode = vap->va_mode & ALLPERMS; - return (zfs_create(ap->a_dvp, (char *)cnp->cn_nameptr, vap, !EXCL, mode, - ap->a_vpp, cnp->cn_cred)); + /* XXX !EXCL is wrong here... */ + error = zfs_create(dvp, __UNCONST(cnp->cn_nameptr), vap, !EXCL, mode, + vpp, cnp->cn_cred); + if (error) { + KASSERT(*vpp == NULL); + goto out; + } + KASSERT(*vpp != NULL); + + /* + * Lock *vpp in conformance to the VOP_CREATE protocol. + */ + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + +out: + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)); + + /* + * Unlock and release dvp because the VOP_CREATE protocol is insane. + */ + VOP_UNLOCK(dvp); + VN_RELE(dvp); + + return (error); } static int zfs_netbsd_remove(void *v) { - struct vop_remove_args *ap = v; + struct vop_remove_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + int error; + + KASSERT(dvp != NULL); + KASSERT(vp != NULL); + KASSERT(cnp != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); + +#if 0 + /* + * ZFS doesn't require dvp to be BSD-locked, and the caller + * expects us to drop this lock anyway, so we might as well + * drop it early to encourage concurrency. + */ + VOP_UNLOCK(dvp); +#endif + + /* + * zfs_remove will look up the entry again itself, so discard vp. + */ + VOP_UNLOCK(vp); + VN_RELE(vp); + + error = zfs_remove(dvp, __UNCONST(cnp->cn_nameptr), cnp->cn_cred, NULL, + 0); + + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); - return (zfs_remove(ap->a_dvp, (char *)ap->a_cnp->cn_nameptr, - ap->a_cnp->cn_cred, NULL, 0)); + /* + * Unlock and release dvp because the VOP_REMOVE protocol is insane. + */ + VOP_UNLOCK(dvp); + VN_RELE(dvp); + + return (error); } static int zfs_netbsd_mkdir(void *v) { - struct vop_mkdir_args *ap = v; - vattr_t *vap = ap->a_vap; + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + int error; + + KASSERT(dvp != NULL); + KASSERT(vpp != NULL); + KASSERT(cnp != NULL); + KASSERT(vap != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + +#if 0 + /* + * ZFS doesn't require dvp to be BSD-locked, and the caller + * expects us to drop this lock anyway, so we might as well + * drop it early to encourage concurrency. + */ + VOP_UNLOCK(dvp); +#endif vattr_init_mask(vap); - return (zfs_mkdir(ap->a_dvp, (char *)ap->a_cnp->cn_nameptr, vap, ap->a_vpp, - ap->a_cnp->cn_cred, NULL, 0, NULL)); + error = zfs_mkdir(dvp, __UNCONST(cnp->cn_nameptr), vap, vpp, + cnp->cn_cred, NULL, 0, NULL); + if (error) { + KASSERT(*vpp == NULL); + goto out; + } + KASSERT(*vpp != NULL); + + /* + * Lock *vpp in conformance to the VOP_MKDIR protocol. + */ + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + +out: + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)); + + /* + * Unlock and release dvp because the VOP_MKDIR protocol is insane. + */ + VOP_UNLOCK(dvp); + VN_RELE(dvp); + + return (error); } static int zfs_netbsd_rmdir(void *v) { - struct vop_rmdir_args *ap = v; + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; + int error; + + KASSERT(dvp != NULL); + KASSERT(vp != NULL); + KASSERT(cnp != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); + +#if 0 + /* + * ZFS doesn't require dvp to be BSD-locked, and the caller + * expects us to drop this lock anyway, so we might as well + * drop it early to encourage concurrency. + */ + VOP_UNLOCK(dvp); +#endif + + /* + * zfs_rmdir will look up the entry again itself, so discard vp. + */ + VOP_UNLOCK(vp); + VN_RELE(vp); - return (zfs_rmdir(ap->a_dvp, (char *)cnp->cn_nameptr, NULL, cnp->cn_cred, NULL, 0)); + error = zfs_rmdir(dvp, __UNCONST(cnp->cn_nameptr), NULL, cnp->cn_cred, + NULL, 0); + + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + + /* + * Unlock and release dvp because the VOP_RMDIR protocol is insane. + */ + VOP_UNLOCK(dvp); + VN_RELE(dvp); + return error; } static int @@ -5053,21 +5290,63 @@ zfs_netbsd_rename(void *v) } */ *ap = v; vnode_t *fdvp = ap->a_fdvp; vnode_t *fvp = ap->a_fvp; + struct componentname *fcnp = ap->a_fcnp; vnode_t *tdvp = ap->a_tdvp; vnode_t *tvp = ap->a_tvp; + struct componentname *tcnp = ap->a_tcnp; + kauth_cred_t cred; int error; - error = zfs_rename(fdvp, (char *)ap->a_fcnp->cn_nameptr, tdvp, - (char *)ap->a_tcnp->cn_nameptr, ap->a_fcnp->cn_cred, NULL, 0); + KASSERT(fdvp != NULL); + KASSERT(fvp != NULL); + KASSERT(fcnp != NULL); + KASSERT(fcnp->cn_nameptr != NULL); + KASSERT(tdvp != NULL); + KASSERT(tcnp != NULL); + KASSERT(fcnp->cn_nameptr != NULL); + /* KASSERT(VOP_ISLOCKED(fdvp) != LK_EXCLUSIVE); */ + /* KASSERT(VOP_ISLOCKED(fvp) != LK_EXCLUSIVE); */ + KASSERT(VOP_ISLOCKED(tdvp) == LK_EXCLUSIVE); + KASSERT((tvp == NULL) || (VOP_ISLOCKED(tvp) == LK_EXCLUSIVE)); + KASSERT(fdvp->v_type == VDIR); + KASSERT(tdvp->v_type == VDIR); - if (tdvp == tvp) - VN_RELE(tdvp); - else - VN_URELE(tdvp); - if (tvp) - VN_URELE(tvp); - VN_RELE(fdvp); + cred = fcnp->cn_cred; + + /* + * XXX Want a better equality test. `tcnp->cn_cred == cred' + * hoses p2k because puffs transmits the creds separately and + * allocates distinct but equivalent structures for them. + */ + KASSERT(kauth_cred_uidmatch(cred, tcnp->cn_cred)); + + /* + * Drop the insane locks. + */ + VOP_UNLOCK(tdvp); + if ((tvp != NULL) && (tvp != tdvp)) + VOP_UNLOCK(tvp); + + /* + * Release the source and target nodes; zfs_rename will look + * them up again once the locking situation is sane. + */ VN_RELE(fvp); + if (tvp != NULL) + VN_RELE(tvp); + + /* + * Do the rename ZFSly. + */ + error = zfs_rename(fdvp, __UNCONST(fcnp->cn_nameptr), tdvp, + __UNCONST(tcnp->cn_nameptr), cred, NULL, 0); + + /* + * Release the directories now too, because the VOP_RENAME + * protocol is insane. + */ + VN_RELE(fdvp); + VN_RELE(tdvp); return (error); } @@ -5075,15 +5354,65 @@ zfs_netbsd_rename(void *v) static int zfs_netbsd_symlink(void *v) { - struct vop_symlink_args *ap = v; + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; - vattr_t *vap = ap->a_vap; + struct vattr *vap = ap->a_vap; + char *target = ap->a_target; + int error; + + KASSERT(dvp != NULL); + KASSERT(vpp != NULL); + KASSERT(cnp != NULL); + KASSERT(vap != NULL); + KASSERT(target != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + +#if 0 + /* + * ZFS doesn't require dvp to be BSD-locked, and the caller + * expects us to drop this lock anyway, so we might as well + * drop it early to encourage concurrency. + */ + VOP_UNLOCK(dvp); +#endif vap->va_type = VLNK; /* Netbsd: Syscall only sets va_mode. */ vattr_init_mask(vap); - return (zfs_symlink(ap->a_dvp, ap->a_vpp, (char *)cnp->cn_nameptr, vap, - ap->a_target, cnp->cn_cred, 0)); + error = zfs_symlink(dvp, vpp, __UNCONST(cnp->cn_nameptr), vap, target, + cnp->cn_cred, 0); + if (error) { + KASSERT(*vpp == NULL); + goto out; + } + KASSERT(*vpp != NULL); + + + /* + * Lock *vpp in conformance to the VOP_SYMLINK protocol. + */ + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + +out: + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); + KASSERT((*vpp == NULL) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)); + + /* + * Unlock and release dvp because the VOP_SYMLINK protocol is insane. + */ + VOP_UNLOCK(dvp); + VN_RELE(dvp); + + return (error); } #ifdef PORT_SOLARIS @@ -5263,10 +5592,41 @@ zfs_netbsd_readlink(void *v) static int zfs_netbsd_link(void *v) { - struct vop_link_args *ap = v; + struct vop_link_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap = v; + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; + int error; + + KASSERT(dvp != NULL); + KASSERT(vp != NULL); + KASSERT(cnp != NULL); + KASSERT(cnp->cn_nameptr != NULL); + KASSERT(VOP_ISLOCKED(dvp) == LK_EXCLUSIVE); - return (zfs_link(ap->a_dvp, ap->a_vp, (char *)cnp->cn_nameptr, cnp->cn_cred, NULL, 0)); +#if 0 + /* + * ZFS doesn't require dvp to be BSD-locked, and the caller + * expects us to drop this lock anyway, so we might as well + * drop it early to encourage concurrency. + */ + VOP_UNLOCK(dvp); +#endif + + error = zfs_link(dvp, vp, __UNCONST(cnp->cn_nameptr), cnp->cn_cred, + NULL, 0); + + /* + * Unlock and release dvp because the VOP_LINK protocol is insane. + */ + VOP_UNLOCK(dvp); + VN_RELE(dvp); + + return (error); } static int @@ -5286,91 +5646,67 @@ zfs_netbsd_inactive(void *v) return (0); } -/* - * Destroy znode from taskq thread without ZFS_OBJ_MUTEX held. - */ -static void -zfs_reclaim_deferred(void *arg) -{ - znode_t *zp = arg; - zfsvfs_t *zfsvfs = zp->z_zfsvfs; - uint64_t z_id = zp->z_id; - - /* - * Don't allow a zfs_zget() while were trying to release this znode - */ - ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); - - /* Don't need to call ZFS_OBJ_HOLD_EXIT zfs_inactive did thatfor us. */ - zfs_zinactive(zp); - -} - static int zfs_netbsd_reclaim(void *v) { - struct vop_reclaim_args *ap = v; - vnode_t *vp = ap->a_vp; - znode_t *zp = VTOZ(vp); + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap = v; + struct vnode *vp = ap->a_vp; + znode_t *zp; zfsvfs_t *zfsvfs; - int locked; - - locked = 0; - - ASSERT(zp != NULL); - KASSERT(!vn_has_cached_data(vp)); + int error; + KASSERT(vp != NULL); + zp = VTOZ(vp); + KASSERT(zp != NULL); zfsvfs = zp->z_zfsvfs; - - mutex_enter(&zp->z_lock); - ASSERT(zp->z_phys); + KASSERT(zfsvfs != NULL); -// dprintf("destroying znode %p -- vnode %p -- zp->z_buf = %p\n", zp, ZTOV(zp), zp->z_dbuf); -// rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); - genfs_node_destroy(vp); - cache_purge(vp); + /* Not until we get uvm and zfs talking to one another. */ + KASSERT(!vn_has_cached_data(vp)); + rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER); if (zp->z_dbuf == NULL) { /* * The fs has been unmounted, or we did a * suspend/resume and this file no longer exists. */ + if (vn_has_cached_data(vp)) + /* zfs and uvm are hosed. Should not happen. */ + panic("zfs vnode has cached data (0): %p", vp); rw_exit(&zfsvfs->z_teardown_inactive_lock); - mutex_exit(&zp->z_lock); zfs_znode_free(zp); return (0); } - mutex_exit(&zp->z_lock); - mutex_enter(&zp->z_lock); - if (!zp->z_unlinked) { - /* - * XXX Hack because ZFS_OBJ_MUTEX is held we can't call zfs_zinactive - * now. I need to defer zfs_zinactive to another thread which doesn't hold this mutex. - */ - locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 : - ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id); - if (locked == 0) { - /* - * Lock can't be obtained due to deadlock possibility, - * so defer znode destruction. - */ - taskq_dispatch(system_taskq, zfs_reclaim_deferred, zp, 0); + /* + * Attempt to push any data in the page cache. If this fails + * we will get kicked out later in zfs_zinactive(). + */ + if (vn_has_cached_data(vp)) + /* zfs and uvm are hosed. Should not happen. */ + panic("zfs vnode has cached data (1): %p", vp); + + if (zp->z_atime_dirty && zp->z_unlinked == 0) { + dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + + dmu_tx_hold_bonus(tx, zp->z_id); + error = dmu_tx_assign(tx, TXG_WAIT); + if (error) { + dmu_tx_abort(tx); } else { - zfs_znode_dmu_fini(zp); - /* Our LWP is holding ZFS_OBJ_HELD mutex but it was locked before - zfs_zinactive was called therefore we can't release it. */ - if (locked == 1) - ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id); - zfs_znode_free(zp); + dmu_buf_will_dirty(zp->z_dbuf, tx); + mutex_enter(&zp->z_lock); + zp->z_atime_dirty = 0; + mutex_exit(&zp->z_lock); + dmu_tx_commit(tx); } - } else - mutex_exit(&zp->z_lock); + } - ZTOV(zp) = NULL; - vp->v_data = NULL; /* v_data must be NULL for a cleaned vnode. */ - - return (0); + zfs_zinactive(zp); + rw_exit(&zfsvfs->z_teardown_inactive_lock); + return 0; } static int @@ -5414,9 +5750,6 @@ zfs_netbsd_pathconf(void *v) case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; return (0); - case _PC_NO_TRUNC: - *ap->a_retval = 1; - return (0); case _PC_VDISABLE: *ap->a_retval = _POSIX_VDISABLE; return (0); @@ -5424,10 +5757,15 @@ zfs_netbsd_pathconf(void *v) return (EINVAL); } /* NOTREACHED */ - } + } return (error); } +#if 1 +# define zfs_netbsd_lock genfs_lock +# define zfs_netbsd_unlock genfs_unlock +# define zfs_netbsd_islocked genfs_islocked +#else int zfs_netbsd_lock(void *v) { @@ -5449,6 +5787,7 @@ zfs_netbsd_islocked(void *v) return LK_EXCLUSIVE; } +#endif /* int Index: src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c diff -u src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c:1.12 src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c:1.13 --- src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c:1.12 Sun Nov 20 02:54:25 2011 +++ src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_znode.c Mon Oct 15 23:08:19 2012 @@ -618,8 +618,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu int error; zp = kmem_cache_alloc(znode_cache, KM_SLEEP); - for (;;) { + for (;;) { error = getnewvnode(VT_ZFS, zfsvfs->z_parent->z_vfs, zfs_vnodeop_p, NULL, &zp->z_vnode); if (__predict_true(error == 0)) @@ -628,7 +628,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu "error=%d\n", error); (void)kpause("zfsnewvn", false, hz, NULL); } - + ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_dbuf == NULL); ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); @@ -656,6 +656,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu vp->v_vfsp = zfsvfs->z_parent->z_vfs; vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); vp->v_data = zp; + genfs_node_init(vp, &zfs_genfsops); switch (vp->v_type) { case VDIR: zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ @@ -686,6 +687,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_bu zp->z_zfsvfs = zfsvfs; mutex_exit(&zfsvfs->z_znodes_lock); + VFS_HOLD(zfsvfs->z_vfs); return (zp); } @@ -998,11 +1000,6 @@ again: if (((znode_phys_t *)db->db_data)->zp_gen != 0) { zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); *zpp = zp; - - vp = ZTOV(zp); - genfs_node_init(vp, &zfs_genfsops); - VOP_UNLOCK(vp); - err = 0; } else { dmu_buf_rele(db, NULL); @@ -1076,17 +1073,13 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t * zfs_znode_free(zp); } -/* - * zfs_zinactive must be called with ZFS_OBJ_HOLD_ENTER held. And this lock - * will be released in zfs_zinactive. - */ void zfs_zinactive(znode_t *zp) { vnode_t *vp = ZTOV(zp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; - + ASSERT(zp->z_dbuf && zp->z_phys); /* @@ -1107,7 +1100,7 @@ zfs_zinactive(znode_t *zp) } mutex_exit(&zp->z_lock); - /* XXX why disabled zfs_znode_dmu_fini(zp); */ + zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); zfs_znode_free(zp); } @@ -1116,7 +1109,14 @@ void zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; - ASSERT(ZTOV(zp) == NULL); + struct vnode *vp = ZTOV(zp); + + /* XXX Not all callers are from VOP_RECLAIM. What to do? */ + KASSERT(vp != NULL); + mutex_enter(vp->v_interlock); /* XXX Necessary? */ + genfs_node_destroy(vp); + vp->v_data = NULL; + mutex_exit(vp->v_interlock); dprintf("destroying znode %p\n", zp); //cpu_Debugger(); @@ -1403,6 +1403,8 @@ top: dmu_tx_commit(tx); + zfs_range_unlock(rl); + /* * Clear any mapped pages in the truncated region. This has to * happen outside of the transaction to avoid the possibility of