Author: mjg
Date: Sat Jul 25 10:38:05 2020
New Revision: 363520
URL: https://svnweb.freebsd.org/changeset/base/363520

Log:
  ufs: add support for lockless lookup
  
  ACLs are not supported, meaning their presence will force the use of the old 
lookup.
  
  Reviewed by:    kib
  Tested by:      pho (in a patchset)
  Differential Revision:        https://reviews.freebsd.org/D25579

Modified:
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/ufs/ufs/inode.h
  head/sys/ufs/ufs/ufs_acl.c
  head/sys/ufs/ufs/ufs_vnops.c

Modified: head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c       Sat Jul 25 10:37:15 2020        
(r363519)
+++ head/sys/ufs/ffs/ffs_vfsops.c       Sat Jul 25 10:38:05 2020        
(r363520)
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 #include <ddb/ddb.h>
 
 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
+VFS_SMR_DECLARE;
 
 static int     ffs_mountfs(struct vnode *, struct mount *, struct thread *);
 static void    ffs_oldfscompat_read(struct fs *, struct ufsmount *,
@@ -393,6 +394,7 @@ ffs_mount(struct mount *mp)
                uma_ufs2 = uma_zcreate("FFS2 dinode",
                    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
                    UMA_ALIGN_PTR, 0);
+               VFS_SMR_ZONE_SET(uma_inode);
        }
 
        vfs_deleteopt(mp->mnt_optnew, "groupquota");
@@ -455,6 +457,7 @@ ffs_mount(struct mount *mp)
        }
 
        MNT_ILOCK(mp);
+       mp->mnt_kern_flag &= ~MNTK_FPLOOKUP;
        mp->mnt_flag |= mntorflags;
        MNT_IUNLOCK(mp);
        /*
@@ -795,6 +798,17 @@ ffs_mount(struct mount *mp)
                        }
                }
        }
+
+       MNT_ILOCK(mp);
+       /*
+        * This is racy versus lookup, see ufs_fplookup_vexec for details.
+        */
+       if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0)
+               panic("MNTK_FPLOOKUP set on mount %p when it should not be", 
mp);
+       if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0)
+               mp->mnt_kern_flag |= MNTK_FPLOOKUP;
+       MNT_IUNLOCK(mp);
+
        vfs_mountedfrom(mp, fspec);
        return (0);
 }
@@ -1968,14 +1982,14 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 
        ump = VFSTOUFS(mp);
        fs = ump->um_fs;
-       ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
+       ip = uma_zalloc_smr(uma_inode, M_WAITOK | M_ZERO);
 
        /* Allocate a new vnode/inode. */
        error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
            &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
        if (error) {
                *vpp = NULL;
-               uma_zfree(uma_inode, ip);
+               uma_zfree_smr(uma_inode, ip);
                return (error);
        }
        /*
@@ -2004,7 +2018,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
                vp->v_vflag |= VV_FORCEINSMQ;
        error = insmntque(vp, mp);
        if (error != 0) {
-               uma_zfree(uma_inode, ip);
+               uma_zfree_smr(uma_inode, ip);
                *vpp = NULL;
                return (error);
        }
@@ -2327,7 +2341,7 @@ ffs_ifree(struct ufsmount *ump, struct inode *ip)
                uma_zfree(uma_ufs1, ip->i_din1);
        else if (ip->i_din2 != NULL)
                uma_zfree(uma_ufs2, ip->i_din2);
-       uma_zfree(uma_inode, ip);
+       uma_zfree_smr(uma_inode, ip);
 }
 
 static int dobkgrdwrite = 1;

Modified: head/sys/ufs/ffs/ffs_vnops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vnops.c        Sat Jul 25 10:37:15 2020        
(r363519)
+++ head/sys/ufs/ffs/ffs_vnops.c        Sat Jul 25 10:38:05 2020        
(r363520)
@@ -905,8 +905,10 @@ ffs_write(ap)
        if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
            ap->a_cred) {
                if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
-                       ip->i_mode &= ~(ISUID | ISGID);
+                       vn_seqc_write_begin(vp);
+                       UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
                        DIP_SET(ip, i_mode, ip->i_mode);
+                       vn_seqc_write_end(vp);
                }
        }
        if (error) {
@@ -1152,8 +1154,10 @@ ffs_extwrite(struct vnode *vp, struct uio *uio, int io
         */
        if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
                if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
-                       ip->i_mode &= ~(ISUID | ISGID);
+                       vn_seqc_write_begin(vp);
+                       UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
                        dp->di_mode = ip->i_mode;
+                       vn_seqc_write_end(vp);
                }
        }
        if (error) {

Modified: head/sys/ufs/ufs/inode.h
==============================================================================
--- head/sys/ufs/ufs/inode.h    Sat Jul 25 10:37:15 2020        (r363519)
+++ head/sys/ufs/ufs/inode.h    Sat Jul 25 10:38:05 2020        (r363520)
@@ -43,6 +43,7 @@
 #include <sys/lock.h>
 #include <sys/queue.h>
 #include <ufs/ufs/dinode.h>
+#include <sys/seqc.h>
 
 /*
  * This must agree with the definition in <ufs/ufs/dir.h>.
@@ -149,6 +150,14 @@ struct inode {
 #define UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE \
        (UFS_INODE_FLAG_LAZY_MASK & ~(IN_LAZYMOD | IN_LAZYACCESS))
 
+#define UFS_INODE_SET_MODE(ip, mode) do {                      \
+       struct inode *_ip = (ip);                               \
+       int _mode = (mode);                                     \
+                                                               \
+       ASSERT_VOP_IN_SEQC(ITOV(_ip));                          \
+       atomic_store_short(&(_ip)->i_mode, _mode);              \
+} while (0)
+
 #define UFS_INODE_SET_FLAG(ip, flags) do {                     \
        struct inode *_ip = (ip);                               \
        struct vnode *_vp = ITOV(_ip);                          \
@@ -229,6 +238,7 @@ struct indir {
 
 /* Convert between inode pointers and vnode pointers. */
 #define        VTOI(vp)        ((struct inode *)(vp)->v_data)
+#define        VTOI_SMR(vp)    ((struct inode *)vn_load_v_data_smr(vp))
 #define        ITOV(ip)        ((ip)->i_vnode)
 
 /* Determine if soft dependencies are being done */

Modified: head/sys/ufs/ufs/ufs_acl.c
==============================================================================
--- head/sys/ufs/ufs/ufs_acl.c  Sat Jul 25 10:37:15 2020        (r363519)
+++ head/sys/ufs/ufs/ufs_acl.c  Sat Jul 25 10:38:05 2020        (r363520)
@@ -139,9 +139,11 @@ ufs_sync_acl_from_inode(struct inode *ip, struct acl *
 void
 ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip)
 {
+       int newmode;
 
-       ip->i_mode &= ACL_PRESERVE_MASK;
-       ip->i_mode |= acl_posix1e_acl_to_mode(acl);
+       newmode = ip->i_mode & ACL_PRESERVE_MASK;
+       newmode |= acl_posix1e_acl_to_mode(acl);
+       UFS_INODE_SET_MODE(ip, newmode);
        DIP_SET(ip, i_mode, ip->i_mode);
 }
 
@@ -381,7 +383,7 @@ int
 ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td)
 {
        int error;
-       mode_t mode;
+       mode_t mode, newmode;
        struct inode *ip = VTOI(vp);
 
        KASSERT(acl_nfs4_check(aclp, vp->v_type == VDIR) == 0,
@@ -418,8 +420,9 @@ ufs_setacl_nfs4_internal(struct vnode *vp, struct acl 
 
        acl_nfs4_sync_mode_from_acl(&mode, aclp);
 
-       ip->i_mode &= ACL_PRESERVE_MASK;
-       ip->i_mode |= mode;
+       newmode = ip->i_mode & ACL_PRESERVE_MASK;
+       newmode |= mode;
+       UFS_INODE_SET_MODE(ip, newmode);
        DIP_SET(ip, i_mode, ip->i_mode);
        UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 

Modified: head/sys/ufs/ufs/ufs_vnops.c
==============================================================================
--- head/sys/ufs/ufs/ufs_vnops.c        Sat Jul 25 10:37:15 2020        
(r363519)
+++ head/sys/ufs/ufs/ufs_vnops.c        Sat Jul 25 10:38:05 2020        
(r363520)
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lockf.h>
 #include <sys/conf.h>
 #include <sys/acl.h>
+#include <sys/smr.h>
 
 #include <security/mac/mac_framework.h>
 
@@ -96,10 +97,12 @@ FEATURE(suiddir,
     "Give all new files in directory the same ownership as the directory");
 #endif
 
+VFS_SMR_DECLARE;
 
 #include <ufs/ffs/ffs_extern.h>
 
 static vop_accessx_t   ufs_accessx;
+static vop_fplookup_vexec_t ufs_fplookup_vexec;
 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct 
thread *);
 static vop_close_t     ufs_close;
@@ -422,6 +425,46 @@ ufs_accessx(ap)
        return (error);
 }
 
+/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ */
+static int
+ufs_fplookup_vexec(ap)
+       struct vop_fplookup_vexec_args /* {
+               struct vnode *a_vp;
+               struct ucred *a_cred;
+               struct thread *a_td;
+       } */ *ap;
+{
+       struct vnode *vp;
+       struct inode *ip;
+       struct ucred *cred;
+       mode_t all_x, mode;
+
+       vp = ap->a_vp;
+       ip = VTOI_SMR(vp);
+       if (__predict_false(ip == NULL))
+               return (EAGAIN);
+
+       /*
+        * XXX ACL race
+        *
+        * ACLs are not supported and UFS clears/sets this flag on mount and
+        * remount. However, we may still be racing with seeing them and there
+        * is no provision to make sure they were accounted for. This matches
+        * the behavior of the locked case, since the lookup there is also
+        * racy: mount takes no measures to block anyone from progressing.
+        */
+       all_x = S_IXUSR | S_IXGRP | S_IXOTH;
+       mode = atomic_load_short(&ip->i_mode);
+       if (__predict_true((mode & all_x) == all_x))
+               return (0);
+
+       cred = ap->a_cred;
+       return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred));
+}
+
 /* ARGSUSED */
 static int
 ufs_getattr(ap)
@@ -711,7 +754,7 @@ ufs_chmod(vp, mode, cred, td)
        struct thread *td;
 {
        struct inode *ip = VTOI(vp);
-       int error;
+       int newmode, error;
 
        /*
         * To modify the permissions on a file, must possess VADMIN
@@ -744,8 +787,9 @@ ufs_chmod(vp, mode, cred, td)
                        return (error);
        }
 
-       ip->i_mode &= ~ALLPERMS;
-       ip->i_mode |= (mode & ALLPERMS);
+       newmode = ip->i_mode & ~ALLPERMS;
+       newmode |= (mode & ALLPERMS);
+       UFS_INODE_SET_MODE(ip, newmode);
        DIP_SET(ip, i_mode, ip->i_mode);
        UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 #ifdef UFS_ACL
@@ -869,7 +913,7 @@ good:
        UFS_INODE_SET_FLAG(ip, IN_CHANGE);
        if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
                if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
-                       ip->i_mode &= ~(ISUID | ISGID);
+                       UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
                        DIP_SET(ip, i_mode, ip->i_mode);
                }
        }
@@ -1111,7 +1155,10 @@ ufs_rename(ap)
        int error = 0;
        struct mount *mp;
        ino_t ino;
+       bool want_seqc_end;
 
+       want_seqc_end = false;
+
 #ifdef INVARIANTS
        if ((tcnp->cn_flags & HASBUF) == 0 ||
            (fcnp->cn_flags & HASBUF) == 0)
@@ -1315,6 +1362,13 @@ relock:
            tdp->i_effnlink == 0)
                panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 
+       if (tvp != NULL)
+               vn_seqc_write_begin(tvp);
+       vn_seqc_write_begin(tdvp);
+       vn_seqc_write_begin(fvp);
+       vn_seqc_write_begin(fdvp);
+       want_seqc_end = true;
+
        /*
         * 1) Bump link count while we're moving stuff
         *    around.  If we crash somewhere before
@@ -1513,6 +1567,14 @@ relock:
        cache_purge_negative(tdvp);
 
 unlockout:
+       if (want_seqc_end) {
+               if (tvp != NULL)
+                       vn_seqc_write_end(tvp);
+               vn_seqc_write_end(tdvp);
+               vn_seqc_write_end(fvp);
+               vn_seqc_write_end(fdvp);
+       }
+
        vput(fdvp);
        vput(fvp);
        if (tvp)
@@ -1556,6 +1618,14 @@ bad:
        goto unlockout;
 
 releout:
+       if (want_seqc_end) {
+               if (tvp != NULL)
+                       vn_seqc_write_end(tvp);
+               vn_seqc_write_end(tdvp);
+               vn_seqc_write_end(fvp);
+               vn_seqc_write_end(fdvp);
+       }
+
        vrele(fdvp);
        vrele(fvp);
        vrele(tdvp);
@@ -1590,7 +1660,7 @@ ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, 
                 */
                if (acl->acl_cnt != 0) {
                        dmode = acl_posix1e_newfilemode(dmode, acl);
-                       ip->i_mode = dmode;
+                       UFS_INODE_SET_MODE(ip, dmode);
                        DIP_SET(ip, i_mode, dmode);
                        *dacl = *acl;
                        ufs_sync_acl_from_inode(ip, acl);
@@ -1602,7 +1672,7 @@ ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, 
                /*
                 * Just use the mode as-is.
                 */
-               ip->i_mode = dmode;
+               UFS_INODE_SET_MODE(ip, dmode);
                DIP_SET(ip, i_mode, dmode);
                error = 0;
                goto out;
@@ -1673,7 +1743,7 @@ ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp,
                         * the it's not defined case.
                         */
                        mode = acl_posix1e_newfilemode(mode, acl);
-                       ip->i_mode = mode;
+                       UFS_INODE_SET_MODE(ip, mode);
                        DIP_SET(ip, i_mode, mode);
                        ufs_sync_acl_from_inode(ip, acl);
                        break;
@@ -1684,7 +1754,7 @@ ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp,
                /*
                 * Just use the mode as-is.
                 */
-               ip->i_mode = mode;
+               UFS_INODE_SET_MODE(ip, mode);
                DIP_SET(ip, i_mode, mode);
                error = 0;
                goto out;
@@ -1796,6 +1866,7 @@ ufs_mkdir(ap)
        error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
        if (error)
                goto out;
+       vn_seqc_write_begin(tvp);
        ip = VTOI(tvp);
        ip->i_gid = dp->i_gid;
        DIP_SET(ip, i_gid, dp->i_gid);
@@ -1846,6 +1917,7 @@ ufs_mkdir(ap)
                        if (DOINGSOFTDEP(tvp))
                                softdep_revert_link(dp, ip);
                        UFS_VFREE(tvp, ip->i_number, dmode);
+                       vn_seqc_write_end(tvp);
                        vgone(tvp);
                        vput(tvp);
                        return (error);
@@ -1861,6 +1933,7 @@ ufs_mkdir(ap)
                if (DOINGSOFTDEP(tvp))
                        softdep_revert_link(dp, ip);
                UFS_VFREE(tvp, ip->i_number, dmode);
+               vn_seqc_write_end(tvp);
                vgone(tvp);
                vput(tvp);
                return (error);
@@ -1868,7 +1941,7 @@ ufs_mkdir(ap)
 #endif
 #endif /* !SUIDDIR */
        UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
-       ip->i_mode = dmode;
+       UFS_INODE_SET_MODE(ip, dmode);
        DIP_SET(ip, i_mode, dmode);
        tvp->v_type = VDIR;     /* Rest init'd in getnewvnode(). */
        ip->i_effnlink = 2;
@@ -1974,6 +2047,7 @@ ufs_mkdir(ap)
 bad:
        if (error == 0) {
                *ap->a_vpp = tvp;
+               vn_seqc_write_end(tvp);
        } else {
                dp->i_effnlink--;
                dp->i_nlink--;
@@ -1989,6 +2063,7 @@ bad:
                UFS_INODE_SET_FLAG(ip, IN_CHANGE);
                if (DOINGSOFTDEP(tvp))
                        softdep_revert_mkdir(dp, ip);
+               vn_seqc_write_end(tvp);
                vgone(tvp);
                vput(tvp);
        }
@@ -2637,8 +2712,9 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
        }
 #endif
 #endif /* !SUIDDIR */
+       vn_seqc_write_begin(tvp); /* Mostly to cover asserts */
        UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
-       ip->i_mode = mode;
+       UFS_INODE_SET_MODE(ip, mode);
        DIP_SET(ip, i_mode, mode);
        tvp->v_type = IFTOVT(mode);     /* Rest init'd in getnewvnode(). */
        ip->i_effnlink = 1;
@@ -2648,7 +2724,7 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
                softdep_setup_create(VTOI(dvp), ip);
        if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
            priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) {
-               ip->i_mode &= ~ISGID;
+               UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID);
                DIP_SET(ip, i_mode, ip->i_mode);
        }
 
@@ -2688,6 +2764,7 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
        error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
        if (error)
                goto bad;
+       vn_seqc_write_end(tvp);
        *vpp = tvp;
        return (0);
 
@@ -2702,6 +2779,7 @@ bad:
        UFS_INODE_SET_FLAG(ip, IN_CHANGE);
        if (DOINGSOFTDEP(tvp))
                softdep_revert_create(VTOI(dvp), ip);
+       vn_seqc_write_end(tvp);
        vgone(tvp);
        vput(tvp);
        return (error);
@@ -2740,6 +2818,7 @@ struct vop_vector ufs_vnodeops = {
        .vop_write =            VOP_PANIC,
        .vop_accessx =          ufs_accessx,
        .vop_bmap =             ufs_bmap,
+       .vop_fplookup_vexec =   ufs_fplookup_vexec,
        .vop_cachedlookup =     ufs_lookup,
        .vop_close =            ufs_close,
        .vop_create =           ufs_create,
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to