Author: mjg
Date: Sat Jul 25 10:32:45 2020
New Revision: 363518
URL: https://svnweb.freebsd.org/changeset/base/363518

Log:
  vfs: add the infrastructure for lockless lookup
  
  Reviewed by:    kib
  Tested by:      pho (in a patchset)
  Differential Revision:        https://reviews.freebsd.org/D25577

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vnode_if.src
  head/sys/security/mac/mac_framework.h
  head/sys/sys/filedesc.h
  head/sys/sys/mount.h
  head/sys/sys/vnode.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c        Sat Jul 25 10:31:52 2020        
(r363517)
+++ head/sys/kern/kern_descrip.c        Sat Jul 25 10:32:45 2020        
(r363518)
@@ -102,8 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
 
 static __read_mostly uma_zone_t file_zone;
 static __read_mostly uma_zone_t filedesc0_zone;
-static __read_mostly uma_zone_t pwd_zone;
-static __read_mostly smr_t pwd_smr;
+__read_mostly uma_zone_t pwd_zone;
+VFS_SMR_DECLARE;
 
 static int     closefp(struct filedesc *fdp, int fd, struct file *fp,
                    struct thread *td, int holdleaders);
@@ -3343,21 +3343,30 @@ pwd_hold(struct thread *td)
 
        fdp = td->td_proc->p_fd;
 
-       smr_enter(pwd_smr);
-       pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
+       vfs_smr_enter();
+       pwd = vfs_smr_entered_load(&fdp->fd_pwd);
        MPASS(pwd != NULL);
        if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) {
-               smr_exit(pwd_smr);
+               vfs_smr_exit();
                return (pwd);
        }
-       smr_exit(pwd_smr);
+       vfs_smr_exit();
        FILEDESC_SLOCK(fdp);
        pwd = pwd_hold_filedesc(fdp);
        MPASS(pwd != NULL);
-       FILEDESC_SUNLOCK(fdp);
        return (pwd);
 }
 
+struct pwd *
+pwd_get_smr(void)
+{
+       struct pwd *pwd;
+
+       pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd);
+       MPASS(pwd != NULL);
+       return (pwd);
+}
+
 static struct pwd *
 pwd_alloc(void)
 {
@@ -4368,7 +4377,11 @@ filelistinit(void *dummy)
            NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
        pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
            NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
-       pwd_smr = uma_zone_get_smr(pwd_zone);
+       /*
+        * XXXMJG this is a temporary hack due to boot ordering issues against
+        * the vnode zone.
+        */
+       vfs_smr = uma_zone_get_smr(pwd_zone);
        mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 }
 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c    Sat Jul 25 10:31:52 2020        (r363517)
+++ head/sys/kern/vfs_subr.c    Sat Jul 25 10:32:45 2020        (r363518)
@@ -664,8 +664,8 @@ vntblinit(void *dummy __unused)
        vnode_list_reclaim_marker = vn_alloc_marker(NULL);
        TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
        vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
-           vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
-       vfs_smr = uma_zone_get_smr(vnode_zone);
+           vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
+       uma_zone_set_smr(vnode_zone, vfs_smr);
        vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
            NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
        /*
@@ -2914,6 +2914,22 @@ vget_prep(struct vnode *vp)
        return (vs);
 }
 
+void
+vget_abort(struct vnode *vp, enum vgetstate vs)
+{
+
+       switch (vs) {
+       case VGET_USECOUNT:
+               vrele(vp);
+               break;
+       case VGET_HOLDCNT:
+               vdrop(vp);
+               break;
+       default:
+               __assert_unreachable();
+       }
+}
+
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
@@ -2925,7 +2941,7 @@ vget(struct vnode *vp, int flags, struct thread *td)
        return (vget_finish(vp, flags, vs));
 }
 
-static int __noinline
+static void __noinline
 vget_finish_vchr(struct vnode *vp)
 {
 
@@ -2941,7 +2957,7 @@ vget_finish_vchr(struct vnode *vp)
 #else
                refcount_release(&vp->v_holdcnt);
 #endif
-               return (0);
+               return;
        }
 
        VI_LOCK(vp);
@@ -2953,18 +2969,17 @@ vget_finish_vchr(struct vnode *vp)
                refcount_release(&vp->v_holdcnt);
 #endif
                VI_UNLOCK(vp);
-               return (0);
+               return;
        }
        v_incr_devcount(vp);
        refcount_acquire(&vp->v_usecount);
        VI_UNLOCK(vp);
-       return (0);
 }
 
 int
 vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
 {
-       int error, old;
+       int error;
 
        if ((flags & LK_INTERLOCK) != 0)
                ASSERT_VI_LOCKED(vp, __func__);
@@ -2976,20 +2991,32 @@ vget_finish(struct vnode *vp, int flags, enum vgetstat
 
        error = vn_lock(vp, flags);
        if (__predict_false(error != 0)) {
-               if (vs == VGET_USECOUNT)
-                       vrele(vp);
-               else
-                       vdrop(vp);
+               vget_abort(vp, vs);
                CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
                    vp);
                return (error);
        }
 
+       vget_finish_ref(vp, vs);
+       return (0);
+}
+
+void
+vget_finish_ref(struct vnode *vp, enum vgetstate vs)
+{
+       int old;
+
+       VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
+       VNPASS(vp->v_holdcnt > 0, vp);
+       VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
+
        if (vs == VGET_USECOUNT)
-               return (0);
+               return;
 
-       if (__predict_false(vp->v_type == VCHR))
-               return (vget_finish_vchr(vp));
+       if (__predict_false(vp->v_type == VCHR)) {
+               vget_finish_vchr(vp);
+               return;
+       }
 
        /*
         * We hold the vnode. If the usecount is 0 it will be utilized to keep
@@ -3006,7 +3033,6 @@ vget_finish(struct vnode *vp, int flags, enum vgetstat
                refcount_release(&vp->v_holdcnt);
 #endif
        }
-       return (0);
 }
 
 /*
@@ -4424,6 +4450,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
        MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
        MNT_KERN_FLAG(MNTK_MARKER);
        MNT_KERN_FLAG(MNTK_USES_BCACHE);
+       MNT_KERN_FLAG(MNTK_FPLOOKUP);
        MNT_KERN_FLAG(MNTK_NOASYNC);
        MNT_KERN_FLAG(MNTK_UNMOUNT);
        MNT_KERN_FLAG(MNTK_MWAIT);
@@ -5240,6 +5267,38 @@ out:
 }
 
 /*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ *
+ * We never deny as priv_check_cred calls are not yet supported, see vaccess.
+ */
+int
+vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct 
ucred *cred)
+{
+
+       VFS_SMR_ASSERT_ENTERED();
+
+       /* Check the owner. */
+       if (cred->cr_uid == file_uid) {
+               if (file_mode & S_IXUSR)
+                       return (0);
+               return (EAGAIN);
+       }
+
+       /* Otherwise, check the groups (first match) */
+       if (groupmember(file_gid, cred)) {
+               if (file_mode & S_IXGRP)
+                       return (0);
+               return (EAGAIN);
+       }
+
+       /* Otherwise, check everyone else. */
+       if (file_mode & S_IXOTH)
+               return (0);
+       return (EAGAIN);
+}
+
+/*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
@@ -5537,6 +5596,20 @@ vop_rename_pre(void *ap)
 }
 
 #ifdef DEBUG_VFS_LOCKS
+void
+vop_fplookup_vexec_pre(void *ap __unused)
+{
+
+       VFS_SMR_ASSERT_ENTERED();
+}
+
+void
+vop_fplookup_vexec_post(void *ap __unused, int rc __unused)
+{
+
+       VFS_SMR_ASSERT_ENTERED();
+}
+
 void
 vop_strategy_pre(void *ap)
 {

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src  Sat Jul 25 10:31:52 2020        (r363517)
+++ head/sys/kern/vnode_if.src  Sat Jul 25 10:32:45 2020        (r363518)
@@ -146,6 +146,17 @@ vop_close {
 };
 
 
+%% fplookup_vexec      vp      - - -
+%! fplookup_vexec      pre     vop_fplookup_vexec_pre
+%! fplookup_vexec      post    vop_fplookup_vexec_post
+
+vop_fplookup_vexec {
+       IN struct vnode *vp;
+       IN struct ucred *cred;
+       IN struct thread *td;
+};
+
+
 %% access      vp      L L L
 
 vop_access {

Modified: head/sys/security/mac/mac_framework.h
==============================================================================
--- head/sys/security/mac/mac_framework.h       Sat Jul 25 10:31:52 2020        
(r363517)
+++ head/sys/security/mac/mac_framework.h       Sat Jul 25 10:32:45 2020        
(r363518)
@@ -422,13 +422,14 @@ int       mac_vnode_check_listextattr(struct ucred *cred, 
st
 int    mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp,
            struct componentname *cnp);
 extern bool mac_vnode_check_lookup_fp_flag;
+#define mac_vnode_check_lookup_enabled() 
__predict_false(mac_vnode_check_lookup_fp_flag)
 static inline int
 mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp,
     struct componentname *cnp)
 {
 
        mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup");
-       if (__predict_false(mac_vnode_check_lookup_fp_flag))
+       if (mac_vnode_check_lookup_enabled())
                 return (mac_vnode_check_lookup_impl(cred, dvp, cnp));
        return (0);
 }

Modified: head/sys/sys/filedesc.h
==============================================================================
--- head/sys/sys/filedesc.h     Sat Jul 25 10:31:52 2020        (r363517)
+++ head/sys/sys/filedesc.h     Sat Jul 25 10:32:45 2020        (r363518)
@@ -311,6 +311,7 @@ pwd_set(struct filedesc *fdp, struct pwd *newpwd)
        smr_serialized_store(&fdp->fd_pwd, newpwd,
            (FILEDESC_XLOCK_ASSERT(fdp), true));
 }
+struct pwd *pwd_get_smr(void);
 
 #endif /* _KERNEL */
 

Modified: head/sys/sys/mount.h
==============================================================================
--- head/sys/sys/mount.h        Sat Jul 25 10:31:52 2020        (r363517)
+++ head/sys/sys/mount.h        Sat Jul 25 10:32:45 2020        (r363518)
@@ -420,6 +420,7 @@ void          __mnt_vnode_markerfree_lazy(struct vnode
 #define        MNTK_TEXT_REFS          0x00008000 /* Keep use ref for text */
 #define        MNTK_VMSETSIZE_BUG      0x00010000
 #define        MNTK_UNIONFS    0x00020000      /* A hack for F_ISUNIONSTACK */
+#define        MNTK_FPLOOKUP   0x00040000      /* fast path lookup is 
supported */
 #define MNTK_NOASYNC   0x00800000      /* disable async */
 #define MNTK_UNMOUNT   0x01000000      /* unmount in progress */
 #define        MNTK_MWAIT      0x02000000      /* waiting for unmount to 
finish */

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h        Sat Jul 25 10:31:52 2020        (r363517)
+++ head/sys/sys/vnode.h        Sat Jul 25 10:32:45 2020        (r363518)
@@ -666,6 +666,8 @@ int vn_path_to_global_path(struct thread *td, struct v
 int    vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
            gid_t file_gid, accmode_t accmode, struct ucred *cred,
            int *privused);
+int    vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid,
+           struct ucred *cred);
 int    vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
            struct acl *aclp, accmode_t accmode, struct ucred *cred,
            int *privused);
@@ -682,6 +684,8 @@ int vget(struct vnode *vp, int flags, struct thread *t
 enum vgetstate vget_prep_smr(struct vnode *vp);
 enum vgetstate vget_prep(struct vnode *vp);
 int    vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
+void   vget_finish_ref(struct vnode *vp, enum vgetstate vs);
+void   vget_abort(struct vnode *vp, enum vgetstate vs);
 void   vgone(struct vnode *vp);
 void   vhold(struct vnode *);
 void   vholdl(struct vnode *);
@@ -865,6 +869,8 @@ void        vop_symlink_post(void *a, int rc);
 int    vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
 
 #ifdef DEBUG_VFS_LOCKS
+void   vop_fplookup_vexec_pre(void *a);
+void   vop_fplookup_vexec_post(void *a, int rc);
 void   vop_strategy_pre(void *a);
 void   vop_lock_pre(void *a);
 void   vop_lock_post(void *a, int rc);
@@ -872,6 +878,8 @@ void        vop_unlock_pre(void *a);
 void   vop_need_inactive_pre(void *a);
 void   vop_need_inactive_post(void *a, int rc);
 #else
+#define        vop_fplookup_vexec_pre(x)       do { } while (0)
+#define        vop_fplookup_vexec_post(x, y)   do { } while (0)
 #define        vop_strategy_pre(x)     do { } while (0)
 #define        vop_lock_pre(x)         do { } while (0)
 #define        vop_lock_post(x, y)     do { } while (0)
@@ -1025,9 +1033,17 @@ int vn_dir_check_exec(struct vnode *vp, struct compone
 #define VFS_SMR()      vfs_smr
 #define vfs_smr_enter()        smr_enter(VFS_SMR())
 #define vfs_smr_exit() smr_exit(VFS_SMR())
+#define vfs_smr_entered_load(ptr)      smr_entered_load((ptr), VFS_SMR())
 #define VFS_SMR_ASSERT_ENTERED()       SMR_ASSERT_ENTERED(VFS_SMR())
 #define VFS_SMR_ASSERT_NOT_ENTERED()   SMR_ASSERT_NOT_ENTERED(VFS_SMR())
 #define VFS_SMR_ZONE_SET(zone) uma_zone_set_smr((zone), VFS_SMR())
+
+#define vn_load_v_data_smr(vp) ({              \
+       struct vnode *_vp = (vp);               \
+                                               \
+       VFS_SMR_ASSERT_ENTERED();               \
+       atomic_load_ptr(&(_vp)->v_data);        \
+})
 
 #endif /* _KERNEL */
 
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to