The branch main has been updated by mjg:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=aeabf8d4b9727201648887f8aa7dd1930cf0e154

commit aeabf8d4b9727201648887f8aa7dd1930cf0e154
Author:     Mateusz Guzik <[email protected]>
AuthorDate: 2022-03-07 11:43:43 +0000
Commit:     Mateusz Guzik <[email protected]>
CommitDate: 2022-03-19 10:47:10 +0000

    nullfs: hash insertion without vnode lock upgrade
    
    Use the hash lock to serialize instead.
    
    This enables shared-locked ".." lookups.
    
    Reviewed by:    markj
    Tested by:      pho (previous version)
    Differential Revision:  https://reviews.freebsd.org/D34466
---
 sys/fs/nullfs/null_subr.c   | 114 ++++++++++++++++++++------------------------
 sys/fs/nullfs/null_vfsops.c |   2 +-
 2 files changed, 53 insertions(+), 63 deletions(-)

diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 6b422410b9ec..b2b6e6837b2d 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -65,7 +65,7 @@ static u_long null_hash_mask;
 static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
 MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
 
-static struct vnode * null_hashins(struct mount *, struct null_node *);
+static void null_hashins(struct mount *, struct null_node *);
 
 /*
  * Initialise cache headers
@@ -93,14 +93,15 @@ nullfs_uninit(struct vfsconf *vfsp)
  * Return a VREF'ed alias for lower vnode if already exists, else 0.
  * Lower vnode should be locked on entry and will be left locked on exit.
  */
-struct vnode *
-null_hashget(struct mount *mp, struct vnode *lowervp)
+static struct vnode *
+null_hashget_locked(struct mount *mp, struct vnode *lowervp)
 {
        struct null_node_hashhead *hd;
        struct null_node *a;
        struct vnode *vp;
 
        ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+       rw_assert(&null_hash_lock, RA_LOCKED);
 
        /*
         * Find hash base, and then search the (two-way) linked
@@ -109,9 +110,6 @@ null_hashget(struct mount *mp, struct vnode *lowervp)
         * reference count (but NOT the lower vnode's VREF counter).
         */
        hd = NULL_NHASH(lowervp);
-       if (LIST_EMPTY(hd))
-               return (NULLVP);
-       rw_rlock(&null_hash_lock);
        LIST_FOREACH(a, hd, null_hash) {
                if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
                        /*
@@ -122,43 +120,50 @@ null_hashget(struct mount *mp, struct vnode *lowervp)
                         */
                        vp = NULLTOV(a);
                        vref(vp);
-                       rw_runlock(&null_hash_lock);
                        return (vp);
                }
        }
-       rw_runlock(&null_hash_lock);
        return (NULLVP);
 }
 
-/*
- * Act like null_hashget, but add passed null_node to hash if no existing
- * node found.
- */
-static struct vnode *
+struct vnode *
+null_hashget(struct mount *mp, struct vnode *lowervp)
+{
+       struct null_node_hashhead *hd;
+       struct vnode *vp;
+
+       hd = NULL_NHASH(lowervp);
+       if (LIST_EMPTY(hd))
+               return (NULLVP);
+
+       rw_rlock(&null_hash_lock);
+       vp = null_hashget_locked(mp, lowervp);
+       rw_runlock(&null_hash_lock);
+
+       return (vp);
+}
+
+static void
 null_hashins(struct mount *mp, struct null_node *xp)
 {
        struct null_node_hashhead *hd;
+#ifdef INVARIANTS
        struct null_node *oxp;
-       struct vnode *ovp;
+#endif
+
+       rw_assert(&null_hash_lock, RA_WLOCKED);
 
        hd = NULL_NHASH(xp->null_lowervp);
-       rw_wlock(&null_hash_lock);
+#ifdef INVARIANTS
        LIST_FOREACH(oxp, hd, null_hash) {
                if (oxp->null_lowervp == xp->null_lowervp &&
                    NULLTOV(oxp)->v_mount == mp) {
-                       /*
-                        * See null_hashget for a description of this
-                        * operation.
-                        */
-                       ovp = NULLTOV(oxp);
-                       vref(ovp);
-                       rw_wunlock(&null_hash_lock);
-                       return (ovp);
+                       VNASSERT(0, NULLTOV(oxp),
+                           ("vnode already in hash"));
                }
        }
+#endif
        LIST_INSERT_HEAD(hd, xp, null_hash);
-       rw_wunlock(&null_hash_lock);
-       return (NULLVP);
 }
 
 static void
@@ -201,19 +206,6 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, 
struct vnode **vpp)
                return (0);
        }
 
-       /*
-        * The insmntque1() call below requires the exclusive lock on
-        * the nullfs vnode.  Upgrade the lock now if hash failed to
-        * provide ready to use vnode.
-        */
-       if (VOP_ISLOCKED(lowervp) != LK_EXCLUSIVE) {
-               vn_lock(lowervp, LK_UPGRADE | LK_RETRY);
-               if (VN_IS_DOOMED(lowervp)) {
-                       vput(lowervp);
-                       return (ENOENT);
-               }
-       }
-
        /*
         * We do not serialize vnode creation, instead we will check for
         * duplicates later, when adding new vnode to hash.
@@ -229,20 +221,23 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, 
struct vnode **vpp)
                return (error);
        }
 
+       VNPASS(vp->v_object == NULL, vp);
+       VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) == 0, vp);
+
+       rw_wlock(&null_hash_lock);
        xp->null_vnode = vp;
        xp->null_lowervp = lowervp;
        xp->null_flags = 0;
        vp->v_type = lowervp->v_type;
        vp->v_data = xp;
        vp->v_vnlock = lowervp->v_vnlock;
-       error = insmntque1(vp, mp);
-       if (error != 0) {
-               vput(lowervp);
+       *vpp = null_hashget_locked(mp, lowervp);
+       if (*vpp != NULL) {
+               rw_wunlock(&null_hash_lock);
+               vrele(lowervp);
                null_destroy_proto(vp, xp);
-               return (error);
+               return (0);
        }
-       if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
-               vp->v_vflag |= VV_ROOT;
 
        /*
         * We might miss the case where lower vnode sets VIRF_PGREAD
@@ -251,28 +246,23 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, 
struct vnode **vpp)
         */
        if ((vn_irflag_read(lowervp) & VIRF_PGREAD) != 0) {
                MPASS(lowervp->v_object != NULL);
-               if ((vn_irflag_read(vp) & VIRF_PGREAD) == 0) {
-                       if (vp->v_object == NULL)
-                               vp->v_object = lowervp->v_object;
-                       else
-                               MPASS(vp->v_object == lowervp->v_object);
-                       vn_irflag_set_cond(vp, VIRF_PGREAD);
-               } else {
-                       MPASS(vp->v_object != NULL);
-               }
+               vp->v_object = lowervp->v_object;
+               vn_irflag_set(vp, VIRF_PGREAD);
        }
+       if (lowervp == MOUNTTONULLMOUNT(mp)->nullm_lowerrootvp)
+               vp->v_vflag |= VV_ROOT;
 
-       /*
-        * Atomically insert our new node into the hash or vget existing 
-        * if someone else has beaten us to it.
-        */
-       *vpp = null_hashins(mp, xp);
-       if (*vpp != NULL) {
-               vrele(lowervp);
-               vp->v_object = NULL;    /* in case VIRF_PGREAD set it */
+       error = insmntque1(vp, mp);
+       if (error != 0) {
+               rw_wunlock(&null_hash_lock);
+               vput(lowervp);
+               vp->v_object = NULL;
                null_destroy_proto(vp, xp);
-               return (0);
+               return (error);
        }
+
+       null_hashins(mp, xp);
+       rw_wunlock(&null_hash_lock);
        *vpp = vp;
 
        return (0);
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 1dba5f51d619..21a28dd45570 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -207,7 +207,7 @@ nullfs_mount(struct mount *mp)
                    (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED |
                    MNTK_EXTENDED_SHARED);
        }
-       mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT | MNTK_NOMSYNC;
+       mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE;
        mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
            (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
        MNT_IUNLOCK(mp);

Reply via email to