The branch main has been updated by jah:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=59409cb90fc079bd388d8f7404679193e4d34889

commit 59409cb90fc079bd388d8f7404679193e4d34889
Author:     Jason A. Harmening <[email protected]>
AuthorDate: 2021-05-17 22:47:27 +0000
Commit:     Jason A. Harmening <[email protected]>
CommitDate: 2021-06-06 01:20:36 +0000

    Add a generic mechanism for preventing forced unmount
    
    This is aimed at preventing stacked filesystems like nullfs and unionfs
    from "losing" their lower mounts due to forced unmount.  Otherwise,
    VFS operations that are passed through to the lower filesystem(s) may
    crash or otherwise cause unpredictable behavior.
    
    Introduce two new functions: vfs_pin_from_vp() and vfs_unpin().
    which are intended to be called on the lower mount(s) when the stacked
    filesystem is mounted and unmounted, respectively.
    Much as registration in the mnt_uppers list previously did, pinning
    will prevent even forced unmount of the lower FS and will allow the
    stacked FS to freely operate on the lower mount either by direct
    use of the struct mount* or indirect use through a properly-referenced
    vnode's v_mount field.
    
    vfs_pin_from_vp() is modeled after vfs_ref_from_vp() in that it uses
    the mount interlock coupled with re-checking vp->v_mount to ensure
    that it will fail in the face of a pending unmount request, even if
    the concurrent unmount fully completes.
    
    Adopt these new functions in both nullfs and unionfs.
    
    Reviewed By:    kib, markj
    Differential Revision: https://reviews.freebsd.org/D30401
---
 sys/fs/nullfs/null_vfsops.c   |  9 ++++++++-
 sys/fs/unionfs/union_vfsops.c | 22 +++++++++++++++++++---
 sys/kern/vfs_mount.c          | 39 ++++++++++++++++++++++++++++++++++++++-
 sys/kern/vfs_subr.c           |  3 +++
 sys/sys/mount.h               |  3 +++
 5 files changed, 71 insertions(+), 5 deletions(-)

diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index 0ad2385116a9..4914e5fc2dbf 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -163,7 +163,12 @@ nullfs_mount(struct mount *mp)
         * Save pointer to underlying FS and the reference to the
         * lower root vnode.
         */
-       xmp->nullm_vfs = lowerrootvp->v_mount;
+       xmp->nullm_vfs = vfs_pin_from_vp(lowerrootvp);
+       if (xmp->nullm_vfs == NULL) {
+               vput(lowerrootvp);
+               free(xmp, M_NULLFSMNT);
+               return (ENOENT);
+       }
        vref(lowerrootvp);
        xmp->nullm_lowerrootvp = lowerrootvp;
        mp->mnt_data = xmp;
@@ -173,6 +178,7 @@ nullfs_mount(struct mount *mp)
         */
        error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
        if (error != 0) {
+               vfs_unpin(xmp->nullm_vfs);
                vrele(lowerrootvp);
                free(xmp, M_NULLFSMNT);
                return (error);
@@ -263,6 +269,7 @@ nullfs_unmount(mp, mntflags)
                TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
                MNT_IUNLOCK(ump);
        }
+       vfs_unpin(ump);
        vrele(mntdata->nullm_lowerrootvp);
        mp->mnt_data = NULL;
        free(mntdata, M_NULLFSMNT);
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
index bd264c7bcdb5..96a30f0ae8b5 100644
--- a/sys/fs/unionfs/union_vfsops.c
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -75,6 +75,7 @@ static int
 unionfs_domount(struct mount *mp)
 {
        int             error;
+       struct mount   *lowermp, *uppermp;
        struct vnode   *lowerrootvp;
        struct vnode   *upperrootvp;
        struct unionfs_mount *ump;
@@ -285,15 +286,28 @@ unionfs_domount(struct mount *mp)
        error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp,
            NULLVP, &(ump->um_rootvp), NULL, td);
        vrele(upperrootvp);
-       if (error) {
+       if (error != 0) {
                free(ump, M_UNIONFSMNT);
                mp->mnt_data = NULL;
                return (error);
        }
 
+       lowermp = vfs_pin_from_vp(ump->um_lowervp);
+       uppermp = vfs_pin_from_vp(ump->um_uppervp);
+
+       if (lowermp == NULL || uppermp == NULL) {
+               if (lowermp != NULL)
+                       vfs_unpin(lowermp);
+               if (uppermp != NULL)
+                       vfs_unpin(uppermp);
+               free(ump, M_UNIONFSMNT);
+               mp->mnt_data = NULL;
+               return (ENOENT);
+       }
+
        MNT_ILOCK(mp);
-       if ((ump->um_lowervp->v_mount->mnt_flag & MNT_LOCAL) &&
-           (ump->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+       if ((lowermp->mnt_flag & MNT_LOCAL) != 0 &&
+           (uppermp->mnt_flag & MNT_LOCAL) != 0)
                mp->mnt_flag |= MNT_LOCAL;
        mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNIONFS;
        MNT_IUNLOCK(mp);
@@ -343,6 +357,8 @@ unionfs_unmount(struct mount *mp, int mntflags)
        if (error)
                return (error);
 
+       vfs_unpin(ump->um_lowervp->v_mount);
+       vfs_unpin(ump->um_uppervp->v_mount);
        free(ump, M_UNIONFSMNT);
        mp->mnt_data = NULL;
 
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 7dc6b795eefd..8b4426209818 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/libkern.h>
+#include <sys/limits.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
@@ -504,6 +505,39 @@ vfs_ref(struct mount *mp)
        MNT_IUNLOCK(mp);
 }
 
+struct mount *
+vfs_pin_from_vp(struct vnode *vp)
+{
+       struct mount *mp;
+
+       mp = atomic_load_ptr(&vp->v_mount);
+       if (mp == NULL)
+               return (NULL);
+       MNT_ILOCK(mp);
+       if (mp != vp->v_mount || (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
+               MNT_IUNLOCK(mp);
+               return (NULL);
+       }
+       MNT_REF(mp);
+       KASSERT(mp->mnt_pinned_count < INT_MAX,
+           ("mount pinned count overflow"));
+       ++mp->mnt_pinned_count;
+       MNT_IUNLOCK(mp);
+       return (mp);
+}
+
+void
+vfs_unpin(struct mount *mp)
+{
+       MNT_ILOCK(mp);
+       KASSERT(mp->mnt_pinned_count > 0, ("mount pinned count underflow"));
+       KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0,
+           ("mount pinned with pending unmount"));
+       --mp->mnt_pinned_count;
+       MNT_REL(mp);
+       MNT_IUNLOCK(mp);
+}
+
 void
 vfs_rel(struct mount *mp)
 {
@@ -567,6 +601,7 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, 
const char *fspath,
 #endif
        arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
        TAILQ_INIT(&mp->mnt_uppers);
+       mp->mnt_pinned_count = 0;
        return (mp);
 }
 
@@ -605,6 +640,8 @@ vfs_mount_destroy(struct mount *mp)
                        vn_printf(vp, "dangling vnode ");
                panic("unmount: dangling vnode");
        }
+       KASSERT(mp->mnt_pinned_count == 0,
+          ("mnt_pinned_count = %d", mp->mnt_pinned_count));
        KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
        if (mp->mnt_nvnodelistsize != 0)
                panic("vfs_mount_destroy: nonzero nvnodelistsize");
@@ -1811,7 +1848,7 @@ dounmount(struct mount *mp, int flags, struct thread *td)
        MNT_ILOCK(mp);
        if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
            (mp->mnt_flag & MNT_UPDATE) != 0 ||
-           !TAILQ_EMPTY(&mp->mnt_uppers)) {
+           mp->mnt_pinned_count != 0) {
                dounmount_cleanup(mp, coveredvp, 0);
                return (EBUSY);
        }
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index ad0f1c2a0c7c..a2f25bf78495 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -831,6 +831,9 @@ vfs_busy(struct mount *mp, int flags)
         * valid.
         */
        while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
+               KASSERT(mp->mnt_pinned_count == 0,
+                   ("%s: non-zero pinned count %d with pending unmount",
+                   __func__, mp->mnt_pinned_count));
                if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
                        MNT_REL(mp);
                        MNT_IUNLOCK(mp);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 6b2e6e7f7f13..693293b12370 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -242,6 +242,7 @@ struct mount {
        struct mtx      mnt_listmtx;
        struct vnodelst mnt_lazyvnodelist;      /* (l) list of lazy vnodes */
        int             mnt_lazyvnodelistsize;  /* (l) # of lazy vnodes */
+       int             mnt_pinned_count;       /* (i) unmount prevented */
        struct lock     mnt_explock;            /* vfs_export walkers lock */
        TAILQ_ENTRY(mount) mnt_upper_link;      /* (i*) we in the all uppers */
        TAILQ_HEAD(, mount) mnt_uppers;         /* (i) upper mounts over us */
@@ -1011,6 +1012,8 @@ struct mount *vfs_mount_alloc(struct vnode *, struct 
vfsconf *, const char *,
 int    vfs_suser(struct mount *, struct thread *);
 void   vfs_unbusy(struct mount *);
 void   vfs_unmountall(void);
+struct mount *vfs_pin_from_vp(struct vnode *);
+void   vfs_unpin(struct mount *);
 extern TAILQ_HEAD(mntlist, mount) mountlist;   /* mounted filesystem list */
 extern struct mtx_padalign mountlist_mtx;
 extern struct nfs_public nfs_pub;
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "[email protected]"

Reply via email to