The branch main has been updated by mjg:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e9272225e6bed840b00eef1c817b188c172338ee

commit e9272225e6bed840b00eef1c817b188c172338ee
Author:     Mateusz Guzik <[email protected]>
AuthorDate: 2021-03-17 21:33:47 +0000
Commit:     Mateusz Guzik <[email protected]>
CommitDate: 2021-03-18 14:59:03 +0000

    vfs: fix vnlru marker handling for filtered/unfiltered cases
    
    The global list has a marker with an invariant that free vnodes are
    placed somewhere past that. A caller which performs filtering (like ZFS)
    can move said marker all the way to the end, across free vnodes which
    don't match. Then a caller which does not perform filtering will fail to
    find them. This makes vn_alloc_hard sleep for 1 second instead of
    reclaiming, resulting in significant stalls.
    
    Fix the problem by requiring an explicit marker by callers which do
    filtering.
    
    As a temporary measure extend vnlru_free to restart if it fails to
    reclaim anything.
    
    Big thanks go to the reporter for testing several iterations of the
    patch.
    
    Reported by:    Yamagi <lists yamagi.org>
    Tested by:      Yamagi <lists yamagi.org>
    Reviewed by:    kib
    Differential Revision:  https://reviews.freebsd.org/D29324
---
 sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c | 14 ++++-
 sys/kern/vfs_subr.c                                | 72 +++++++++++++++++++---
 sys/sys/vnode.h                                    |  3 +
 3 files changed, 80 insertions(+), 9 deletions(-)

diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c 
b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
index 4fc7468bfa47..0d5cffbe8d1e 100644
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/arc_os.c
@@ -51,6 +51,9 @@
 #include <sys/vm.h>
 #include <sys/vmmeter.h>
 
+static struct sx arc_vnlru_lock;
+static struct vnode *arc_vnlru_marker;
+
 extern struct vfsops zfs_vfsops;
 
 uint_t zfs_arc_free_target = 0;
@@ -157,7 +160,9 @@ arc_prune_task(void *arg)
 
        arc_reduce_target_size(ptob(nr_scan));
        free(arg, M_TEMP);
-       vnlru_free(nr_scan, &zfs_vfsops);
+       sx_xlock(&arc_vnlru_lock);
+       vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
+       sx_xunlock(&arc_vnlru_lock);
 }
 
 /*
@@ -234,7 +239,8 @@ arc_lowmem_init(void)
 {
        arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
            EVENTHANDLER_PRI_FIRST);
-
+       arc_vnlru_marker = vnlru_alloc_marker();
+       sx_init(&arc_vnlru_lock, "arc vnlru lock");
 }
 
 void
@@ -242,6 +248,10 @@ arc_lowmem_fini(void)
 {
        if (arc_event_lowmem != NULL)
                EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
+       if (arc_vnlru_marker != NULL) {
+               vnlru_free_marker(arc_vnlru_marker);
+               sx_destroy(&arc_vnlru_lock);
+       }
 }
 
 void
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 37e713ee48ea..af12252e8a88 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1216,9 +1216,9 @@ SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, 
&max_vnlru_free,
  * Attempt to reduce the free list by the requested amount.
  */
 static int
-vnlru_free_locked(int count, struct vfsops *mnt_op)
+vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
 {
-       struct vnode *vp, *mvp;
+       struct vnode *vp;
        struct mount *mp;
        int ocount;
 
@@ -1226,7 +1226,6 @@ vnlru_free_locked(int count, struct vfsops *mnt_op)
        if (count > max_vnlru_free)
                count = max_vnlru_free;
        ocount = count;
-       mvp = vnode_list_free_marker;
        vp = mvp;
        for (;;) {
                if (count == 0) {
@@ -1268,13 +1267,72 @@ vnlru_free_locked(int count, struct vfsops *mnt_op)
        return (ocount - count);
 }
 
+static int
+vnlru_free_locked(int count)
+{
+
+       mtx_assert(&vnode_list_mtx, MA_OWNED);
+       return (vnlru_free_impl(count, NULL, vnode_list_free_marker));
+}
+
+void
+vnlru_free_vfsops(int count, struct vfsops *mnt_op, struct vnode *mvp)
+{
+
+       MPASS(mnt_op != NULL);
+       MPASS(mvp != NULL);
+       VNPASS(mvp->v_type == VMARKER, mvp);
+       mtx_lock(&vnode_list_mtx);
+       vnlru_free_impl(count, mnt_op, mvp);
+       mtx_unlock(&vnode_list_mtx);
+}
+
+/*
+ * Temporary binary compat, don't use. Call vnlru_free_vfsops instead.
+ */
 void
 vnlru_free(int count, struct vfsops *mnt_op)
 {
+       struct vnode *mvp;
+
+       if (count == 0)
+               return;
+       mtx_lock(&vnode_list_mtx);
+       mvp = vnode_list_free_marker;
+       if (vnlru_free_impl(count, mnt_op, mvp) == 0) {
+               /*
+                * It is possible the marker was moved over eligible vnodes by
+                * callers which filtered by different ops. If so, start from
+                * scratch.
+                */
+               if (vnlru_read_freevnodes() > 0) {
+                       TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
+                       TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
+               }
+               vnlru_free_impl(count, mnt_op, mvp);
+       }
+       mtx_unlock(&vnode_list_mtx);
+}
+
+struct vnode *
+vnlru_alloc_marker(void)
+{
+       struct vnode *mvp;
 
+       mvp = vn_alloc_marker(NULL);
+       mtx_lock(&vnode_list_mtx);
+       TAILQ_INSERT_BEFORE(vnode_list_free_marker, mvp, v_vnodelist);
+       mtx_unlock(&vnode_list_mtx);
+       return (mvp);
+}
+
+void
+vnlru_free_marker(struct vnode *mvp)
+{
        mtx_lock(&vnode_list_mtx);
-       vnlru_free_locked(count, mnt_op);
+       TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
        mtx_unlock(&vnode_list_mtx);
+       vn_free_marker(mvp);
 }
 
 static void
@@ -1423,7 +1481,7 @@ vnlru_proc(void)
                 * try to reduce it by discarding from the free list.
                 */
                if (rnumvnodes > desiredvnodes) {
-                       vnlru_free_locked(rnumvnodes - desiredvnodes, NULL);
+                       vnlru_free_locked(rnumvnodes - desiredvnodes);
                        rnumvnodes = atomic_load_long(&numvnodes);
                }
                /*
@@ -1615,7 +1673,7 @@ vn_alloc_hard(struct mount *mp)
         * should be chosen so that we never wait or even reclaim from
         * the free list to below its target minimum.
         */
-       if (vnlru_free_locked(1, NULL) > 0)
+       if (vnlru_free_locked(1) > 0)
                goto alloc;
        if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
                /*
@@ -1625,7 +1683,7 @@ vn_alloc_hard(struct mount *mp)
                msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
                if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
                    vnlru_read_freevnodes() > 1)
-                       vnlru_free_locked(1, NULL);
+                       vnlru_free_locked(1);
        }
 alloc:
        rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 0e46bea14b64..f05b03c74c82 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -826,7 +826,10 @@ void       vfs_timestamp(struct timespec *);
 void   vfs_write_resume(struct mount *mp, int flags);
 int    vfs_write_suspend(struct mount *mp, int flags);
 int    vfs_write_suspend_umnt(struct mount *mp);
+struct vnode *vnlru_alloc_marker(void);
+void   vnlru_free_marker(struct vnode *);
 void   vnlru_free(int, struct vfsops *);
+void   vnlru_free_vfsops(int, struct vfsops *, struct vnode *);
 int    vop_stdbmap(struct vop_bmap_args *);
 int    vop_stdfdatasync_buf(struct vop_fdatasync_args *);
 int    vop_stdfsync(struct vop_fsync_args *);
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to