The branch main has been updated by mjg:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=0a9aa6fdf58468945240e86bf16c268acc8c1776

commit 0a9aa6fdf58468945240e86bf16c268acc8c1776
Author:     Mateusz Guzik <[email protected]>
AuthorDate: 2024-07-08 12:24:41 +0000
Commit:     Mateusz Guzik <[email protected]>
CommitDate: 2024-07-08 12:40:20 +0000

    vfs: make skipping LRU requeue optional
    
    As explained in the comment in the code it is a bottleneck in certain
    workloads. On the other hand it does not need to be skipped in most
    cases, while transiently running into the lock being contended happens a
    lot.
---
 sys/kern/vfs_subr.c | 54 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 52712b99abac..8012fab29081 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -222,6 +222,10 @@ static counter_u64_t vnode_skipped_requeues;
 SYSCTL_COUNTER_U64(_vfs_vnode_stats, OID_AUTO, skipped_requeues, CTLFLAG_RD, 
&vnode_skipped_requeues,
     "Number of times LRU requeue was skipped due to lock contention");
 
+static __read_mostly bool vnode_can_skip_requeue;
+SYSCTL_BOOL(_vfs_vnode_param, OID_AUTO, can_skip_requeue, CTLFLAG_RW,
+    &vnode_can_skip_requeue, 0, "Is LRU requeue skippable");
+
 static u_long deferred_inact;
 SYSCTL_ULONG(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD,
     &deferred_inact, 0, "Number of times inactive processing was deferred");
@@ -3835,31 +3839,41 @@ vdbatch_process(struct vdbatch *vd)
         * lock contention, where vnode_list_mtx becomes the primary bottleneck
         * if multiple CPUs get here (one real-world example is highly parallel
         * do-nothing make , which will stat *tons* of vnodes). Since it is
-        * quasi-LRU (read: not that great even if fully honoured) just dodge
-        * the problem. Parties which don't like it are welcome to implement
-        * something better.
+        * quasi-LRU (read: not that great even if fully honoured) provide an
+        * option to just dodge the problem. Parties which don't like it are
+        * welcome to implement something better.
         */
-       critical_enter();
-       if (mtx_trylock(&vnode_list_mtx)) {
-               for (i = 0; i < VDBATCH_SIZE; i++) {
-                       vp = vd->tab[i];
-                       vd->tab[i] = NULL;
-                       TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
-                       TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
-                       MPASS(vp->v_dbatchcpu != NOCPU);
-                       vp->v_dbatchcpu = NOCPU;
+       if (vnode_can_skip_requeue) {
+               if (!mtx_trylock(&vnode_list_mtx)) {
+                       counter_u64_add(vnode_skipped_requeues, 1);
+                       critical_enter();
+                       for (i = 0; i < VDBATCH_SIZE; i++) {
+                               vp = vd->tab[i];
+                               vd->tab[i] = NULL;
+                               MPASS(vp->v_dbatchcpu != NOCPU);
+                               vp->v_dbatchcpu = NOCPU;
+                       }
+                       vd->index = 0;
+                       critical_exit();
+                       return;
+
                }
-               mtx_unlock(&vnode_list_mtx);
+               /* fallthrough to locked processing */
        } else {
-               counter_u64_add(vnode_skipped_requeues, 1);
+               mtx_lock(&vnode_list_mtx);
+       }
 
-               for (i = 0; i < VDBATCH_SIZE; i++) {
-                       vp = vd->tab[i];
-                       vd->tab[i] = NULL;
-                       MPASS(vp->v_dbatchcpu != NOCPU);
-                       vp->v_dbatchcpu = NOCPU;
-               }
+       mtx_assert(&vnode_list_mtx, MA_OWNED);
+       critical_enter();
+       for (i = 0; i < VDBATCH_SIZE; i++) {
+               vp = vd->tab[i];
+               vd->tab[i] = NULL;
+               TAILQ_REMOVE(&vnode_list, vp, v_vnodelist);
+               TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist);
+               MPASS(vp->v_dbatchcpu != NOCPU);
+               vp->v_dbatchcpu = NOCPU;
        }
+       mtx_unlock(&vnode_list_mtx);
        vd->index = 0;
        critical_exit();
 }

Reply via email to