The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=45117ffcd533ddf995f654db60b10899ae8370ec

commit 45117ffcd533ddf995f654db60b10899ae8370ec
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2026-02-28 16:11:58 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2026-03-05 23:46:54 +0000

    vfs: add VOP_DELAYED_SETSIZE() and related infrastructure
    
    The change generalizes code that was initially developed for nfs client
    to handle filesystems that needs to call vnode_pager_setsize() while
    only owning the vnode lock shared.  Since vnode pager might need to trim
    or extend the vnode vm_object' page queue, the vnode lock for the call
    must be owned exclusive.  This is typical for filesystems with remote
    authorative source of file attributes, like nfs/p9/fuse.
    
    Handle the conflict by delaying the vnode_pager_setsize() to the next
    vnode locking to avoid relock.  But if the next locking request is in
    shared mode, lock it exclusively instead, perform the delayed
    vnode_pager_setsize() call by doing VOP_DEFAULT_SETSIZE(), and then
    downgrade to shared.
    
    Filesystems that opt into the feature must provide the implementation of
    VOP_DELAYED_SETSIZE() that actually calls vnode_pager_setsize(), and use
    vn_delay_setsize() helper to mark the vnode as requiring the delay call.
    
    Reviewed by:    rmacklem
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D55595
---
 sys/fs/deadfs/dead_vnops.c |  1 +
 sys/kern/vfs_default.c     |  1 +
 sys/kern/vfs_vnops.c       | 74 +++++++++++++++++++++++++++++++++++++++++++++-
 sys/kern/vnode_if.src      |  8 +++++
 sys/sys/vnode.h            | 31 +++++++++++++++++++
 5 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
index 137c86b65058..b6d6fa55d221 100644
--- a/sys/fs/deadfs/dead_vnops.c
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = {
        .vop_write =            dead_write,
        .vop_fplookup_vexec =   VOP_EOPNOTSUPP,
        .vop_fplookup_symlink = VOP_EOPNOTSUPP,
+       .vop_delayed_setsize =  VOP_NULL,
 };
 VFS_VOP_VECTOR_REGISTER(dead_vnodeops);
 
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 468d5d18b02b..3151c69d1912 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -147,6 +147,7 @@ struct vop_vector default_vnodeops = {
        .vop_add_writecount =   vop_stdadd_writecount,
        .vop_copy_file_range =  vop_stdcopy_file_range,
        .vop_vput_pair =        vop_stdvput_pair,
+       .vop_delayed_setsize =  VOP_PANIC,
 };
 VFS_VOP_VECTOR_REGISTER(default_vnodeops);
 
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index a53df50c06bd..24efdf4ac0d5 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1959,9 +1959,74 @@ _vn_lock_fallback(struct vnode *vp, int flags, const 
char *file, int line,
        return (0);
 }
 
+static int
+vn_lock_delayed_setsize(struct vop_lock1_args *ap)
+{
+       struct vnode *vp;
+       int error, lktype;
+       bool onfault;
+
+       vp = ap->a_vp;
+       lktype = ap->a_flags & LK_TYPE_MASK;
+       if (vp->v_op == &dead_vnodeops)
+               return (0);
+       VI_LOCK(vp);
+       if ((vp->v_iflag & VI_DELAYEDSSZ) == 0 || (lktype != LK_SHARED &&
+           lktype != LK_EXCLUSIVE && lktype != LK_UPGRADE &&
+           lktype != LK_TRYUPGRADE)) {
+               VI_UNLOCK(vp);
+               return (0);
+       }
+       onfault = (ap->a_flags & LK_EATTR_MASK) == LK_NOWAIT &&
+           (ap->a_flags & LK_INIT_MASK) == LK_CANRECURSE &&
+           (lktype == LK_SHARED || lktype == LK_EXCLUSIVE);
+       if (onfault && vp->v_vnlock->lk_recurse == 0) {
+               /*
+                * Force retry in vm_fault(), to make the lock request
+                * sleepable, which allows us to piggy-back the
+                * sleepable call to vnode_pager_setsize().
+                */
+               VI_UNLOCK(vp);
+               VOP_UNLOCK(vp);
+               return (EBUSY);
+       }
+       if ((ap->a_flags & LK_NOWAIT) != 0 ||
+           (lktype == LK_SHARED && vp->v_vnlock->lk_recurse > 0)) {
+               VI_UNLOCK(vp);
+               return (0);
+       }
+       if (lktype == LK_SHARED) {
+               VOP_UNLOCK(vp);
+               ap->a_flags &= ~LK_TYPE_MASK;
+               ap->a_flags |= LK_EXCLUSIVE | LK_INTERLOCK;
+               error = VOP_LOCK1_APV(&default_vnodeops, ap);
+               if (error != 0 || vp->v_op == &dead_vnodeops)
+                       return (error);
+               if (vp->v_data == NULL)
+                       goto downgrade;
+               MPASS(vp->v_data != NULL);
+               VI_LOCK(vp);
+               if ((vp->v_iflag & VI_DELAYEDSSZ) == 0) {
+                       VI_UNLOCK(vp);
+                       goto downgrade;
+               }
+       }
+       vp->v_iflag &= ~VI_DELAYEDSSZ;
+       VI_UNLOCK(vp);
+       VOP_DELAYED_SETSIZE(vp);
+downgrade:
+       if (lktype == LK_SHARED) {
+               ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK);
+               ap->a_flags |= LK_DOWNGRADE;
+               (void)VOP_LOCK1_APV(&default_vnodeops, ap);
+       }
+       return (0);
+}
+
 int
 _vn_lock(struct vnode *vp, int flags, const char *file, int line)
 {
+       struct vop_lock1_args ap;
        int error;
 
        VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
@@ -1970,7 +2035,14 @@ _vn_lock(struct vnode *vp, int flags, const char *file, 
int line)
        error = VOP_LOCK1(vp, flags, file, line);
        if (__predict_false(error != 0 || VN_IS_DOOMED(vp)))
                return (_vn_lock_fallback(vp, flags, file, line, error));
-       return (0);
+       if (__predict_false((vp->v_iflag & VI_DELAYEDSSZ) == 0))
+               return (0);
+       ap.a_gen.a_desc = &vop_lock1_desc;
+       ap.a_vp = vp;
+       ap.a_flags = flags;
+       ap.a_file = file;
+       ap.a_line = line;
+       return (vn_lock_delayed_setsize(&ap));
 }
 
 /*
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 6b7448d9f1df..78ba1aa7afda 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -847,6 +847,14 @@ vop_inotify_add_watch {
        IN struct thread *td;
 };
 
+
+%% delayed_setsize     vp      E E E
+
+vop_delayed_setsize {
+       IN struct vnode *vp;
+};
+
+
 # The VOPs below are spares at the end of the table to allow new VOPs to be
 # added in stable branches without breaking the KBI.  New VOPs in HEAD should
 # be added above these spares.  When merging a new VOP to a stable branch,
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 1a267e0e272c..36e10fd8d8b7 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -268,6 +268,7 @@ _Static_assert(sizeof(struct vnode) <= 448, "vnode size 
crosses 448 bytes");
 #define        VI_DEFINACT     0x0010  /* deferred inactive */
 #define        VI_FOPENING     0x0020  /* In open, with opening process having 
the
                                   first right to advlock file */
+#define        VI_DELAYEDSSZ   0x0040  /* Delayed setsize */
 
 #define        VV_ROOT         0x0001  /* root of its filesystem */
 #define        VV_ISTTY        0x0002  /* vnode represents a tty */
@@ -1251,6 +1252,36 @@ vn_get_state(struct vnode *vp)
        atomic_load_consume_ptr(&(_vp)->v_data);\
 })
 
+static inline void
+vn_delay_setsize_locked(struct vnode *vp)
+{
+       ASSERT_VI_LOCKED(vp, "delayed_setsize");
+       vp->v_iflag |= VI_DELAYEDSSZ;
+}
+
+static inline void
+vn_delay_setsize(struct vnode *vp)
+{
+       VI_LOCK(vp);
+       vn_delay_setsize_locked(vp);
+       VI_UNLOCK(vp);
+}
+
+static inline void
+vn_clear_delayed_setsize_locked(struct vnode *vp)
+{
+       ASSERT_VI_LOCKED(vp, "delayed_setsize");
+       vp->v_iflag &= ~VI_DELAYEDSSZ;
+}
+
+static inline void
+vn_clear_delayed_setsize(struct vnode *vp)
+{
+       VI_LOCK(vp);
+       vn_clear_delayed_setsize_locked(vp);
+       VI_UNLOCK(vp);
+}
+
 #endif /* _KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */

Reply via email to