The branch main has been updated by asomers:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=7e68af7ce2c1b892954df415774fe59fd2f1b62f

commit 7e68af7ce2c1b892954df415774fe59fd2f1b62f
Author:     Alan Somers <[email protected]>
AuthorDate: 2026-01-23 21:23:51 +0000
Commit:     Alan Somers <[email protected]>
CommitDate: 2026-03-12 16:11:25 +0000

    fusefs: redo vnode attribute locking
    
    Previously most fields in fuse_vnode_data were protected by the vnode
    lock.  But because DEBUG_VFS_LOCKS was never enabled by default until
    stable/15 the assertions were never checked, and many were wrong.
    Others were missing.  This led to panics in stable/15 and 16.0-CURRENT,
    when a vnode was expected to be exclusively locked but wasn't, for fuse
    file systems that mount with "-o async".
    
    In some places it isn't possible to exclusively lock the vnode when
    accessing these fields.  So protect them with a new mutex instead.  This
    fixes panics and unprotected field accesses in VOP_READ,
    VOP_COPY_FILE_RANGE, VOP_GETATTR, VOP_BMAP, and FUSE_NOTIFY_INVAL_ENTRY.
    Add assertions everywhere the protected fields are accessed.
    
    Lock the vnode exclusively when handling FUSE_NOTIFY_INVAL_INODE.
    
    During fuse_vnode_setsize, if the vnode isn't already exclusively
    locked, use the vn_delayed_setsize mechanism.  This fixes panics during
    VOP_READ or VOP_GETATTR.
    
    Also, ensure that fuse_vnop_rename locks the "from" vnode.
    
    Finally, reorder elements in struct fuse_vnode_data to reduce the
    structure size.
    
    Fixes:          283391
    Reported by:    kargl, markj, vishwin, Abdelkader Boudih, [email protected]
    MFC after:      2 weeks
    Sponsored by:   ConnectWise
    Reviewed by:    kib
    Differential Revision: https://reviews.freebsd.org/D55230
---
 sys/fs/fuse/fuse_file.c       |   8 +-
 sys/fs/fuse/fuse_internal.c   |  42 +++++----
 sys/fs/fuse/fuse_io.c         |  21 ++++-
 sys/fs/fuse/fuse_node.c       |  89 ++++++++++++++++----
 sys/fs/fuse/fuse_node.h       |  91 +++++++++++++++++---
 sys/fs/fuse/fuse_vfsops.c     |   2 +
 sys/fs/fuse/fuse_vnops.c      |  81 ++++++++++++++++--
 tests/sys/fs/fusefs/bmap.cc   |  34 +++++---
 tests/sys/fs/fusefs/notify.cc |  38 ++++++---
 tests/sys/fs/fusefs/read.cc   | 192 ++++++++++++++++++++++++++++++++++++++++++
 tests/sys/fs/fusefs/rename.cc |  90 ++++++++++++++++++++
 11 files changed, 609 insertions(+), 79 deletions(-)

diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
index 5f5819c2ccae..2cb8ef84e511 100644
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -194,6 +194,8 @@ fuse_filehandle_close(struct vnode *vp, struct 
fuse_filehandle *fufh,
        int err = 0;
        int op = FUSE_RELEASE;
 
+       ASSERT_VOP_ELOCKED(vp, __func__);
+
        if (fuse_isdeadfs(vp)) {
                goto out;
        }
@@ -381,7 +383,11 @@ fuse_filehandle_init(struct vnode *vp, fufh_type_t 
fufh_type,
        } else {
                if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0)
                        fuse_io_invalbuf(vp, td);
-               VTOFUD(vp)->flag &= ~FN_DIRECTIO;
+               /*
+                * XXX Update the flag without the lock for now.  See
+                * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+                */
+               VTOFUD(vp)->flag &= ~FN_DIRECTIO;
        }
 
 }
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index a3590060f44a..914deb416c08 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -262,7 +262,7 @@ fuse_internal_cache_attrs(struct vnode *vp, struct 
fuse_attr *attr,
        fvdat = VTOFUD(vp);
        data = fuse_get_mpdata(mp);
 
-       ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
 
        fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
                &fvdat->attr_cache_timeout);
@@ -478,7 +478,9 @@ fuse_internal_invalidate_entry(struct mount *mp, struct uio 
*uio)
        cn.cn_namelen = fnieo.namelen;
        err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
        MPASS(err == 0);
+       CACHED_ATTR_LOCK(dvp);
        fuse_vnode_clear_attr_cache(dvp);
+       CACHED_ATTR_UNLOCK(dvp);
        vput(dvp);
        return (0);
 }
@@ -498,8 +500,8 @@ fuse_internal_invalidate_inode(struct mount *mp, struct uio 
*uio)
        if (fniio.ino == FUSE_ROOT_ID)
                err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
        else
-               err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
-                       &vp);
+               err = fuse_internal_get_cached_vnode(mp, fniio.ino,
+                   LK_EXCLUSIVE, &vp);
        SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
        if (err != 0 || vp == NULL)
                return (err);
@@ -694,6 +696,8 @@ fuse_internal_remove(struct vnode *dvp,
        nlink_t nlink;
        int err = 0;
 
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
+
        fdisp_init(&fdi, cnp->cn_namelen + 1);
        fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred);
 
@@ -891,15 +895,9 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr 
*vap,
        struct fuse_vnode_data *fvdat = VTOFUD(vp);
        struct fuse_getattr_in *fgai;
        struct fuse_attr_out *fao;
-       off_t old_filesize = fvdat->cached_attrs.va_size;
-       struct timespec old_atime = fvdat->cached_attrs.va_atime;
-       struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
-       struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
        __enum_uint8(vtype) vtyp;
        int err;
 
-       ASSERT_VOP_LOCKED(vp, __func__);
-
        fdisp_init(&fdi, sizeof(*fgai));
        fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
        fgai = fdi.indata;
@@ -917,22 +915,27 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr 
*vap,
 
        fao = (struct fuse_attr_out *)fdi.answ;
        vtyp = IFTOVT(fao->attr.mode);
+
+       CACHED_ATTR_LOCK(vp);
        if (fvdat->flag & FN_SIZECHANGE)
-               fao->attr.size = old_filesize;
+               fao->attr.size = fvdat->cached_attrs.va_size;
        if (fvdat->flag & FN_ATIMECHANGE) {
-               fao->attr.atime = old_atime.tv_sec;
-               fao->attr.atimensec = old_atime.tv_nsec;
+               fao->attr.atime = fvdat->cached_attrs.va_atime.tv_sec;
+               fao->attr.atimensec = fvdat->cached_attrs.va_atime.tv_nsec;
        }
        if (fvdat->flag & FN_CTIMECHANGE) {
-               fao->attr.ctime = old_ctime.tv_sec;
-               fao->attr.ctimensec = old_ctime.tv_nsec;
+               fao->attr.ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+               fao->attr.ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
        }
        if (fvdat->flag & FN_MTIMECHANGE) {
-               fao->attr.mtime = old_mtime.tv_sec;
-               fao->attr.mtimensec = old_mtime.tv_nsec;
+               fao->attr.mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+               fao->attr.mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
        }
+
        fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
                fao->attr_valid_nsec, vap, true);
+
+       CACHED_ATTR_UNLOCK(vp);
        if (vtyp != vnode_vtype(vp)) {
                fuse_internal_vnode_disappear(vp);
                err = ENOENT;
@@ -950,10 +953,13 @@ fuse_internal_getattr(struct vnode *vp, struct vattr 
*vap, struct ucred *cred,
 {
        struct vattr *attrs;
 
+       CACHED_ATTR_LOCK(vp);
        if ((attrs = VTOVA(vp)) != NULL) {
                *vap = *attrs;  /* struct copy */
+               CACHED_ATTR_UNLOCK(vp);
                return 0;
-       }
+       } else
+               CACHED_ATTR_UNLOCK(vp);
 
        return fuse_internal_do_getattr(vp, vap, cred, td);
 }
@@ -1140,7 +1146,7 @@ int fuse_internal_setattr(struct vnode *vp, struct vattr 
*vap,
        int err = 0;
        __enum_uint8(vtype) vtyp;
 
-       ASSERT_VOP_ELOCKED(vp, __func__);
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
 
        mp = vnode_mount(vp);
        fvdat = VTOFUD(vp);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 0760d7641c7d..9f864e48effc 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -401,6 +401,7 @@ retry:
                        fuse_warn(data, FSESS_WARN_WROTE_LONG,
                                "wrote more data than we provided it.");
                        /* This is bonkers.  Clear attr cache. */
+                       ASSERT_CACHED_ATTRS_LOCKED(vp);
                        fvdat->flag &= ~FN_SIZECHANGE;
                        fuse_vnode_clear_attr_cache(vp);
                        err = EINVAL;
@@ -416,8 +417,10 @@ retry:
                        fuse_vnode_setsize(vp, as_written_offset, false);
                        getnanouptime(&fvdat->last_local_modify);
                }
-               if (as_written_offset - diff >= filesize)
+               if (as_written_offset - diff >= filesize) {
+                       ASSERT_CACHED_ATTRS_LOCKED(vp);
                        fvdat->flag &= ~FN_SIZECHANGE;
+               }
 
                if (diff > 0) {
                        /* Short write */
@@ -454,8 +457,11 @@ retry:
 
        fdisp_destroy(&fdi);
 
-       if (wrote_anything)
+       if (wrote_anything) {
+               CACHED_ATTR_LOCK(vp);
                fuse_vnode_undirty_cached_timestamps(vp, false);
+               CACHED_ATTR_UNLOCK(vp);
+       }
 
        vn_rlimit_fsizex_res(uio, r);
        return (err);
@@ -556,6 +562,7 @@ again:
                        err = fuse_vnode_setsize(vp, uio->uio_offset + n, 
false);
                        filesize = uio->uio_offset + n;
                        getnanouptime(&fvdat->last_local_modify);
+                       ASSERT_CACHED_ATTRS_LOCKED(vp);
                        fvdat->flag |= FN_SIZECHANGE;
                        if (err) {
                                brelse(bp);
@@ -806,6 +813,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
                        left = uiop->uio_resid;
                        bzero((char *)bp->b_data + nread, left);
 
+                       CACHED_ATTR_LOCK(vp);
                        if ((fvdat->flag & FN_SIZECHANGE) == 0) {
                                /*
                                 * A short read with no error, when not using
@@ -838,6 +846,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
                                        "Short read of a dirty file");
                                uiop->uio_resid = 0;
                        }
+                       CACHED_ATTR_UNLOCK(vp);
                }
                if (error) {
                        bp->b_ioflags |= BIO_ERROR;
@@ -855,10 +864,18 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
                 * anything about it.  In particular, we can't invalidate any
                 * part of the file's buffers because VOP_STRATEGY is called
                 * with them already locked.
+                *
+                * Normally the vnode should be exclusively locked at this
+                * point.  However, if clustered reads are in use, then in a
+                * mixed read-write workload getblkx may need to flush a
+                * partially written buffer to disk during a read.  In such a
+                * case, the vnode may only have a shared lock at this point.
                 */
+               CACHED_ATTR_LOCK(vp);
                filesize = fvdat->cached_attrs.va_size;
                /* filesize must've been cached by fuse_vnop_open.  */
                KASSERT(filesize != VNOVAL, ("filesize should've been cached"));
+               CACHED_ATTR_UNLOCK(vp);
 
                if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
                        bp->b_dirtyend = filesize - 
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
index 742dc66bcafc..f4fb993a7ca1 100644
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -157,6 +157,8 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data 
*fvdat,
        fvdat->nid = nodeid;
        LIST_INIT(&fvdat->handles);
 
+       mtx_init(&fvdat->cached_attr_mtx, "fuse attr cache mutex", NULL,
+           MTX_DEF);
        vattr_null(&fvdat->cached_attrs);
        fvdat->cached_attrs.va_birthtime.tv_sec = -1;
        fvdat->cached_attrs.va_birthtime.tv_nsec = 0;
@@ -181,6 +183,7 @@ fuse_vnode_destroy(struct vnode *vp)
        struct fuse_vnode_data *fvdat = vp->v_data;
 
        vp->v_data = NULL;
+       mtx_destroy(&fvdat->cached_attr_mtx);
        KASSERT(LIST_EMPTY(&fvdat->handles),
                ("Destroying fuse vnode with open files!"));
        free(fvdat, M_FUSEVN);
@@ -386,7 +389,8 @@ fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, 
pid_t pid)
        struct fuse_setattr_in *fsai;
        int err = 0;
 
-       ASSERT_VOP_ELOCKED(vp, "fuse_io_extend");
+       ASSERT_VOP_ELOCKED(vp, __func__); /* For flag and last_local_modify */
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
 
        if (fuse_isdeadfs(vp)) {
                return EBADF;
@@ -439,10 +443,10 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool 
from_server)
        struct vattr *attrs;
        off_t oldsize;
        size_t iosize;
-       struct buf *bp = NULL;
        int err = 0;
 
-       ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize");
+       ASSERT_VOP_LOCKED(vp, __func__);
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
 
        iosize = fuse_iosize(vp);
        oldsize = fvdat->cached_attrs.va_size;
@@ -450,7 +454,45 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool 
from_server)
        if ((attrs = VTOVA(vp)) != NULL)
                attrs->va_size = newsize;
 
-       if (newsize < oldsize) {
+       if (from_server && newsize > oldsize && oldsize != VNOVAL) {
+               /*
+                * The FUSE server changed the file size behind our back.  We
+                * should invalidate the entire cache.
+                */
+               daddr_t end_lbn;
+
+               end_lbn = howmany(newsize, iosize);
+               v_inval_buf_range(vp, 0, end_lbn, iosize);
+       }
+
+       if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
+               err = fuse_vnode_setsize_immediate(vp, newsize < oldsize);
+       } else {
+               /* Without an exclusive vnode lock, we must defer the operation 
*/
+               fvdat->flag |= FN_DELAYED_TRUNCATE;
+               vn_delayed_setsize(vp);
+       }
+
+       return err;
+}
+
+/* Immediately set the vnode's size in the pager */
+int
+fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink)
+{
+       struct fuse_vnode_data *fvdat = VTOFUD(vp);
+       struct buf *bp = NULL;
+       size_t iosize;
+       off_t newsize;
+       int err = 0;
+
+       MPASS(fvdat);
+       ASSERT_VOP_ELOCKED(vp, __func__);
+
+       iosize = fuse_iosize(vp);
+       newsize = fvdat->cached_attrs.va_size;
+
+       if (shrink) {
                daddr_t lbn;
 
                err = vtruncbuf(vp, newsize, fuse_iosize(vp));
@@ -474,21 +516,13 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool 
from_server)
                MPASS(bp->b_flags & B_VMIO);
                vfs_bio_clrbuf(bp);
                bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize);
-       } else if (from_server && newsize > oldsize && oldsize != VNOVAL) {
-               /*
-                * The FUSE server changed the file size behind our back.  We
-                * should invalidate the entire cache.
-                */
-               daddr_t end_lbn;
-
-               end_lbn = howmany(newsize, iosize);
-               v_inval_buf_range(vp, 0, end_lbn, iosize);
        }
 out:
        if (bp)
                brelse(bp);
        vnode_pager_setsize(vp, newsize);
-       return err;
+
+       return (err);
 }
 
 /* Get the current, possibly dirty, size of the file */
@@ -497,15 +531,28 @@ fuse_vnode_size(struct vnode *vp, off_t *filesize, struct 
ucred *cred,
        struct thread *td)
 {
        struct fuse_vnode_data *fvdat = VTOFUD(vp);
+       struct vattr va;
        int error = 0;
 
+       ASSERT_VOP_LOCKED(vp, __func__);
+
+       CACHED_ATTR_LOCK(vp);
        if (!(fvdat->flag & FN_SIZECHANGE) &&
                (!fuse_vnode_attr_cache_valid(vp) ||
-                 fvdat->cached_attrs.va_size == VNOVAL)) 
-               error = fuse_internal_do_getattr(vp, NULL, cred, td);
-
-       if (!error)
+                 fvdat->cached_attrs.va_size == VNOVAL)) {
+               CACHED_ATTR_UNLOCK(vp);
+               /*
+                * It incurs a large struct copy, but we supply &va so we don't
+                * have to acquire the lock a second time after
+                * fuse_internal_do_getattr returns.
+                */
+               error = fuse_internal_do_getattr(vp, &va, cred, td);
+               if (!error)
+                       *filesize = va.va_size;
+       } else {
                *filesize = fvdat->cached_attrs.va_size;
+               CACHED_ATTR_UNLOCK(vp);
+       }
 
        return error;
 }
@@ -515,6 +562,8 @@ fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool 
atime)
 {
        struct fuse_vnode_data *fvdat = VTOFUD(vp);
 
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
+
        fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE);
        if (atime)
                fvdat->flag &= ~FN_ATIMECHANGE;
@@ -537,6 +586,8 @@ fuse_vnode_update(struct vnode *vp, int flags)
        if (mp->mnt_flag & MNT_NOATIME)
                flags &= ~FN_ATIMECHANGE;
 
+       CACHED_ATTR_LOCK(vp);
+
        if (flags & FN_ATIMECHANGE)
                fvdat->cached_attrs.va_atime = ts;
        if (flags & FN_MTIMECHANGE)
@@ -545,6 +596,8 @@ fuse_vnode_update(struct vnode *vp, int flags)
                fvdat->cached_attrs.va_ctime = ts;
 
        fvdat->flag |= flags;
+
+       CACHED_ATTR_UNLOCK(vp);
 }
 
 void
diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h
index 97774de9eeb5..b6e388d01702 100644
--- a/sys/fs/fuse/fuse_node.h
+++ b/sys/fs/fuse/fuse_node.h
@@ -79,6 +79,11 @@
  * cache_attrs.va_size field does not time out.
  */
 #define        FN_SIZECHANGE           0x00000100
+/*
+ * Whether I/O to this vnode should bypass the cache.
+ * XXX BUG: this should be part of the file handle, not the vnode data.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
 #define        FN_DIRECTIO             0x00000200
 /* Indicates that parent_nid is valid */
 #define        FN_PARENT_NID           0x00000400
@@ -92,38 +97,81 @@
 #define        FN_CTIMECHANGE          0x00001000
 #define        FN_ATIMECHANGE          0x00002000
 
+/* vop_delayed_setsize should truncate the file */
+#define FN_DELAYED_TRUNCATE    0x00004000
+
+#define CACHED_ATTR_LOCK(vp)                           \
+do {                                                   \
+       ASSERT_VOP_LOCKED(vp, __func__);                \
+       if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)           \
+               mtx_lock(&VTOFUD(vp)->cached_attr_mtx); \
+} while(0)
+
+#define CACHED_ATTR_UNLOCK(vp)                                 \
+do {                                                           \
+       ASSERT_VOP_LOCKED(vp, __func__);                        \
+       if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)                   \
+               mtx_unlock(&VTOFUD(vp)->cached_attr_mtx);       \
+} while(0)
+
 struct fuse_vnode_data {
-       /** self **/
+       /* self's node id, similar to an inode number. Immutable. */
        uint64_t        nid;
+       /*
+        * Generation number.  Distinguishes files with same nid but that don't
+        * overlap in time.  Immutable.
+        */
        uint64_t        generation;
 
-       /** parent **/
+       /* parent's node id.  Protected by the vnode lock. */
        uint64_t        parent_nid;
 
        /** I/O **/
-       /* List of file handles for all of the vnode's open file descriptors */
+       /*
+        * List of file handles for all of the vnode's open file descriptors.
+        * Protected by the vnode lock.
+        */
        LIST_HEAD(, fuse_filehandle)    handles;
 
-       /** flags **/
-       uint32_t        flag;
+       /* Protects flag, attr_cache_timeout and cached_attrs */
+       struct mtx      cached_attr_mtx;
 
-       /** meta **/
-       /* The monotonic time after which the attr cache is invalid */
+       /*
+        * The monotonic time after which the attr cache is invalid
+        * Protected by an exclusive vnode lock or the cached_attr_mtx
+        */
        struct bintime  attr_cache_timeout;
-       /* 
+
+       /*
         * Monotonic time after which the entry is invalid.  Used for lookups
-        * by nodeid instead of pathname.
+        * by nodeid instead of pathname.  Protected by the vnode lock.
         */
        struct bintime  entry_cache_timeout;
+
        /*
         * Monotonic time of the last FUSE operation that modified the file
         * size.  Used to avoid races between mutator ops like VOP_SETATTR and
-        * unlocked accessor ops like VOP_LOOKUP.
+        * unlocked accessor ops like VOP_LOOKUP.  Protected by the vnode lock.
         */
        struct timespec last_local_modify;
+
+       /* Protected by an exclusive vnode lock or the cached_attr_mtx */
        struct vattr    cached_attrs;
+
+       /* Number of FUSE_LOOKUPs minus FUSE_FORGETs. Protected by vnode lock */
        uint64_t        nlookup;
+
+       /*
+        * Misc flags.  Protected by an exclusive vnode lock or the
+        * cached_attr_mtx, because some of the flags reflect the contents of
+        * cached_attrs.
+        */
+       uint32_t        flag;
+
+       /* Vnode type.  Immutable */
        __enum_uint8(vtype)     vtype;
+
+       /* State for clustered writes.  Protected by vnode lock */
        struct vn_clusterw clusterw;
 };
 
@@ -141,18 +189,32 @@ struct fuse_fid {
 #define VTOFUD(vp) \
        ((struct fuse_vnode_data *)((vp)->v_data))
 #define VTOI(vp)    (VTOFUD(vp)->nid)
+
+#define ASSERT_CACHED_ATTRS_LOCKED(vp)                         \
+do {                                                           \
+       ASSERT_VOP_LOCKED(vp, __func__);                        \
+       VNASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE ||            \
+               mtx_owned(&VTOFUD(vp)->cached_attr_mtx), vp,    \
+               ("cached attrs not locked"));                   \
+} while(0)
+
 static inline bool
 fuse_vnode_attr_cache_valid(struct vnode *vp)
 {
+       struct fuse_vnode_data *fvdat = VTOFUD(vp);
        struct bintime now;
 
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
+
        getbinuptime(&now);
-       return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >));
+       return (bintime_cmp(&fvdat->attr_cache_timeout, &now, >));
 }
 
 static inline struct vattr*
 VTOVA(struct vnode *vp)
 {
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
+
        if (fuse_vnode_attr_cache_valid(vp))
                return &(VTOFUD(vp)->cached_attrs);
        else
@@ -162,6 +224,8 @@ VTOVA(struct vnode *vp)
 static inline void
 fuse_vnode_clear_attr_cache(struct vnode *vp)
 {
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
+
        bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
 }
 
@@ -184,10 +248,14 @@ static inline void
 fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp)
 {
        if (dvp != NULL && vp->v_type == VDIR) {
+               ASSERT_VOP_ELOCKED(vp, __func__); /* for parent_nid */
+
                MPASS(dvp->v_type == VDIR);
                VTOFUD(vp)->parent_nid = VTOI(dvp);
                VTOFUD(vp)->flag |= FN_PARENT_NID;
        } else {
+               ASSERT_CACHED_ATTRS_LOCKED(vp);
+
                VTOFUD(vp)->flag &= ~FN_PARENT_NID;
        }
 }
@@ -207,6 +275,7 @@ void fuse_vnode_open(struct vnode *vp, int32_t 
fuse_open_flags,
 int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid);
 
 int fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server);
+int fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink);
 
 void fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime);
 
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
index a5118aa7675f..7b6ad86e4b2b 100644
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -601,6 +601,8 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, 
struct vnode **vpp)
        error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
        if (error)
                goto out;
+       /* for last_local_modify and fuse_internal_cache_attrs */
+       ASSERT_VOP_ELOCKED(*vpp, __func__);
        fvdat = VTOFUD(*vpp);
 
        if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index c66d3bcf01e0..ad7a2dcf84ef 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -134,6 +134,7 @@ static vop_close_t fuse_vnop_close;
 static vop_copy_file_range_t fuse_vnop_copy_file_range;
 static vop_create_t fuse_vnop_create;
 static vop_deallocate_t fuse_vnop_deallocate;
+static vop_delayed_setsize_t fuse_vnop_delayed_setsize;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
@@ -191,6 +192,7 @@ struct vop_vector fuse_vnops = {
        .vop_copy_file_range = fuse_vnop_copy_file_range,
        .vop_create = fuse_vnop_create,
        .vop_deallocate = fuse_vnop_deallocate,
+       .vop_delayed_setsize = fuse_vnop_delayed_setsize,
        .vop_deleteextattr = fuse_vnop_deleteextattr,
        .vop_fsync = fuse_vnop_fsync,
        .vop_fdatasync = fuse_vnop_fdatasync,
@@ -707,6 +709,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
                return (EXTERROR(EOPNOTSUPP, "This server does not implement "
                    "FUSE_FALLOCATE"));
 
+       ASSERT_CACHED_ATTRS_LOCKED(vp);
+
        io.uio_offset = *offset;
        io.uio_resid = *len;
        err = vn_rlimit_fsize(vp, &io, curthread);
@@ -779,7 +783,7 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
        struct fuse_data *data;
        struct fuse_vnode_data *fvdat = VTOFUD(vp);
        uint64_t biosize;
-       off_t fsize;
+       off_t fsize = VNOVAL;
        daddr_t lbn = ap->a_bn;
        daddr_t *pbn = ap->a_bnp;
        int *runp = ap->a_runp;
@@ -822,9 +826,10 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
                 * and the risk of getting it wrong is not worth the cost of
                 * another upcall.
                 */
-               if (fvdat->cached_attrs.va_size != VNOVAL)
-                       fsize = fvdat->cached_attrs.va_size;
-               else
+               CACHED_ATTR_LOCK(vp);
+               fsize = fvdat->cached_attrs.va_size;
+               CACHED_ATTR_UNLOCK(vp);
+               if (fsize == VNOVAL)
                        error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
                if (error == 0)
                        *runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
@@ -894,6 +899,7 @@ fuse_vnop_close(struct vop_close_args *ap)
                cred = td->td_ucred;
 
        err = fuse_flush(vp, cred, pid, fflag);
+       ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
        if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
                struct vattr vap;
                struct fuse_data *data;
@@ -911,6 +917,7 @@ fuse_vnop_close(struct vop_close_args *ap)
                }
                if (access_e == 0) {
                        VATTR_NULL(&vap);
+                       ASSERT_CACHED_ATTRS_LOCKED(vp);
                        vap.va_atime = fvdat->cached_attrs.va_atime;
                        /*
                         * Ignore errors setting when setting atime.  That
@@ -1035,6 +1042,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args 
*ap)
                *ap->a_inoffp += fwo->size;
                *ap->a_outoffp += fwo->size;
                fuse_internal_clear_suid_on_write(outvp, outcred, td);
+               ASSERT_CACHED_ATTRS_LOCKED(outvp);
                if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
                        fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
                        getnanouptime(&outfvdat->last_local_modify);
@@ -1337,6 +1345,7 @@ fuse_vnop_inactive(struct vop_inactive_args *ap)
 
        int need_flush = 1;
 
+       ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */
        LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
                if (need_flush && vp->v_type == VREG) {
                        if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
@@ -1557,6 +1566,7 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
        else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
                return err;
 
+       ASSERT_CACHED_ATTRS_LOCKED(dvp);        /* For flag */
        is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
        if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
                if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
@@ -1960,6 +1970,10 @@ fuse_vnop_read(struct vop_read_args *ap)
                    "to be closed"));
        }
 
+       /*
+        * XXX Check this flag without the lock.  See
+        * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+        */
        if (VTOFUD(vp)->flag & FN_DIRECTIO) {
                ioflag |= IO_DIRECT;
        }
@@ -2220,6 +2234,8 @@ fuse_vnop_remove(struct vop_remove_args *ap)
        return err;
 }
 
+SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*",
+       "struct vnode*", "struct vnode*", "struct vnode*");
 /*
     struct vnop_rename_args {
        struct vnode *a_fdvp;
@@ -2242,6 +2258,7 @@ fuse_vnop_rename(struct vop_rename_args *ap)
        struct fuse_data *data;
        bool newparent = fdvp != tdvp;
        bool isdir = fvp->v_type == VDIR;
+       int locktype;
        int err = 0;
 
        if (fuse_isdeadfs(fdvp)) {
@@ -2270,11 +2287,32 @@ fuse_vnop_rename(struct vop_rename_args *ap)
         * have write permission to it, so ".." can be modified.
         */
        data = fuse_get_mpdata(vnode_mount(tdvp));
+
+       if (tdvp != fdvp)
+               locktype = LK_EXCLUSIVE; /* for fuse_vnode_setparent */
+       else
+               locktype = LK_SHARED;
+
+       /*
+        * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or
+        * tvp
+        */
+       if (vn_lock(fvp, locktype | LK_NOWAIT) != 0) {
+               /*
+                * Can't release tdvp or tvp to try avoiding the LOR.
+                * Must return instead.
+                */
+               SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp,
+                       tvp);
+               err = ERELOOKUP;
+               goto out;
+       }
+
        if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
                err = fuse_internal_access(fvp, VWRITE,
                        curthread, tcnp->cn_cred);
                if (err)
-                       goto out;
+                       goto unlock;
        }
        err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
        if (err == 0) {
@@ -2293,6 +2331,8 @@ fuse_vnop_rename(struct vop_rename_args *ap)
                }
                cache_purge(fdvp);
        }
+unlock:
+       VOP_UNLOCK(fvp);
 out:
        if (tdvp == tvp) {
                vrele(tdvp);
@@ -2568,6 +2608,7 @@ static int
 fuse_vnop_write(struct vop_write_args *ap)
 {
        struct vnode *vp = ap->a_vp;
+       struct fuse_vnode_data *fvdat = VTOFUD(vp);
        struct uio *uio = ap->a_uio;
        int ioflag = ap->a_ioflag;
        struct ucred *cred = ap->a_cred;
@@ -2583,9 +2624,12 @@ fuse_vnop_write(struct vop_write_args *ap)
                    "to be closed"));
        }
 
-       if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+       /*
+        * XXX Check this flag without the lock.  See
+        * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+        */
+       if (fvdat->flag & FN_DIRECTIO)
                ioflag |= IO_DIRECT;
-       }
 
        err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
        if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
@@ -3219,6 +3263,29 @@ fallback:
        return (vop_stddeallocate(ap));
 }
 
+/*
+   struct vop_delayed_setsize_args {
+       struct vop_generic_args a_gen;
+       struct vnode *a_vp;
+  };
+ */
+static int
+fuse_vnop_delayed_setsize(struct vop_delayed_setsize_args *ap)
+{
+       struct vnode *vp = ap->a_vp;
+       struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
+       bool shrink = (fvdat->flag & FN_DELAYED_TRUNCATE) != 0;
+       int err;
+
+       if (!fvdat)
+               return (0);
+
+       err = fuse_vnode_setsize_immediate(vp, shrink);
+       fvdat->flag &= ~FN_DELAYED_TRUNCATE;
+
+       return (err);
+}
+
 /*
     struct vop_deleteextattr_args {
        struct vop_generic_args a_gen;
diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc
index e61dadb6d79e..622a3c3debcc 100644
--- a/tests/sys/fs/fusefs/bmap.cc
+++ b/tests/sys/fs/fusefs/bmap.cc
@@ -44,10 +44,13 @@ using namespace testing;
 const static char FULLPATH[] = "mountpoint/foo";
 const static char RELPATH[] = "foo";
 
-class Bmap: public FuseTest {
+class Bmap: public FuseTest,
+           public WithParamInterface<tuple<int, int>>
+{
 public:
 virtual void SetUp() {
        m_maxreadahead = UINT32_MAX;
+       m_init_flags |= get<0>(GetParam());
        FuseTest::SetUp();
 }
 void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
@@ -73,12 +76,12 @@ void expect_lookup(const char *relpath, uint64_t ino, off_t 
size)
 }
 };
 
-class BmapEof: public Bmap, public WithParamInterface<int> {};
+class BmapEof: public Bmap {};
 
 /*
  * Test FUSE_BMAP
  */
-TEST_F(Bmap, bmap)
+TEST_P(Bmap, bmap)
 {
        struct fiobmap2_arg arg;
        /*
@@ -124,7 +127,7 @@ TEST_F(Bmap, bmap)
  * If the daemon does not implement VOP_BMAP, fusefs should return sensible
  * defaults.
  */
-TEST_F(Bmap, default_)
+TEST_P(Bmap, default_)
 {
        struct fiobmap2_arg arg;
        const off_t filesize = 1 << 30;
@@ -181,7 +184,7 @@ TEST_F(Bmap, default_)
  * The server returns an error for some reason for FUSE_BMAP.  fusefs should
  * faithfully report that error up to the caller.
  */
-TEST_F(Bmap, einval)
+TEST_P(Bmap, einval)
 {
        struct fiobmap2_arg arg;
        const off_t filesize = 1 << 30;
@@ -217,7 +220,7 @@ TEST_F(Bmap, einval)
  * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 .  The bug did not
  * lie in fusefs, but this is a convenient place for a regression test.
  */
-TEST_F(Bmap, spurious_einval)
+TEST_P(Bmap, spurious_einval)
 {
        const off_t filesize = 4ull << 30;
        const ino_t ino = 42;
@@ -288,7 +291,7 @@ TEST_P(BmapEof, eof)
        int fd;
        int ngetattrs;
 
-       ngetattrs = GetParam();
+       ngetattrs = get<1>(GetParam());
        FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
        expect_open(ino, 0, 1);
        // Depending on ngetattrs, FUSE_READ could be called with either
@@ -348,6 +351,17 @@ TEST_P(BmapEof, eof)
        leak(fd);
 }
 
-INSTANTIATE_TEST_SUITE_P(BE, BmapEof,
-       Values(1, 2, 3)
-);
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * on entry to fuse_vnop_bmap.
+ */
+INSTANTIATE_TEST_SUITE_P(B, Bmap, Values(
+       tuple(0, 0),
+       tuple(FUSE_ASYNC_READ, 0)
+));
+
+INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values(
+       tuple(0, 1),
+       tuple(0, 2),
+       tuple(0, 3)
+));
diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc
index d370a1e6e706..69742fb2a54b 100644
--- a/tests/sys/fs/fusefs/notify.cc
+++ b/tests/sys/fs/fusefs/notify.cc
@@ -47,8 +47,15 @@ using namespace testing;
  * invalidation.  This file tests our client's handling of those messages.
  */
 
-class Notify: public FuseTest {
+class Notify: public FuseTest,
+             public WithParamInterface<int>
+{
 public:
+virtual void SetUp() {
+       m_init_flags |= GetParam();
+       FuseTest::SetUp();
+}
+
 /* Ignore an optional FUSE_FSYNC */
 void maybe_expect_fsync(uint64_t ino)
 {
@@ -154,7 +161,7 @@ static void* store(void* arg) {
 }
 
 /* Invalidate a nonexistent entry */
-TEST_F(Notify, inval_entry_nonexistent)
+TEST_P(Notify, inval_entry_nonexistent)
 {
        const static char *name = "foo";
        struct inval_entry_args iea;
@@ -173,7 +180,7 @@ TEST_F(Notify, inval_entry_nonexistent)
 }
 
 /* Invalidate a cached entry */
-TEST_F(Notify, inval_entry)
+TEST_P(Notify, inval_entry)
 {
        const static char FULLPATH[] = "mountpoint/foo";
*** 404 LINES SKIPPED ***

Reply via email to