The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=3b6056204dd80cc866b7998ef0776247ebc42ce4

commit 3b6056204dd80cc866b7998ef0776247ebc42ce4
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2023-02-04 01:20:19 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2023-02-04 18:32:07 +0000

    FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation
    
    Writes on UFS through a mapped region do not allocate disk blocks in
    holes immediately. The blocks are allocated when the pages are paged out
    first time.
    
    This breaks the algorithm in vn_bmap_seekhole() and ufs_bmap_seekdata(),
    because VOP_BMAP() reports hole for the place which already contains a
    valid data.
    
    Clean the pages before doing VOP_BMAP() in the affected functions.  In
    principle, we could clean less by only requesting clean starting from
    the offset, but it is probably not very important.
    
    PR:     269261
    Reported by:    asomers
    Reviewed by:    asomers, markj
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D38379
---
 sys/kern/vfs_vnops.c    | 14 ++++++++++++--
 sys/ufs/ufs/ufs_bmap.c  | 18 ++++++++++++++++++
 sys/ufs/ufs/ufs_vnops.c |  2 +-
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index a49070b6060d..1d90f76f9cd6 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -2556,6 +2556,7 @@ int
 vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off,
     struct ucred *cred)
 {
+       vm_object_t obj;
        off_t size;
        daddr_t bn, bnp;
        uint64_t bsize;
@@ -2564,7 +2565,7 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, 
off_t *off,
 
        KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
            ("%s: Wrong command %lu", __func__, cmd));
-       ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked");
+       ASSERT_VOP_ELOCKED(vp, "vn_bmap_seekhole_locked");
 
        if (vp->v_type != VREG) {
                error = ENOTTY;
@@ -2578,6 +2579,15 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, 
off_t *off,
                error = ENXIO;
                goto out;
        }
+
+       /* See the comment in ufs_bmap_seekdata(). */
+       obj = vp->v_object;
+       if (obj != NULL) {
+               VM_OBJECT_WLOCK(obj);
+               vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+               VM_OBJECT_WUNLOCK(obj);
+       }
+
        bsize = vp->v_mount->mnt_stat.f_iosize;
        for (bn = noff / bsize; noff < size; bn++, noff += bsize -
            noff % bsize) {
@@ -2613,7 +2623,7 @@ vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t 
*off, struct ucred *cred)
        KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA,
            ("%s: Wrong command %lu", __func__, cmd));
 
-       if (vn_lock(vp, LK_SHARED) != 0)
+       if (vn_lock(vp, LK_EXCLUSIVE) != 0)
                return (EBADF);
        error = vn_bmap_seekhole_locked(vp, cmd, off, cred);
        VOP_UNLOCK(vp);
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 4ac8ca149279..acdd334f6c7b 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -44,12 +44,16 @@ __FBSDID("$FreeBSD$");
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
+#include <sys/rwlock.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/racct.h>
 #include <sys/resourcevar.h>
 #include <sys/stat.h>
 
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
@@ -348,6 +352,7 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
        struct inode *ip;
        struct mount *mp;
        struct ufsmount *ump;
+       vm_object_t obj;
        ufs2_daddr_t bn, daddr, nextbn;
        uint64_t bsize;
        off_t numblks;
@@ -364,6 +369,19 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp)
        if (*offp < 0 || *offp >= ip->i_size)
                return (ENXIO);
 
+       /*
+        * We could have pages on the vnode' object queue which still
+        * do not have the data blocks allocated.  Convert all dirty
+        * pages into buffer writes to ensure that we see all
+        * allocated data.
+        */
+       obj = vp->v_object;
+       if (obj != NULL) {
+               VM_OBJECT_WLOCK(obj);
+               vm_object_page_clean(obj, 0, 0, OBJPC_SYNC);
+               VM_OBJECT_WUNLOCK(obj);
+       }
+
        bsize = mp->mnt_stat.f_iosize;
        for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize);
            bn < numblks; bn = nextbn) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 93a5b173b785..ae6d963920f3 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -2944,7 +2944,7 @@ ufs_ioctl(struct vop_ioctl_args *ap)
        vp = ap->a_vp;
        switch (ap->a_command) {
        case FIOSEEKDATA:
-               error = vn_lock(vp, LK_SHARED);
+               error = vn_lock(vp, LK_EXCLUSIVE);
                if (error == 0) {
                        error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data);
                        VOP_UNLOCK(vp);

Reply via email to