The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=37aea2649ff707f23d35309d882b38e9ac818e42

commit 37aea2649ff707f23d35309d882b38e9ac818e42
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2022-10-20 13:17:43 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2022-12-09 12:17:12 +0000

    tmpfs: for used pages, account really allocated pages, instead of file sizes
    
    This makes tmpfs size accounting correct for the sparce files. Also
    correct report st_blocks/va_bytes. Previously the reported value did not
    accounted for the swapped out pages.
    
    PR:     223015
    Reviewed by:    markj
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D37097
---
 sys/fs/tmpfs/tmpfs.h        |  18 ++++++-
 sys/fs/tmpfs/tmpfs_subr.c   | 118 ++++++++++++++++++++++++++++++++++++++++----
 sys/fs/tmpfs/tmpfs_vfsops.c |   6 ++-
 sys/fs/tmpfs/tmpfs_vnops.c  |  17 +++++--
 sys/kern/uipc_shm.c         |   2 +-
 5 files changed, 144 insertions(+), 17 deletions(-)

diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index ad0354f2d474..e9964d7d48af 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -148,6 +148,7 @@ RB_HEAD(tmpfs_dir, tmpfs_dirent);
  * (i)  tn_interlock
  * (m)  tmpfs_mount tm_allnode_lock
  * (c)  stable after creation
+ * (v)  tn_reg.tn_aobj vm_object lock
  */
 struct tmpfs_node {
        /*
@@ -299,6 +300,7 @@ struct tmpfs_node {
                         */
                        vm_object_t             tn_aobj;        /* (c) */
                        struct tmpfs_mount      *tn_tmp;        /* (c) */
+                       vm_pindex_t             tn_pages;       /* (v) */
                } tn_reg;
        } tn_spec;      /* (v) */
 };
@@ -533,7 +535,8 @@ VM_TO_TMPFS_VP(vm_object_t obj)
 {
        struct tmpfs_node *node;
 
-       MPASS((obj->flags & OBJ_TMPFS) != 0);
+       if ((obj->flags & OBJ_TMPFS) == 0)
+               return (NULL);
 
        /*
         * swp_priv is the back-pointer to the tmpfs node, if any,
@@ -545,6 +548,19 @@ VM_TO_TMPFS_VP(vm_object_t obj)
        return (node->tn_vnode);
 }
 
+static inline struct tmpfs_mount *
+VM_TO_TMPFS_MP(vm_object_t obj)
+{
+       struct tmpfs_node *node;
+
+       if ((obj->flags & OBJ_TMPFS) == 0)
+               return (NULL);
+
+       node = obj->un_pager.swp.swp_priv;
+       MPASS(node->tn_type == VREG);
+       return (node->tn_reg.tn_tmp);
+}
+
 static inline struct tmpfs_mount *
 VFS_TO_TMPFS(struct mount *mp)
 {
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index 0a7f255a4f74..ef558c424914 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -214,6 +214,87 @@ tmpfs_pager_getvp(vm_object_t object, struct vnode **vpp, 
bool *vp_heldp)
                VM_OBJECT_RUNLOCK(object);
 }
 
+static void
+tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size)
+{
+       struct tmpfs_node *node;
+       struct tmpfs_mount *tm;
+       vm_size_t c;
+
+       swap_pager_freespace(obj, start, size, &c);
+       if ((obj->flags & OBJ_TMPFS) == 0 || c == 0)
+               return;
+
+       node = obj->un_pager.swp.swp_priv;
+       MPASS(node->tn_type == VREG);
+       tm = node->tn_reg.tn_tmp;
+
+       KASSERT(tm->tm_pages_used >= c,
+           ("tmpfs tm %p pages %jd free %jd", tm,
+           (uintmax_t)tm->tm_pages_used, (uintmax_t)c));
+       atomic_add_long(&tm->tm_pages_used, -c);
+       KASSERT(node->tn_reg.tn_pages >= c,
+           ("tmpfs node %p pages %jd free %jd", node,
+           (uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c));
+       node->tn_reg.tn_pages -= c;
+}
+
+static void
+tmpfs_page_inserted(vm_object_t obj, vm_page_t m)
+{
+       struct tmpfs_node *node;
+       struct tmpfs_mount *tm;
+
+       if ((obj->flags & OBJ_TMPFS) == 0)
+               return;
+
+       node = obj->un_pager.swp.swp_priv;
+       MPASS(node->tn_type == VREG);
+       tm = node->tn_reg.tn_tmp;
+
+       if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
+               atomic_add_long(&tm->tm_pages_used, 1);
+               node->tn_reg.tn_pages += 1;
+       }
+}
+
+static void
+tmpfs_page_removed(vm_object_t obj, vm_page_t m)
+{
+       struct tmpfs_node *node;
+       struct tmpfs_mount *tm;
+
+       if ((obj->flags & OBJ_TMPFS) == 0)
+               return;
+
+       node = obj->un_pager.swp.swp_priv;
+       MPASS(node->tn_type == VREG);
+       tm = node->tn_reg.tn_tmp;
+
+       if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
+               KASSERT(tm->tm_pages_used >= 1,
+                   ("tmpfs tm %p pages %jd free 1", tm,
+                   (uintmax_t)tm->tm_pages_used));
+               atomic_add_long(&tm->tm_pages_used, -1);
+               KASSERT(node->tn_reg.tn_pages >= 1,
+                   ("tmpfs node %p pages %jd free 1", node,
+                   (uintmax_t)node->tn_reg.tn_pages));
+               node->tn_reg.tn_pages -= 1;
+       }
+}
+
+static boolean_t
+tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex)
+{
+       struct tmpfs_mount *tm;
+
+       tm = VM_TO_TMPFS_MP(obj);
+       if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) ||
+           tm->tm_pages_max == 0)
+               return (true);
+       return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used));
+}
+
 struct pagerops tmpfs_pager_ops = {
        .pgo_kvme_type = KVME_TYPE_VNODE,
        .pgo_alloc = tmpfs_pager_alloc,
@@ -222,6 +303,10 @@ struct pagerops tmpfs_pager_ops = {
        .pgo_release_writecount = tmpfs_pager_release_writecount,
        .pgo_mightbedirty = vm_object_mightbedirty_,
        .pgo_getvp = tmpfs_pager_getvp,
+       .pgo_freespace = tmpfs_pager_freespace,
+       .pgo_page_inserted = tmpfs_page_inserted,
+       .pgo_page_removed = tmpfs_page_removed,
+       .pgo_can_alloc_page = tmpfs_can_alloc_page,
 };
 
 static int
@@ -576,6 +661,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, 
enum vtype type,
                nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode;
                vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS);
                nnode->tn_reg.tn_tmp = tmp;
+               nnode->tn_reg.tn_pages = 0;
                break;
 
        default:
@@ -665,11 +751,31 @@ tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct 
tmpfs_node *node,
        switch (node->tn_type) {
        case VREG:
                uobj = node->tn_reg.tn_aobj;
-               if (uobj != NULL && uobj->size != 0)
-                       atomic_subtract_long(&tmp->tm_pages_used, uobj->size);
+               node->tn_reg.tn_aobj = NULL;
+               if (uobj != NULL) {
+                       VM_OBJECT_WLOCK(uobj);
+                       KASSERT((uobj->flags & OBJ_TMPFS) != 0,
+                           ("tmpfs node %p uobj %p not tmpfs", node, uobj));
+                       vm_object_clear_flag(uobj, OBJ_TMPFS);
+                       KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages,
+                           ("tmpfs tmp %p node %p pages %jd free %jd", tmp,
+                           node, (uintmax_t)tmp->tm_pages_used,
+                           (uintmax_t)node->tn_reg.tn_pages));
+                       atomic_add_long(&tmp->tm_pages_used,
+                           -node->tn_reg.tn_pages);
+                       VM_OBJECT_WUNLOCK(uobj);
+               }
                tmpfs_free_tmp(tmp);
+
+               /*
+                * vm_object_deallocate() must not be called while
+                * owning tm_allnode_lock, because deallocate might
+                * sleep.  Call it after tmpfs_free_tmp() does the
+                * unlock.
+                */
                if (uobj != NULL)
                        vm_object_deallocate(uobj);
+
                break;
        case VLNK:
                tmpfs_free_tmp(tmp);
@@ -1697,7 +1803,6 @@ tmpfs_dir_whiteout_remove(struct vnode *dvp, struct 
componentname *cnp)
 int
 tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
 {
-       struct tmpfs_mount *tmp;
        struct tmpfs_node *node;
        vm_object_t uobj;
        vm_pindex_t idx, newpages, oldpages;
@@ -1709,7 +1814,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, 
boolean_t ignerr)
 
        node = VP_TO_TMPFS_NODE(vp);
        uobj = node->tn_reg.tn_aobj;
-       tmp = VFS_TO_TMPFS(vp->v_mount);
 
        /*
         * Convert the old and new sizes to the number of pages needed to
@@ -1727,10 +1831,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, 
boolean_t ignerr)
                return (0);
        }
 
-       if (newpages > oldpages &&
-           !tmpfs_pages_check_avail(tmp, newpages - oldpages))
-               return (ENOSPC);
-
        VM_OBJECT_WLOCK(uobj);
        if (newsize < oldsize) {
                /*
@@ -1756,8 +1856,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize, 
boolean_t ignerr)
        uobj->size = newpages;
        VM_OBJECT_WUNLOCK(uobj);
 
-       atomic_add_long(&tmp->tm_pages_used, newpages - oldpages);
-
        node->tn_size = newsize;
        return (0);
 }
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index ec6120ba72df..de207242b574 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -557,7 +557,11 @@ tmpfs_free_tmp(struct tmpfs_mount *tmp)
        TMPFS_UNLOCK(tmp);
 
        mtx_destroy(&tmp->tm_allnode_lock);
-       MPASS(tmp->tm_pages_used == 0);
+       /*
+        * We cannot assert that tmp->tm_pages_used == 0 there,
+        * because tmpfs vm_objects might be still mapped by some
+        * process and outlive the mount due to reference counting.
+        */
        MPASS(tmp->tm_nodes_inuse == 0);
 
        free(tmp, M_TMPFSMNT);
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 6b53f002c8ac..4d95441c05e4 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -437,7 +437,6 @@ tmpfs_stat(struct vop_stat_args *v)
 {
        struct vnode *vp = v->a_vp;
        struct stat *sb = v->a_sb;
-       vm_object_t obj;
        struct tmpfs_node *node;
        int error;
 
@@ -470,8 +469,16 @@ tmpfs_stat(struct vop_stat_args *v)
        sb->st_flags = node->tn_flags;
        sb->st_gen = node->tn_gen;
        if (vp->v_type == VREG) {
-               obj = node->tn_reg.tn_aobj;
-               sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
+#ifdef __ILP32__
+               vm_object_t obj = node->tn_reg.tn_aobj;
+
+               /* Handle torn read */
+               VM_OBJECT_RLOCK(obj);
+#endif
+               sb->st_blocks = ptoa(node->tn_reg.tn_pages);
+#ifdef __ILP32__
+               VM_OBJECT_RUNLOCK(obj);
+#endif
        } else {
                sb->st_blocks = node->tn_size;
        }
@@ -510,7 +517,9 @@ tmpfs_getattr(struct vop_getattr_args *v)
            node->tn_rdev : NODEV;
        if (vp->v_type == VREG) {
                obj = node->tn_reg.tn_aobj;
-               vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
+               VM_OBJECT_RLOCK(obj);
+               vap->va_bytes = ptoa(node->tn_reg.tn_pages);
+               VM_OBJECT_RUNLOCK(obj);
        } else {
                vap->va_bytes = node->tn_size;
        }
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 07fbc4caa14f..923930a708a3 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -229,7 +229,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct uio 
*uio)
                        printf("uiomove_object: vm_obj %p idx %jd "
                            "pager error %d\n", obj, idx, rv);
                }
-               return (EIO);
+               return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO);
        }
        VM_OBJECT_WUNLOCK(obj);
 

Reply via email to