On 12/9/22, Konstantin Belousov <[email protected]> wrote:
> The branch main has been updated by kib:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=37aea2649ff707f23d35309d882b38e9ac818e42
>
> commit 37aea2649ff707f23d35309d882b38e9ac818e42
> Author:     Konstantin Belousov <[email protected]>
> AuthorDate: 2022-10-20 13:17:43 +0000
> Commit:     Konstantin Belousov <[email protected]>
> CommitDate: 2022-12-09 12:17:12 +0000
>
>     tmpfs: for used pages, account really allocated pages, instead of file
> sizes
>
>     This makes tmpfs size accounting correct for the sparce files. Also
>     correct report st_blocks/va_bytes. Previously the reported value did
> not
>     accounted for the swapped out pages.
>
>     PR:     223015
>     Reviewed by:    markj
>     Tested by:      pho
>     Sponsored by:   The FreeBSD Foundation
>     MFC after:      1 week
>     Differential revision:  https://reviews.freebsd.org/D37097
> ---
>  sys/fs/tmpfs/tmpfs.h        |  18 ++++++-
>  sys/fs/tmpfs/tmpfs_subr.c   | 118
> ++++++++++++++++++++++++++++++++++++++++----
>  sys/fs/tmpfs/tmpfs_vfsops.c |   6 ++-
>  sys/fs/tmpfs/tmpfs_vnops.c  |  17 +++++--
>  sys/kern/uipc_shm.c         |   2 +-
>  5 files changed, 144 insertions(+), 17 deletions(-)
>
> diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
> index ad0354f2d474..e9964d7d48af 100644
> --- a/sys/fs/tmpfs/tmpfs.h
> +++ b/sys/fs/tmpfs/tmpfs.h
> @@ -148,6 +148,7 @@ RB_HEAD(tmpfs_dir, tmpfs_dirent);
>   * (i)  tn_interlock
>   * (m)  tmpfs_mount tm_allnode_lock
>   * (c)  stable after creation
> + * (v)  tn_reg.tn_aobj vm_object lock
>   */
>  struct tmpfs_node {
>       /*
> @@ -299,6 +300,7 @@ struct tmpfs_node {
>                        */
>                       vm_object_t             tn_aobj;        /* (c) */
>                       struct tmpfs_mount      *tn_tmp;        /* (c) */
> +                     vm_pindex_t             tn_pages;       /* (v) */
>               } tn_reg;
>       } tn_spec;      /* (v) */
>  };
> @@ -533,7 +535,8 @@ VM_TO_TMPFS_VP(vm_object_t obj)
>  {
>       struct tmpfs_node *node;
>
> -     MPASS((obj->flags & OBJ_TMPFS) != 0);
> +     if ((obj->flags & OBJ_TMPFS) == 0)
> +             return (NULL);
>
>       /*
>        * swp_priv is the back-pointer to the tmpfs node, if any,
> @@ -545,6 +548,19 @@ VM_TO_TMPFS_VP(vm_object_t obj)
>       return (node->tn_vnode);
>  }
>
> +static inline struct tmpfs_mount *
> +VM_TO_TMPFS_MP(vm_object_t obj)
> +{
> +     struct tmpfs_node *node;
> +
> +     if ((obj->flags & OBJ_TMPFS) == 0)
> +             return (NULL);
> +
> +     node = obj->un_pager.swp.swp_priv;
> +     MPASS(node->tn_type == VREG);
> +     return (node->tn_reg.tn_tmp);
> +}
> +
>  static inline struct tmpfs_mount *
>  VFS_TO_TMPFS(struct mount *mp)
>  {
> diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
> index 0a7f255a4f74..ef558c424914 100644
> --- a/sys/fs/tmpfs/tmpfs_subr.c
> +++ b/sys/fs/tmpfs/tmpfs_subr.c
> @@ -214,6 +214,87 @@ tmpfs_pager_getvp(vm_object_t object, struct vnode
> **vpp, bool *vp_heldp)
>               VM_OBJECT_RUNLOCK(object);
>  }
>
> +static void
> +tmpfs_pager_freespace(vm_object_t obj, vm_pindex_t start, vm_size_t size)
> +{
> +     struct tmpfs_node *node;
> +     struct tmpfs_mount *tm;
> +     vm_size_t c;
> +
> +     swap_pager_freespace(obj, start, size, &c);
> +     if ((obj->flags & OBJ_TMPFS) == 0 || c == 0)
> +             return;
> +
> +     node = obj->un_pager.swp.swp_priv;
> +     MPASS(node->tn_type == VREG);
> +     tm = node->tn_reg.tn_tmp;
> +
> +     KASSERT(tm->tm_pages_used >= c,
> +         ("tmpfs tm %p pages %jd free %jd", tm,
> +         (uintmax_t)tm->tm_pages_used, (uintmax_t)c));
> +     atomic_add_long(&tm->tm_pages_used, -c);
> +     KASSERT(node->tn_reg.tn_pages >= c,
> +         ("tmpfs node %p pages %jd free %jd", node,
> +         (uintmax_t)node->tn_reg.tn_pages, (uintmax_t)c));
> +     node->tn_reg.tn_pages -= c;
> +}
> +
> +static void
> +tmpfs_page_inserted(vm_object_t obj, vm_page_t m)
> +{
> +     struct tmpfs_node *node;
> +     struct tmpfs_mount *tm;
> +
> +     if ((obj->flags & OBJ_TMPFS) == 0)
> +             return;
> +
> +     node = obj->un_pager.swp.swp_priv;
> +     MPASS(node->tn_type == VREG);
> +     tm = node->tn_reg.tn_tmp;
> +
> +     if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
> +             atomic_add_long(&tm->tm_pages_used, 1);
> +             node->tn_reg.tn_pages += 1;
> +     }
> +}
> +
> +static void
> +tmpfs_page_removed(vm_object_t obj, vm_page_t m)
> +{
> +     struct tmpfs_node *node;
> +     struct tmpfs_mount *tm;
> +
> +     if ((obj->flags & OBJ_TMPFS) == 0)
> +             return;
> +
> +     node = obj->un_pager.swp.swp_priv;
> +     MPASS(node->tn_type == VREG);
> +     tm = node->tn_reg.tn_tmp;
> +
> +     if (!vm_pager_has_page(obj, m->pindex, NULL, NULL)) {
> +             KASSERT(tm->tm_pages_used >= 1,
> +                 ("tmpfs tm %p pages %jd free 1", tm,
> +                 (uintmax_t)tm->tm_pages_used));
> +             atomic_add_long(&tm->tm_pages_used, -1);
> +             KASSERT(node->tn_reg.tn_pages >= 1,
> +                 ("tmpfs node %p pages %jd free 1", node,
> +                 (uintmax_t)node->tn_reg.tn_pages));
> +             node->tn_reg.tn_pages -= 1;
> +     }
> +}
> +
> +static boolean_t
> +tmpfs_can_alloc_page(vm_object_t obj, vm_pindex_t pindex)
> +{
> +     struct tmpfs_mount *tm;
> +
> +     tm = VM_TO_TMPFS_MP(obj);
> +     if (tm == NULL || vm_pager_has_page(obj, pindex, NULL, NULL) ||
> +         tm->tm_pages_max == 0)
> +             return (true);
> +     return (tm->tm_pages_max > atomic_load_long(&tm->tm_pages_used));
> +}
> +
>  struct pagerops tmpfs_pager_ops = {
>       .pgo_kvme_type = KVME_TYPE_VNODE,
>       .pgo_alloc = tmpfs_pager_alloc,
> @@ -222,6 +303,10 @@ struct pagerops tmpfs_pager_ops = {
>       .pgo_release_writecount = tmpfs_pager_release_writecount,
>       .pgo_mightbedirty = vm_object_mightbedirty_,
>       .pgo_getvp = tmpfs_pager_getvp,
> +     .pgo_freespace = tmpfs_pager_freespace,
> +     .pgo_page_inserted = tmpfs_page_inserted,
> +     .pgo_page_removed = tmpfs_page_removed,
> +     .pgo_can_alloc_page = tmpfs_can_alloc_page,
>  };
>
>  static int
> @@ -576,6 +661,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount
> *tmp, enum vtype type,
>               nnode->tn_reg.tn_aobj->un_pager.swp.swp_priv = nnode;
>               vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_TMPFS);
>               nnode->tn_reg.tn_tmp = tmp;
> +             nnode->tn_reg.tn_pages = 0;
>               break;
>
>       default:
> @@ -665,11 +751,31 @@ tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct
> tmpfs_node *node,
>       switch (node->tn_type) {
>       case VREG:
>               uobj = node->tn_reg.tn_aobj;
> -             if (uobj != NULL && uobj->size != 0)
> -                     atomic_subtract_long(&tmp->tm_pages_used, uobj->size);
> +             node->tn_reg.tn_aobj = NULL;
> +             if (uobj != NULL) {
> +                     VM_OBJECT_WLOCK(uobj);
> +                     KASSERT((uobj->flags & OBJ_TMPFS) != 0,
> +                         ("tmpfs node %p uobj %p not tmpfs", node, uobj));
> +                     vm_object_clear_flag(uobj, OBJ_TMPFS);
> +                     KASSERT(tmp->tm_pages_used >= node->tn_reg.tn_pages,
> +                         ("tmpfs tmp %p node %p pages %jd free %jd", tmp,
> +                         node, (uintmax_t)tmp->tm_pages_used,
> +                         (uintmax_t)node->tn_reg.tn_pages));
> +                     atomic_add_long(&tmp->tm_pages_used,
> +                         -node->tn_reg.tn_pages);
> +                     VM_OBJECT_WUNLOCK(uobj);
> +             }
>               tmpfs_free_tmp(tmp);
> +
> +             /*
> +              * vm_object_deallocate() must not be called while
> +              * owning tm_allnode_lock, because deallocate might
> +              * sleep.  Call it after tmpfs_free_tmp() does the
> +              * unlock.
> +              */
>               if (uobj != NULL)
>                       vm_object_deallocate(uobj);
> +
>               break;
>       case VLNK:
>               tmpfs_free_tmp(tmp);
> @@ -1697,7 +1803,6 @@ tmpfs_dir_whiteout_remove(struct vnode *dvp, struct
> componentname *cnp)
>  int
>  tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr)
>  {
> -     struct tmpfs_mount *tmp;
>       struct tmpfs_node *node;
>       vm_object_t uobj;
>       vm_pindex_t idx, newpages, oldpages;
> @@ -1709,7 +1814,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize,
> boolean_t ignerr)
>
>       node = VP_TO_TMPFS_NODE(vp);
>       uobj = node->tn_reg.tn_aobj;
> -     tmp = VFS_TO_TMPFS(vp->v_mount);
>
>       /*
>        * Convert the old and new sizes to the number of pages needed to
> @@ -1727,10 +1831,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize,
> boolean_t ignerr)
>               return (0);
>       }
>
> -     if (newpages > oldpages &&
> -         !tmpfs_pages_check_avail(tmp, newpages - oldpages))
> -             return (ENOSPC);
> -
>       VM_OBJECT_WLOCK(uobj);
>       if (newsize < oldsize) {
>               /*
> @@ -1756,8 +1856,6 @@ tmpfs_reg_resize(struct vnode *vp, off_t newsize,
> boolean_t ignerr)
>       uobj->size = newpages;
>       VM_OBJECT_WUNLOCK(uobj);
>
> -     atomic_add_long(&tmp->tm_pages_used, newpages - oldpages);
> -
>       node->tn_size = newsize;
>       return (0);
>  }
> diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
> index ec6120ba72df..de207242b574 100644
> --- a/sys/fs/tmpfs/tmpfs_vfsops.c
> +++ b/sys/fs/tmpfs/tmpfs_vfsops.c
> @@ -557,7 +557,11 @@ tmpfs_free_tmp(struct tmpfs_mount *tmp)
>       TMPFS_UNLOCK(tmp);
>
>       mtx_destroy(&tmp->tm_allnode_lock);
> -     MPASS(tmp->tm_pages_used == 0);
> +     /*
> +      * We cannot assert that tmp->tm_pages_used == 0 there,
> +      * because tmpfs vm_objects might be still mapped by some
> +      * process and outlive the mount due to reference counting.
> +      */
>       MPASS(tmp->tm_nodes_inuse == 0);
>
>       free(tmp, M_TMPFSMNT);
> diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
> index 6b53f002c8ac..4d95441c05e4 100644
> --- a/sys/fs/tmpfs/tmpfs_vnops.c
> +++ b/sys/fs/tmpfs/tmpfs_vnops.c
> @@ -437,7 +437,6 @@ tmpfs_stat(struct vop_stat_args *v)
>  {
>       struct vnode *vp = v->a_vp;
>       struct stat *sb = v->a_sb;
> -     vm_object_t obj;
>       struct tmpfs_node *node;
>       int error;
>
> @@ -470,8 +469,16 @@ tmpfs_stat(struct vop_stat_args *v)
>       sb->st_flags = node->tn_flags;
>       sb->st_gen = node->tn_gen;
>       if (vp->v_type == VREG) {
> -             obj = node->tn_reg.tn_aobj;
> -             sb->st_blocks = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
> +#ifdef __ILP32__
> +             vm_object_t obj = node->tn_reg.tn_aobj;
> +
> +             /* Handle torn read */
> +             VM_OBJECT_RLOCK(obj);
> +#endif
> +             sb->st_blocks = ptoa(node->tn_reg.tn_pages);
> +#ifdef __ILP32__
> +             VM_OBJECT_RUNLOCK(obj);
> +#endif
>       } else {
>               sb->st_blocks = node->tn_size;
>       }
> @@ -510,7 +517,9 @@ tmpfs_getattr(struct vop_getattr_args *v)
>           node->tn_rdev : NODEV;
>       if (vp->v_type == VREG) {
>               obj = node->tn_reg.tn_aobj;
> -             vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
> +             VM_OBJECT_RLOCK(obj);
> +             vap->va_bytes = ptoa(node->tn_reg.tn_pages);
> +             VM_OBJECT_RUNLOCK(obj);

why is the lock taken here? it is a significant pessimization of the
routine and tmpfs_stat equivalent only does it for LP32?

>       } else {
>               vap->va_bytes = node->tn_size;
>       }
> diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
> index 07fbc4caa14f..923930a708a3 100644
> --- a/sys/kern/uipc_shm.c
> +++ b/sys/kern/uipc_shm.c
> @@ -229,7 +229,7 @@ uiomove_object_page(vm_object_t obj, size_t len, struct
> uio *uio)
>                       printf("uiomove_object: vm_obj %p idx %jd "
>                           "pager error %d\n", obj, idx, rv);
>               }
> -             return (EIO);
> +             return (rv == VM_PAGER_AGAIN ? ENOSPC : EIO);
>       }
>       VM_OBJECT_WUNLOCK(obj);
>
>


-- 
Mateusz Guzik <mjguzik gmail.com>

Reply via email to