On Wed, Jun 05, 2019 at 11:02:33AM +0800, Peng Tao wrote:
> Enhance FUSE_REMOVEMAPPING wire protocol to remove multiple mappings
> in one call. So that fuse_removemapping() can send just one fuse request
> for many dmap entries.
>
> Signed-off-by: Peng Tao <[email protected]>
> ---
> fs/fuse/file.c | 205 +++++++++++++++++++++++++++-----------
> fs/fuse/fuse_i.h | 6 +-
> fs/fuse/inode.c | 2 +-
> include/uapi/linux/fuse.h | 7 ++
> 4 files changed, 160 insertions(+), 60 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index d0979dc32f08..5a0b80719815 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -27,6 +27,9 @@ INTERVAL_TREE_DEFINE(struct fuse_dax_mapping, rb, __u64,
> __subtree_last,
>
> static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn
> *fc,
> struct inode *inode);
> +static void fuse_dax_free_mappings_range(struct fuse_conn *fc,
> + struct inode *inode, loff_t start, loff_t end);
> +
> static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file
> *file,
> int opcode, struct fuse_open_out *outargp)
> {
> @@ -319,75 +322,87 @@ static int fuse_setup_one_mapping(struct inode *inode,
> return 0;
> }
>
> -static int fuse_removemapping_one(struct inode *inode,
> - struct fuse_dax_mapping *dmap)
> +static int
> +fuse_send_removemapping(struct inode *inode,
> + struct fuse_removemapping_in *inargp,
> + struct fuse_removemapping_one *remove_one)
> {
> struct fuse_inode *fi = get_fuse_inode(inode);
> struct fuse_conn *fc = get_fuse_conn(inode);
> - struct fuse_removemapping_in inarg;
> FUSE_ARGS(args);
>
> - memset(&inarg, 0, sizeof(inarg));
> - inarg.moffset = dmap->window_offset;
> - inarg.len = dmap->length;
> args.in.h.opcode = FUSE_REMOVEMAPPING;
> args.in.h.nodeid = fi->nodeid;
> - args.in.numargs = 1;
> - args.in.args[0].size = sizeof(inarg);
> - args.in.args[0].value = &inarg;
> + args.in.numargs = 2;
> + args.in.args[0].size = sizeof(*inargp);
> + args.in.args[0].value = inargp;
> + args.in.args[1].size = inargp->count * sizeof(*remove_one);
> + args.in.args[1].value = remove_one;
> return fuse_simple_request(fc, &args);
> }
>
> -/*
> - * It is called from evict_inode() and by that time inode is going away. So
> - * this function does not take any locks like fi->i_dmap_sem for traversing
> - * that fuse inode interval tree. If that lock is taken then lock validator
> - * complains of deadlock situation w.r.t fs_reclaim lock.
> - */
> -void fuse_removemapping(struct inode *inode)
> +static int dmap_list_send_removemappings(struct inode *inode, unsigned num,
> + struct list_head *to_remove)
> {
> - struct fuse_conn *fc = get_fuse_conn(inode);
> - struct fuse_inode *fi = get_fuse_inode(inode);
> - ssize_t err;
> + struct fuse_removemapping_one *remove_one, *ptr;
> + struct fuse_removemapping_in inarg;
> struct fuse_dax_mapping *dmap;
> + int ret, i = 0, nr_alloc;
>
> - /* Clear the mappings list */
> - while (true) {
> - WARN_ON(fi->nr_dmaps < 0);
> + nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY);
> + remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOIO);
GFP_NOIO's comments implies that memalloc_noio_{save,restore} are preferred,
also would GFP_NOFS be better?
> + if (!remove_one)
> + return -ENOMEM;
>
> - dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0,
> - -1);
> - if (dmap) {
> - fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> - fi->nr_dmaps--;
> - dmap_remove_busy_list(fc, dmap);
> + ptr = remove_one;
> + list_for_each_entry(dmap, to_remove, list) {
> + ptr->moffset = dmap->window_offset;
> + ptr->len = dmap->length;
> + ptr++;
> + i++;
> + num--;
> + if (i >= nr_alloc || num == 0) {
> + memset(&inarg, 0, sizeof(inarg));
> + inarg.count = i;
> + ret = fuse_send_removemapping(inode, &inarg,
> + remove_one);
> + if (ret)
> + goto out;
> + ptr = remove_one;
> + i = 0;
> }
> + }
> +out:
> + kfree(remove_one);
> + return ret;
> +}
>
> - if (!dmap)
> - break;
> +static int fuse_removemapping_one(struct inode *inode,
> + struct fuse_dax_mapping *dmap)
> +{
> + struct fuse_removemapping_in inarg;
> + struct fuse_removemapping_one remove_one;
>
> - /*
> - * During umount/shutdown, fuse connection is dropped first
> - * and later evict_inode() is called later. That means any
> - * removemapping messages are going to fail. Send messages
> - * only if connection is up. Otherwise fuse daemon is
> - * responsible for cleaning up any leftover references and
> - * mappings.
> - */
> - if (fc->connected) {
> - err = fuse_removemapping_one(inode, dmap);
> - if (err) {
> - pr_warn("Failed to removemapping. offset=0x%llx"
> - " len=0x%llx\n", dmap->window_offset,
> - dmap->length);
> - }
> - }
> + memset(&inarg, 0, sizeof(inarg));
> + /* TODO: fill in inarg.fh when available */
> + inarg.count = 1;
> + memset(&remove_one, 0, sizeof(remove_one));
> + remove_one.moffset = dmap->window_offset;
> + remove_one.len = dmap->length;
>
> - dmap->inode = NULL;
> + return fuse_send_removemapping(inode, &inarg, &remove_one);
> +}
>
> - /* Add it back to free ranges list */
> - free_dax_mapping(fc, dmap);
> - }
> +/*
> + * It is called from evict_inode() and by that time inode is going away. So
> + * this function does not take any locks like fi->i_dmap_sem for traversing
> + * that fuse inode interval tree. If that lock is taken then lock validator
> + * complains of deadlock situation w.r.t fs_reclaim lock.
> + */
> +void fuse_cleanup_inode_mappings(struct inode *inode)
> +{
> + struct fuse_conn *fc = get_fuse_conn(inode);
> + fuse_dax_free_mappings_range(fc, inode, 0, -1);
> }
>
> void fuse_finish_open(struct inode *inode, struct file *file)
> @@ -3980,6 +3995,88 @@ static struct fuse_dax_mapping
> *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
> }
> }
>
> +/*
> + * Cleanup dmap entry and add back to free list. This should be called with
> + * fc->lock held.
> + */
> +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc,
> + struct fuse_dax_mapping *dmap)
> +{
> + __dmap_remove_busy_list(fc, dmap);
> + dmap->inode = NULL;
> + dmap->start = dmap->end = 0;
> + __free_dax_mapping(fc, dmap);
> + pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
> + "window_offset=0x%llx length=0x%llx\n", dmap->start,
> + dmap->end, dmap->window_offset, dmap->length);
pr_debug() needs to be placed at the beginning as dmap->start & end have been
zero'd.
Other parts look good to me.
Reviewed-by: Liu Bo <[email protected]>
thanks,
-liubo
> +}
> +
> +/*
> + * Free inode dmap entries whose range falls entirely inside [start, end].
> + * Does not take any locks. Caller must take care of any lock requirements.
> + * Lock ordering follows fuse_dax_free_one_mapping().
> + * inode->i_rwsem, fuse_inode->i_mmap_sem and fuse_inode->i_dmap_sem must be
> + * held exclusively, unless it is called from evict_inode() where no one else
> + * is accessing the inode.
> + */
> +static void fuse_dax_free_mappings_range(struct fuse_conn *fc,
> + struct inode *inode, loff_t start,
> + loff_t end)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_dax_mapping *dmap, *n;
> + int err, num = 0;
> + LIST_HEAD(to_remove);
> +
> + pr_debug("fuse: %s: start=0x%llx, end=0x%llx\n", __func__, start, end);
> +
> + /*
> + * Interval tree search matches intersecting entries. Adjust the range
> + * to avoid dropping partial valid entries.
> + */
> + start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
> + end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
> +
> + while (1) {
> + dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start,
> + end);
> + if (!dmap)
> + break;
> + fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
> + num++;
> + WARN_ON(!list_empty(&dmap->list));
> + list_add_tail(&dmap->list, &to_remove);
> + }
> +
> + /* Nothing to remove */
> + if (list_empty(&to_remove))
> + return;
> +
> + WARN_ON(fi->nr_dmaps < num);
> + fi->nr_dmaps -= num;
> + /*
> + * During umount/shutdown, fuse connection is dropped first
> + * and evict_inode() is called later. That means any
> + * removemapping messages are going to fail. Send messages
> + * only if connection is up. Otherwise fuse daemon is
> + * responsible for cleaning up any leftover references and
> + * mappings.
> + */
> + if (fc->connected) {
> + err = dmap_list_send_removemappings(inode, num, &to_remove);
> + if (err) {
> + pr_warn("Failed to removemappings. start=0x%llx"
> + " end=0x%llx\n", start, end);
> + }
> + }
> + spin_lock(&fc->lock);
> + list_for_each_entry_safe(dmap, n, &to_remove, list) {
> + list_del(&dmap->list);
> + fuse_dax_do_free_mapping_locked(fc, dmap);
> + }
> + spin_unlock(&fc->lock);
> +}
> +
> int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode
> *inode,
> u64 dmap_start)
> {
> @@ -4003,15 +4100,9 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn
> *fc, struct inode *inode,
>
> /* Cleanup dmap entry and add back to free list */
> spin_lock(&fc->lock);
> - __dmap_remove_busy_list(fc, dmap);
> - dmap->inode = NULL;
> - dmap->start = dmap->end = 0;
> - __free_dax_mapping(fc, dmap);
> + fuse_dax_do_free_mapping_locked(fc, dmap);
> spin_unlock(&fc->lock);
>
> - pr_debug("fuse: freed memory range window_offset=0x%llx,"
> - " length=0x%llx\n", dmap->window_offset,
> - dmap->length);
> return ret;
> }
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index 7ac7f9a0b81b..17784dbd49a7 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -117,7 +117,9 @@ struct fuse_dax_mapping {
> /* Pointer to inode where this memory range is mapped */
> struct inode *inode;
>
> - /* Will connect in fc->free_ranges to keep track of free memory */
> + /* Will connect in fc->free_ranges to keep track of free memory.
> + * When the mapping is in fc->busy_ranges, the field can also be
> + * used by temporary lists */
> struct list_head list;
>
> /* For interval tree in file/inode */
> @@ -1286,6 +1288,6 @@ unsigned fuse_len_args(unsigned numargs, struct
> fuse_arg *args);
> */
> u64 fuse_get_unique(struct fuse_iqueue *fiq);
> void fuse_dax_free_mem_worker(struct work_struct *work);
> -void fuse_removemapping(struct inode *inode);
> +void fuse_cleanup_inode_mappings(struct inode *inode);
>
> #endif /* _FS_FUSE_I_H */
> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
> index 302f7e04b645..4277324a7c3b 100644
> --- a/fs/fuse/inode.c
> +++ b/fs/fuse/inode.c
> @@ -123,7 +123,7 @@ static void fuse_evict_inode(struct inode *inode)
> struct fuse_conn *fc = get_fuse_conn(inode);
> struct fuse_inode *fi = get_fuse_inode(inode);
> if (IS_DAX(inode)) {
> - fuse_removemapping(inode);
> + fuse_cleanup_inode_mappings(inode);
> WARN_ON(fi->nr_dmaps);
> }
> fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index 5042e227e8a8..b588a028f099 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -850,10 +850,17 @@ struct fuse_setupmapping_out {
> struct fuse_removemapping_in {
> /* An already open handle */
> uint64_t fh;
> + /* number of fuse_removemapping_one follows */
> + uint32_t count;
> +};
> +
> +struct fuse_removemapping_one {
> /* Offset into the dax window start the unmapping */
> uint64_t moffset;
> /* Length of mapping required */
> uint64_t len;
> };
> +#define FUSE_REMOVEMAPPING_MAX_ENTRY \
> + (PAGE_SIZE / sizeof(struct fuse_removemapping_one))
>
> #endif /* _LINUX_FUSE_H */
> --
> 2.17.1
>
> _______________________________________________
> Virtio-fs mailing list
> [email protected]
> https://www.redhat.com/mailman/listinfo/virtio-fs