On Mon, Jun 03, 2019 at 10:46:51AM +0800, Peng Tao wrote: > Enhance FUSE_REMOVEMAPPING wire protocol to remove multiple mappings > in one call. So that fuse_removemapping() can send just one fuse request > for many dmap entries. > > Signed-off-by: Peng Tao <[email protected]> > --- > v1-v2: make fuse_removemapping_in count fuse_removemapping_one > > This works with corresponding qemu virtiofsd patch: > "[PATCH-v3] virtiofsd: make FUSE_REMOVEMAPPING support multiple entries". > > fs/fuse/file.c | 195 +++++++++++++++++++++++++++----------- > include/uapi/linux/fuse.h | 7 ++ > 2 files changed, 145 insertions(+), 57 deletions(-) >
Hi Tao Peng, This patch looks more or less good to me. I have made quite a few cosmetic changes to it. Please have a look. I have done some basic testing with it. Will stress it little bit more tomorrow. If nothing breaks, will merge it. Thanks Vivek Enhance FUSE_REMOVEMAPPING wire protocol to remove multiple mappings in one call. So that fuse_removemapping() can send just one fuse request for many dmap entries. Signed-off-by: Peng Tao <[email protected]> --- v1-v2: make fuse_removemapping_in count fuse_removemapping_one This works with corresponding qemu virtiofsd patch: "[PATCH-v3] virtiofsd: make FUSE_REMOVEMAPPING support multiple entries". fs/fuse/file.c | 203 +++++++++++++++++++++++++++++++++------------- fs/fuse/fuse_i.h | 2 fs/fuse/inode.c | 2 include/uapi/linux/fuse.h | 7 + 4 files changed, 155 insertions(+), 59 deletions(-) Index: rhvgoyal-linux-fuse/fs/fuse/file.c =================================================================== --- rhvgoyal-linux-fuse.orig/fs/fuse/file.c 2019-06-03 15:31:47.754645455 -0400 +++ rhvgoyal-linux-fuse/fs/fuse/file.c 2019-06-03 17:16:35.930742301 -0400 @@ -27,6 +27,9 @@ INTERVAL_TREE_DEFINE(struct fuse_dax_map static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc, struct inode *inode); +static void fuse_dax_free_mappings_range(struct fuse_conn *fc, + struct inode *inode, loff_t start, loff_t end); + static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file, int opcode, struct fuse_open_out *outargp) { @@ -319,75 +322,87 @@ static int fuse_setup_one_mapping(struct return 0; } -static int fuse_removemapping_one(struct inode *inode, - struct fuse_dax_mapping *dmap) +static int +fuse_send_removemapping(struct inode *inode, + struct fuse_removemapping_in *inargp, + struct fuse_removemapping_one *forget_one) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_removemapping_in inarg; FUSE_ARGS(args); - memset(&inarg, 0, sizeof(inarg)); - inarg.moffset = dmap->window_offset; - inarg.len = dmap->length; args.in.h.opcode = FUSE_REMOVEMAPPING; args.in.h.nodeid = fi->nodeid; - args.in.numargs = 1; - args.in.args[0].size = sizeof(inarg); - args.in.args[0].value = &inarg; + args.in.numargs = 2; + args.in.args[0].size = sizeof(*inargp); + args.in.args[0].value = inargp; + args.in.args[1].size = inargp->count * sizeof(*forget_one); + args.in.args[1].value = forget_one; return fuse_simple_request(fc, &args); } -/* - * It is called from evict_inode() and by that time inode is going away. So - * this function does not take any locks like fi->i_dmap_sem for traversing - * that fuse inode interval tree. If that lock is taken then lock validator - * complains of deadlock situation w.r.t fs_reclaim lock. - */ -void fuse_removemapping(struct inode *inode) +static int dmap_list_send_removemappings(struct inode *inode, unsigned num, + struct list_head *to_remove) { - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); - ssize_t err; + struct fuse_removemapping_one *remove_one, *ptr; + struct fuse_removemapping_in inarg; struct fuse_dax_mapping *dmap; + int ret, i = 0, nr_alloc; - /* Clear the mappings list */ - while (true) { - WARN_ON(fi->nr_dmaps < 0); + nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); + remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOIO); + if (!remove_one) + return -ENOMEM; - dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0, - -1); - if (dmap) { - fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree); - fi->nr_dmaps--; - dmap_remove_busy_list(fc, dmap); + ptr = remove_one; + list_for_each_entry(dmap, to_remove, list) { + ptr->moffset = dmap->window_offset; + ptr->len = dmap->length; + ptr++; + i++; + num--; + if (i >= nr_alloc || num == 0) { + memset(&inarg, 0, sizeof(inarg)); + inarg.count = i; + ret = fuse_send_removemapping(inode, &inarg, + remove_one); + if (ret) + goto out; + ptr = remove_one; + i = 0; } + } +out: + kfree(remove_one); + return ret; +} - if (!dmap) - break; +static int fuse_removemapping_one(struct inode *inode, + struct fuse_dax_mapping *dmap) +{ + struct fuse_removemapping_one forget_one; + struct fuse_removemapping_in inarg; - /* - * During umount/shutdown, fuse connection is dropped first - * and later evict_inode() is called later. That means any - * removemapping messages are going to fail. Send messages - * only if connection is up. Otherwise fuse daemon is - * responsible for cleaning up any leftover references and - * mappings. - */ - if (fc->connected) { - err = fuse_removemapping_one(inode, dmap); - if (err) { - pr_warn("Failed to removemapping. offset=0x%llx" - " len=0x%llx\n", dmap->window_offset, - dmap->length); - } - } + memset(&inarg, 0, sizeof(inarg)); + /* TODO: fill in inarg.fh when available */ + inarg.count = 1; + memset(&forget_one, 0, sizeof(forget_one)); + forget_one.moffset = dmap->window_offset; + forget_one.len = dmap->length; - dmap->inode = NULL; + return fuse_send_removemapping(inode, &inarg, &forget_one); +} - /* Add it back to free ranges list */ - free_dax_mapping(fc, dmap); - } +/* + * It is called from evict_inode() and by that time inode is going away. So + * this function does not take any locks like fi->i_dmap_sem for traversing + * that fuse inode interval tree. If that lock is taken then lock validator + * complains of deadlock situation w.r.t fs_reclaim lock. + */ +void fuse_cleanup_inode_mappings(struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + fuse_dax_free_mappings_range(fc, inode, 0, -1); } void fuse_finish_open(struct inode *inode, struct file *file) @@ -3980,6 +3995,86 @@ static struct fuse_dax_mapping *alloc_da } } +/* + * Cleanup dmap entry and add back to free list. This should be called with + * fc->lock held. + */ +static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, + struct fuse_dax_mapping *dmap) +{ + __dmap_remove_busy_list(fc, dmap); + dmap->inode = NULL; + dmap->start = dmap->end = 0; + __free_dax_mapping(fc, dmap); + pr_debug("fuse: freed memory range start=0x%llx end=0x%llx " + "window_offset=0x%llx length=0x%llx\n", dmap->start, + dmap->end, dmap->window_offset, dmap->length); +} + +/* + * Free inode dmap entries whose range falls entirely inside [start, end]. + * Does not take any locks. Caller must take care of any lock requirements. + * Lock ordering follows fuse_dax_free_one_mapping(). + * inode->i_rwsem, fuse_inode->i_mmap_sem and fuse_inode->i_dmap_sem must be + * held exclusively, unless it is called from evict_inode() where no one else + * is accessing the inode. + */ +static void fuse_dax_free_mappings_range(struct fuse_conn *fc, + struct inode *inode, loff_t start, + loff_t end) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_dax_mapping *dmap, *n; + int err, num = 0; + LIST_HEAD(to_remove); + + pr_debug("fuse: %s: start=0x%llx, end=0x%llx\n", __func__, start, end); + + /* + * Interval tree search matches intersecting entries. Adjust the range + * to avoid dropping partial valid entries. + */ + start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ); + end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ); + + while (1) { + dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start, + end); + if (!dmap) + break; + fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree); + num++; + list_add(&dmap->list, &to_remove); + } + + /* Nothing to remove */ + if (list_empty(&to_remove)) + return; + + WARN_ON(fi->nr_dmaps < num); + fi->nr_dmaps -= num; + /* + * During umount/shutdown, fuse connection is dropped first + * and evict_inode() is called later. That means any + * removemapping messages are going to fail. Send messages + * only if connection is up. Otherwise fuse daemon is + * responsible for cleaning up any leftover references and + * mappings. + */ + if (fc->connected) { + err = dmap_list_send_removemappings(inode, num, &to_remove); + if (err) { + pr_warn("Failed to removemappings. start=0x%llx" + " end=0x%llx\n", start, end); + } + } + spin_lock(&fc->lock); + list_for_each_entry_safe(dmap, n, &to_remove, list) { + fuse_dax_do_free_mapping_locked(fc, dmap); + } + spin_unlock(&fc->lock); +} + int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode, u64 dmap_start) { @@ -4003,15 +4098,9 @@ int fuse_dax_free_one_mapping_locked(str /* Cleanup dmap entry and add back to free list */ spin_lock(&fc->lock); - __dmap_remove_busy_list(fc, dmap); - dmap->inode = NULL; - dmap->start = dmap->end = 0; - __free_dax_mapping(fc, dmap); + fuse_dax_do_free_mapping_locked(fc, dmap); spin_unlock(&fc->lock); - pr_debug("fuse: freed memory range window_offset=0x%llx," - " length=0x%llx\n", dmap->window_offset, - dmap->length); return ret; } Index: rhvgoyal-linux-fuse/include/uapi/linux/fuse.h =================================================================== --- rhvgoyal-linux-fuse.orig/include/uapi/linux/fuse.h 2019-06-03 15:31:47.754645455 -0400 +++ rhvgoyal-linux-fuse/include/uapi/linux/fuse.h 2019-06-03 16:01:48.268645455 -0400 @@ -850,10 +850,17 @@ struct fuse_setupmapping_out { struct fuse_removemapping_in { /* An already open handle */ uint64_t fh; + /* number of fuse_removemapping_one follows */ + uint32_t count; +}; + +struct fuse_removemapping_one { /* Offset into the dax window start the unmapping */ uint64_t moffset; /* Length of mapping required */ uint64_t len; }; +#define FUSE_REMOVEMAPPING_MAX_ENTRY \ + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) #endif /* _LINUX_FUSE_H */ Index: rhvgoyal-linux-fuse/fs/fuse/fuse_i.h =================================================================== --- rhvgoyal-linux-fuse.orig/fs/fuse/fuse_i.h 2019-06-03 17:16:29.372742301 -0400 +++ rhvgoyal-linux-fuse/fs/fuse/fuse_i.h 2019-06-03 17:16:35.937742301 -0400 @@ -1289,6 +1289,6 @@ unsigned fuse_len_args(unsigned numargs, */ u64 fuse_get_unique(struct fuse_iqueue *fiq); void fuse_dax_free_mem_worker(struct work_struct *work); -void fuse_removemapping(struct inode *inode); +void fuse_cleanup_inode_mappings(struct inode *inode); #endif /* _FS_FUSE_I_H */ Index: rhvgoyal-linux-fuse/fs/fuse/inode.c =================================================================== --- rhvgoyal-linux-fuse.orig/fs/fuse/inode.c 2019-06-03 17:16:29.372742301 -0400 +++ rhvgoyal-linux-fuse/fs/fuse/inode.c 2019-06-03 17:16:35.935742301 -0400 @@ -123,7 +123,7 @@ static void fuse_evict_inode(struct inod struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); if (IS_DAX(inode)) { - fuse_removemapping(inode); + fuse_cleanup_inode_mappings(inode); WARN_ON(fi->nr_dmaps); } fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
