Enhance FUSE_REMOVEMAPPING wire protocol to remove multiple mappings
in one call. So that fuse_removemapping() does not need to send
one fuse request for each dmap entry.

Signed-off-by: Peng Tao <[email protected]>
---
This works with corresponding virtiofsd patch
"[PATCH-v2] virtiofsd: make FUSE_REMOVEMAPPING support multiple entries".

 fs/fuse/file.c            | 195 +++++++++++++++++++++++++++-----------
 include/uapi/linux/fuse.h |   9 +-
 2 files changed, 146 insertions(+), 58 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d0979dc32f08..a6a34249c598 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -27,6 +27,9 @@ INTERVAL_TREE_DEFINE(struct fuse_dax_mapping, rb, __u64, 
__subtree_last,

 static struct fuse_dax_mapping *alloc_dax_mapping_reclaim(struct fuse_conn *fc,
                                struct inode *inode);
+static void __fuse_dax_free_mappings_range(struct fuse_conn *fc,
+                               struct inode *inode, loff_t start, loff_t end);
+
 static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
                          int opcode, struct fuse_open_out *outargp)
 {
@@ -319,75 +322,90 @@ static int fuse_setup_one_mapping(struct inode *inode,
        return 0;
 }

-static int fuse_removemapping_one(struct inode *inode,
-                                       struct fuse_dax_mapping *dmap)
+static int fuse_send_removemapping_request(struct inode *inode,
+               struct fuse_removemapping_in_header *header,
+               struct fuse_removemapping_in *inargp)
 {
        struct fuse_inode *fi = get_fuse_inode(inode);
        struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_removemapping_in inarg;
        FUSE_ARGS(args);

-       memset(&inarg, 0, sizeof(inarg));
-       inarg.moffset = dmap->window_offset;
-       inarg.len = dmap->length;
        args.in.h.opcode = FUSE_REMOVEMAPPING;
        args.in.h.nodeid = fi->nodeid;
-       args.in.numargs = 1;
-       args.in.args[0].size = sizeof(inarg);
-       args.in.args[0].value = &inarg;
+       args.in.numargs = 2;
+       args.in.args[0].size = sizeof(*header);
+       args.in.args[0].value = header;
+       args.in.args[1].size = header->count * sizeof(*inargp);
+       args.in.args[1].value = inargp;
        return fuse_simple_request(fc, &args);
 }

-/*
- * It is called from evict_inode() and by that time inode is going away. So
- * this function does not take any locks like fi->i_dmap_sem for traversing
- * that fuse inode interval tree. If that lock is taken then lock validator
- * complains of deadlock situation w.r.t fs_reclaim lock.
- */
-void fuse_removemapping(struct inode *inode)
+static int fuse_do_removemappings(struct inode *inode, unsigned num,
+                                 struct list_head *to_remove)
 {
-       struct fuse_conn *fc = get_fuse_conn(inode);
-       struct fuse_inode *fi = get_fuse_inode(inode);
-       ssize_t err;
+       struct fuse_removemapping_in *inargp, *ptr;
+       struct fuse_removemapping_in_header header;
        struct fuse_dax_mapping *dmap;
+       int ret, i = 0;

-       /* Clear the mappings list */
-       while (true) {
-               WARN_ON(fi->nr_dmaps < 0);
+       if (num <= FUSE_REMOVEMAPPING_MAX_ENTRY) {
+               inargp = kmalloc_array(num, sizeof(*inargp), GFP_NOIO);
+       } else {
+               inargp = kmalloc_array(FUSE_REMOVEMAPPING_MAX_ENTRY,
+                                       sizeof(*inargp), GFP_NOIO);
+       }
+       if (inargp == NULL)
+               return -ENOMEM;

-               dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, 0,
-                                                               -1);
-               if (dmap) {
-                       fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
-                       fi->nr_dmaps--;
-                       dmap_remove_busy_list(fc, dmap);
+       ptr = inargp;
+       list_for_each_entry(dmap, to_remove, list) {
+               ptr->moffset = dmap->window_offset;
+               ptr->len = dmap->length;
+               ptr++;
+               i++;
+               num--;
+               if (i >= FUSE_REMOVEMAPPING_MAX_ENTRY || num == 0) {
+                       memset(&header, 0, sizeof(header));
+                       header.count = i;
+                       ret = fuse_send_removemapping_request(inode, &header, 
inargp);
+                       if (ret)
+                               goto out;
+                       ptr = inargp;
+                       i = 0;
                }
+       }

-               if (!dmap)
-                       break;
+out:
+       kfree(inargp);
+       return ret;
+}

-               /*
-                * During umount/shutdown, fuse connection is dropped first
-                * and later evict_inode() is called later. That means any
-                * removemapping messages are going to fail. Send messages
-                * only if connection is up. Otherwise fuse daemon is
-                * responsible for cleaning up any leftover references and
-                * mappings.
-                */
-               if (fc->connected) {
-                       err = fuse_removemapping_one(inode, dmap);
-                       if (err) {
-                               pr_warn("Failed to removemapping. offset=0x%llx"
-                                       " len=0x%llx\n", dmap->window_offset,
-                                       dmap->length);
-                       }
-               }
+static int fuse_removemapping_one(struct inode *inode,
+                                       struct fuse_dax_mapping *dmap)
+{
+       struct fuse_removemapping_in inarg;
+       struct fuse_removemapping_in_header header;

-               dmap->inode = NULL;
+       memset(&header, 0, sizeof(header));
+       /* TODO: fill in header.fh when available */
+       header.count = 1;
+       memset(&inarg, 0, sizeof(inarg));
+       inarg.moffset = dmap->window_offset;
+       inarg.len = dmap->length;

-               /* Add it back to free ranges list */
-               free_dax_mapping(fc, dmap);
-       }
+       return fuse_send_removemapping_request(inode, &header, &inarg);
+}
+
+/*
+ * It is called from evict_inode() and by that time inode is going away. So
+ * this function does not take any locks like fi->i_dmap_sem for traversing
+ * that fuse inode interval tree. If that lock is taken then lock validator
+ * complains of deadlock situation w.r.t fs_reclaim lock.
+ */
+void fuse_removemapping(struct inode *inode)
+{
+       struct fuse_conn *fc = get_fuse_conn(inode);
+       __fuse_dax_free_mappings_range(fc, inode, 0, -1);
 }

 void fuse_finish_open(struct inode *inode, struct file *file)
@@ -3980,6 +3998,75 @@ static struct fuse_dax_mapping 
*alloc_dax_mapping_reclaim(struct fuse_conn *fc,
        }
 }

+/* Cleanup dmap entry and add back to free list */
+static void fuse_dax_do_free_mapping_locked(struct fuse_conn *fc, struct 
fuse_dax_mapping *dmap)
+{
+       pr_debug("fuse: freed memory range start=0x%llx end=0x%llx "
+               "window_offset=0x%llx length=0x%llx\n", dmap->start,
+               dmap->end, dmap->window_offset, dmap->length);
+       __dmap_remove_busy_list(fc, dmap);
+       dmap->inode = NULL;
+       dmap->start = dmap->end = 0;
+       __free_dax_mapping(fc, dmap);
+}
+
+/*
+ * Free inode dmap entries whose range falls entirely inside [start, end].
+ * Does not take any locks. Caller must take care of any lock requirements.
+ * Lock ordering follows fuse_dax_free_one_mapping().
+ * inode->i_rwsem, fuse_inode->i_mmap_sem and fuse_inode->i_dmap_sem must be
+ * held unless it is called from evict_inode() where no one else is accessing
+ * the inode.
+ */
+static void __fuse_dax_free_mappings_range(struct fuse_conn *fc,
+                               struct inode *inode, loff_t start, loff_t end)
+{
+       struct fuse_inode *fi = get_fuse_inode(inode);
+       struct fuse_dax_mapping *dmap, *n;
+       int err, num = 0;
+       LIST_HEAD(to_remove);
+
+       pr_debug("fuse: __fuse_dax_free_mappings_range "
+                "start=0x%llx, end=0x%llx\n", start, end);
+       /* interval tree search matches intersecting entries.
+        * Adjust the range to avoid dropping partial valid entries. */
+       start = ALIGN(start, FUSE_DAX_MEM_RANGE_SZ);
+       end = ALIGN_DOWN(end, FUSE_DAX_MEM_RANGE_SZ);
+
+       while ((dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, start, 
end))) {
+               fuse_dax_interval_tree_remove(dmap, &fi->dmap_tree);
+               num++;
+               list_add(&dmap->list, &to_remove);
+       }
+
+       /* Nothing to remove */
+       if (list_empty(&to_remove))
+               return;
+
+       WARN_ON(fi->nr_dmaps < num);
+       fi->nr_dmaps -= num;
+       /*
+        * During umount/shutdown, fuse connection is dropped first
+        * and later evict_inode() is called later. That means any
+        * removemapping messages are going to fail. Send messages
+        * only if connection is up. Otherwise fuse daemon is
+        * responsible for cleaning up any leftover references and
+        * mappings.
+        */
+       if (fc->connected) {
+               err = fuse_do_removemappings(inode, num, &to_remove);
+               if (err) {
+                       pr_warn("Failed to removemappings. start=0x%llx"
+                               " end=0x%llx\n", start, end);
+               }
+       }
+       spin_lock(&fc->lock);
+       list_for_each_entry_safe(dmap, n, &to_remove, list) {
+               fuse_dax_do_free_mapping_locked(fc, dmap);
+       }
+       spin_unlock(&fc->lock);
+}
+
 int fuse_dax_free_one_mapping_locked(struct fuse_conn *fc, struct inode *inode,
                                u64 dmap_start)
 {
@@ -4003,15 +4090,9 @@ int fuse_dax_free_one_mapping_locked(struct fuse_conn 
*fc, struct inode *inode,

        /* Cleanup dmap entry and add back to free list */
        spin_lock(&fc->lock);
-       __dmap_remove_busy_list(fc, dmap);
-       dmap->inode = NULL;
-       dmap->start = dmap->end = 0;
-       __free_dax_mapping(fc, dmap);
+       fuse_dax_do_free_mapping_locked(fc, dmap);
        spin_unlock(&fc->lock);

-       pr_debug("fuse: freed memory range window_offset=0x%llx,"
-                               " length=0x%llx\n", dmap->window_offset,
-                               dmap->length);
        return ret;
 }

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 5042e227e8a8..0dccfff50f91 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -847,13 +847,20 @@ struct fuse_setupmapping_out {
         uint64_t       len[FUSE_SETUPMAPPING_ENTRIES];
 };

-struct fuse_removemapping_in {
+struct fuse_removemapping_in_header {
         /* An already open handle */
         uint64_t       fh;
+       /* number of fuse_removemapping_in follows */
+       uint32_t        count;
+};
+
+struct fuse_removemapping_in {
        /* Offset into the dax window start the unmapping */
        uint64_t        moffset;
         /* Length of mapping required */
         uint64_t       len;
 };
+#define FUSE_REMOVEMAPPING_MAX_ENTRY   \
+               (PAGE_SIZE / sizeof(struct fuse_removemapping_in))

 #endif /* _LINUX_FUSE_H */
-- 
2.17.1

Reply via email to