Do not always map a dax mapping read-write. There are use cases like executing a file where we want to map file read-only. virtio-fs dir on host might be backed by overlayfs. We don't want to open file read-write on overlayfs otherwise it will unnecessariliy be copied-up nullyifying the advantages of sharing page cache between vms for unmodified files.
Hence, create a read-only mapping if user did not ask for writable mapping. Later upgrade it to read-write mapping when users requests it. Signed-off-by: Vivek Goyal <[email protected]> --- fs/fuse/file.c | 114 ++++++++++++++++++++++++++++++++++++----------- fs/fuse/fuse_i.h | 3 ++ 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a2c19e4a28b5..bcd8ddcc0fdf 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -265,7 +265,8 @@ static void dmap_add_to_free_pool(struct fuse_conn *fc, /* offset passed in should be aligned to FUSE_DAX_MEM_RANGE_SZ */ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset, - struct fuse_dax_mapping *dmap) + struct fuse_dax_mapping *dmap, bool writable, + bool upgrade) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -283,7 +284,8 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset, inarg.moffset = dmap->window_offset; inarg.len = FUSE_DAX_MEM_RANGE_SZ; inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; - inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; + if (writable) + inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; args.in.h.opcode = FUSE_SETUPMAPPING; args.in.h.nodeid = fi->nodeid; args.in.numargs = 1; @@ -296,26 +298,30 @@ static int fuse_setup_one_mapping(struct inode *inode, loff_t offset, return err; } - pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx err=%zd\n", offset, err); + pr_debug("fuse_setup_one_mapping() succeeded. offset=0x%llx writable=%d" + " err=%zd\n", offset, writable, err); - /* - * We don't take a refernce on inode. inode is valid right now and - * when inode is going away, cleanup logic should first cleanup - * dmap entries. - * - * TODO: Do we need to ensure that we are holding inode lock - * as well. - */ - dmap->inode = inode; - dmap->start = offset; - dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1; - /* Protected by fi->i_dmap_sem */ - fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree); - fi->nr_dmaps++; - spin_lock(&fc->lock); - list_add_tail(&dmap->busy_list, &fc->busy_ranges); - fc->nr_busy_ranges++; - spin_unlock(&fc->lock); + dmap->writable = writable; + if (!upgrade) { + /* + * We don't take a refernce on inode. inode is valid right now + * and when inode is going away, cleanup logic should first + * cleanup dmap entries. + * + * TODO: Do we need to ensure that we are holding inode lock + * as well. + */ + dmap->inode = inode; + dmap->start = offset; + dmap->end = offset + FUSE_DAX_MEM_RANGE_SZ - 1; + /* Protected by fi->i_dmap_sem */ + fuse_dax_interval_tree_insert(dmap, &fi->dmap_tree); + fi->nr_dmaps++; + spin_lock(&fc->lock); + list_add_tail(&dmap->busy_list, &fc->busy_ranges); + fc->nr_busy_ranges++; + spin_unlock(&fc->lock); + } return 0; } @@ -1890,6 +1896,7 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos, struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; int ret; + bool writable = flags & IOMAP_WRITE; /* Can't do reclaim in fault path yet due to lock ordering. * Read path takes shared inode lock and that's not sufficient @@ -1930,10 +1937,11 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos, /* Setup one mapping */ ret = fuse_setup_one_mapping(inode, - ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), alloc_dmap); + ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), + alloc_dmap, writable, false); if (ret < 0) { printk("fuse_setup_one_mapping() failed. err=%d" - " pos=0x%llx\n", ret, pos); + " pos=0x%llx, writable=%d\n", ret, pos, writable); dmap_add_to_free_pool(fc, alloc_dmap); up_write(&fi->i_dmap_sem); return ret; @@ -1943,6 +1951,45 @@ static int iomap_begin_setup_new_mapping(struct inode *inode, loff_t pos, return 0; } +static int iomap_begin_upgrade_mapping(struct inode *inode, loff_t pos, + loff_t length, unsigned flags, + struct iomap *iomap) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_dax_mapping *dmap; + int ret; + + /* + * Take exclusive lock so that only one caller can try to setup + * mapping and others wait. + */ + down_write(&fi->i_dmap_sem); + dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos); + + /* We are holding either inode lock or i_mmap_sem, and that should + * ensure that dmap can't reclaimed or truncated and it should still + * be there in tree despite the fact we dropped and re-acquired the + * lock. + */ + if (WARN_ON(!dmap)) + return -EIO; + + WARN_ON(dmap->writable); + ret = fuse_setup_one_mapping(inode, + ALIGN_DOWN(pos, FUSE_DAX_MEM_RANGE_SZ), + dmap, true, true); + if (ret < 0) { + printk("fuse_setup_one_mapping() failed. err=%d pos=0x%llx\n", + ret, pos); + up_write(&fi->i_dmap_sem); + return ret; + } + + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); + up_write(&fi->i_dmap_sem); + return 0; +} + /* This is just for DAX and the mapping is ephemeral, do not use it for other * purposes since there is no block device with a permanent mapping. */ @@ -1952,6 +1999,7 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_dax_mapping *dmap; + bool writable = flags & IOMAP_WRITE; /* We don't support FIEMAP */ BUG_ON(flags & IOMAP_REPORT); @@ -1982,9 +2030,23 @@ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, dmap = fuse_dax_interval_tree_iter_first(&fi->dmap_tree, pos, pos); if (dmap) { - fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); - up_read(&fi->i_dmap_sem); - return 0; + if (writable && !dmap->writable) { + /* Upgrade read-only mapping to read-write. This will + * require exclusive i_dmap_sem lock as we don't want + * two threads to be trying to this simultaneously + * for same dmap. So drop shared lock and acquire + * exclusive lock. + */ + up_read(&fi->i_dmap_sem); + pr_debug("%s: Upgrading mapping at offset 0x%llx" + " length 0x%llx\n", __func__, pos, length); + return iomap_begin_upgrade_mapping(inode, pos, length, + flags, iomap); + } else { + fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); + up_read(&fi->i_dmap_sem); + return 0; + } } else { up_read(&fi->i_dmap_sem); pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e899a06e29d7..34ca8b90a1e1 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -134,6 +134,9 @@ struct fuse_dax_mapping { /** Length of mapping, in bytes */ loff_t length; + + /* Is this mapping read-only or read-write */ + bool writable; }; /** FUSE inode */ -- 2.17.2
