This integrates the dax window manager with the virtio-fs file system operations:
- Removes the lock from virtio::fs::dax_window, since this is now handled by the window manager. - A single manager instance is maintained for each virtio-fs device having at least a singe mount. Multiple, concurrent mounts of the same device share a common dax_manager. - The FUSE_READ fallback as well as the logic of using it when the DAX window is not available or a read using it fails are untouched. Signed-off-by: Fotis Xenakis <[email protected]> --- drivers/virtio-fs.hh | 11 ++- fs/virtiofs/virtiofs.hh | 21 ++++- fs/virtiofs/virtiofs_i.hh | 3 - fs/virtiofs/virtiofs_vfsops.cc | 53 ++++++++++-- fs/virtiofs/virtiofs_vnops.cc | 151 ++++++--------------------------- 5 files changed, 100 insertions(+), 139 deletions(-) diff --git a/drivers/virtio-fs.hh b/drivers/virtio-fs.hh index d78d7962..88a16939 100644 --- a/drivers/virtio-fs.hh +++ b/drivers/virtio-fs.hh @@ -8,8 +8,11 @@ #ifndef VIRTIO_FS_DRIVER_H #define VIRTIO_FS_DRIVER_H +#include <functional> + #include <osv/mutex.h> #include <osv/waitqueue.hh> + #include "drivers/virtio.hh" #include "drivers/virtio-device.hh" #include "fs/virtiofs/fuse_kernel.h" @@ -49,7 +52,13 @@ public: struct dax_window { mmioaddr_t addr; u64 len; - mutex lock; + }; + + // Helper enabling the use of fs* as key type in an unordered_* container. + struct hasher { + size_t operator()(fs* const _fs) const noexcept { + return std::hash<int>{}(_fs->_id); + } }; explicit fs(virtio_device& dev); diff --git a/fs/virtiofs/virtiofs.hh b/fs/virtiofs/virtiofs.hh index 475d5eba..8a3b06f2 100644 --- a/fs/virtiofs/virtiofs.hh +++ b/fs/virtiofs/virtiofs.hh @@ -8,11 +8,13 @@ #ifndef __INCLUDE_VIRTIOFS_H__ #define __INCLUDE_VIRTIOFS_H__ -#include <osv/vnode.h> +#include <memory> + +#include <osv/debug.h> #include <osv/mount.h> -#include <osv/dentry.h> -#include <osv/prex.h> -#include <osv/buf.h> +#include <osv/vnode.h> + +#include "drivers/virtio-fs.hh" #include "fuse_kernel.h" #define VIRTIOFS_DEBUG_ENABLED 1 @@ -23,6 +25,17 @@ #define virtiofs_debug(...) #endif +// Necessary pre-declaration because virtiofs::dax depends on virtiofs_inode, +// declared below. +namespace virtiofs { +class dax_manager; +} + +struct virtiofs_mount_data { + virtio::fs* drv; + std::shared_ptr<virtiofs::dax_manager> dax_mgr; +}; + struct virtiofs_inode { uint64_t nodeid; struct fuse_attr attr; diff --git a/fs/virtiofs/virtiofs_i.hh b/fs/virtiofs/virtiofs_i.hh index 76533d74..e879546d 100644 --- a/fs/virtiofs/virtiofs_i.hh +++ b/fs/virtiofs/virtiofs_i.hh @@ -8,9 +8,6 @@ #ifndef VIRTIOFS_IO_H #define VIRTIOFS_IO_H -#include "fuse_kernel.h" -#include <osv/mutex.h> -#include <osv/waitqueue.hh> #include "drivers/virtio-fs.hh" int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode, diff --git a/fs/virtiofs/virtiofs_vfsops.cc b/fs/virtiofs/virtiofs_vfsops.cc index 30daa42b..b60a766b 100644 --- a/fs/virtiofs/virtiofs_vfsops.cc +++ b/fs/virtiofs/virtiofs_vfsops.cc @@ -5,19 +5,32 @@ * BSD license as described in the LICENSE file in the top-level directory. */ +#include <atomic> +#include <memory> +#include <mutex> +#include <new> #include <sys/types.h> -#include <osv/device.h> + +#include <api/assert.h> #include <osv/debug.h> -#include <iomanip> -#include <iostream> +#include <osv/device.h> +#include <osv/mutex.h> + +#include "drivers/virtio-fs.hh" #include "virtiofs.hh" +#include "virtiofs_dax.hh" #include "virtiofs_i.hh" -#include "drivers/virtio-fs.hh" using fuse_request = virtio::fs::fuse_request; static std::atomic<uint64_t> fuse_unique_id(1); +static struct { + std::unordered_map<virtio::fs*, std::shared_ptr<virtiofs::dax_manager>, + virtio::fs::hasher> mgrs; + mutex lock; +} dax_managers; + int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode, uint64_t nodeid, void* input_args_data, size_t input_args_size, void* output_args_data, size_t output_args_size) @@ -115,7 +128,28 @@ static int virtiofs_mount(struct mount* mp, const char* dev, int flags, virtiofs_set_vnode(mp->m_root->d_vnode, root_node); - mp->m_data = drv; + auto* m_data = new (std::nothrow) virtiofs_mount_data; + if (!m_data) { + return ENOMEM; + } + m_data->drv = drv; + if (drv->get_dax()) { + // The device supports the DAX window + std::lock_guard<mutex> guard {dax_managers.lock}; + auto found = dax_managers.mgrs.find(drv); + if (found != dax_managers.mgrs.end()) { + // There is a dax_manager already associated with this device (the + // device is already mounted) + m_data->dax_mgr = found->second; + } else { + m_data->dax_mgr = std::make_shared<virtiofs::dax_manager>(*drv); + if (!m_data->dax_mgr) { + return ENOMEM; + } + } + } + + mp->m_data = m_data; mp->m_dev = device; return 0; @@ -141,6 +175,15 @@ static int virtiofs_statfs(struct mount* mp, struct statfs* statp) static int virtiofs_unmount(struct mount* mp, int flags) { + auto* m_data = static_cast<virtiofs_mount_data*>(mp->m_data); + std::lock_guard<mutex> guard {dax_managers.lock}; + if (m_data->dax_mgr && m_data->dax_mgr.use_count() == 2) { + // This was the last mount of this device. It's safe to delete the + // window manager. + dax_managers.mgrs.erase(m_data->drv); + } + delete m_data; + struct device* dev = mp->m_dev; return device_close(dev); } diff --git a/fs/virtiofs/virtiofs_vnops.cc b/fs/virtiofs/virtiofs_vnops.cc index 38c000ec..51be3ee4 100644 --- a/fs/virtiofs/virtiofs_vnops.cc +++ b/fs/virtiofs/virtiofs_vnops.cc @@ -5,28 +5,27 @@ * BSD license as described in the LICENSE file in the top-level directory. */ -#include <sys/stat.h> +#include <cstdlib> +#include <cstring> #include <dirent.h> -#include <sys/param.h> - #include <errno.h> -#include <string.h> -#include <stdlib.h> #include <fcntl.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/types.h> -#include <osv/prex.h> -#include <osv/vnode.h> -#include <osv/file.h> -#include <osv/mount.h> +#include <osv/contiguous_alloc.hh> #include <osv/debug.h> - -#include <sys/types.h> #include <osv/device.h> -#include <osv/sched.hh> +#include <osv/file.h> #include <osv/mmio.hh> -#include <osv/contiguous_alloc.hh> +#include <osv/mount.h> +#include <osv/prex.h> +#include <osv/sched.hh> +#include <osv/vnode.h> #include "virtiofs.hh" +#include "virtiofs_dax.hh" #include "virtiofs_i.hh" static constexpr uint32_t OPEN_FLAGS = O_RDONLY; @@ -59,7 +58,8 @@ static int virtiofs_lookup(struct vnode* vnode, char* name, struct vnode** vpp) } strcpy(in_args.get(), name); - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); + auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); + auto* drv = m_data->drv; auto error = fuse_req_send_and_receive_reply(drv, FUSE_LOOKUP, inode->nodeid, in_args.get(), in_args_len, out_args.get(), sizeof(*out_args)); @@ -110,7 +110,8 @@ static int virtiofs_open(struct file* fp) } in_args->flags = OPEN_FLAGS; - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); + auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); + auto* drv = m_data->drv; auto error = fuse_req_send_and_receive_reply(drv, FUSE_OPEN, inode->nodeid, in_args.get(), sizeof(*in_args), out_args.get(), sizeof(*out_args)); @@ -145,7 +146,8 @@ static int virtiofs_close(struct vnode* vnode, struct file* fp) in_args->fh = f_data->file_handle; in_args->flags = OPEN_FLAGS; // need to be same as in FUSE_OPEN - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); + auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); + auto* drv = m_data->drv; auto error = fuse_req_send_and_receive_reply(drv, FUSE_RELEASE, inode->nodeid, in_args.get(), sizeof(*in_args), nullptr, 0); if (error) { @@ -172,7 +174,8 @@ static int virtiofs_readlink(struct vnode* vnode, struct uio* uio) return ENOMEM; } - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); + auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); + auto* drv = m_data->drv; auto error = fuse_req_send_and_receive_reply(drv, FUSE_READLINK, inode->nodeid, nullptr, 0, link_path.get(), PATH_MAX); if (error) { @@ -185,107 +188,6 @@ static int virtiofs_readlink(struct vnode* vnode, struct uio* uio) return uiomove(link_path.get(), strlen(link_path.get()), uio); } -// Read @read_amt bytes from @inode, using the DAX window. -static int virtiofs_read_direct(virtiofs_inode& inode, u64 file_handle, - u64 read_amt, virtio::fs& drv, struct uio& uio) -{ - auto* dax = drv.get_dax(); - // Enter the critical path: setup mapping -> read -> remove mapping - std::lock_guard<mutex> guard {dax->lock}; - - // Setup mapping - // NOTE: There are restrictions on the arguments to FUSE_SETUPMAPPING, from - // the spec: "Alignment constraints for FUSE_SETUPMAPPING and - // FUSE_REMOVEMAPPING requests are communicated during FUSE_INIT - // negotiation"): - // - foffset: multiple of map_alignment from FUSE_INIT - // - len: not larger than remaining file? - // - moffset: multiple of map_alignment from FUSE_INIT - // In practice, map_alignment is the host's page size, because foffset and - // moffset are passed to mmap() on the host. - std::unique_ptr<fuse_setupmapping_in> in_args { - new (std::nothrow) fuse_setupmapping_in()}; - if (!in_args) { - return ENOMEM; - } - in_args->fh = file_handle; - in_args->flags = 0; - uint64_t moffset = 0; - in_args->moffset = moffset; - - auto map_align = drv.get_map_alignment(); - if (map_align < 0) { - kprintf("[virtiofs] inode %lld, map alignment not set\n", inode.nodeid); - return ENOTSUP; - } - uint64_t alignment = 1ul << map_align; - auto foffset = align_down(static_cast<uint64_t>(uio.uio_offset), alignment); - in_args->foffset = foffset; - - // The possible excess part of the file mapped due to alignment constraints - // NOTE: map_excess <= alignemnt - auto map_excess = uio.uio_offset - foffset; - if (moffset + map_excess >= dax->len) { - // No usable room in DAX window due to map_excess - return ENOBUFS; - } - // Actual read amount is read_amt, or what fits in the DAX window - auto read_amt_act = std::min<uint64_t>(read_amt, - dax->len - moffset - map_excess); - in_args->len = read_amt_act + map_excess; - - virtiofs_debug("inode %lld, setting up mapping (foffset=%lld, len=%lld, " - "moffset=%lld)\n", inode.nodeid, in_args->foffset, - in_args->len, in_args->moffset); - auto error = fuse_req_send_and_receive_reply(&drv, FUSE_SETUPMAPPING, - inode.nodeid, in_args.get(), sizeof(*in_args), nullptr, 0); - if (error) { - kprintf("[virtiofs] inode %lld, mapping setup failed\n", inode.nodeid); - return error; - } - - // Read from the DAX window - // NOTE: It shouldn't be necessary to use the mmio* interface (i.e. volatile - // accesses). From the spec: "Drivers map this shared memory region with - // writeback caching as if it were regular RAM." - // The location of the requested data in the DAX window - auto req_data = dax->addr + moffset + map_excess; - error = uiomove(const_cast<void*>(req_data), read_amt_act, &uio); - if (error) { - kprintf("[virtiofs] inode %lld, uiomove failed\n", inode.nodeid); - return error; - } - - // Remove mapping - // NOTE: This is only necessary when FUSE_SETUPMAPPING fails. From the spec: - // "If the device runs out of resources the FUSE_SETUPMAPPING request fails - // until resources are available again following FUSE_REMOVEMAPPING." - auto r_in_args_size = sizeof(fuse_removemapping_in) + - sizeof(fuse_removemapping_one); - std::unique_ptr<u8> r_in_args {new (std::nothrow) u8[r_in_args_size]}; - if (!r_in_args) { - return ENOMEM; - } - auto r_in = new (r_in_args.get()) fuse_removemapping_in(); - auto r_one = new (r_in_args.get() + sizeof(fuse_removemapping_in)) - fuse_removemapping_one(); - r_in->count = 1; - r_one->moffset = in_args->moffset; - r_one->len = in_args->len; - - virtiofs_debug("inode %lld, removing mapping (moffset=%lld, len=%lld)\n", - inode.nodeid, r_one->moffset, r_one->len); - error = fuse_req_send_and_receive_reply(&drv, FUSE_REMOVEMAPPING, - inode.nodeid, r_in_args.get(), r_in_args_size, nullptr, 0); - if (error) { - kprintf("[virtiofs] inode %lld, mapping removal failed\n", - inode.nodeid); - return error; - } - - return 0; -} - // Read @read_amt bytes from @inode, using the fallback FUSE_READ mechanism. static int virtiofs_read_fallback(virtiofs_inode& inode, u64 file_handle, u32 read_amt, u32 flags, virtio::fs& drv, struct uio& uio) @@ -314,9 +216,6 @@ static int virtiofs_read_fallback(virtiofs_inode& inode, u64 file_handle, return uiomove(buf.get(), read_amt, &uio); } -// TODO: Optimize it to reduce number of exits to host (each -// fuse_req_send_and_receive_reply()) by reading eagerly "ahead/around" just -// like ROFS does and caching it static int virtiofs_read(struct vnode* vnode, struct file* fp, struct uio* uio, int ioflag) { @@ -343,17 +242,17 @@ static int virtiofs_read(struct vnode* vnode, struct file* fp, struct uio* uio, auto* inode = static_cast<virtiofs_inode*>(vnode->v_data); auto* file_data = static_cast<virtiofs_file_data*>(fp->f_data); - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); + auto* m_data = static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); + auto* drv = m_data->drv; + auto dax_mgr = m_data->dax_mgr; // Total read amount is what they requested, or what is left auto read_amt = std::min<uint64_t>(uio->uio_resid, inode->attr.size - uio->uio_offset); - if (drv->get_dax()) { + if (dax_mgr) { // Try to read from DAX - if (!virtiofs_read_direct(*inode, file_data->file_handle, read_amt, - *drv, *uio)) { - + if (!dax_mgr->read(*inode, file_data->file_handle, read_amt, *uio)) { return 0; } } -- 2.27.0 -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/AM0PR03MB629260F9F8CC3272E9CF435AA6960%40AM0PR03MB6292.eurprd03.prod.outlook.com.
