Hi, Very impressive work!
I cannot really say I fully appreciate all details nor know this part of the spec but your code looks good and makes sense. How did you actually test it especially the edge conditions - window getting full, etc? Waldek On Sun, Jun 21, 2020 at 5:09 AM Fotis Xenakis <[email protected]> wrote: > This integrates the dax window manager with the virtio-fs file system > operations: > > - Removes the lock from virtio::fs::dax_window, since this is now > handled by the window manager. > - A single manager instance is maintained for each virtio-fs device > having at least a singe mount. Multiple, concurrent mounts of the same > device share a common dax_manager. > - The FUSE_READ fallback as well as the logic of using it when the DAX > window is not available or a read using it fails are untouched. > > Signed-off-by: Fotis Xenakis <[email protected]> > --- > drivers/virtio-fs.hh | 11 ++- > fs/virtiofs/virtiofs.hh | 21 ++++- > fs/virtiofs/virtiofs_i.hh | 3 - > fs/virtiofs/virtiofs_vfsops.cc | 53 ++++++++++-- > fs/virtiofs/virtiofs_vnops.cc | 151 ++++++--------------------------- > 5 files changed, 100 insertions(+), 139 deletions(-) > > diff --git a/drivers/virtio-fs.hh b/drivers/virtio-fs.hh > index d78d7962..88a16939 100644 > --- a/drivers/virtio-fs.hh > +++ b/drivers/virtio-fs.hh > @@ -8,8 +8,11 @@ > #ifndef VIRTIO_FS_DRIVER_H > #define VIRTIO_FS_DRIVER_H > > +#include <functional> > + > #include <osv/mutex.h> > #include <osv/waitqueue.hh> > + > #include "drivers/virtio.hh" > #include "drivers/virtio-device.hh" > #include "fs/virtiofs/fuse_kernel.h" > @@ -49,7 +52,13 @@ public: > struct dax_window { > mmioaddr_t addr; > u64 len; > - mutex lock; > + }; > + > + // Helper enabling the use of fs* as key type in an unordered_* > container. > + struct hasher { > + size_t operator()(fs* const _fs) const noexcept { > + return std::hash<int>{}(_fs->_id); > + } > }; > > explicit fs(virtio_device& dev); > diff --git a/fs/virtiofs/virtiofs.hh b/fs/virtiofs/virtiofs.hh > index 475d5eba..8a3b06f2 100644 > --- a/fs/virtiofs/virtiofs.hh > +++ b/fs/virtiofs/virtiofs.hh > @@ -8,11 +8,13 @@ > #ifndef __INCLUDE_VIRTIOFS_H__ > #define __INCLUDE_VIRTIOFS_H__ > > -#include <osv/vnode.h> > +#include <memory> > + > +#include <osv/debug.h> > #include <osv/mount.h> > -#include <osv/dentry.h> > -#include <osv/prex.h> > -#include <osv/buf.h> > +#include <osv/vnode.h> > + > +#include "drivers/virtio-fs.hh" > #include "fuse_kernel.h" > > #define VIRTIOFS_DEBUG_ENABLED 1 > @@ -23,6 +25,17 @@ > #define virtiofs_debug(...) > #endif > > +// Necessary pre-declaration because virtiofs::dax depends on > virtiofs_inode, > +// declared below. > +namespace virtiofs { > +class dax_manager; > +} > + > +struct virtiofs_mount_data { > + virtio::fs* drv; > + std::shared_ptr<virtiofs::dax_manager> dax_mgr; > +}; > + > struct virtiofs_inode { > uint64_t nodeid; > struct fuse_attr attr; > diff --git a/fs/virtiofs/virtiofs_i.hh b/fs/virtiofs/virtiofs_i.hh > index 76533d74..e879546d 100644 > --- a/fs/virtiofs/virtiofs_i.hh > +++ b/fs/virtiofs/virtiofs_i.hh > @@ -8,9 +8,6 @@ > #ifndef VIRTIOFS_IO_H > #define VIRTIOFS_IO_H > > -#include "fuse_kernel.h" > -#include <osv/mutex.h> > -#include <osv/waitqueue.hh> > #include "drivers/virtio-fs.hh" > > int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode, > diff --git a/fs/virtiofs/virtiofs_vfsops.cc > b/fs/virtiofs/virtiofs_vfsops.cc > index 30daa42b..b60a766b 100644 > --- a/fs/virtiofs/virtiofs_vfsops.cc > +++ b/fs/virtiofs/virtiofs_vfsops.cc > @@ -5,19 +5,32 @@ > * BSD license as described in the LICENSE file in the top-level > directory. > */ > > +#include <atomic> > +#include <memory> > +#include <mutex> > +#include <new> > #include <sys/types.h> > -#include <osv/device.h> > + > +#include <api/assert.h> > #include <osv/debug.h> > -#include <iomanip> > -#include <iostream> > +#include <osv/device.h> > +#include <osv/mutex.h> > + > +#include "drivers/virtio-fs.hh" > #include "virtiofs.hh" > +#include "virtiofs_dax.hh" > #include "virtiofs_i.hh" > -#include "drivers/virtio-fs.hh" > > using fuse_request = virtio::fs::fuse_request; > > static std::atomic<uint64_t> fuse_unique_id(1); > > +static struct { > + std::unordered_map<virtio::fs*, > std::shared_ptr<virtiofs::dax_manager>, > + virtio::fs::hasher> mgrs; > + mutex lock; > +} dax_managers; > + > int fuse_req_send_and_receive_reply(virtio::fs* drv, uint32_t opcode, > uint64_t nodeid, void* input_args_data, size_t input_args_size, > void* output_args_data, size_t output_args_size) > @@ -115,7 +128,28 @@ static int virtiofs_mount(struct mount* mp, const > char* dev, int flags, > > virtiofs_set_vnode(mp->m_root->d_vnode, root_node); > > - mp->m_data = drv; > + auto* m_data = new (std::nothrow) virtiofs_mount_data; > + if (!m_data) { > + return ENOMEM; > + } > + m_data->drv = drv; > + if (drv->get_dax()) { > + // The device supports the DAX window > + std::lock_guard<mutex> guard {dax_managers.lock}; > + auto found = dax_managers.mgrs.find(drv); > + if (found != dax_managers.mgrs.end()) { > + // There is a dax_manager already associated with this device > (the > + // device is already mounted) > + m_data->dax_mgr = found->second; > + } else { > + m_data->dax_mgr = > std::make_shared<virtiofs::dax_manager>(*drv); > + if (!m_data->dax_mgr) { > + return ENOMEM; > + } > + } > + } > + > + mp->m_data = m_data; > mp->m_dev = device; > > return 0; > @@ -141,6 +175,15 @@ static int virtiofs_statfs(struct mount* mp, struct > statfs* statp) > > static int virtiofs_unmount(struct mount* mp, int flags) > { > + auto* m_data = static_cast<virtiofs_mount_data*>(mp->m_data); > + std::lock_guard<mutex> guard {dax_managers.lock}; > + if (m_data->dax_mgr && m_data->dax_mgr.use_count() == 2) { > + // This was the last mount of this device. It's safe to delete the > + // window manager. > + dax_managers.mgrs.erase(m_data->drv); > + } > + delete m_data; > + > struct device* dev = mp->m_dev; > return device_close(dev); > } > diff --git a/fs/virtiofs/virtiofs_vnops.cc b/fs/virtiofs/virtiofs_vnops.cc > index 38c000ec..51be3ee4 100644 > --- a/fs/virtiofs/virtiofs_vnops.cc > +++ b/fs/virtiofs/virtiofs_vnops.cc > @@ -5,28 +5,27 @@ > * BSD license as described in the LICENSE file in the top-level > directory. > */ > > -#include <sys/stat.h> > +#include <cstdlib> > +#include <cstring> > #include <dirent.h> > -#include <sys/param.h> > - > #include <errno.h> > -#include <string.h> > -#include <stdlib.h> > #include <fcntl.h> > +#include <sys/param.h> > +#include <sys/stat.h> > +#include <sys/types.h> > > -#include <osv/prex.h> > -#include <osv/vnode.h> > -#include <osv/file.h> > -#include <osv/mount.h> > +#include <osv/contiguous_alloc.hh> > #include <osv/debug.h> > - > -#include <sys/types.h> > #include <osv/device.h> > -#include <osv/sched.hh> > +#include <osv/file.h> > #include <osv/mmio.hh> > -#include <osv/contiguous_alloc.hh> > +#include <osv/mount.h> > +#include <osv/prex.h> > +#include <osv/sched.hh> > +#include <osv/vnode.h> > > #include "virtiofs.hh" > +#include "virtiofs_dax.hh" > #include "virtiofs_i.hh" > > static constexpr uint32_t OPEN_FLAGS = O_RDONLY; > @@ -59,7 +58,8 @@ static int virtiofs_lookup(struct vnode* vnode, char* > name, struct vnode** vpp) > } > strcpy(in_args.get(), name); > > - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); > + auto* m_data = > static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); > + auto* drv = m_data->drv; > auto error = fuse_req_send_and_receive_reply(drv, FUSE_LOOKUP, > inode->nodeid, in_args.get(), in_args_len, out_args.get(), > sizeof(*out_args)); > @@ -110,7 +110,8 @@ static int virtiofs_open(struct file* fp) > } > in_args->flags = OPEN_FLAGS; > > - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); > + auto* m_data = > static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); > + auto* drv = m_data->drv; > auto error = fuse_req_send_and_receive_reply(drv, FUSE_OPEN, > inode->nodeid, in_args.get(), sizeof(*in_args), out_args.get(), > sizeof(*out_args)); > @@ -145,7 +146,8 @@ static int virtiofs_close(struct vnode* vnode, struct > file* fp) > in_args->fh = f_data->file_handle; > in_args->flags = OPEN_FLAGS; // need to be same as in FUSE_OPEN > > - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); > + auto* m_data = > static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); > + auto* drv = m_data->drv; > auto error = fuse_req_send_and_receive_reply(drv, FUSE_RELEASE, > inode->nodeid, in_args.get(), sizeof(*in_args), nullptr, 0); > if (error) { > @@ -172,7 +174,8 @@ static int virtiofs_readlink(struct vnode* vnode, > struct uio* uio) > return ENOMEM; > } > > - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); > + auto* m_data = > static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); > + auto* drv = m_data->drv; > auto error = fuse_req_send_and_receive_reply(drv, FUSE_READLINK, > inode->nodeid, nullptr, 0, link_path.get(), PATH_MAX); > if (error) { > @@ -185,107 +188,6 @@ static int virtiofs_readlink(struct vnode* vnode, > struct uio* uio) > return uiomove(link_path.get(), strlen(link_path.get()), uio); > } > > -// Read @read_amt bytes from @inode, using the DAX window. > -static int virtiofs_read_direct(virtiofs_inode& inode, u64 file_handle, > - u64 read_amt, virtio::fs& drv, struct uio& uio) > -{ > - auto* dax = drv.get_dax(); > - // Enter the critical path: setup mapping -> read -> remove mapping > - std::lock_guard<mutex> guard {dax->lock}; > - > - // Setup mapping > - // NOTE: There are restrictions on the arguments to > FUSE_SETUPMAPPING, from > - // the spec: "Alignment constraints for FUSE_SETUPMAPPING and > - // FUSE_REMOVEMAPPING requests are communicated during FUSE_INIT > - // negotiation"): > - // - foffset: multiple of map_alignment from FUSE_INIT > - // - len: not larger than remaining file? > - // - moffset: multiple of map_alignment from FUSE_INIT > - // In practice, map_alignment is the host's page size, because > foffset and > - // moffset are passed to mmap() on the host. > - std::unique_ptr<fuse_setupmapping_in> in_args { > - new (std::nothrow) fuse_setupmapping_in()}; > - if (!in_args) { > - return ENOMEM; > - } > - in_args->fh = file_handle; > - in_args->flags = 0; > - uint64_t moffset = 0; > - in_args->moffset = moffset; > - > - auto map_align = drv.get_map_alignment(); > - if (map_align < 0) { > - kprintf("[virtiofs] inode %lld, map alignment not set\n", > inode.nodeid); > - return ENOTSUP; > - } > - uint64_t alignment = 1ul << map_align; > - auto foffset = align_down(static_cast<uint64_t>(uio.uio_offset), > alignment); > - in_args->foffset = foffset; > - > - // The possible excess part of the file mapped due to alignment > constraints > - // NOTE: map_excess <= alignemnt > - auto map_excess = uio.uio_offset - foffset; > - if (moffset + map_excess >= dax->len) { > - // No usable room in DAX window due to map_excess > - return ENOBUFS; > - } > - // Actual read amount is read_amt, or what fits in the DAX window > - auto read_amt_act = std::min<uint64_t>(read_amt, > - dax->len - moffset - map_excess); > - in_args->len = read_amt_act + map_excess; > - > - virtiofs_debug("inode %lld, setting up mapping (foffset=%lld, > len=%lld, " > - "moffset=%lld)\n", inode.nodeid, in_args->foffset, > - in_args->len, in_args->moffset); > - auto error = fuse_req_send_and_receive_reply(&drv, FUSE_SETUPMAPPING, > - inode.nodeid, in_args.get(), sizeof(*in_args), nullptr, 0); > - if (error) { > - kprintf("[virtiofs] inode %lld, mapping setup failed\n", > inode.nodeid); > - return error; > - } > - > - // Read from the DAX window > - // NOTE: It shouldn't be necessary to use the mmio* interface (i.e. > volatile > - // accesses). From the spec: "Drivers map this shared memory region > with > - // writeback caching as if it were regular RAM." > - // The location of the requested data in the DAX window > - auto req_data = dax->addr + moffset + map_excess; > - error = uiomove(const_cast<void*>(req_data), read_amt_act, &uio); > - if (error) { > - kprintf("[virtiofs] inode %lld, uiomove failed\n", inode.nodeid); > - return error; > - } > - > - // Remove mapping > - // NOTE: This is only necessary when FUSE_SETUPMAPPING fails. From > the spec: > - // "If the device runs out of resources the FUSE_SETUPMAPPING request > fails > - // until resources are available again following FUSE_REMOVEMAPPING." > - auto r_in_args_size = sizeof(fuse_removemapping_in) + > - sizeof(fuse_removemapping_one); > - std::unique_ptr<u8> r_in_args {new (std::nothrow) u8[r_in_args_size]}; > - if (!r_in_args) { > - return ENOMEM; > - } > - auto r_in = new (r_in_args.get()) fuse_removemapping_in(); > - auto r_one = new (r_in_args.get() + sizeof(fuse_removemapping_in)) > - fuse_removemapping_one(); > - r_in->count = 1; > - r_one->moffset = in_args->moffset; > - r_one->len = in_args->len; > - > - virtiofs_debug("inode %lld, removing mapping (moffset=%lld, > len=%lld)\n", > - inode.nodeid, r_one->moffset, r_one->len); > - error = fuse_req_send_and_receive_reply(&drv, FUSE_REMOVEMAPPING, > - inode.nodeid, r_in_args.get(), r_in_args_size, nullptr, 0); > - if (error) { > - kprintf("[virtiofs] inode %lld, mapping removal failed\n", > - inode.nodeid); > - return error; > - } > - > - return 0; > -} > - > // Read @read_amt bytes from @inode, using the fallback FUSE_READ > mechanism. > static int virtiofs_read_fallback(virtiofs_inode& inode, u64 file_handle, > u32 read_amt, u32 flags, virtio::fs& drv, struct uio& uio) > @@ -314,9 +216,6 @@ static int virtiofs_read_fallback(virtiofs_inode& > inode, u64 file_handle, > return uiomove(buf.get(), read_amt, &uio); > } > > -// TODO: Optimize it to reduce number of exits to host (each > -// fuse_req_send_and_receive_reply()) by reading eagerly "ahead/around" > just > -// like ROFS does and caching it > static int virtiofs_read(struct vnode* vnode, struct file* fp, struct > uio* uio, > int ioflag) > { > @@ -343,17 +242,17 @@ static int virtiofs_read(struct vnode* vnode, struct > file* fp, struct uio* uio, > > auto* inode = static_cast<virtiofs_inode*>(vnode->v_data); > auto* file_data = static_cast<virtiofs_file_data*>(fp->f_data); > - auto* drv = static_cast<virtio::fs*>(vnode->v_mount->m_data); > + auto* m_data = > static_cast<virtiofs_mount_data*>(vnode->v_mount->m_data); > + auto* drv = m_data->drv; > + auto dax_mgr = m_data->dax_mgr; > > // Total read amount is what they requested, or what is left > auto read_amt = std::min<uint64_t>(uio->uio_resid, > inode->attr.size - uio->uio_offset); > > - if (drv->get_dax()) { > + if (dax_mgr) { > // Try to read from DAX > - if (!virtiofs_read_direct(*inode, file_data->file_handle, > read_amt, > - *drv, *uio)) { > - > + if (!dax_mgr->read(*inode, file_data->file_handle, read_amt, > *uio)) { > return 0; > } > } > -- > 2.27.0 > > -- > You received this message because you are subscribed to the Google Groups > "OSv Development" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to [email protected]. > To view this discussion on the web visit > https://groups.google.com/d/msgid/osv-dev/AM0PR03MB629260F9F8CC3272E9CF435AA6960%40AM0PR03MB6292.eurprd03.prod.outlook.com > . > -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/CAL9cFfO%3DcrNf_Rg72Zru8vviDzZG%3DRaVc-BsBfKP9v-Rz6mDUQ%40mail.gmail.com.
