Re: [PATCH v5 08/11] vduse: Implement an MMU-based IOMMU driver
在 2021/3/25 下午3:38, Yongji Xie 写道: On Thu, Mar 25, 2021 at 12:53 PM Jason Wang wrote: 在 2021/3/24 下午3:39, Yongji Xie 写道: On Wed, Mar 24, 2021 at 11:54 AM Jason Wang wrote: 在 2021/3/15 下午1:37, Xie Yongji 写道: This implements an MMU-based IOMMU driver to support mapping kernel dma buffer into userspace. The basic idea behind it is treating MMU (VA->PA) as IOMMU (IOVA->PA). The driver will set up MMU mapping instead of IOMMU mapping for the DMA transfer so that the userspace process is able to use its virtual address to access the dma buffer in kernel. And to avoid security issue, a bounce-buffering mechanism is introduced to prevent userspace accessing the original buffer directly. Signed-off-by: Xie Yongji --- drivers/vdpa/vdpa_user/iova_domain.c | 535 +++ drivers/vdpa/vdpa_user/iova_domain.h | 75 + 2 files changed, 610 insertions(+) create mode 100644 drivers/vdpa/vdpa_user/iova_domain.c create mode 100644 drivers/vdpa/vdpa_user/iova_domain.h diff --git a/drivers/vdpa/vdpa_user/iova_domain.c b/drivers/vdpa/vdpa_user/iova_domain.c new file mode 100644 index ..83de216b0e51 --- /dev/null +++ b/drivers/vdpa/vdpa_user/iova_domain.c @@ -0,0 +1,535 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * MMU-based IOMMU implementation + * + * Copyright (C) 2020 Bytedance Inc. and/or its affiliates. All rights reserved. 2021 as well. Sure. + * + * Author: Xie Yongji + * + */ + +#include +#include +#include +#include +#include +#include + +#include "iova_domain.h" + +static int vduse_iotlb_add_range(struct vduse_iova_domain *domain, + u64 start, u64 last, + u64 addr, unsigned int perm, + struct file *file, u64 offset) +{ + struct vdpa_map_file *map_file; + int ret; + + map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC); + if (!map_file) + return -ENOMEM; + + map_file->file = get_file(file); + map_file->offset = offset; + + ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last, + addr, perm, map_file); + if (ret) { + fput(map_file->file); + kfree(map_file); + return ret; + } + return 0; +} + +static void vduse_iotlb_del_range(struct vduse_iova_domain *domain, + u64 start, u64 last) +{ + struct vdpa_map_file *map_file; + struct vhost_iotlb_map *map; + + while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) { + map_file = (struct vdpa_map_file *)map->opaque; + fput(map_file->file); + kfree(map_file); + vhost_iotlb_map_free(domain->iotlb, map); + } +} + +int vduse_domain_set_map(struct vduse_iova_domain *domain, + struct vhost_iotlb *iotlb) +{ + struct vdpa_map_file *map_file; + struct vhost_iotlb_map *map; + u64 start = 0ULL, last = ULLONG_MAX; + int ret; + + spin_lock(>iotlb_lock); + vduse_iotlb_del_range(domain, start, last); + + for (map = vhost_iotlb_itree_first(iotlb, start, last); map; + map = vhost_iotlb_itree_next(map, start, last)) { + map_file = (struct vdpa_map_file *)map->opaque; + ret = vduse_iotlb_add_range(domain, map->start, map->last, + map->addr, map->perm, + map_file->file, + map_file->offset); + if (ret) + goto err; + } + spin_unlock(>iotlb_lock); + + return 0; +err: + vduse_iotlb_del_range(domain, start, last); + spin_unlock(>iotlb_lock); + return ret; +} + +static void vduse_domain_map_bounce_page(struct vduse_iova_domain *domain, + u64 iova, u64 size, u64 paddr) +{ + struct vduse_bounce_map *map; + unsigned int index; + u64 last = iova + size - 1; + + while (iova < last) { + map = >bounce_maps[iova >> PAGE_SHIFT]; + index = offset_in_page(iova) >> IOVA_ALLOC_ORDER; + map->orig_phys[index] = paddr; + paddr += IOVA_ALLOC_SIZE; + iova += IOVA_ALLOC_SIZE; + } +} + +static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain, +u64 iova, u64 size) +{ + struct vduse_bounce_map *map; + unsigned int index; + u64 last = iova + size - 1; + + while (iova < last) { + map = >bounce_maps[iova >> PAGE_SHIFT]; + index = offset_in_page(iova) >> IOVA_ALLOC_ORDER; + map->orig_phys[index] = INVALID_PHYS_ADDR; + iova += IOVA_ALLOC_SIZE; + } +} + +static void do_bounce(phys_addr_t orig, void *addr, size_t size, + enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(orig); + + if
Re: drm/ttm: switch to per device LRU lock
Thanks! Just a copy issue. Patch to fix this is on the mailing list. Christian. Am 25.03.21 um 16:00 schrieb Colin Ian King: Hi, Static analysis with Coverity in linux-next has detected an issue in drivers/gpu/drm/ttm/ttm_bo.c with the follow commit: commit a1f091f8ef2b680a5184db065527612247cb4cae Author: Christian König Date: Tue Oct 6 17:26:42 2020 +0200 drm/ttm: switch to per device LRU lock Instead of having a global lock for potentially less contention. The analysis is as follows: 617 int ttm_mem_evict_first(struct ttm_device *bdev, 618struct ttm_resource_manager *man, 619const struct ttm_place *place, 620struct ttm_operation_ctx *ctx, 621struct ww_acquire_ctx *ticket) 622 { 1. assign_zero: Assigning: bo = NULL. 623struct ttm_buffer_object *bo = NULL, *busy_bo = NULL; 624bool locked = false; 625unsigned i; 626int ret; 627 Explicit null dereferenced (FORWARD_NULL)2. var_deref_op: Dereferencing null pointer bo. 628spin_lock(>bdev->lru_lock); 629for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { The spin_lock on bo is dereferencing a null bo pointer. Colin ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
re: drm/ttm: switch to per device LRU lock
Hi, Static analysis with Coverity in linux-next has detected an issue in drivers/gpu/drm/ttm/ttm_bo.c with the follow commit: commit a1f091f8ef2b680a5184db065527612247cb4cae Author: Christian König Date: Tue Oct 6 17:26:42 2020 +0200 drm/ttm: switch to per device LRU lock Instead of having a global lock for potentially less contention. The analysis is as follows: 617 int ttm_mem_evict_first(struct ttm_device *bdev, 618struct ttm_resource_manager *man, 619const struct ttm_place *place, 620struct ttm_operation_ctx *ctx, 621struct ww_acquire_ctx *ticket) 622 { 1. assign_zero: Assigning: bo = NULL. 623struct ttm_buffer_object *bo = NULL, *busy_bo = NULL; 624bool locked = false; 625unsigned i; 626int ret; 627 Explicit null dereferenced (FORWARD_NULL)2. var_deref_op: Dereferencing null pointer bo. 628spin_lock(>bdev->lru_lock); 629for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { The spin_lock on bo is dereferencing a null bo pointer. Colin ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v2 00/10] drm: Support simple-framebuffer devices and firmware fbs
Hi, On 3/18/21 11:29 AM, Thomas Zimmermann wrote: > This patchset adds support for simple-framebuffer platform devices and > a handover mechanism for native drivers to take-over control of the > hardware. > > The new driver, called simpledrm, binds to a simple-frambuffer platform > device. The kernel's boot code creates such devices for firmware-provided > framebuffers, such as EFI-GOP or VESA. Typically the BIOS, UEFI or boot > loader sets up the framebuffers. Description via device tree is also an > option. > > Simpledrm is small enough to be linked into the kernel. The driver's main > purpose is to provide graphical output during the early phases of the boot > process, before the native DRM drivers are available. Native drivers are > typically loaded from an initrd ram disk. Occationally simpledrm can also > serve as interim solution on graphics hardware without native DRM driver. > > So far distributions rely on fbdev drivers, such as efifb, vesafb or > simplefb, for early-boot graphical output. However fbdev is deprecated and > the drivers do not provide DRM interfaces for modern userspace. > > Patches 1 and 2 prepare the DRM format helpers for simpledrm. > > Patches 3 and 4 add a hand-over mechanism. Simpledrm acquires it's > framebuffer's I/O-memory range and provides a callback function to be > removed by a native driver. The native driver will remove simpledrm before > taking over the hardware. The removal is integrated into existing helpers, > so drivers use it automatically. > > Patches 5 to 10 add the simpledrm driver. It's build on simple DRM helpers > and SHMEM. It supports 16-bit, 24-bit and 32-bit RGB framebuffers. During > pageflips, SHMEM buffers are copied into the framebuffer memory, similar > to cirrus or mgag200. The code in patches 8 and 9 handles clocks and > regulators. It's based on the simplefb drivers, but has been modified for > DRM. Thank you for your work on this, this is very interesting. > I've also been working on fastboot support (i.e., flicker-free booting). > This requires state-readout from simpledrm via generic interfaces, as > outlined in [1]. I do have some prototype code, but it will take a while > to get this ready. Simpledrm will then support it. > > I've tested simpledrm with x86 EFI and VESA framebuffers, which both work > reliably. The fbdev console and Weston work automatically. Xorg requires > manual configuration of the device. Xorgs current modesetting driver does > not work with both, platform and PCI device, for the same physical > hardware. Once configured, X11 works. I looked into X11, but couldn't see > an easy way of fixing the problem. With the push towards Wayland+Xwayland > I expect the problem to become a non-issue soon. Additional testing has > been reported at [2]. > > One cosmetical issue is that simpledrm's device file is card0 and the > native driver's device file is card1. After simpledrm has been kicked out, > only card1 is left. This does not seem to be a practical problem however. > > TODO/IDEAS: > > * provide deferred takeover I'm not sure what you mean with this ? Currently deferred-takeover is handled in the fbcon code. Current flickerfree boot works like this (assuming a single LCD panel in a laptop): 1. EFI/GOP sets up the framebuffer, draws a vendor logo 2. The bootloader runs in silent mode and does not touch anything gfx related 3. kernel boots, with a loglevel of 3 so only CRIT/EMERG messages are shown 2. efifb loads; and tells fbcon that a framebuffer is now available for it to "bind" to. Since CONFIG_FRAMEBUFFER_CONSOLE_DEFERRED_TAKEOVER=y fbcon defers taking over the console and leaves the dummy-console driver in place (unless there have already been kernel messages logged, which there shouldn't because loglevel=3) 3. i915 loads, reads out the hw state compares this to the preferred-mode for the panel which it would set, they match, nothing happens. i915 takes ownership of the scanout-buffer set up by the GOP, but leaves it in place. i915 also removes the efifb /dev/fb0 and installs its own /dev/fb0 fbdev compat device, fbcon is notified of this, but is still deferred and leaves the dummy console driver in place as console driver. 4. Plymouth loads, allocates a new scan-out buffer at the panel's preferred resolution, plymouth reads the vendor-logo through the BGRT ACPI interface and fills the scanout-buffer with the vendor-logo + a spinner. Then plymouth installs the new scanout-buffer on the crtc, this is done atomically during vsync, so the user sees no changes, other then the spinner appearing (note the active VT is now in graphical mode) 5. From here on not flickering is a userspace problem AFAICT this should work fine with simplekms too, unless it clears the screen to black when it binds. An addition to the above sequence, if at any time either the kernel or userspace prints a message to the console; and at that time a fbdev is registered then fbcon
[PATCH AUTOSEL 4.9 07/13] vhost: Fix vhost_vq_reset()
From: Laurent Vivier [ Upstream commit beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 ] vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lviv...@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b14e62f11075..d2431afeda84 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -306,8 +306,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 4.14 10/16] vhost: Fix vhost_vq_reset()
From: Laurent Vivier [ Upstream commit beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 ] vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lviv...@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3d7bea15c57b..4b5590f4e98b 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -324,8 +324,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 4.19 12/20] vhost: Fix vhost_vq_reset()
From: Laurent Vivier [ Upstream commit beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 ] vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lviv...@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 98b6eb902df9..732327756ee1 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -322,8 +322,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->kick = NULL; vq->call_ctx = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 5.4 16/24] vhost: Fix vhost_vq_reset()
From: Laurent Vivier [ Upstream commit beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 ] vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lviv...@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 57ab79fbcee9..a279ecacbf60 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -320,8 +320,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->kick = NULL; vq->call_ctx = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 5.10 22/39] vhost: Fix vhost_vq_reset()
From: Laurent Vivier [ Upstream commit beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 ] vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lviv...@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a262e12c6dc2..5ccb0705beae 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -332,8 +332,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->error_ctx = NULL; vq->kick = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 5.10 01/39] virtiofs: Fail dax mount if device does not support it
From: Vivek Goyal [ Upstream commit 3f9b9efd82a84f27e95d0414f852caf1fa839e83 ] Right now "mount -t virtiofs -o dax myfs /mnt/virtiofs" succeeds even if filesystem deivce does not have a cache window and hence DAX can't be supported. This gives a false sense to user that they are using DAX with virtiofs but fact of the matter is that they are not. Fix this by returning error if dax can't be supported and user has asked for it. Signed-off-by: Vivek Goyal Reviewed-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/virtio_fs.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index d2c0e58c6416..3d83c9e12848 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1324,8 +1324,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; - if (ctx->dax) + if (ctx->dax) { + if (!fs->dax_dev) { + err = -EINVAL; + pr_err("virtio-fs: dax can't be enabled as filesystem" + " device does not support it.\n"); + goto err_free_fuse_devs; + } ctx->dax_dev = fs->dax_dev; + } err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 5.11 23/44] vhost: Fix vhost_vq_reset()
From: Laurent Vivier [ Upstream commit beb691e69f4dec7bfe8b81b509848acfd1f0dbf9 ] vhost_reset_is_le() is vhost_init_is_le(), and in the case of cross-endian legacy, vhost_init_is_le() depends on vq->user_be. vq->user_be is set by vhost_disable_cross_endian(). But in vhost_vq_reset(), we have: vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); And so user_be is used before being set. To fix that, reverse the lines order as there is no other dependency between them. Signed-off-by: Laurent Vivier Link: https://lore.kernel.org/r/20210312140913.788592-1-lviv...@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a262e12c6dc2..5ccb0705beae 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -332,8 +332,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->error_ctx = NULL; vq->kick = NULL; vq->log_ctx = NULL; - vhost_reset_is_le(vq); vhost_disable_cross_endian(vq); + vhost_reset_is_le(vq); vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
[PATCH AUTOSEL 5.11 01/44] virtiofs: Fail dax mount if device does not support it
From: Vivek Goyal [ Upstream commit 3f9b9efd82a84f27e95d0414f852caf1fa839e83 ] Right now "mount -t virtiofs -o dax myfs /mnt/virtiofs" succeeds even if filesystem deivce does not have a cache window and hence DAX can't be supported. This gives a false sense to user that they are using DAX with virtiofs but fact of the matter is that they are not. Fix this by returning error if dax can't be supported and user has asked for it. Signed-off-by: Vivek Goyal Reviewed-by: Stefan Hajnoczi Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/virtio_fs.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 8868ac31a3c0..4ee6f734ba83 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1324,8 +1324,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; - if (ctx->dax) + if (ctx->dax) { + if (!fs->dax_dev) { + err = -EINVAL; + pr_err("virtio-fs: dax can't be enabled as filesystem" + " device does not support it.\n"); + goto err_free_fuse_devs; + } ctx->dax_dev = fs->dax_dev; + } err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs; -- 2.30.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v5 03/11] vhost-vdpa: protect concurrent access to vhost device iotlb
On Mon, Mar 15, 2021 at 01:37:13PM +0800, Xie Yongji wrote: Use vhost_dev->mutex to protect vhost device iotlb from concurrent access. Fixes: 4c8cf318("vhost: introduce vDPA-based backend") Signed-off-by: Xie Yongji --- drivers/vhost/vdpa.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) Reviewed-by: Stefano Garzarella ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 00/22] virtio/vsock: introduce SOCK_SEQPACKET support
Hi Arseny, On Tue, Mar 23, 2021 at 04:07:13PM +0300, Arseny Krasnov wrote: This patchset implements support of SOCK_SEQPACKET for virtio transport. As SOCK_SEQPACKET guarantees to save record boundaries, so to do it, two new packet operations were added: first for start of record and second to mark end of record(SEQ_BEGIN and SEQ_END later). Also, both operations carries metadata - to maintain boundaries and payload integrity. Metadata is introduced by adding special header with two fields - message id and message length: struct virtio_vsock_seq_hdr { __le32 msg_id; __le32 msg_len; } __attribute__((packed)); This header is transmitted as payload of SEQ_BEGIN and SEQ_END packets(buffer of second virtio descriptor in chain) in the same way as data transmitted in RW packets. Payload was chosen as buffer for this header to avoid touching first virtio buffer which carries header of packet, because someone could check that size of this buffer is equal to size of packet header. To send record, packet with start marker is sent first(it's header carries length of record and id),then all data is sent as usual 'RW' packets and finally SEQ_END is sent(it carries id of message, which is equal to id of SEQ_BEGIN), also after sending SEQ_END id is incremented. On receiver's side,size of record is known from packet with start record marker. To check that no packets were dropped by transport, 'msg_id's of two sequential SEQ_BEGIN and SEQ_END are checked to be equal and length of data between two markers is compared to then length in SEQ_BEGIN header. Now as packets of one socket are not reordered neither on vsock nor on vhost transport layers, such markers allows to restore original record on receiver's side. If user's buffer is smaller that record length, when all out of size data is dropped. Maximum length of datagram is not limited as in stream socket, because same credit logic is used. Difference with stream socket is that user is not woken up until whole record is received or error occurred. Implementation also supports 'MSG_EOR' and 'MSG_TRUNC' flags. Tests also implemented. Thanks to st...@yandex.ru for encouragements and initial design recommendations. Arseny Krasnov (22): af_vsock: update functions for connectible socket af_vsock: separate wait data loop af_vsock: separate receive data loop af_vsock: implement SEQPACKET receive loop af_vsock: separate wait space loop af_vsock: implement send logic for SEQPACKET af_vsock: rest of SEQPACKET support af_vsock: update comments for stream sockets virtio/vsock: set packet's type in virtio_transport_send_pkt_info() virtio/vsock: simplify credit update function API virtio/vsock: dequeue callback for SOCK_SEQPACKET virtio/vsock: fetch length for SEQPACKET record virtio/vsock: add SEQPACKET receive logic virtio/vsock: rest of SOCK_SEQPACKET support virtio/vsock: SEQPACKET support feature bit virtio/vsock: setup SEQPACKET ops for transport vhost/vsock: setup SEQPACKET ops for transport vsock/loopback: setup SEQPACKET ops for transport vhost/vsock: SEQPACKET feature bit support virtio/vsock: SEQPACKET feature bit support vsock_test: add SOCK_SEQPACKET tests virtio/vsock: update trace event for SEQPACKET drivers/vhost/vsock.c| 21 +- include/linux/virtio_vsock.h | 21 + include/net/af_vsock.h | 9 + .../events/vsock_virtio_transport_common.h | 48 +- include/uapi/linux/virtio_vsock.h| 19 + net/vmw_vsock/af_vsock.c | 581 +++-- net/vmw_vsock/virtio_transport.c | 17 + net/vmw_vsock/virtio_transport_common.c | 379 +-- net/vmw_vsock/vsock_loopback.c | 12 + tools/testing/vsock/util.c | 32 +- tools/testing/vsock/util.h | 3 + tools/testing/vsock/vsock_test.c | 126 12 files changed, 1015 insertions(+), 253 deletions(-) v6 -> v7: General changelog: - virtio transport callback for message length now removed from transport. Length of record is returned by dequeue callback. - function which tries to get message length now returns 0 when rx queue is empty. Also length of current message in progress is set to 0, when message processed or error happens. - patches for virtio feature bit moved after patches with transport ops. Per patch changelog: see every patch after '---' line. I reviewed the series and I left some comments, I think we are at a good point, but we should have the specification accepted before merging this series to avoid having to change the implementation later. What do you think? Thanks, Stefano ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 18/22] vsock/loopback: setup SEQPACKET ops for transport
On Tue, Mar 23, 2021 at 04:14:33PM +0300, Arseny Krasnov wrote: This adds SEQPACKET ops for loopback transport and 'seqpacket_allow()' callback. Signed-off-by: Arseny Krasnov --- net/vmw_vsock/vsock_loopback.c | 12 1 file changed, 12 insertions(+) Reviewed-by: Stefano Garzarella ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 17/22] vhost/vsock: setup SEQPACKET ops for transport
On Tue, Mar 23, 2021 at 04:14:18PM +0300, Arseny Krasnov wrote: This also removes ignore of non-stream type of packets and adds 'seqpacket_allow()' callback. Signed-off-by: Arseny Krasnov --- drivers/vhost/vsock.c | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) Same thing for this transporter too, maybe we can merge with the patch "vhost/vsock: SEQPACKET feature bit support". Stefano diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 5e78fb719602..5af141772068 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -354,8 +354,7 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq, return NULL; } - if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) - pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->len = le32_to_cpu(pkt->hdr.len); /* No payload */ if (!pkt->len) @@ -398,6 +397,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock) return val < vq->num; } +static bool vhost_transport_seqpacket_allow(void); + static struct virtio_transport vhost_transport = { .transport = { .module = THIS_MODULE, @@ -424,6 +425,10 @@ static struct virtio_transport vhost_transport = { .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, + .seqpacket_dequeue= virtio_transport_seqpacket_dequeue, + .seqpacket_enqueue= virtio_transport_seqpacket_enqueue, + .seqpacket_allow = vhost_transport_seqpacket_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, @@ -439,8 +444,14 @@ static struct virtio_transport vhost_transport = { }, .send_pkt = vhost_transport_send_pkt, + .seqpacket_allow = false }; +static bool vhost_transport_seqpacket_allow(void) +{ + return vhost_transport.seqpacket_allow; +} + static void vhost_vsock_handle_tx_kick(struct vhost_work *work) { struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, -- 2.25.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 16/22] virtio/vsock: setup SEQPACKET ops for transport
On Tue, Mar 23, 2021 at 04:14:03PM +0300, Arseny Krasnov wrote: This adds SEQPACKET ops for virtio transport and 'seqpacket_allow()' callback. Signed-off-by: Arseny Krasnov --- net/vmw_vsock/virtio_transport.c | 12 1 file changed, 12 insertions(+) Sorry for not mentioning this in the previous review, but maybe we can merge this patch with "virtio/vsock: SEQPACKET feature bit support", so we have a single patch when we fully enable the SEQPACKET support in this transport. Anyway, I don't have a strong opinion on that. What do you think? Stefano diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 2700a63ab095..83ae2078c847 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -443,6 +443,8 @@ static void virtio_vsock_rx_done(struct virtqueue *vq) queue_work(virtio_vsock_workqueue, >rx_work); } +static bool virtio_transport_seqpacket_allow(void); + static struct virtio_transport virtio_transport = { .transport = { .module = THIS_MODULE, @@ -469,6 +471,10 @@ static struct virtio_transport virtio_transport = { .stream_is_active = virtio_transport_stream_is_active, .stream_allow = virtio_transport_stream_allow, + .seqpacket_dequeue= virtio_transport_seqpacket_dequeue, + .seqpacket_enqueue= virtio_transport_seqpacket_enqueue, + .seqpacket_allow = virtio_transport_seqpacket_allow, + .notify_poll_in = virtio_transport_notify_poll_in, .notify_poll_out = virtio_transport_notify_poll_out, .notify_recv_init = virtio_transport_notify_recv_init, @@ -483,8 +489,14 @@ static struct virtio_transport virtio_transport = { }, .send_pkt = virtio_transport_send_pkt, + .seqpacket_allow = false }; +static bool virtio_transport_seqpacket_allow(void) +{ + return virtio_transport.seqpacket_allow; +} + static void virtio_transport_rx_work(struct work_struct *work) { struct virtio_vsock *vsock = -- 2.25.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 15/22] virtio/vsock: SEQPACKET support feature bit
On Tue, Mar 23, 2021 at 04:13:49PM +0300, Arseny Krasnov wrote: This adds new virtio vsock specific feature bit which means SOCK_SEQPACKET support. Guest negotiates this bit with vhost, thus checking that vhost side supports SEQPACKET. Signed-off-by: Arseny Krasnov --- include/uapi/linux/virtio_vsock.h | 3 +++ 1 file changed, 3 insertions(+) Since you have this patch, I think you can generalize the title, update the description, and merge here the changes I mentioned in patch 11/22 about changes of include/uapi/linux/virtio_vsock.h. So you can have a single patch with the new virtio-spec defines and structs related to SEQPACKET, of course then we move it before patch 11. What do you think? Stefano diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 692f8078cced..619aaebb355a 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h @@ -38,6 +38,9 @@ #include #include +/* The feature bitmap for virtio vsock */ +#define VIRTIO_VSOCK_F_SEQPACKET 0 /* SOCK_SEQPACKET supported */ + struct virtio_vsock_config { __le64 guest_cid; } __attribute__((packed)); -- 2.25.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 14/22] virtio/vsock: rest of SOCK_SEQPACKET support
On Tue, Mar 23, 2021 at 04:13:29PM +0300, Arseny Krasnov wrote: This adds rest of logic for SEQPACKET: 1) SEQPACKET specific functions which send SEQ_BEGIN/SEQ_END. Note that both functions may sleep to wait enough space for SEQPACKET header. 2) SEQ_BEGIN/SEQ_END in TAP packet capture. 3) Send SHUTDOWN on socket close for SEQPACKET type. 4) Set SEQPACKET packet type during send. 5) Set MSG_EOR in flags for SEQPACKET during send. 6) 'seqpacket_allow' flag to virtio transport. Signed-off-by: Arseny Krasnov --- v6 -> v7: In 'virtio_transport_seqpacket_enqueue()', 'next_tx_msg_id' is updated in both cases when message send successfully or error occured. include/linux/virtio_vsock.h| 7 ++ net/vmw_vsock/virtio_transport_common.c | 88 - 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 0e3aa395c07c..ab5f56fd7251 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -22,6 +22,7 @@ struct virtio_vsock_seq_state { u32 user_read_seq_len; u32 user_read_copied; u32 curr_rx_msg_id; + u32 next_tx_msg_id; }; /* Per-socket state (accessed via vsk->trans) */ @@ -76,6 +77,8 @@ struct virtio_transport { /* Takes ownership of the packet */ int (*send_pkt)(struct virtio_vsock_pkt *pkt); + + bool seqpacket_allow; }; ssize_t @@ -89,6 +92,10 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, size_t len, int flags); int +virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len); +int virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, struct msghdr *msg, int flags, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index bfe0d7026bf8..01a56c7da8bd 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -139,6 +139,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + case VIRTIO_VSOCK_OP_SEQ_BEGIN: + case VIRTIO_VSOCK_OP_SEQ_END: hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL); break; default: @@ -187,7 +189,12 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, struct virtio_vsock_pkt *pkt; u32 pkt_len = info->pkt_len; - info->type = VIRTIO_VSOCK_TYPE_STREAM; + info->type = virtio_transport_get_type(sk_vsock(vsk)); + + if (info->type == VIRTIO_VSOCK_TYPE_SEQPACKET && + info->msg && + info->msg->msg_flags & MSG_EOR) + info->flags |= VIRTIO_VSOCK_RW_EOR; t_ops = virtio_transport_get_ops(vsk); if (unlikely(!t_ops)) @@ -401,6 +408,43 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, return err; } +static int virtio_transport_seqpacket_send_ctrl(struct vsock_sock *vsk, + int type, + size_t len, + int flags) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt_info info = { + .op = type, + .vsk = vsk, + .pkt_len = sizeof(struct virtio_vsock_seq_hdr) + }; + + struct virtio_vsock_seq_hdr seq_hdr = { + .msg_id = cpu_to_le32(vvs->seq_state.next_tx_msg_id), + .msg_len = cpu_to_le32(len) + }; + + struct kvec seq_hdr_kiov = { + .iov_base = (void *)_hdr, + .iov_len = sizeof(struct virtio_vsock_seq_hdr) + }; + + struct msghdr msg = {0}; + + //XXX: do we need 'vsock_transport_send_notify_data' pointer? + if (vsock_wait_space(sk_vsock(vsk), +sizeof(struct virtio_vsock_seq_hdr), +flags, NULL)) + return -1; + + iov_iter_kvec(_iter, WRITE, _hdr_kiov, 1, sizeof(seq_hdr)); + + info.msg = + + return virtio_transport_send_pkt_info(vsk, ); +} + static inline void virtio_transport_remove_pkt(struct virtio_vsock_pkt *pkt) { list_del(>list); @@ -595,6 +639,46 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, } EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); +int +virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk, + struct msghdr *msg, + size_t len) +{ + int written = -1; + + if (msg->msg_iter.iov_offset == 0) { + /* Send SEQBEGIN. */ + if (virtio_transport_seqpacket_send_ctrl(vsk, +
Re: [RFC PATCH v7 13/22] virtio/vsock: add SEQPACKET receive logic
On Tue, Mar 23, 2021 at 04:13:13PM +0300, Arseny Krasnov wrote: This modifies current receive logic for SEQPACKET support: 1) Inserts 'SEQ_BEGIN' packet to socket's rx queue. 2) Inserts 'RW' packet to socket's rx queue, but without merging with buffer of last packet in queue. 3) Performs check for packet and socket types on receive(if mismatch, then reset connection). Signed-off-by: Arseny Krasnov --- v6 -> v7: In 'virtio_transport_recv_pkt()', 'sock_put()' is added, when type of received packet does not match to the type of socket. net/vmw_vsock/virtio_transport_common.c | 64 + 1 file changed, 45 insertions(+), 19 deletions(-) Reviewed-by: Stefano Garzarella ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 12/22] virtio/vsock: fetch length for SEQPACKET record
On Tue, Mar 23, 2021 at 04:12:55PM +0300, Arseny Krasnov wrote: This adds transport callback which tries to fetch record begin marker from socket's rx queue. It is called from af_vsock.c before reading data packets of record. Signed-off-by: Arseny Krasnov --- v6 -> v7: 1) Now 'virtio_transport_seqpacket_seq_get_len()' returns 0, if rx queue of socket is empty. Else it returns length of current message to handle. 2) If dequeue callback is called, but there is no detected length of message to dequeue, EAGAIN is returned, and outer loop restarts receiving. net/vmw_vsock/virtio_transport_common.c | 61 + 1 file changed, 61 insertions(+) diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a8f4326e45e8..41f05034593e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -399,6 +399,62 @@ static inline void virtio_transport_remove_pkt(struct virtio_vsock_pkt *pkt) virtio_transport_free_pkt(pkt); } +static size_t virtio_transport_drop_until_seq_begin(struct virtio_vsock_sock *vvs) +{ + struct virtio_vsock_pkt *pkt, *n; + size_t bytes_dropped = 0; + + list_for_each_entry_safe(pkt, n, >rx_queue, list) { + if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_SEQ_BEGIN) + break; + + bytes_dropped += le32_to_cpu(pkt->hdr.len); + virtio_transport_dec_rx_pkt(vvs, pkt); + virtio_transport_remove_pkt(pkt); + } + + return bytes_dropped; +} + +static size_t virtio_transport_seqpacket_seq_get_len(struct vsock_sock *vsk) +{ + struct virtio_vsock_seq_hdr *seq_hdr; + struct virtio_vsock_sock *vvs; + struct virtio_vsock_pkt *pkt; + size_t bytes_dropped = 0; + + vvs = vsk->trans; + + spin_lock_bh(>rx_lock); + + /* Have some record to process, return it's length. */ + if (vvs->seq_state.user_read_seq_len) + goto out; + + /* Fetch all orphaned 'RW' packets and send credit update. */ + bytes_dropped = virtio_transport_drop_until_seq_begin(vvs); + + if (list_empty(>rx_queue)) + goto out; + + pkt = list_first_entry(>rx_queue, struct virtio_vsock_pkt, list); + + vvs->seq_state.user_read_copied = 0; + + seq_hdr = (struct virtio_vsock_seq_hdr *)pkt->buf; + vvs->seq_state.user_read_seq_len = le32_to_cpu(seq_hdr->msg_len); + vvs->seq_state.curr_rx_msg_id = le32_to_cpu(seq_hdr->msg_id); + virtio_transport_dec_rx_pkt(vvs, pkt); + virtio_transport_remove_pkt(pkt); +out: + spin_unlock_bh(>rx_lock); + + if (bytes_dropped) + virtio_transport_send_credit_update(vsk); + + return vvs->seq_state.user_read_seq_len; +} + static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, struct msghdr *msg, bool *msg_ready) @@ -522,6 +578,11 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, if (flags & MSG_PEEK) return -EOPNOTSUPP; + *msg_len = virtio_transport_seqpacket_seq_get_len(vsk); + + if (*msg_len == 0) + return -EAGAIN; + Okay, I see now, I think you can move this patch before the previous one or merge them in a single patch, it is better to review and to bisect. As mentioned, I think we can return msg_len if virtio_transport_seqpacket_do_dequeue() does not fail, otherwise the error. I mean something like this: static ssize_t virtio_transport_seqpacket_do_dequeue(...) { size_t msg_len; ssize_t ret; msg_len = virtio_transport_seqpacket_seq_get_len(vsk); if (msg_len == 0) return -EAGAIN; ret = virtio_transport_seqpacket_do_dequeue(vsk, msg, msg_ready); if (ret < 0) return ret; return msg_len; } return virtio_transport_seqpacket_do_dequeue(vsk, msg, msg_ready); } EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue); -- 2.25.1 ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 11/22] virtio/vsock: dequeue callback for SOCK_SEQPACKET
On Tue, Mar 23, 2021 at 04:12:41PM +0300, Arseny Krasnov wrote: This adds transport callback and it's logic for SEQPACKET dequeue. Callback fetches RW packets from rx queue of socket until whole record is copied(if user's buffer is full, user is not woken up). This is done to not stall sender, because if we wake up user and it leaves syscall, nobody will send credit update for rest of record, and sender will wait for next enter of read syscall at receiver's side. So if user buffer is full, we just send credit update and drop data. If during copy SEQ_BEGIN was found(and not all data was copied), copying is restarted by reset user's iov iterator(previous unfinished data is dropped). Signed-off-by: Arseny Krasnov --- v6 -> v7: 1) 'struct virtio_vsock_seqpacket_state' now renamed to 'struct virtio_vsock_seq_state'. 2) Field 'seqpacket_state' of 'struct virtio_vsock_sock' now renamed to 'seq_state'. 3) Current message length to process('user_read_seq_len') is set to 0 on error or message dequeue completed sucecssfully. include/linux/virtio_vsock.h| 14 +++ include/uapi/linux/virtio_vsock.h | 16 net/vmw_vsock/virtio_transport_common.c | 121 3 files changed, 151 insertions(+) diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index dc636b727179..0e3aa395c07c 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -18,6 +18,12 @@ enum { VSOCK_VQ_MAX= 3, }; +struct virtio_vsock_seq_state { + u32 user_read_seq_len; + u32 user_read_copied; + u32 curr_rx_msg_id; +}; + /* Per-socket state (accessed via vsk->trans) */ struct virtio_vsock_sock { struct vsock_sock *vsk; @@ -36,6 +42,8 @@ struct virtio_vsock_sock { u32 rx_bytes; u32 buf_alloc; struct list_head rx_queue; + + struct virtio_vsock_seq_state seq_state; }; struct virtio_vsock_pkt { @@ -80,6 +88,12 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, size_t len, int flags); +int +virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk, + struct msghdr *msg, + int flags, + bool *msg_ready, + size_t *msg_len); s64 virtio_transport_stream_has_data(struct vsock_sock *vsk); s64 virtio_transport_stream_has_space(struct vsock_sock *vsk); diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h index 1d57ed3d84d2..692f8078cced 100644 --- a/include/uapi/linux/virtio_vsock.h +++ b/include/uapi/linux/virtio_vsock.h Maybe we can move the following changes to this file (that should match the virtio-spec discussed in the separate thread) in a separate patch with a reference to the spec. You can include this in the series before this patch. @@ -63,8 +63,14 @@ struct virtio_vsock_hdr { __le32 fwd_cnt; } __attribute__((packed)); +struct virtio_vsock_seq_hdr { + __le32 msg_id; + __le32 msg_len; +} __attribute__((packed)); + enum virtio_vsock_type { VIRTIO_VSOCK_TYPE_STREAM = 1, + VIRTIO_VSOCK_TYPE_SEQPACKET = 2, }; enum virtio_vsock_op { @@ -83,6 +89,11 @@ enum virtio_vsock_op { VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6, /* Request the peer to send the credit info to us */ VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7, + + /* Record begin for SOCK_SEQPACKET */ + VIRTIO_VSOCK_OP_SEQ_BEGIN = 8, + /* Record end for SOCK_SEQPACKET */ + VIRTIO_VSOCK_OP_SEQ_END = 9, }; /* VIRTIO_VSOCK_OP_SHUTDOWN flags values */ @@ -91,4 +102,9 @@ enum virtio_vsock_shutdown { VIRTIO_VSOCK_SHUTDOWN_SEND = 2, }; +/* VIRTIO_VSOCK_OP_RW flags values */ +enum virtio_vsock_rw { + VIRTIO_VSOCK_RW_EOR = 1, +}; + #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */ diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 833104b71a1c..a8f4326e45e8 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -393,6 +393,114 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, return err; } +static inline void virtio_transport_remove_pkt(struct virtio_vsock_pkt *pkt) +{ + list_del(>list); + virtio_transport_free_pkt(pkt); +} + +static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, +struct msghdr *msg, +bool *msg_ready) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + struct virtio_vsock_pkt *pkt; + int err = 0; + size_t user_buf_len = msg->msg_iter.count; + + *msg_ready = false; + spin_lock_bh(>rx_lock); + + while (!*msg_ready && !list_empty(>rx_queue) && !err) { + pkt = list_first_entry(>rx_queue, struct virtio_vsock_pkt, list);
Re: [RFC PATCH v7 06/22] af_vsock: implement send logic for SEQPACKET
On Tue, Mar 23, 2021 at 04:10:42PM +0300, Arseny Krasnov wrote: This adds some logic to current stream enqueue function for SEQPACKET support: 1) Use transport's seqpacket enqueue callback. 2) Return value from enqueue function is whole record length or error for SOCK_SEQPACKET. Signed-off-by: Arseny Krasnov --- v6 -> v7: 'seqpacket_enqueue' callback interface changed, 'flags' argument was removed, because it was 'msg_flags' field of 'msg' argument which is already exists. include/net/af_vsock.h | 2 ++ net/vmw_vsock/af_vsock.c | 21 +++-- 2 files changed, 17 insertions(+), 6 deletions(-) Reviewed-by: Stefano Garzarella ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [RFC PATCH v7 05/22] af_vsock: separate wait space loop
On Tue, Mar 23, 2021 at 04:10:23PM +0300, Arseny Krasnov wrote: This moves loop that waits for space on send to separate function, because it will be used for SEQ_BEGIN/SEQ_END sending before and after data transmission. Waiting for SEQ_BEGIN/SEQ_END is needed because such packets carries SEQPACKET header that couldn't be fragmented by credit mechanism, so to avoid it, sender waits until enough space will be ready. Signed-off-by: Arseny Krasnov --- include/net/af_vsock.h | 2 + net/vmw_vsock/af_vsock.c | 99 +--- 2 files changed, 63 insertions(+), 38 deletions(-) I had already reviewed this one as well and it doesn't seem to have changed :-) Reviewed-by: Stefano Garzarella diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 74ac8a4c4168..7232f6c42a36 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -204,6 +204,8 @@ void vsock_remove_sock(struct vsock_sock *vsk); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); bool vsock_find_cid(unsigned int cid); +int vsock_wait_space(struct sock *sk, size_t space, int flags, +struct vsock_transport_send_notify_data *send_data); / TAP / diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index fa0c37f97330..617ffe42693d 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1692,6 +1692,65 @@ static int vsock_connectible_getsockopt(struct socket *sock, return 0; } +int vsock_wait_space(struct sock *sk, size_t space, int flags, +struct vsock_transport_send_notify_data *send_data) +{ + const struct vsock_transport *transport; + struct vsock_sock *vsk; + long timeout; + int err; + + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + vsk = vsock_sk(sk); + transport = vsk->transport; + timeout = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + err = 0; + + add_wait_queue(sk_sleep(sk), ); + + while (vsock_stream_has_space(vsk) < space && + sk->sk_err == 0 && + !(sk->sk_shutdown & SEND_SHUTDOWN) && + !(vsk->peer_shutdown & RCV_SHUTDOWN)) { + + /* Don't wait for non-blocking sockets. */ + if (timeout == 0) { + err = -EAGAIN; + goto out_err; + } + + if (send_data) { + err = transport->notify_send_pre_block(vsk, send_data); + if (err < 0) + goto out_err; + } + + release_sock(sk); + timeout = wait_woken(, TASK_INTERRUPTIBLE, timeout); + lock_sock(sk); + if (signal_pending(current)) { + err = sock_intr_errno(timeout); + goto out_err; + } else if (timeout == 0) { + err = -EAGAIN; + goto out_err; + } + } + + if (sk->sk_err) { + err = -sk->sk_err; + } else if ((sk->sk_shutdown & SEND_SHUTDOWN) || + (vsk->peer_shutdown & RCV_SHUTDOWN)) { + err = -EPIPE; + } + +out_err: + remove_wait_queue(sk_sleep(sk), ); + return err; +} +EXPORT_SYMBOL_GPL(vsock_wait_space); + static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1699,10 +1758,8 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, struct vsock_sock *vsk; const struct vsock_transport *transport; ssize_t total_written; - long timeout; int err; struct vsock_transport_send_notify_data send_data; - DEFINE_WAIT_FUNC(wait, woken_wake_function); sk = sock->sk; vsk = vsock_sk(sk); @@ -1740,9 +1797,6 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - /* Wait for room in the produce queue to enqueue our user's data. */ - timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - err = transport->notify_send_init(vsk, _data); if (err < 0) goto out; @@ -1750,39 +1804,8 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg, while (total_written < len) { ssize_t written; - add_wait_queue(sk_sleep(sk), ); - while (vsock_stream_has_space(vsk) == 0 && - sk->sk_err == 0 && - !(sk->sk_shutdown & SEND_SHUTDOWN) && - !(vsk->peer_shutdown & RCV_SHUTDOWN)) { - - /* Don't wait for non-blocking sockets. */ - if (timeout == 0) { - err = -EAGAIN; -
Re: [RFC PATCH v7 04/22] af_vsock: implement SEQPACKET receive loop
On Tue, Mar 23, 2021 at 04:10:03PM +0300, Arseny Krasnov wrote: This adds receive loop for SEQPACKET. It looks like receive loop for STREAM, but there is a little bit difference: 1) It doesn't call notify callbacks. 2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because there is no sense for these values in SEQPACKET case. 3) It waits until whole record is received or error is found during receiving. 4) It processes and sets 'MSG_TRUNC' flag. So to avoid extra conditions for two types of socket inside one loop, two independent functions were created. Signed-off-by: Arseny Krasnov --- v6 -> v7: 'seqpacket_get_len' callback now removed, length of message is returned by 'seqpacket_dequeue' callback. include/net/af_vsock.h | 4 ++ net/vmw_vsock/af_vsock.c | 88 +++- 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index b1c717286993..74ac8a4c4168 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -135,6 +135,10 @@ struct vsock_transport { bool (*stream_is_active)(struct vsock_sock *); bool (*stream_allow)(u32 cid, u32 port); + /* SEQ_PACKET. */ + int (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg, +int flags, bool *msg_ready, size_t *record_len); + Why not using ssize_t as return value and return the length or a negative value in case of error? In this way we can remove the 'record_len' parameter. /* Notification. */ int (*notify_poll_in)(struct vsock_sock *, size_t, bool *); int (*notify_poll_out)(struct vsock_sock *, size_t, bool *); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 0bc661e54262..fa0c37f97330 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1973,6 +1973,89 @@ static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, return err; } +static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, +size_t len, int flags) +{ + const struct vsock_transport *transport; + const struct iovec *orig_iov; + unsigned long orig_nr_segs; + bool msg_ready; + struct vsock_sock *vsk; + size_t record_len; + long timeout; + int err = 0; + DEFINE_WAIT(wait); + + vsk = vsock_sk(sk); + transport = vsk->transport; + + timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + orig_nr_segs = msg->msg_iter.nr_segs; + orig_iov = msg->msg_iter.iov; + msg_ready = false; + record_len = 0; + + while (1) { + err = vsock_wait_data(sk, , timeout, NULL, 0); + + if (err <= 0) { + /* In case of any loop break(timeout, signal +* interrupt or shutdown), we report user that +* nothing was copied. +*/ + err = 0; + break; + } + + err = transport->seqpacket_dequeue(vsk, msg, flags, _ready, _len); + + if (err < 0) { + if (err == -EAGAIN) { + iov_iter_init(>msg_iter, READ, + orig_iov, orig_nr_segs, + len); + /* Clear 'MSG_EOR' here, because dequeue +* callback above set it again if it was +* set by sender. This 'MSG_EOR' is from +* dropped record. +*/ + msg->msg_flags &= ~MSG_EOR; + record_len = 0; + continue; + } + + err = -ENOMEM; + break; + } + + if (msg_ready) + break; + } + + if (sk->sk_err) + err = -sk->sk_err; + else if (sk->sk_shutdown & RCV_SHUTDOWN) + err = 0; + + if (msg_ready) { If 'err' is not 0, should we skip this branch? + /* User sets MSG_TRUNC, so return real length of +* packet. +*/ + if (flags & MSG_TRUNC) + err = record_len; + else + err = len - msg->msg_iter.count; + + /* Always set MSG_TRUNC if real length of packet is +* bigger than user's buffer. +*/ + if (record_len > len) + msg->msg_flags |= MSG_TRUNC; + } + + return err; +} + static int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) @@ -2028,7 +2111,10 @@
Re: [RFC PATCH v7 03/22] af_vsock: separate receive data loop
On Tue, Mar 23, 2021 at 04:09:36PM +0300, Arseny Krasnov wrote: Move STREAM specific data receive logic to '__vsock_stream_recvmsg()' dedicated function, while checks, that will be same for both STREAM and SEQPACKET sockets, stays in 'vsock_connectible_recvmsg()' shared functions. Signed-off-by: Arseny Krasnov --- net/vmw_vsock/af_vsock.c | 116 ++- 1 file changed, 67 insertions(+), 49 deletions(-) I had already reviewed this in v5 and in v6 you reported the R-b tag. Usually the tag gets removed if you make changes to the patch or the reviewer is no longer happy. But this doesn't seem to be the case. So please keep the tags between versions :-) Reviewed-by: Stefano Garzarella diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 421c0303b26f..0bc661e54262 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1895,65 +1895,22 @@ static int vsock_wait_data(struct sock *sk, struct wait_queue_entry *wait, return data; } -static int -vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg, + size_t len, int flags) { - struct sock *sk; - struct vsock_sock *vsk; + struct vsock_transport_recv_notify_data recv_data; const struct vsock_transport *transport; - int err; - size_t target; + struct vsock_sock *vsk; ssize_t copied; + size_t target; long timeout; - struct vsock_transport_recv_notify_data recv_data; + int err; DEFINE_WAIT(wait); - sk = sock->sk; vsk = vsock_sk(sk); - err = 0; - - lock_sock(sk); - transport = vsk->transport; - if (!transport || sk->sk_state != TCP_ESTABLISHED) { - /* Recvmsg is supposed to return 0 if a peer performs an -* orderly shutdown. Differentiate between that case and when a -* peer has not connected or a local shutdown occured with the -* SOCK_DONE flag. -*/ - if (sock_flag(sk, SOCK_DONE)) - err = 0; - else - err = -ENOTCONN; - - goto out; - } - - if (flags & MSG_OOB) { - err = -EOPNOTSUPP; - goto out; - } - - /* We don't check peer_shutdown flag here since peer may actually shut -* down, but there can be data in the queue that a local socket can -* receive. -*/ - if (sk->sk_shutdown & RCV_SHUTDOWN) { - err = 0; - goto out; - } - - /* It is valid on Linux to pass in a zero-length receive buffer. This -* is not an error. We may as well bail out now. -*/ - if (!len) { - err = 0; - goto out; - } - /* We must not copy less than target bytes into the user's buffer * before returning successfully, so we wait for the consume queue to * have that much data to consume before dequeueing. Note that this @@ -2012,6 +1969,67 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, if (copied > 0) err = copied; +out: + return err; +} + +static int +vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + struct sock *sk; + struct vsock_sock *vsk; + const struct vsock_transport *transport; + int err; + + DEFINE_WAIT(wait); + + sk = sock->sk; + vsk = vsock_sk(sk); + err = 0; + + lock_sock(sk); + + transport = vsk->transport; + + if (!transport || sk->sk_state != TCP_ESTABLISHED) { + /* Recvmsg is supposed to return 0 if a peer performs an +* orderly shutdown. Differentiate between that case and when a +* peer has not connected or a local shutdown occurred with the +* SOCK_DONE flag. +*/ + if (sock_flag(sk, SOCK_DONE)) + err = 0; + else + err = -ENOTCONN; + + goto out; + } + + if (flags & MSG_OOB) { + err = -EOPNOTSUPP; + goto out; + } + + /* We don't check peer_shutdown flag here since peer may actually shut +* down, but there can be data in the queue that a local socket can +* receive. +*/ + if (sk->sk_shutdown & RCV_SHUTDOWN) { + err = 0; + goto out; + } + + /* It is valid on Linux to pass in a zero-length receive buffer. This +* is not an error. We may as well bail out now. +*/ + if (!len) { + err = 0; + goto out; + } + +
Re: [PATCH v5 01/11] file: Export __receive_fd() to modules
On Mon, Mar 15, 2021 at 05:46:43PM +0800, Yongji Xie wrote: > On Mon, Mar 15, 2021 at 5:08 PM Christoph Hellwig wrote: > > > > On Mon, Mar 15, 2021 at 01:37:11PM +0800, Xie Yongji wrote: > > > Export __receive_fd() so that some modules can use > > > it to pass file descriptor between processes. > > > > I really don't think any non-core code should do that, especilly not > > modular mere driver code. > > Do you see any issue? Now I think we're able to do that with the help > of get_unused_fd_flags() and fd_install() in modules. But we may miss > some security stuff in this way. So I try to export __receive_fd() and > use it instead. The real problem is now what helper to use, but rather that random drivers should not just mess with the FD table like that. ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH 15/17] iommu: remove DOMAIN_ATTR_NESTING
On Thu, Mar 25, 2021 at 06:12:37AM +, Tian, Kevin wrote: > Agree. The vSVA series is still undergoing a refactor according to Jason's > comment thus won't be ready in short term. It's better to let this one > go in first. Would be great to get a few more reviews while we're at it :) ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization
Re: [PATCH v5 09/11] vduse: Introduce VDUSE - vDPA Device in Userspace
在 2021/3/24 下午4:55, Yongji Xie 写道: On Wed, Mar 24, 2021 at 12:43 PM Jason Wang wrote: 在 2021/3/15 下午1:37, Xie Yongji 写道: This VDUSE driver enables implementing vDPA devices in userspace. Both control path and data path of vDPA devices will be able to be handled in userspace. In the control path, the VDUSE driver will make use of message mechnism to forward the config operation from vdpa bus driver to userspace. Userspace can use read()/write() to receive/reply those control messages. In the data path, userspace can use mmap() to access vDPA device's iova regions obtained through VDUSE_IOTLB_GET_ENTRY ioctl. Besides, userspace can use ioctl() to inject interrupt and use the eventfd mechanism to receive virtqueue kicks. Signed-off-by: Xie Yongji --- Documentation/userspace-api/ioctl/ioctl-number.rst |1 + drivers/vdpa/Kconfig | 10 + drivers/vdpa/Makefile |1 + drivers/vdpa/vdpa_user/Makefile|5 + drivers/vdpa/vdpa_user/vduse_dev.c | 1281 include/uapi/linux/vduse.h | 153 +++ 6 files changed, 1451 insertions(+) create mode 100644 drivers/vdpa/vdpa_user/Makefile create mode 100644 drivers/vdpa/vdpa_user/vduse_dev.c create mode 100644 include/uapi/linux/vduse.h diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index a4c75a28c839..71722e6f8f23 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -300,6 +300,7 @@ Code Seq#Include File Comments 'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict! '|' 00-7F linux/media.h 0x80 00-1F linux/fb.h +0x81 00-1F linux/vduse.h 0x89 00-06 arch/x86/include/asm/sockios.h 0x89 0B-DF linux/sockios.h 0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index a245809c99d0..77a1da522c21 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -25,6 +25,16 @@ config VDPA_SIM_NET help vDPA networking device simulator which loops TX traffic back to RX. +config VDPA_USER + tristate "VDUSE (vDPA Device in Userspace) support" + depends on EVENTFD && MMU && HAS_DMA + select DMA_OPS + select VHOST_IOTLB + select IOMMU_IOVA + help + With VDUSE it is possible to emulate a vDPA Device + in a userspace program. + config IFCVF tristate "Intel IFC VF vDPA driver" depends on PCI_MSI diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile index 67fe7f3d6943..f02ebed33f19 100644 --- a/drivers/vdpa/Makefile +++ b/drivers/vdpa/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA) += vdpa.o obj-$(CONFIG_VDPA_SIM) += vdpa_sim/ +obj-$(CONFIG_VDPA_USER) += vdpa_user/ obj-$(CONFIG_IFCVF)+= ifcvf/ obj-$(CONFIG_MLX5_VDPA) += mlx5/ obj-$(CONFIG_VP_VDPA)+= virtio_pci/ diff --git a/drivers/vdpa/vdpa_user/Makefile b/drivers/vdpa/vdpa_user/Makefile new file mode 100644 index ..260e0b26af99 --- /dev/null +++ b/drivers/vdpa/vdpa_user/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +vduse-y := vduse_dev.o iova_domain.o + +obj-$(CONFIG_VDPA_USER) += vduse.o diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c new file mode 100644 index ..07d0ae92d470 --- /dev/null +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -0,0 +1,1281 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDUSE: vDPA Device in Userspace + * + * Copyright (C) 2020 Bytedance Inc. and/or its affiliates. All rights reserved. + * + * Author: Xie Yongji + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iova_domain.h" + +#define DRV_VERSION "1.0" +#define DRV_AUTHOR "Yongji Xie " +#define DRV_DESC "vDPA Device in Userspace" +#define DRV_LICENSE "GPL v2" + +#define VDUSE_DEV_MAX (1U << MINORBITS) + +struct vduse_virtqueue { + u16 index; + bool ready; + spinlock_t kick_lock; + spinlock_t irq_lock; + struct eventfd_ctx *kickfd; + struct vdpa_callback cb; + struct work_struct inject; +}; + +struct vduse_dev; + +struct vduse_vdpa { + struct vdpa_device vdpa; + struct vduse_dev *dev; +}; + +struct vduse_dev { + struct vduse_vdpa *vdev; + struct device dev; + struct cdev cdev; + struct vduse_virtqueue *vqs; + struct vduse_iova_domain *domain; + spinlock_t msg_lock; + atomic64_t msg_unique; + wait_queue_head_t waitq; + struct list_head send_list; + struct list_head recv_list; + struct list_head
RE: [PATCH 15/17] iommu: remove DOMAIN_ATTR_NESTING
> From: Auger Eric > Sent: Monday, March 15, 2021 3:52 PM > To: Christoph Hellwig > Cc: k...@vger.kernel.org; Will Deacon ; linuxppc- > d...@lists.ozlabs.org; dri-de...@lists.freedesktop.org; Li Yang > ; io...@lists.linux-foundation.org; > > Hi Christoph, > > On 3/14/21 4:58 PM, Christoph Hellwig wrote: > > On Sun, Mar 14, 2021 at 11:44:52AM +0100, Auger Eric wrote: > >> As mentionned by Robin, there are series planning to use > >> DOMAIN_ATTR_NESTING to get info about the nested caps of the iommu > (ARM > >> and Intel): > >> > >> [Patch v8 00/10] vfio: expose virtual Shared Virtual Addressing to VMs > >> patches 1, 2, 3 > >> > >> Is the plan to introduce a new domain_get_nesting_info ops then? > > > > The plan as usual would be to add it the series adding that support. > > Not sure what the merge plans are - if the series is ready to be > > merged I could rebase on top of it, otherwise that series will need > > to add the method. > OK I think your series may be upstreamed first. > Agree. The vSVA series is still undergoing a refactor according to Jason's comment thus won't be ready in short term. It's better to let this one go in first. Thanks Kevin ___ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization