Re: [PATCH v2 4/4] vdpa_sim: Implement stop vdpa op

2022-05-24 Thread Jason Wang
On Wed, May 25, 2022 at 1:06 AM Eugenio Pérez  wrote:
>
> Implement stop operation for vdpa_sim devices, so vhost-vdpa will offer
> that backend feature and userspace can effectively stop the device.
>
> This is a must before get virtqueue indexes (base) for live migration,
> since the device could modify them after userland gets them. There are
> individual ways to perform that action for some devices
> (VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no
> way to perform it for any vhost device (and, in particular, vhost-vdpa).
>
> After the return of ioctl with stop != 0, the device MUST finish any
> pending operations like in flight requests. It must also preserve all
> the necessary state (the virtqueue vring base plus the possible device
> specific states) that is required for restoring in the future. The
> device must not change its configuration after that point.
>
> After the return of ioctl with stop == 0, the device can continue
> processing buffers as long as typical conditions are met (vq is enabled,
> DRIVER_OK status bit is enabled, etc).
>
> In the future, we will provide features similar to
> VHOST_USER_GET_INFLIGHT_FD so the device can save pending operations.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 21 +
>  drivers/vdpa/vdpa_sim/vdpa_sim.h |  1 +
>  drivers/vdpa/vdpa_sim/vdpa_sim_blk.c |  3 +++
>  drivers/vdpa/vdpa_sim/vdpa_sim_net.c |  3 +++
>  4 files changed, 28 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c 
> b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 50d721072beb..0515cf314bed 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
> for (i = 0; i < vdpasim->dev_attr.nas; i++)
> vhost_iotlb_reset(>iommu[i]);
>
> +   vdpasim->running = true;
> spin_unlock(>iommu_lock);
>
> vdpasim->features = 0;
> @@ -505,6 +506,24 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
> return 0;
>  }
>
> +static int vdpasim_stop(struct vdpa_device *vdpa, bool stop)
> +{
> +   struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> +   int i;
> +
> +   spin_lock(>lock);
> +   vdpasim->running = !stop;
> +   if (vdpasim->running) {
> +   /* Check for missed buffers */
> +   for (i = 0; i < vdpasim->dev_attr.nvqs; ++i)
> +   vdpasim_kick_vq(vdpa, i);
> +
> +   }
> +   spin_unlock(>lock);
> +
> +   return 0;
> +}
> +
>  static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
>  {
> struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> @@ -694,6 +713,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
> .get_status = vdpasim_get_status,
> .set_status = vdpasim_set_status,
> .reset  = vdpasim_reset,
> +   .stop   = vdpasim_stop,
> .get_config_size= vdpasim_get_config_size,
> .get_config = vdpasim_get_config,
> .set_config = vdpasim_set_config,
> @@ -726,6 +746,7 @@ static const struct vdpa_config_ops 
> vdpasim_batch_config_ops = {
> .get_status = vdpasim_get_status,
> .set_status = vdpasim_set_status,
> .reset  = vdpasim_reset,
> +   .stop   = vdpasim_stop,
> .get_config_size= vdpasim_get_config_size,
> .get_config = vdpasim_get_config,
> .set_config = vdpasim_set_config,
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h 
> b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> index 622782e92239..061986f30911 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> @@ -66,6 +66,7 @@ struct vdpasim {
> u32 generation;
> u64 features;
> u32 groups;
> +   bool running;
> /* spinlock to synchronize iommu table */
> spinlock_t iommu_lock;
>  };
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
> b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
> index 42d401d43911..bcdb1982c378 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
> @@ -204,6 +204,9 @@ static void vdpasim_blk_work(struct work_struct *work)
> if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
> goto out;
>
> +   if (!vdpasim->running)
> +   goto out;
> +
> for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
> struct vdpasim_virtqueue *vq = >vqs[i];
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c 
> b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> index 5125976a4df8..886449e88502 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> @@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)
>
> spin_lock(>lock);

Re: [PATCH v2 3/4] vhost-vdpa: uAPI to stop the device

2022-05-24 Thread Jason Wang
On Wed, May 25, 2022 at 1:06 AM Eugenio Pérez  wrote:
>
> The ioctl adds support for stop the device from userspace.
>
> Signed-off-by: Eugenio Pérez 
> ---
>  drivers/vhost/vdpa.c   | 18 ++
>  include/uapi/linux/vhost.h |  3 +++
>  2 files changed, 21 insertions(+)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 32713db5831d..a5d33bad92f9 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -478,6 +478,21 @@ static long vhost_vdpa_get_vqs_count(struct vhost_vdpa 
> *v, u32 __user *argp)
> return 0;
>  }
>
> +static long vhost_vdpa_stop(struct vhost_vdpa *v, u32 __user *argp)
> +{
> +   struct vdpa_device *vdpa = v->vdpa;
> +   const struct vdpa_config_ops *ops = vdpa->config;
> +   int stop;
> +
> +   if (!ops->stop)
> +   return -EOPNOTSUPP;
> +
> +   if (copy_from_user(, argp, sizeof(stop)))
> +   return -EFAULT;
> +
> +   return ops->stop(vdpa, stop);
> +}
> +
>  static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>void __user *argp)
>  {
> @@ -650,6 +665,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> case VHOST_VDPA_GET_VQS_COUNT:
> r = vhost_vdpa_get_vqs_count(v, argp);
> break;
> +   case VHOST_STOP:
> +   r = vhost_vdpa_stop(v, argp);
> +   break;
> default:
> r = vhost_dev_ioctl(>vdev, cmd, argp);
> if (r == -ENOIOCTLCMD)
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index cab645d4a645..e7526968ab0c 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -171,4 +171,7 @@
>  #define VHOST_VDPA_SET_GROUP_ASID  _IOW(VHOST_VIRTIO, 0x7C, \
>  struct vhost_vring_state)
>
> +/* Stop or resume a device so it does not process virtqueue requests anymore 
> */
> +#define VHOST_STOP _IOW(VHOST_VIRTIO, 0x7D, int)
> +

Unless we know it's a vhost general uAPI, let's use VHOST_VDPA_STOP here.

Thanks

>  #endif
> --
> 2.27.0
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH v2 0/4] Implement vdpasim stop operation

2022-05-24 Thread Jason Wang
On Wed, May 25, 2022 at 1:06 AM Eugenio Pérez  wrote:
>
> Implement stop operation for vdpa_sim devices, so vhost-vdpa will offer
> that backend feature and userspace can effectively stop the device.
>
> This is a must before get virtqueue indexes (base) for live migration,
> since the device could modify them after userland gets them. There are
> individual ways to perform that action for some devices
> (VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no
> way to perform it for any vhost device (and, in particular, vhost-vdpa).
>
> After the return of ioctl with stop != 0, the device MUST finish any
> pending operations like in flight requests. It must also preserve all
> the necessary state (the virtqueue vring base plus the possible device
> specific states) that is required for restoring in the future. The
> device must not change its configuration after that point.

I'd suggest documenting this in the code maybe around ops->stop()?

Thanks

>
> After the return of ioctl with stop == 0, the device can continue
> processing buffers as long as typical conditions are met (vq is enabled,
> DRIVER_OK status bit is enabled, etc).
>
> In the future, we will provide features similar to VHOST_USER_GET_INFLIGHT_FD
> so the device can save pending operations.
>
> Comments are welcome.
>
> v2:
> * Replace raw _F_STOP with BIT_ULL(_F_STOP).
> * Fix obtaining of stop ioctl arg (it was not obtained but written).
> * Add stop to vdpa_sim_blk.
>
> Eugenio Pérez (4):
>   vdpa: Add stop operation
>   vhost-vdpa: introduce STOP backend feature bit
>   vhost-vdpa: uAPI to stop the device
>   vdpa_sim: Implement stop vdpa op
>
>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 21 +
>  drivers/vdpa/vdpa_sim/vdpa_sim.h |  1 +
>  drivers/vdpa/vdpa_sim/vdpa_sim_blk.c |  3 +++
>  drivers/vdpa/vdpa_sim/vdpa_sim_net.c |  3 +++
>  drivers/vhost/vdpa.c | 34 +++-
>  include/linux/vdpa.h |  6 +
>  include/uapi/linux/vhost.h   |  3 +++
>  include/uapi/linux/vhost_types.h |  2 ++
>  8 files changed, 72 insertions(+), 1 deletion(-)
>
> --
> 2.27.0
>
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] Docs/ABI/testing: Add VDUSE sysfs interface ABI document

2022-05-24 Thread Jason Wang
On Tue, May 24, 2022 at 7:51 PM Xie Yongji  wrote:
>
> This adds missing documentation for VDUSE sysfs interface ABI
> under Documentation/ABI/testing.
>
> Signed-off-by: Xie Yongji 

Acked-by: Jason Wang 

> ---
>  Documentation/ABI/testing/sysfs-class-vduse | 33 +
>  MAINTAINERS |  1 +
>  2 files changed, 34 insertions(+)
>  create mode 100644 Documentation/ABI/testing/sysfs-class-vduse
>
> diff --git a/Documentation/ABI/testing/sysfs-class-vduse 
> b/Documentation/ABI/testing/sysfs-class-vduse
> new file mode 100644
> index ..2f2bc5c8fc48
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-class-vduse
> @@ -0,0 +1,33 @@
> +What:  /sys/class/vduse/
> +Date:  Oct 2021
> +KernelVersion: 5.15
> +Contact:   Yongji Xie 
> +Description:
> +   The vduse/ class sub-directory belongs to the VDUSE
> +   framework and provides a sysfs interface for configuring
> +   VDUSE devices.
> +
> +What:  /sys/class/vduse/control/
> +Date:  Oct 2021
> +KernelVersion: 5.15
> +Contact:   Yongji Xie 
> +Description:
> +   This directory entry is created for the control device
> +   of VDUSE framework.
> +
> +What:  /sys/class/vduse//
> +Date:  Oct 2021
> +KernelVersion: 5.15
> +Contact:   Yongji Xie 
> +Description:
> +   This directory entry is created when a VDUSE device is
> +   created via the control device.
> +
> +What:  /sys/class/vduse//msg_timeout
> +Date:  Oct 2021
> +KernelVersion: 5.15
> +Contact:   Yongji Xie 
> +Description:
> +   (RW) The timeout (in seconds) for waiting for the control
> +   message's response from userspace. Default value is 30s.
> +   Writing a '0' to the file means to disable the timeout.
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d6d879cb0afd..d9a423de2f4d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -20842,6 +20842,7 @@ M:  Jason Wang 
>  L: virtualization@lists.linux-foundation.org
>  S: Maintained
>  F: Documentation/ABI/testing/sysfs-bus-vdpa
> +F: Documentation/ABI/testing/sysfs-class-vduse
>  F: Documentation/devicetree/bindings/virtio/
>  F: drivers/block/virtio_blk.c
>  F: drivers/crypto/virtio/
> --
> 2.20.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V5 0/9] rework on the IRQ hardening of virtio

2022-05-24 Thread Jason Wang
On Wed, May 25, 2022 at 12:28 AM Halil Pasic  wrote:
>
> On Mon, 23 May 2022 10:53:23 +0200
> Halil Pasic  wrote:
>
> > On Wed, 18 May 2022 11:59:42 +0800
> > Jason Wang  wrote:
> >
> > > Hi All:
> >
> > Sorry for being slow on this one. I'm pretty much under water. Will try
> > to get some regression-testing done till tomorrow end of day.
> >
>
> Did some testing with the two stage indicators disabled. Didn't see any
> significant difference in performance, and with that also no performance
> regression. IMHO we are good to go ahead!

Great!

>
> Sorry it took so long.

No worries and thanks a lot for the help.

I will repost a version with some comments tweaked that is suggested
by Cornelia Huck.

Thanks

>
> Regards,
> Halil
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: Re: [PATCH 3/3] virtio_balloon: Introduce memory recover

2022-05-24 Thread zhenwei pi




On 5/25/22 03:35, Sean Christopherson wrote:

On Fri, May 20, 2022, zhenwei pi wrote:

@@ -59,6 +60,12 @@ enum virtio_balloon_config_read {
VIRTIO_BALLOON_CONFIG_READ_CMD_ID = 0,
  };
  
+/* the request body to commucate with host side */

+struct __virtio_balloon_recover {
+   struct virtio_balloon_recover vbr;
+   __virtio32 pfns[VIRTIO_BALLOON_PAGES_PER_PAGE];


I assume this is copied from virtio_balloon.pfns, which also uses __virtio32, 
but
isn't that horribly broken?  PFNs are 'unsigned long', i.e. 64 bits on 64-bit 
kernels.
x86-64 at least most definitely generates 64-bit PFNs.  Unless there's magic I'm
missing, page_to_balloon_pfn() will truncate PFNs and feed the host bad info.



Yes, I also noticed this point, I suppose the balloon device can not 
work on a virtual machine which has physical address larger than 16T.


I still let the recover VQ keep aligned with the inflate VQ and deflate 
VQ. I prefer the recover VQ to be workable/unworkable with 
inflate/deflate VQ together. So I leave this to the virtio balloon 
maintainer to decide ...



@@ -494,6 +511,198 @@ static void update_balloon_size_func(struct work_struct 
*work)
queue_work(system_freezable_wq, work);
  }
  
+/*

+ * virtballoon_memory_failure - notified by memory failure, try to fix the
+ *  corrupted page.
+ * The memory failure notifier is designed to call back when the kernel handled
+ * successfully only, WARN_ON_ONCE on the unlikely condition to find out any
+ * error(memory error handling is a best effort, not 100% coverd).
+ */
+static int virtballoon_memory_failure(struct notifier_block *notifier,
+ unsigned long pfn, void *parm)
+{
+   struct virtio_balloon *vb = container_of(notifier, struct 
virtio_balloon,
+memory_failure_nb);
+   struct page *page;
+   struct __virtio_balloon_recover *out_vbr;
+   struct scatterlist sg;
+   unsigned long flags;
+   int err;
+
+   page = pfn_to_online_page(pfn);
+   if (WARN_ON_ONCE(!page))
+   return NOTIFY_DONE;
+
+   if (PageHuge(page))
+   return NOTIFY_DONE;
+
+   if (WARN_ON_ONCE(!PageHWPoison(page)))
+   return NOTIFY_DONE;
+
+   if (WARN_ON_ONCE(page_count(page) != 1))
+   return NOTIFY_DONE;
+
+   get_page(page); /* balloon reference */
+
+   out_vbr = kzalloc(sizeof(*out_vbr), GFP_KERNEL);
+   if (WARN_ON_ONCE(!out_vbr))
+   return NOTIFY_BAD;


Not that it truly matters, but won't failure at this point leak the poisoned 
page?


I'll fix this, thanks!

--
zhenwei pi
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] mm: fix a potential infinite loop in start_isolate_page_range().

2022-05-24 Thread Andrew Morton
On Tue, 24 May 2022 15:47:56 -0400 Zi Yan  wrote:

> From: Zi Yan 
> 
> In isolate_single_pageblock() called by start_isolate_page_range(),
> there are some pageblock isolation issues causing a potential
> infinite loop when isolating a page range. This is reported by Qian Cai.
> 
> 1. the pageblock was isolated by just changing pageblock migratetype
>without checking unmovable pages. Calling set_migratetype_isolate() to
>isolate pageblock properly.
> 2. an off-by-one error caused migrating pages unnecessarily, since the page
>is not crossing pageblock boundary.
> 3. migrating a compound page across pageblock boundary then splitting the
>free page later has a small race window that the free page might be
>allocated again, so that the code will try again, causing an potential
>infinite loop. Temporarily set the to-be-migrated page's pageblock to
>MIGRATE_ISOLATE to prevent that and bail out early if no free page is
>found after page migration.
> 
> An additional fix to split_free_page() aims to avoid crashing in
> __free_one_page(). When the free page is split at the specified
> split_pfn_offset, free_page_order should check both the first bit of
> free_page_pfn and the last bit of split_pfn_offset and use the smaller one.
> For example, if free_page_pfn=0x1, split_pfn_offset=0xc000,
> free_page_order should first be 0x8000 then 0x4000, instead of 0x4000 then
> 0x8000, which the original algorithm did.
> 
> ...
>
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1114,13 +1114,16 @@ void split_free_page(struct page *free_page,
>   unsigned long flags;
>   int free_page_order;
>  
> + if (split_pfn_offset == 0)
> + return;
> +
>   spin_lock_irqsave(>lock, flags);
>   del_page_from_free_list(free_page, zone, order);
>   for (pfn = free_page_pfn;
>pfn < free_page_pfn + (1UL << order);) {
>   int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
>  
> - free_page_order = ffs(split_pfn_offset) - 1;
> + free_page_order = min(pfn ? __ffs(pfn) : order, 
> __fls(split_pfn_offset));

Why is it testing the zeroness of `pfn' here?  Can pfn==0 even happen? 
If so, it's a legitimate value so why does it get special-cased?



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 0/3] recover hardware corrupted page by virtio balloon

2022-05-24 Thread David Hildenbrand
On 20.05.22 09:06, zhenwei pi wrote:
> Hi,
> 
> I'm trying to recover hardware corrupted page by virtio balloon, the
> workflow of this feature like this:
> 
> Guest  5.MF -> 6.RVQ FE10.Unpoison page
> /   \/
> ---+-+--+---
>| |  |
> 4.MCE7.RVQ BE   9.RVQ Event
>  QEMU /   \   /
>  3.SIGBUS  8.Remap
> /
> +
> |
> +--2.MF
>  Host   /
>1.HW error
> 
> 1, HardWare page error occurs randomly.
> 2, host side handles corrupted page by Memory Failure mechanism, sends
>SIGBUS to the user process if early-kill is enabled.
> 3, QEMU handles SIGBUS, if the address belongs to guest RAM, then:
> 4, QEMU tries to inject MCE into guest.
> 5, guest handles memory failure again.
> 
> 1-5 is already supported for a long time, the next steps are supported
> in this patch(also related driver patch):
> 
> 6, guest balloon driver gets noticed of the corrupted PFN, and sends
>request to host side by Recover VQ FrontEnd.
> 7, QEMU handles request from Recover VQ BackEnd, then:
> 8, QEMU remaps the corrupted HVA fo fix the memory failure, then:
> 9, QEMU acks the guest side the result by Recover VQ.
> 10, guest unpoisons the page if the corrupted page gets recoverd
> successfully.
> 
> Test:
> This patch set can be tested with QEMU(also in developing):
> https://github.com/pizhenwei/qemu/tree/balloon-recover
> 
> Emulate MCE by QEMU(guest RAM normal page only, hugepage is not supported):
> virsh qemu-monitor-command vm --hmp mce 0 9 0xbdc0 0xd 0x61646678 
> 0x8c
> 
> The guest works fine(on Intel Platinum 8260):
>  mce: [Hardware Error]: Machine check events logged
>  Memory failure: 0x61646: recovery action for dirty LRU page: Recovered
>  virtio_balloon virtio5: recovered pfn 0x61646
>  Unpoison: Unpoisoned page 0x61646 by virtio-balloon
>  MCE: Killing stress:24502 due to hardware memory corruption fault at 
> 7f5be2e5a010
> 
> And the 'HardwareCorrupted' in /proc/meminfo also shows 0 kB.
> 
> About the protocol of virtio balloon recover VQ, it's undefined and in
> developing currently:
> - 'struct virtio_balloon_recover' defines the structure which is used to
>   exchange message between guest and host.
> - '__le32 corrupted_pages' in struct virtio_balloon_config is used in the next
>   step:
>   1, a VM uses RAM of 2M huge page, once a MCE occurs, the 2M becomes
>  unaccessible. Reporting 512 * 4K 'corrupted_pages' to the guest, the 
> guest
>  has a chance to isolate the 512 pages ahead of time.
> 
>   2, after migrating to another host, the corrupted pages are actually 
> recovered,
>  once the guest gets the 'corrupted_pages' with 0, then the guest could
>  unpoison all the poisoned pages which are recorded in the balloon driver.
> 

Hi,

I'm still on vacation this week, I'll try to have a look when I'm back
(and flushed out my overflowing inbox :D).


-- 
Thanks,

David / dhildenb

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 5/7] dt-bindings: Add xen, dev-domid property description for xen-grant DMA ops

2022-05-24 Thread Stefano Stabellini
On Tue, 24 May 2022, Oleksandr wrote:
> > On Mon, 23 May 2022, Oleksandr wrote:
> > > > > On Thu, 19 May 2022, Oleksandr wrote:
> > > > > > > On Wed, May 18, 2022 at 5:06 PM Oleksandr 
> > > > > > > wrote:
> > > > > > > > On 18.05.22 17:32, Arnd Bergmann wrote:
> > > > > > > > > On Sat, May 7, 2022 at 7:19 PM Oleksandr Tyshchenko
> > > > > > > > >  wrote:
> > > > > > > > >      This would mean having a device
> > > > > > > > > node for the grant-table mechanism that can be referred to
> > > > > > > > > using
> > > > > > > > > the
> > > > > > > > > 'iommus'
> > > > > > > > > phandle property, with the domid as an additional argument.
> > > > > > > > I assume, you are speaking about something like the following?
> > > > > > > > 
> > > > > > > > 
> > > > > > > > xen_dummy_iommu {
> > > > > > > >    compatible = "xen,dummy-iommu";
> > > > > > > >    #iommu-cells = <1>;
> > > > > > > > };
> > > > > > > > 
> > > > > > > > virtio@3000 {
> > > > > > > >    compatible = "virtio,mmio";
> > > > > > > >    reg = <0x3000 0x100>;
> > > > > > > >    interrupts = <41>;
> > > > > > > > 
> > > > > > > >    /* The device is located in Xen domain with ID 1 */
> > > > > > > >    iommus = <_dummy_iommu 1>;
> > > > > > > > };
> > > > > > > Right, that's that's the idea,
> > > > > > thank you for the confirmation
> > > > > > 
> > > > > > 
> > > > > > 
> > > > > > >     except I would not call it a 'dummy'.
> > > > > > >    From the perspective of the DT, this behaves just like an
> > > > > > > IOMMU,
> > > > > > > even if the exact mechanism is different from most hardware IOMMU
> > > > > > > implementations.
> > > > > > well, agree
> > > > > > 
> > > > > > 
> > > > > > > > > It does not quite fit the model that Linux currently uses for
> > > > > > > > > iommus,
> > > > > > > > > as that has an allocator for dma_addr_t space
> > > > > > > > yes (# 3/7 adds grant-table based allocator)
> > > > > > > > 
> > > > > > > > 
> > > > > > > > > , but it would think it's
> > > > > > > > > conceptually close enough that it makes sense for the binding.
> > > > > > > > Interesting idea. I am wondering, do we need an extra actions
> > > > > > > > for
> > > > > > > > this
> > > > > > > > to work in Linux guest (dummy IOMMU driver, etc)?
> > > > > > > It depends on how closely the guest implementation can be made to
> > > > > > > resemble a normal iommu. If you do allocate dma_addr_t addresses,
> > > > > > > it may actually be close enough that you can just turn the
> > > > > > > grant-table
> > > > > > > code into a normal iommu driver and change nothing else.
> > > > > > Unfortunately, I failed to find a way how use grant references at
> > > > > > the
> > > > > > iommu_ops level (I mean to fully pretend that we are an IOMMU
> > > > > > driver). I
> > > > > > am
> > > > > > not too familiar with that, so what is written below might be wrong
> > > > > > or
> > > > > > at
> > > > > > least not precise.
> > > > > > 
> > > > > > The normal IOMMU driver in Linux doesn’t allocate DMA addresses by
> > > > > > itself, it
> > > > > > just maps (IOVA-PA) what was requested to be mapped by the upper
> > > > > > layer.
> > > > > > The
> > > > > > DMA address allocation is done by the upper layer (DMA-IOMMU which
> > > > > > is
> > > > > > the glue
> > > > > > layer between DMA API and IOMMU API allocates IOVA for PA?). But,
> > > > > > all
> > > > > > what we
> > > > > > need here is just to allocate our specific grant-table based DMA
> > > > > > addresses
> > > > > > (DMA address = grant reference + offset in the page), so let’s say
> > > > > > we
> > > > > > need an
> > > > > > entity to take a physical address as parameter and return a DMA
> > > > > > address
> > > > > > (what
> > > > > > actually commit #3/7 is doing), and that’s all. So working at the
> > > > > > dma_ops
> > > > > > layer we get exactly what we need, with the minimal changes to guest
> > > > > > infrastructure. In our case the Xen itself acts as an IOMMU.
> > > > > > 
> > > > > > Assuming that we want to reuse the IOMMU infrastructure somehow for
> > > > > > our
> > > > > > needs.
> > > > > > I think, in that case we will likely need to introduce a new
> > > > > > specific
> > > > > > IOVA
> > > > > > allocator (alongside with a generic one) to be hooked up by the
> > > > > > DMA-IOMMU
> > > > > > layer if we run on top of Xen. But, even having the specific IOVA
> > > > > > allocator to
> > > > > > return what we indeed need (DMA address = grant reference + offset
> > > > > > in
> > > > > > the
> > > > > > page) we will still need the specific minimal required IOMMU driver
> > > > > > to
> > > > > > be
> > > > > > present in the system anyway in order to track the mappings(?) and
> > > > > > do
> > > > > > nothing
> > > > > > with them, returning a success (this specific IOMMU driver should
> > > > > > have
> > > > > > all
> > > > > > mandatory callbacks implemented).
> > > > > > 
> > > > > > I completely agree, it would be really nice to reuse generic 

Re: [PATCH V5 6/9] virtio-ccw: implement synchronize_cbs()

2022-05-24 Thread Halil Pasic
On Wed, 18 May 2022 11:59:48 +0800
Jason Wang  wrote:

> This patch tries to implement the synchronize_cbs() for ccw. For the
> vring_interrupt() that is called via virtio_airq_handler(), the
> synchronization is simply done via the airq_info's lock. For the
> vring_interrupt() that is called via virtio_ccw_int_handler(), a per
> device rwlock is introduced ans used in the synchronization method.
> 
> Cc: Thomas Gleixner 
> Cc: Peter Zijlstra 
> Cc: "Paul E. McKenney" 
> Cc: Marc Zyngier 
> Cc: Halil Pasic 
> Cc: Cornelia Huck 
> Cc: Vineeth Vijayan 
> Cc: Peter Oberparleiter 
> Cc: linux-s...@vger.kernel.org
> Signed-off-by: Jason Wang 

Reviewed-by: Halil Pasic 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V5 0/9] rework on the IRQ hardening of virtio

2022-05-24 Thread Halil Pasic
On Mon, 23 May 2022 10:53:23 +0200
Halil Pasic  wrote:

> On Wed, 18 May 2022 11:59:42 +0800
> Jason Wang  wrote:
> 
> > Hi All:  
> 
> Sorry for being slow on this one. I'm pretty much under water. Will try
> to get some regression-testing done till tomorrow end of day.
> 

Did some testing with the two stage indicators disabled. Didn't see any
significant difference in performance, and with that also no performance
regression. IMHO we are good to go ahead!

Sorry it took so long.

Regards,
Halil

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 5/7] dt-bindings: Add xen,dev-domid property description for xen-grant DMA ops

2022-05-24 Thread Rob Herring
+Saravana

On Mon, May 23, 2022 at 06:58:13PM -0700, Stefano Stabellini wrote:
> On Mon, 23 May 2022, Oleksandr wrote:
> > > > On Thu, 19 May 2022, Oleksandr wrote:
> > > > > > On Wed, May 18, 2022 at 5:06 PM Oleksandr  
> > > > > > wrote:
> > > > > > > On 18.05.22 17:32, Arnd Bergmann wrote:
> > > > > > > > On Sat, May 7, 2022 at 7:19 PM Oleksandr Tyshchenko
> > > > > > > >  wrote:
> > > > > > > >     This would mean having a device
> > > > > > > > node for the grant-table mechanism that can be referred to using
> > > > > > > > the
> > > > > > > > 'iommus'
> > > > > > > > phandle property, with the domid as an additional argument.
> > > > > > > I assume, you are speaking about something like the following?
> > > > > > > 
> > > > > > > 
> > > > > > > xen_dummy_iommu {
> > > > > > >   compatible = "xen,dummy-iommu";
> > > > > > >   #iommu-cells = <1>;
> > > > > > > };
> > > > > > > 
> > > > > > > virtio@3000 {
> > > > > > >   compatible = "virtio,mmio";
> > > > > > >   reg = <0x3000 0x100>;
> > > > > > >   interrupts = <41>;
> > > > > > > 
> > > > > > >   /* The device is located in Xen domain with ID 1 */
> > > > > > >   iommus = <_dummy_iommu 1>;
> > > > > > > };
> > > > > > Right, that's that's the idea,
> > > > > thank you for the confirmation
> > > > > 
> > > > > 
> > > > > 
> > > > > >    except I would not call it a 'dummy'.
> > > > > >   From the perspective of the DT, this behaves just like an IOMMU,
> > > > > > even if the exact mechanism is different from most hardware IOMMU
> > > > > > implementations.
> > > > > well, agree
> > > > > 
> > > > > 
> > > > > > > > It does not quite fit the model that Linux currently uses for
> > > > > > > > iommus,
> > > > > > > > as that has an allocator for dma_addr_t space
> > > > > > > yes (# 3/7 adds grant-table based allocator)
> > > > > > > 
> > > > > > > 
> > > > > > > > , but it would think it's
> > > > > > > > conceptually close enough that it makes sense for the binding.
> > > > > > > Interesting idea. I am wondering, do we need an extra actions for
> > > > > > > this
> > > > > > > to work in Linux guest (dummy IOMMU driver, etc)?
> > > > > > It depends on how closely the guest implementation can be made to
> > > > > > resemble a normal iommu. If you do allocate dma_addr_t addresses,
> > > > > > it may actually be close enough that you can just turn the 
> > > > > > grant-table
> > > > > > code into a normal iommu driver and change nothing else.
> > > > > Unfortunately, I failed to find a way how use grant references at the
> > > > > iommu_ops level (I mean to fully pretend that we are an IOMMU 
> > > > > driver). I
> > > > > am
> > > > > not too familiar with that, so what is written below might be wrong or
> > > > > at
> > > > > least not precise.
> > > > > 
> > > > > The normal IOMMU driver in Linux doesn’t allocate DMA addresses by
> > > > > itself, it
> > > > > just maps (IOVA-PA) what was requested to be mapped by the upper 
> > > > > layer.
> > > > > The
> > > > > DMA address allocation is done by the upper layer (DMA-IOMMU which is
> > > > > the glue
> > > > > layer between DMA API and IOMMU API allocates IOVA for PA?). But, all
> > > > > what we
> > > > > need here is just to allocate our specific grant-table based DMA
> > > > > addresses
> > > > > (DMA address = grant reference + offset in the page), so let’s say we
> > > > > need an
> > > > > entity to take a physical address as parameter and return a DMA 
> > > > > address
> > > > > (what
> > > > > actually commit #3/7 is doing), and that’s all. So working at the
> > > > > dma_ops
> > > > > layer we get exactly what we need, with the minimal changes to guest
> > > > > infrastructure. In our case the Xen itself acts as an IOMMU.
> > > > > 
> > > > > Assuming that we want to reuse the IOMMU infrastructure somehow for 
> > > > > our
> > > > > needs.
> > > > > I think, in that case we will likely need to introduce a new specific
> > > > > IOVA
> > > > > allocator (alongside with a generic one) to be hooked up by the
> > > > > DMA-IOMMU
> > > > > layer if we run on top of Xen. But, even having the specific IOVA
> > > > > allocator to
> > > > > return what we indeed need (DMA address = grant reference + offset in
> > > > > the
> > > > > page) we will still need the specific minimal required IOMMU driver to
> > > > > be
> > > > > present in the system anyway in order to track the mappings(?) and do
> > > > > nothing
> > > > > with them, returning a success (this specific IOMMU driver should have
> > > > > all
> > > > > mandatory callbacks implemented).
> > > > > 
> > > > > I completely agree, it would be really nice to reuse generic IOMMU
> > > > > bindings
> > > > > rather than introducing Xen specific property if what we are trying to
> > > > > implement in current patch series fits in the usage of "iommus" in 
> > > > > Linux
> > > > > more-less. But, if we will have to add more complexity/more components
> > > > > to the
> > > > > code for the sake of reusing device 

Re: [RFC PATCH v2 0/5] TUN/VirtioNet USO features support.

2022-05-24 Thread Andrew Melnichenko
Hi all,

The issue is that host segments packets between guests on the same host.
Tests show that it happens because SKB_GSO_DODGY skb offload in
virtio_net_hdr_from_skb().
To do segmentation you need to remove SKB_GSO_DODGY or add SKB_GSO_PARTIAL
The solution with DODGY/PARTIAL offload looks like a dirty hack, so
for now, I've lived it as it is for further investigation.


On Tue, May 17, 2022 at 9:32 AM Jason Wang  wrote:
>
> On Thu, May 12, 2022 at 7:33 PM Andrew Melnychenko  wrote:
> >
> > Added new offloads for TUN devices TUN_F_USO4 and TUN_F_USO6.
> > Technically they enable NETIF_F_GSO_UDP_L4
> > (and only if USO4 & USO6 are set simultaneously).
> > It allows to transmission of large UDP packets.
> >
> > Different features USO4 and USO6 are required for qemu where Windows guests 
> > can
> > enable disable USO receives for IPv4 and IPv6 separately.
> > On the other side, Linux can't really differentiate USO4 and USO6, for now.
> > For now, to enable USO for TUN it requires enabling USO4 and USO6 together.
> > In the future, there would be a mechanism to control UDP_L4 GSO separately.
> >
> > Test it WIP Qemu https://github.com/daynix/qemu/tree/Dev_USOv2
> >
> > New types for VirtioNet already on mailing:
> > https://lists.oasis-open.org/archives/virtio-comment/202110/msg00010.html
> >
> > Also, there is a known issue with transmitting packages between two guests.
>
> Could you explain this more? It looks like a bug. (Or any pointer to
> the discussion)
>
> Thanks
>
> > Without hacks with skb's GSO - packages are still segmented on the host's 
> > postrouting.
> >
> > Andrew (5):
> >   uapi/linux/if_tun.h: Added new offload types for USO4/6.
> >   driver/net/tun: Added features for USO.
> >   uapi/linux/virtio_net.h: Added USO types.
> >   linux/virtio_net.h: Support USO offload in vnet header.
> >   drivers/net/virtio_net.c: Added USO support.
> >
> >  drivers/net/tap.c   | 10 --
> >  drivers/net/tun.c   |  8 +++-
> >  drivers/net/virtio_net.c| 19 +++
> >  include/linux/virtio_net.h  |  9 +
> >  include/uapi/linux/if_tun.h |  2 ++
> >  include/uapi/linux/virtio_net.h |  4 
> >  6 files changed, 45 insertions(+), 7 deletions(-)
> >
> > --
> > 2.35.1
> >
>
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/4] vdpa: Add stop operation

2022-05-24 Thread Stefano Garzarella

On Tue, May 24, 2022 at 09:42:06AM +0200, Eugenio Perez Martin wrote:

On Tue, May 24, 2022 at 9:09 AM Stefano Garzarella  wrote:


On Mon, May 23, 2022 at 09:20:14PM +0200, Eugenio Perez Martin wrote:
>On Sat, May 21, 2022 at 12:13 PM Si-Wei Liu  wrote:
>>
>>
>>
>> On 5/20/2022 10:23 AM, Eugenio Pérez wrote:
>> > This operation is optional: It it's not implemented, backend feature bit
>> > will not be exposed.
>> >
>> > Signed-off-by: Eugenio Pérez 
>> > ---
>> >   include/linux/vdpa.h | 6 ++
>> >   1 file changed, 6 insertions(+)
>> >
>> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>> > index 15af802d41c4..ddfebc4e1e01 100644
>> > --- a/include/linux/vdpa.h
>> > +++ b/include/linux/vdpa.h
>> > @@ -215,6 +215,11 @@ struct vdpa_map_file {
>> >* @reset:  Reset device
>> >*  @vdev: vdpa device
>> >*  Returns integer: success (0) or error (< 0)
>> > + * @stop:Stop or resume the device (optional, but it 
must
>> > + *   be implemented if require device stop)
>> > + *   @vdev: vdpa device
>> > + *   @stop: stop (true), not stop (false)
>> > + *   Returns integer: success (0) or error (< 0)
>> Is this uAPI meant to address all use cases described in the full blown
>> _F_STOP virtio spec proposal, such as:
>>
>> --%<--
>>
>> .. the device MUST finish any in flight
>> operations after the driver writes STOP.  Depending on the device, it
>> can do it
>> in many ways as long as the driver can recover its normal operation
>> if it
>> resumes the device without the need of resetting it:
>>
>> - Drain and wait for the completion of all pending requests until a
>>convenient avail descriptor. Ignore any other posterior descriptor.
>> - Return a device-specific failure for these descriptors, so the driver
>>can choose to retry or to cancel them.
>> - Mark them as done even if they are not, if the kind of device can
>>assume to lose them.
>> --%<--
>>
>
>Right, this is totally underspecified in this series.
>
>I'll expand on it in the next version, but that text proposed to
>virtio-comment was complicated and misleading. I find better to get
>the previous version description. Would the next description work?
>
>```
>After the return of ioctl, the device MUST finish any pending operations like
>in flight requests. It must also preserve all the necessary state (the
>virtqueue vring base plus the possible device specific states) that is required
>for restoring in the future.

For block devices wait for all in-flight requests could take several
time.

Could this be a problem if the caller gets stuck on this ioctl?

If it could be a problem, maybe we should use an eventfd to signal that
the device is successfully stopped.



For that particular problem I'd very much prefer to add directly an
ioctl to get the inflight descriptors. We know for sure we will need
them, and it will be cleaner in the long run.


Makes sense!



As I understand the vdpa block simulator, there is no need to return
the inflight descriptors since all of the requests are processed in a
synchronous way. So, for this iteration, we could offer the stop
feature to qemu.


Right, the simulator handles everything synchronously.



Other non-simulated devices would need it. Could it be delayed to
future development?


Yep, sure, it sounds like you already have a plan, so no problem :-)

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 0/8] virtio/vsock: experimental zerocopy receive

2022-05-24 Thread Stefano Garzarella

On Fri, May 20, 2022 at 11:09:11AM +, Arseniy Krasnov wrote:

Hello Stefano,

On 19.05.2022 10:42, Stefano Garzarella wrote:

On Wed, May 18, 2022 at 11:04:30AM +, Arseniy Krasnov wrote:

Hello Stefano,

On 17.05.2022 18:14, Stefano Garzarella wrote:

Hi Arseniy,

On Thu, May 12, 2022 at 05:04:11AM +, Arseniy Krasnov wrote:

 INTRODUCTION

Hello, this is experimental implementation of virtio vsock zerocopy
receive. It was inspired by TCP zerocopy receive by Eric Dumazet. This API uses
same idea: call 'mmap()' on socket's descriptor, then every 'getsockopt()' will
fill provided vma area with pages of virtio RX buffers. After received data was
processed by user, pages must be freed by 'madvise()'  call with MADV_DONTNEED
flag set(if user won't call 'madvise()', next 'getsockopt()' will fail).


Sounds cool, but maybe we would need some socket/net experts here for review.


Yes, that would be great



Could we do something similar for the sending path as well?


Here are thoughts about zerocopy transmission:

I tried to implement this feature in the following way: user creates
some page aligned buffer, then during tx packet allocation instead of
creating data buffer with 'kmalloc()', i tried to add user's buffer
to virtio queue. But found problem: as kernel virtio API uses virtual
addresses to add new buffers, in the deep of virtio subsystem
'virt_to_phys()' is called to get physical address of buffer, so user's
virtual address won't be translated correctly to physical address(in
theory, i can perform page walk for such user's va, get physical address
and pass some "fake" virtual address to virtio API in order to make
'virt_to_phys()' return valid physical address(but i think this is ugly).


And maybe we should also pin the pages to prevent them from being replaced.

I think we should do something similar to what we do in vhost-vdpa.
Take a look at vhost_vdpa_pa_map() in drivers/vhost/vdpa.c


Hm, ok. I'll read about vdpa...






If we are talking about 'mmap()' way, i think we can do the following:
user calls 'mmap()' on socket, kernel fills newly created mapping with
allocated pages(all pages have rw permissions). Now user can use pages
of this mapping(e.g. fill it with data). Finally, to start transmission,
user calls 'getsockopt()' or some 'ioctl()' and kernel processes data of
this mapping. Also as this call will return immediately(e.g. it is
asynchronous), some completion logic must be implemented. For example
use same way as MSG_ZEROCOPY uses - poll error queue of socket to get
message that pages could be reused, or don't allow user to work with
these pages: unmap it, perform transmission and finally free pages.
To start new transmission user need to call 'mmap()' again.

   OR

I think there is another unusual way for zerocopy tx: let's use 'vmsplice()'
/'splice()'. In this approach to transmit something, user does the following
steps:
1) Creates pipe.
2) Calls 'vmsplice(SPLICE_F_GIFT)' on this pipe, insert data pages to it.
  SPLICE_F_GIFT allows user to forget about allocated pages - kernel will
  free it.
3) Calls 'splice(SPLICE_F_MOVE)' from pipe to socket. SPLICE_F_MOVE will
  move pages from pipe to socket(e.g. in special socket callback we got
  set of pipe's pages as input argument and all pages will be inserted
  to virtio queue).

But as SPLICE_F_MOVE support is disabled, it must be repaired first.


Splice seems interesting, but it would be nice If we do something similar to 
TCP. IIUC they use a flag for send(2):

    send(fd, buf, sizeof(buf), MSG_ZEROCOPY);



Yes, but in this way i think:
1) What is 'buf'? It can't be user's address, since this buffer must be 
inserted to tx queue.
  E.g. it must be allocated by kernel and then returned to user for tx 
purposes. In TCP
  case, 'buf' is user's address(of course page aligned) because TCP logic uses 
sk_buff which
  allows to use such memory as data buffer.


IIUC we can pin that buffer like we do in vhost-vdpa, and use it in the 
VQ.


2) To wait tx process is done(e.g. pages can be used again), such 
API(send + MSG_ZEROCOPY),
  uses socket's error queue - poll events that tx is finished. So same 
  way must be

  implemented for virtio vsock.


Yeah, I think so.




 






    DETAILS

Here is how mapping with mapped pages looks exactly: first page mapping
contains array of trimmed virtio vsock packet headers (in contains only length
of data on the corresponding page and 'flags' field):

struct virtio_vsock_usr_hdr {
    uint32_t length;
    uint32_t flags;
};

Field  'length' allows user to know exact size of payload within each sequence
of pages and 'flags' allows user to handle SOCK_SEQPACKET flags(such as message
bounds or record bounds). All other pages are data pages from RX queue.

    Page 0  Page 1  Page N

[ hdr1 .. hdrN ][ data ] .. [ data ]
  |    |   ^   ^
   

Re: [PATCH 1/4] vdpa: Add stop operation

2022-05-24 Thread Stefano Garzarella

On Mon, May 23, 2022 at 09:20:14PM +0200, Eugenio Perez Martin wrote:

On Sat, May 21, 2022 at 12:13 PM Si-Wei Liu  wrote:




On 5/20/2022 10:23 AM, Eugenio Pérez wrote:
> This operation is optional: It it's not implemented, backend feature bit
> will not be exposed.
>
> Signed-off-by: Eugenio Pérez 
> ---
>   include/linux/vdpa.h | 6 ++
>   1 file changed, 6 insertions(+)
>
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 15af802d41c4..ddfebc4e1e01 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -215,6 +215,11 @@ struct vdpa_map_file {
>* @reset:  Reset device
>*  @vdev: vdpa device
>*  Returns integer: success (0) or error (< 0)
> + * @stop:Stop or resume the device (optional, but it must
> + *   be implemented if require device stop)
> + *   @vdev: vdpa device
> + *   @stop: stop (true), not stop (false)
> + *   Returns integer: success (0) or error (< 0)
Is this uAPI meant to address all use cases described in the full blown
_F_STOP virtio spec proposal, such as:

--%<--

.. the device MUST finish any in flight
operations after the driver writes STOP.  Depending on the device, it
can do it
in many ways as long as the driver can recover its normal operation 
if it

resumes the device without the need of resetting it:

- Drain and wait for the completion of all pending requests until a
   convenient avail descriptor. Ignore any other posterior descriptor.
- Return a device-specific failure for these descriptors, so the driver
   can choose to retry or to cancel them.
- Mark them as done even if they are not, if the kind of device can
   assume to lose them.
--%<--



Right, this is totally underspecified in this series.

I'll expand on it in the next version, but that text proposed to
virtio-comment was complicated and misleading. I find better to get
the previous version description. Would the next description work?

```
After the return of ioctl, the device MUST finish any pending operations like
in flight requests. It must also preserve all the necessary state (the
virtqueue vring base plus the possible device specific states) that is required
for restoring in the future.


For block devices wait for all in-flight requests could take several 
time.


Could this be a problem if the caller gets stuck on this ioctl?

If it could be a problem, maybe we should use an eventfd to signal that 
the device is successfully stopped.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] virtio-crypto: Fix an error handling path in virtio_crypto_alg_skcipher_close_session()

2022-05-24 Thread Jason Wang
On Sun, May 22, 2022 at 9:07 PM Christophe JAILLET
 wrote:
>
> Now that a private buffer is allocated (see commit in the Fixes tag),
> it must be released in all error handling paths.
>
> Add the missing goto to avoid a leak in the error handling path.
>
> Fixes: 42e6ac99e417 ("virtio-crypto: use private buffer for control request")
> Signed-off-by: Christophe JAILLET 

Acked-by: Jason Wang 

> ---
>  drivers/crypto/virtio/virtio_crypto_skcipher_algs.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c 
> b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
> index e553ccadbcbc..e5876286828b 100644
> --- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
> +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c
> @@ -239,7 +239,8 @@ static int virtio_crypto_alg_skcipher_close_session(
> pr_err("virtio_crypto: Close session failed status: %u, 
> session_id: 0x%llx\n",
> ctrl_status->status, destroy_session->session_id);
>
> -   return -EINVAL;
> +   err = -EINVAL;
> +   goto out;
> }
>
> err = 0;
> --
> 2.34.1
>

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization