Re: [PATCH v2 4/4] vdpa_sim: Implement stop vdpa op

2022-05-25 Thread Stefano Garzarella

On Tue, May 24, 2022 at 07:06:10PM +0200, Eugenio Pérez wrote:

Implement stop operation for vdpa_sim devices, so vhost-vdpa will offer
that backend feature and userspace can effectively stop the device.

This is a must before get virtqueue indexes (base) for live migration,
since the device could modify them after userland gets them. There are
individual ways to perform that action for some devices
(VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no
way to perform it for any vhost device (and, in particular, vhost-vdpa).

After the return of ioctl with stop != 0, the device MUST finish any
pending operations like in flight requests. It must also preserve all
the necessary state (the virtqueue vring base plus the possible device
specific states) that is required for restoring in the future. The
device must not change its configuration after that point.

After the return of ioctl with stop == 0, the device can continue
processing buffers as long as typical conditions are met (vq is enabled,
DRIVER_OK status bit is enabled, etc).

In the future, we will provide features similar to
VHOST_USER_GET_INFLIGHT_FD so the device can save pending operations.

Signed-off-by: Eugenio Pérez 
---
drivers/vdpa/vdpa_sim/vdpa_sim.c | 21 +
drivers/vdpa/vdpa_sim/vdpa_sim.h |  1 +
drivers/vdpa/vdpa_sim/vdpa_sim_blk.c |  3 +++
drivers/vdpa/vdpa_sim/vdpa_sim_net.c |  3 +++
4 files changed, 28 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 50d721072beb..0515cf314bed 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(>iommu[i]);

+   vdpasim->running = true;
spin_unlock(>iommu_lock);

vdpasim->features = 0;
@@ -505,6 +506,24 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
return 0;
}

+static int vdpasim_stop(struct vdpa_device *vdpa, bool stop)
+{
+   struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+   int i;
+
+   spin_lock(>lock);
+   vdpasim->running = !stop;
+   if (vdpasim->running) {
+   /* Check for missed buffers */
+   for (i = 0; i < vdpasim->dev_attr.nvqs; ++i)
+   vdpasim_kick_vq(vdpa, i);
+
+   }
+   spin_unlock(>lock);
+
+   return 0;
+}
+
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -694,6 +713,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset  = vdpasim_reset,
+   .stop   = vdpasim_stop,
.get_config_size= vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
@@ -726,6 +746,7 @@ static const struct vdpa_config_ops 
vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset  = vdpasim_reset,
+   .stop   = vdpasim_stop,
.get_config_size= vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 622782e92239..061986f30911 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -66,6 +66,7 @@ struct vdpasim {
u32 generation;
u64 features;
u32 groups;
+   bool running;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 42d401d43911..bcdb1982c378 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -204,6 +204,9 @@ static void vdpasim_blk_work(struct work_struct *work)
if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
goto out;

+   if (!vdpasim->running)
+   goto out;
+


Not related to this series, but I think in vdpa_sim_blk.c we should 
implement something similar to what we already do in vdpa_sim_net.c and 
re-schedule the work after X requests handled, otherwise we risk never 
stopping if there are always requests to handle.


Also for supporting multiple queues, that could be a problem, but for 
now we only support one, so there should be no problem.


I have other patches to send for vdpa_sim_blk.c, so if you want I can do 
that in my series.


Thanks,
Stefano


for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
struct vdpasim_virtqueue *vq = >vqs[i];

diff --git 

Re: [PATCH 1/4] vdpa: Add stop operation

2022-05-24 Thread Stefano Garzarella

On Tue, May 24, 2022 at 09:42:06AM +0200, Eugenio Perez Martin wrote:

On Tue, May 24, 2022 at 9:09 AM Stefano Garzarella  wrote:


On Mon, May 23, 2022 at 09:20:14PM +0200, Eugenio Perez Martin wrote:
>On Sat, May 21, 2022 at 12:13 PM Si-Wei Liu  wrote:
>>
>>
>>
>> On 5/20/2022 10:23 AM, Eugenio Pérez wrote:
>> > This operation is optional: It it's not implemented, backend feature bit
>> > will not be exposed.
>> >
>> > Signed-off-by: Eugenio Pérez 
>> > ---
>> >   include/linux/vdpa.h | 6 ++
>> >   1 file changed, 6 insertions(+)
>> >
>> > diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
>> > index 15af802d41c4..ddfebc4e1e01 100644
>> > --- a/include/linux/vdpa.h
>> > +++ b/include/linux/vdpa.h
>> > @@ -215,6 +215,11 @@ struct vdpa_map_file {
>> >* @reset:  Reset device
>> >*  @vdev: vdpa device
>> >*  Returns integer: success (0) or error (< 0)
>> > + * @stop:Stop or resume the device (optional, but it 
must
>> > + *   be implemented if require device stop)
>> > + *   @vdev: vdpa device
>> > + *   @stop: stop (true), not stop (false)
>> > + *   Returns integer: success (0) or error (< 0)
>> Is this uAPI meant to address all use cases described in the full blown
>> _F_STOP virtio spec proposal, such as:
>>
>> --%<--
>>
>> .. the device MUST finish any in flight
>> operations after the driver writes STOP.  Depending on the device, it
>> can do it
>> in many ways as long as the driver can recover its normal operation
>> if it
>> resumes the device without the need of resetting it:
>>
>> - Drain and wait for the completion of all pending requests until a
>>convenient avail descriptor. Ignore any other posterior descriptor.
>> - Return a device-specific failure for these descriptors, so the driver
>>can choose to retry or to cancel them.
>> - Mark them as done even if they are not, if the kind of device can
>>assume to lose them.
>> --%<--
>>
>
>Right, this is totally underspecified in this series.
>
>I'll expand on it in the next version, but that text proposed to
>virtio-comment was complicated and misleading. I find better to get
>the previous version description. Would the next description work?
>
>```
>After the return of ioctl, the device MUST finish any pending operations like
>in flight requests. It must also preserve all the necessary state (the
>virtqueue vring base plus the possible device specific states) that is required
>for restoring in the future.

For block devices wait for all in-flight requests could take several
time.

Could this be a problem if the caller gets stuck on this ioctl?

If it could be a problem, maybe we should use an eventfd to signal that
the device is successfully stopped.



For that particular problem I'd very much prefer to add directly an
ioctl to get the inflight descriptors. We know for sure we will need
them, and it will be cleaner in the long run.


Makes sense!



As I understand the vdpa block simulator, there is no need to return
the inflight descriptors since all of the requests are processed in a
synchronous way. So, for this iteration, we could offer the stop
feature to qemu.


Right, the simulator handles everything synchronously.



Other non-simulated devices would need it. Could it be delayed to
future development?


Yep, sure, it sounds like you already have a plan, so no problem :-)

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 0/8] virtio/vsock: experimental zerocopy receive

2022-05-24 Thread Stefano Garzarella

On Fri, May 20, 2022 at 11:09:11AM +, Arseniy Krasnov wrote:

Hello Stefano,

On 19.05.2022 10:42, Stefano Garzarella wrote:

On Wed, May 18, 2022 at 11:04:30AM +, Arseniy Krasnov wrote:

Hello Stefano,

On 17.05.2022 18:14, Stefano Garzarella wrote:

Hi Arseniy,

On Thu, May 12, 2022 at 05:04:11AM +, Arseniy Krasnov wrote:

 INTRODUCTION

Hello, this is experimental implementation of virtio vsock zerocopy
receive. It was inspired by TCP zerocopy receive by Eric Dumazet. This API uses
same idea: call 'mmap()' on socket's descriptor, then every 'getsockopt()' will
fill provided vma area with pages of virtio RX buffers. After received data was
processed by user, pages must be freed by 'madvise()'  call with MADV_DONTNEED
flag set(if user won't call 'madvise()', next 'getsockopt()' will fail).


Sounds cool, but maybe we would need some socket/net experts here for review.


Yes, that would be great



Could we do something similar for the sending path as well?


Here are thoughts about zerocopy transmission:

I tried to implement this feature in the following way: user creates
some page aligned buffer, then during tx packet allocation instead of
creating data buffer with 'kmalloc()', i tried to add user's buffer
to virtio queue. But found problem: as kernel virtio API uses virtual
addresses to add new buffers, in the deep of virtio subsystem
'virt_to_phys()' is called to get physical address of buffer, so user's
virtual address won't be translated correctly to physical address(in
theory, i can perform page walk for such user's va, get physical address
and pass some "fake" virtual address to virtio API in order to make
'virt_to_phys()' return valid physical address(but i think this is ugly).


And maybe we should also pin the pages to prevent them from being replaced.

I think we should do something similar to what we do in vhost-vdpa.
Take a look at vhost_vdpa_pa_map() in drivers/vhost/vdpa.c


Hm, ok. I'll read about vdpa...






If we are talking about 'mmap()' way, i think we can do the following:
user calls 'mmap()' on socket, kernel fills newly created mapping with
allocated pages(all pages have rw permissions). Now user can use pages
of this mapping(e.g. fill it with data). Finally, to start transmission,
user calls 'getsockopt()' or some 'ioctl()' and kernel processes data of
this mapping. Also as this call will return immediately(e.g. it is
asynchronous), some completion logic must be implemented. For example
use same way as MSG_ZEROCOPY uses - poll error queue of socket to get
message that pages could be reused, or don't allow user to work with
these pages: unmap it, perform transmission and finally free pages.
To start new transmission user need to call 'mmap()' again.

   OR

I think there is another unusual way for zerocopy tx: let's use 'vmsplice()'
/'splice()'. In this approach to transmit something, user does the following
steps:
1) Creates pipe.
2) Calls 'vmsplice(SPLICE_F_GIFT)' on this pipe, insert data pages to it.
  SPLICE_F_GIFT allows user to forget about allocated pages - kernel will
  free it.
3) Calls 'splice(SPLICE_F_MOVE)' from pipe to socket. SPLICE_F_MOVE will
  move pages from pipe to socket(e.g. in special socket callback we got
  set of pipe's pages as input argument and all pages will be inserted
  to virtio queue).

But as SPLICE_F_MOVE support is disabled, it must be repaired first.


Splice seems interesting, but it would be nice If we do something similar to 
TCP. IIUC they use a flag for send(2):

    send(fd, buf, sizeof(buf), MSG_ZEROCOPY);



Yes, but in this way i think:
1) What is 'buf'? It can't be user's address, since this buffer must be 
inserted to tx queue.
  E.g. it must be allocated by kernel and then returned to user for tx 
purposes. In TCP
  case, 'buf' is user's address(of course page aligned) because TCP logic uses 
sk_buff which
  allows to use such memory as data buffer.


IIUC we can pin that buffer like we do in vhost-vdpa, and use it in the 
VQ.


2) To wait tx process is done(e.g. pages can be used again), such 
API(send + MSG_ZEROCOPY),
  uses socket's error queue - poll events that tx is finished. So same 
  way must be

  implemented for virtio vsock.


Yeah, I think so.




 






    DETAILS

Here is how mapping with mapped pages looks exactly: first page mapping
contains array of trimmed virtio vsock packet headers (in contains only length
of data on the corresponding page and 'flags' field):

struct virtio_vsock_usr_hdr {
    uint32_t length;
    uint32_t flags;
};

Field  'length' allows user to know exact size of payload within each sequence
of pages and 'flags' allows user to handle SOCK_SEQPACKET flags(such as message
bounds or record bounds). All other pages are data pages from RX queue.

    Page 0  Page 1  Page N

[ hdr1 .. hdrN ][ data ]

Re: [PATCH 1/4] vdpa: Add stop operation

2022-05-24 Thread Stefano Garzarella

On Mon, May 23, 2022 at 09:20:14PM +0200, Eugenio Perez Martin wrote:

On Sat, May 21, 2022 at 12:13 PM Si-Wei Liu  wrote:




On 5/20/2022 10:23 AM, Eugenio Pérez wrote:
> This operation is optional: It it's not implemented, backend feature bit
> will not be exposed.
>
> Signed-off-by: Eugenio Pérez 
> ---
>   include/linux/vdpa.h | 6 ++
>   1 file changed, 6 insertions(+)
>
> diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
> index 15af802d41c4..ddfebc4e1e01 100644
> --- a/include/linux/vdpa.h
> +++ b/include/linux/vdpa.h
> @@ -215,6 +215,11 @@ struct vdpa_map_file {
>* @reset:  Reset device
>*  @vdev: vdpa device
>*  Returns integer: success (0) or error (< 0)
> + * @stop:Stop or resume the device (optional, but it must
> + *   be implemented if require device stop)
> + *   @vdev: vdpa device
> + *   @stop: stop (true), not stop (false)
> + *   Returns integer: success (0) or error (< 0)
Is this uAPI meant to address all use cases described in the full blown
_F_STOP virtio spec proposal, such as:

--%<--

.. the device MUST finish any in flight
operations after the driver writes STOP.  Depending on the device, it
can do it
in many ways as long as the driver can recover its normal operation 
if it

resumes the device without the need of resetting it:

- Drain and wait for the completion of all pending requests until a
   convenient avail descriptor. Ignore any other posterior descriptor.
- Return a device-specific failure for these descriptors, so the driver
   can choose to retry or to cancel them.
- Mark them as done even if they are not, if the kind of device can
   assume to lose them.
--%<--



Right, this is totally underspecified in this series.

I'll expand on it in the next version, but that text proposed to
virtio-comment was complicated and misleading. I find better to get
the previous version description. Would the next description work?

```
After the return of ioctl, the device MUST finish any pending operations like
in flight requests. It must also preserve all the necessary state (the
virtqueue vring base plus the possible device specific states) that is required
for restoring in the future.


For block devices wait for all in-flight requests could take several 
time.


Could this be a problem if the caller gets stuck on this ioctl?

If it could be a problem, maybe we should use an eventfd to signal that 
the device is successfully stopped.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost-vdpa: return -EFAULT on copy_to_user() failure

2022-05-23 Thread Stefano Garzarella

On Mon, May 23, 2022 at 11:33:26AM +0300, Dan Carpenter wrote:

The copy_to_user() function returns the number of bytes remaining to be
copied.  However, we need to return a negative error code, -EFAULT, to
the user.

Fixes: 87f4c217413a ("vhost-vdpa: introduce uAPI to get the number of virtqueue 
groups")
Fixes: e96ef636f154 ("vhost-vdpa: introduce uAPI to get the number of address 
spaces")
Signed-off-by: Dan Carpenter 
---
drivers/vhost/vdpa.c | 8 +---
1 file changed, 5 insertions(+), 3 deletions(-)


Reviewed-by: Stefano Garzarella 



diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 3e86080041fc..935a1d0ddb97 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -609,11 +609,13 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
r = vhost_vdpa_get_vring_num(v, argp);
break;
case VHOST_VDPA_GET_GROUP_NUM:
-   r = copy_to_user(argp, >vdpa->ngroups,
-sizeof(v->vdpa->ngroups));
+   if (copy_to_user(argp, >vdpa->ngroups,
+sizeof(v->vdpa->ngroups)))
+   r = -EFAULT;
break;
case VHOST_VDPA_GET_AS_NUM:
-   r = copy_to_user(argp, >vdpa->nas, sizeof(v->vdpa->nas));
+   if (copy_to_user(argp, >vdpa->nas, sizeof(v->vdpa->nas)))
+   r = -EFAULT;
break;
case VHOST_SET_LOG_BASE:
case VHOST_SET_LOG_FD:
--
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost-vdpa: Fix some error handling path in vhost_vdpa_process_iotlb_msg()

2022-05-23 Thread Stefano Garzarella

On Mon, May 23, 2022 at 12:41:03PM +0800, Jason Wang wrote:

On Sun, May 22, 2022 at 9:59 PM Christophe JAILLET
 wrote:


In the error paths introduced by the commit in the Fixes tag, a mutex may
be left locked.
Add the correct goto instead of a direct return.

Fixes: a1468175bb17 ("vhost-vdpa: support ASID based IOTLB API")
Signed-off-by: Christophe JAILLET 
---
WARNING: This patch only fixes the goto vs return mix-up in this function.
However, the 2nd hunk looks really spurious to me. I think that the:
-   return -EINVAL;
+   r = -EINVAL;
+   goto unlock;
should be done only in the 'if (!iotlb)' block.


It should be fine, the error happen if

1) the batched ASID based request is not equal (the first if)
2) there's no IOTLB for this ASID (the second if)

But I agree the code could be tweaked to use two different if instead
of using a or condition here.


Yeah, I think so!

Anyway, this patch LGTM:

Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [mst-vhost:vhost 26/43] drivers/vhost/vdpa.c:1003:3-9: preceding lock on line 991 (fwd)

2022-05-23 Thread Stefano Garzarella

On Fri, May 20, 2022 at 06:37:01PM +0200, Julia Lawall wrote:

Please check whether an unlock is needed before line 1003.


Yep, I think so. Same also for line 1016.

I just saw that there is already a patch posted to solve this problem:
https://lore.kernel.org/netdev/89ef0ae4c26ac3cfa440c71e97e392dcb328ac1b.1653227924.git.christophe.jail...@wanadoo.fr/

Thanks for the report,
Stefano



julia

-- Forwarded message --
Date: Fri, 20 May 2022 17:35:29 +0800
From: kernel test robot 
To: kbu...@lists.01.org
Cc: l...@intel.com, Julia Lawall 
Subject: [mst-vhost:vhost 26/43] drivers/vhost/vdpa.c:1003:3-9: preceding lock
   on line 991

CC: kbuild-...@lists.01.org
BCC: l...@intel.com
CC: k...@vger.kernel.org
CC: virtualization@lists.linux-foundation.org
CC: net...@vger.kernel.org
TO: Gautam Dawar 
CC: "Michael S. Tsirkin" 
CC: Jason Wang 

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git vhost
head:   73211bf1bc3ac0a3c544225e270401c1fe5d395d
commit: a1468175bb17ca5e477147de5d886e7a22d93527 [26/43] vhost-vdpa: support 
ASID based IOTLB API
:: branch date: 10 hours ago
:: commit date: 10 hours ago
config: arc-allmodconfig 
(https://download.01.org/0day-ci/archive/20220520/202205201721.rgqusahl-...@intel.com/config)
compiler: arceb-elf-gcc (GCC) 11.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 
Reported-by: Julia Lawall 


cocci warnings: (new ones prefixed by >>)

drivers/vhost/vdpa.c:1003:3-9: preceding lock on line 991

  drivers/vhost/vdpa.c:1016:2-8: preceding lock on line 991

vim +1003 drivers/vhost/vdpa.c

4c8cf31885f69e Tiwei Bie2020-03-26   980
0f05062453fb51 Gautam Dawar 2022-03-30   981  static int 
vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
4c8cf31885f69e Tiwei Bie2020-03-26   982
struct vhost_iotlb_msg *msg)
4c8cf31885f69e Tiwei Bie2020-03-26   983  {
4c8cf31885f69e Tiwei Bie2020-03-26   984struct vhost_vdpa *v = 
container_of(dev, struct vhost_vdpa, vdev);
25abc060d28213 Jason Wang   2020-08-04   985struct vdpa_device *vdpa = 
v->vdpa;
25abc060d28213 Jason Wang   2020-08-04   986const struct vdpa_config_ops *ops 
= vdpa->config;
a1468175bb17ca Gautam Dawar 2022-03-30   987struct vhost_iotlb *iotlb = 
NULL;
a1468175bb17ca Gautam Dawar 2022-03-30   988struct vhost_vdpa_as *as = NULL;
4c8cf31885f69e Tiwei Bie2020-03-26   989int r = 0;
4c8cf31885f69e Tiwei Bie2020-03-26   990
a9d064524fc3cf Xie Yongji   2021-04-12  @991mutex_lock(>mutex);
a9d064524fc3cf Xie Yongji   2021-04-12   992
4c8cf31885f69e Tiwei Bie2020-03-26   993r = vhost_dev_check_owner(dev);
4c8cf31885f69e Tiwei Bie2020-03-26   994if (r)
a9d064524fc3cf Xie Yongji   2021-04-12   995goto unlock;
4c8cf31885f69e Tiwei Bie2020-03-26   996
a1468175bb17ca Gautam Dawar 2022-03-30   997if (msg->type == 
VHOST_IOTLB_UPDATE ||
a1468175bb17ca Gautam Dawar 2022-03-30   998msg->type == 
VHOST_IOTLB_BATCH_BEGIN) {
a1468175bb17ca Gautam Dawar 2022-03-30   999as = 
vhost_vdpa_find_alloc_as(v, asid);
a1468175bb17ca Gautam Dawar 2022-03-30  1000if (!as) {
a1468175bb17ca Gautam Dawar 2022-03-30  1001dev_err(>dev, 
"can't find and alloc asid %d\n",
a1468175bb17ca Gautam Dawar 2022-03-30  1002asid);
a1468175bb17ca Gautam Dawar 2022-03-30 @1003return -EINVAL;
a1468175bb17ca Gautam Dawar 2022-03-30  1004}
a1468175bb17ca Gautam Dawar 2022-03-30  1005iotlb = >iotlb;
a1468175bb17ca Gautam Dawar 2022-03-30  1006} else
a1468175bb17ca Gautam Dawar 2022-03-30  1007iotlb = 
asid_to_iotlb(v, asid);
a1468175bb17ca Gautam Dawar 2022-03-30  1008
a1468175bb17ca Gautam Dawar 2022-03-30  1009if ((v->in_batch && 
v->batch_asid != asid) || !iotlb) {
a1468175bb17ca Gautam Dawar 2022-03-30  1010  		if (v->in_batch 
&& v->batch_asid != asid) {

a1468175bb17ca Gautam Dawar 2022-03-30  1011dev_info(>dev, 
"batch id %d asid %d\n",
a1468175bb17ca Gautam Dawar 2022-03-30  1012 
v->batch_asid, asid);
a1468175bb17ca Gautam Dawar 2022-03-30  1013}
a1468175bb17ca Gautam Dawar 2022-03-30  1014if (!iotlb)
a1468175bb17ca Gautam Dawar 2022-03-30  1015dev_err(>dev, "no 
iotlb for asid %d\n", asid);
a1468175bb17ca Gautam Dawar 2022-03-30  1016return -EINVAL;
a1468175bb17ca Gautam Dawar 2022-03-30  1017}
a1468175bb17ca Gautam Dawar 2022-03-30  1018
4c8cf31885f69e Tiwei Bie2020-03-26  1019switch (msg->type) {
4c8cf31885f69e Tiwei Bie2020-03-26  1020case VHOST_IOTLB_UPDATE:
3111cb7283065a Gautam Dawar 2022-03-30  1021r = 
vhost_vdpa_process_iotlb_update(v, iotlb, msg);
4c8cf31885f69e Tiwei Bie2020-03-26  1022break;
4c8cf31885f69e Tiwei Bie2020-03-26  1023case 

Re: [PATCH 4/4] vdpa_sim: Implement stop vdpa op

2022-05-23 Thread Stefano Garzarella

On Fri, May 20, 2022 at 07:23:25PM +0200, Eugenio Pérez wrote:

Implement stop operation for vdpa_sim devices, so vhost-vdpa will offer
that backend feature and userspace can effectively stop the device.

This is a must before get virtqueue indexes (base) for live migration,
since the device could modify them after userland gets them. There are
individual ways to perform that action for some devices
(VHOST_NET_SET_BACKEND, VHOST_VSOCK_SET_RUNNING, ...) but there was no
way to perform it for any vhost device (and, in particular, vhost-vdpa).

Signed-off-by: Eugenio Pérez 
---
drivers/vdpa/vdpa_sim/vdpa_sim.c | 21 +
drivers/vdpa/vdpa_sim/vdpa_sim.h |  1 +
drivers/vdpa/vdpa_sim/vdpa_sim_net.c |  3 +++
3 files changed, 25 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 50d721072beb..0515cf314bed 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -107,6 +107,7 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
for (i = 0; i < vdpasim->dev_attr.nas; i++)
vhost_iotlb_reset(>iommu[i]);

+   vdpasim->running = true;
spin_unlock(>iommu_lock);

vdpasim->features = 0;
@@ -505,6 +506,24 @@ static int vdpasim_reset(struct vdpa_device *vdpa)
return 0;
}

+static int vdpasim_stop(struct vdpa_device *vdpa, bool stop)
+{
+   struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+   int i;
+
+   spin_lock(>lock);
+   vdpasim->running = !stop;
+   if (vdpasim->running) {
+   /* Check for missed buffers */
+   for (i = 0; i < vdpasim->dev_attr.nvqs; ++i)
+   vdpasim_kick_vq(vdpa, i);
+
+   }
+   spin_unlock(>lock);
+
+   return 0;
+}
+
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -694,6 +713,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset  = vdpasim_reset,
+   .stop   = vdpasim_stop,
.get_config_size= vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
@@ -726,6 +746,7 @@ static const struct vdpa_config_ops 
vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
.reset  = vdpasim_reset,
+   .stop   = vdpasim_stop,
.get_config_size= vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 622782e92239..061986f30911 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -66,6 +66,7 @@ struct vdpasim {
u32 generation;
u64 features;
u32 groups;
+   bool running;
/* spinlock to synchronize iommu table */
spinlock_t iommu_lock;
};
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index 5125976a4df8..886449e88502 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -154,6 +154,9 @@ static void vdpasim_net_work(struct work_struct *work)

spin_lock(>lock);

+   if (!vdpasim->running)
+   goto out;
+


It would be nice to do the same for vdpa_sim_blk as well.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2] vdpasim: allow to enable a vq repeatedly

2022-05-19 Thread Stefano Garzarella

On Thu, May 19, 2022 at 04:59:19PM +0200, Eugenio Pérez wrote:

Code must be resilient to enable a queue many times.

At the moment the queue is resetting so it's definitely not the expected
behavior.

v2: set vq->ready = 0 at disable.

Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator")
Cc: sta...@vger.kernel.org
Signed-off-by: Eugenio Pérez 
---
drivers/vdpa/vdpa_sim/vdpa_sim.c | 5 -
1 file changed, 4 insertions(+), 1 deletion(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vdpasim: allow to enable a vq repeatedly

2022-05-19 Thread Stefano Garzarella

On Thu, May 19, 2022 at 04:31:45PM +0200, Eugenio Pérez wrote:

Code must be resilient to enable a queue many times.

At the moment the queue is resetting so it's definitely not the expected
behavior.

Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator")
Cc: sta...@vger.kernel.org
Signed-off-by: Eugenio Pérez 
---
drivers/vdpa/vdpa_sim/vdpa_sim.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index ddbe142af09a..b53cd00ad161 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -355,9 +355,10 @@ static void vdpasim_set_vq_ready(struct vdpa_device *vdpa, 
u16 idx, bool ready)
struct vdpasim_virtqueue *vq = >vqs[idx];

spin_lock(>lock);
-   vq->ready = ready;
-   if (vq->ready)
+   if (!vq->ready) {
+   vq->ready = ready;
vdpasim_queue_ready(vdpasim, idx);
+   }


But this way the first time vq->ready is set to true, then it will never 
be set back to false.


Should we leave the assignment out of the block?
Maybe after the if block to avoid the problem we are fixing.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V5 9/9] virtio: use WARN_ON() to warn illegal status value

2022-05-19 Thread Stefano Garzarella

On Wed, May 18, 2022 at 11:59:51AM +0800, Jason Wang wrote:

We used to use BUG_ON() in virtio_device_ready() to detect illegal
status value, this seems sub-optimal since the value is under the
control of the device. Switch to use WARN_ON() instead.

Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: "Paul E. McKenney" 
Cc: Marc Zyngier 
Cc: Halil Pasic 
Cc: Cornelia Huck 
Cc: Vineeth Vijayan 
Cc: Peter Oberparleiter 
Cc: linux-s...@vger.kernel.org
Signed-off-by: Jason Wang 
---
include/linux/virtio_config.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V5 3/9] virtio: introduce config op to synchronize vring callbacks

2022-05-19 Thread Stefano Garzarella

On Wed, May 18, 2022 at 11:59:45AM +0800, Jason Wang wrote:

This patch introduces new virtio config op to vring
callbacks. Transport specific method is required to make sure the
write before this function is visible to the vring_interrupt() that is
called after the return of this function. For the transport that
doesn't provide synchronize_vqs(), use synchornize_rcu() which
synchronize with IRQ implicitly as a fallback.

Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: "Paul E. McKenney" 
Cc: Marc Zyngier 
Cc: Halil Pasic 
Cc: Cornelia Huck 
Cc: Vineeth Vijayan 
Cc: Peter Oberparleiter 
Cc: linux-s...@vger.kernel.org
Reviewed-by: Cornelia Huck 
Signed-off-by: Jason Wang 
---
include/linux/virtio_config.h | 25 +
1 file changed, 25 insertions(+)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V5 2/9] virtio: use virtio_reset_device() when possible

2022-05-19 Thread Stefano Garzarella

On Wed, May 18, 2022 at 11:59:44AM +0800, Jason Wang wrote:

This allows us to do common extension without duplicating code.

Cc: Thomas Gleixner 
Cc: Peter Zijlstra 
Cc: "Paul E. McKenney" 
Cc: Marc Zyngier 
Cc: Halil Pasic 
Cc: Cornelia Huck 
Cc: Vineeth Vijayan 
Cc: Peter Oberparleiter 
Cc: linux-s...@vger.kernel.org
Reviewed-by: Cornelia Huck 
Signed-off-by: Jason Wang 
---
drivers/virtio/virtio.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 0/8] virtio/vsock: experimental zerocopy receive

2022-05-19 Thread Stefano Garzarella

On Wed, May 18, 2022 at 11:04:30AM +, Arseniy Krasnov wrote:

Hello Stefano,

On 17.05.2022 18:14, Stefano Garzarella wrote:

Hi Arseniy,

On Thu, May 12, 2022 at 05:04:11AM +, Arseniy Krasnov wrote:

 INTRODUCTION

Hello, this is experimental implementation of virtio vsock zerocopy
receive. It was inspired by TCP zerocopy receive by Eric Dumazet. This API uses
same idea: call 'mmap()' on socket's descriptor, then every 'getsockopt()' will
fill provided vma area with pages of virtio RX buffers. After received data was
processed by user, pages must be freed by 'madvise()'  call with MADV_DONTNEED
flag set(if user won't call 'madvise()', next 'getsockopt()' will fail).


Sounds cool, but maybe we would need some socket/net experts here for review.


Yes, that would be great



Could we do something similar for the sending path as well?


Here are thoughts about zerocopy transmission:

I tried to implement this feature in the following way: user creates
some page aligned buffer, then during tx packet allocation instead of
creating data buffer with 'kmalloc()', i tried to add user's buffer
to virtio queue. But found problem: as kernel virtio API uses virtual
addresses to add new buffers, in the deep of virtio subsystem
'virt_to_phys()' is called to get physical address of buffer, so user's
virtual address won't be translated correctly to physical address(in
theory, i can perform page walk for such user's va, get physical address
and pass some "fake" virtual address to virtio API in order to make
'virt_to_phys()' return valid physical address(but i think this is ugly).


And maybe we should also pin the pages to prevent them from being 
replaced.


I think we should do something similar to what we do in vhost-vdpa.
Take a look at vhost_vdpa_pa_map() in drivers/vhost/vdpa.c




If we are talking about 'mmap()' way, i think we can do the following:
user calls 'mmap()' on socket, kernel fills newly created mapping with
allocated pages(all pages have rw permissions). Now user can use pages
of this mapping(e.g. fill it with data). Finally, to start transmission,
user calls 'getsockopt()' or some 'ioctl()' and kernel processes data of
this mapping. Also as this call will return immediately(e.g. it is
asynchronous), some completion logic must be implemented. For example
use same way as MSG_ZEROCOPY uses - poll error queue of socket to get
message that pages could be reused, or don't allow user to work with
these pages: unmap it, perform transmission and finally free pages.
To start new transmission user need to call 'mmap()' again.

   OR

I think there is another unusual way for zerocopy tx: let's use 'vmsplice()'
/'splice()'. In this approach to transmit something, user does the following
steps:
1) Creates pipe.
2) Calls 'vmsplice(SPLICE_F_GIFT)' on this pipe, insert data pages to it.
  SPLICE_F_GIFT allows user to forget about allocated pages - kernel will
  free it.
3) Calls 'splice(SPLICE_F_MOVE)' from pipe to socket. SPLICE_F_MOVE will
  move pages from pipe to socket(e.g. in special socket callback we got
  set of pipe's pages as input argument and all pages will be inserted
  to virtio queue).

But as SPLICE_F_MOVE support is disabled, it must be repaired first.


Splice seems interesting, but it would be nice If we do something 
similar to TCP. IIUC they use a flag for send(2):


send(fd, buf, sizeof(buf), MSG_ZEROCOPY);

 






    DETAILS

Here is how mapping with mapped pages looks exactly: first page mapping
contains array of trimmed virtio vsock packet headers (in contains only length
of data on the corresponding page and 'flags' field):

struct virtio_vsock_usr_hdr {
    uint32_t length;
    uint32_t flags;
};

Field  'length' allows user to know exact size of payload within each sequence
of pages and 'flags' allows user to handle SOCK_SEQPACKET flags(such as message
bounds or record bounds). All other pages are data pages from RX queue.

    Page 0  Page 1  Page N

[ hdr1 .. hdrN ][ data ] .. [ data ]
  |    |   ^   ^
  |    |   |   |
  |    *---*
  |    |
  |    |
  **

Of course, single header could represent array of pages (when packet's
buffer is bigger than one page).So here is example of detailed mapping layout
for some set of packages. Lets consider that we have the following sequence  of
packages: 56 bytes, 4096 bytes and 8200 bytes. All pages: 0,1,2,3,4 and 5 will
be inserted to user's vma(vma is large enough).

Page 0: [[ hdr0 ][ hdr 1 ][ hdr 2 ][ hdr 3 ] ... ]
Page 1: [ 56 ]
Page 2: [ 4096 ]
Page 3: [ 4096 ]
Page 4: [ 4096 ]
Page 5: [ 8 ]

Page 0 contains only array of headers:
'hdr0' has 56 in length field.
'hdr1' has 4096 in length field.
'hdr2' has 8200 in le

Re: [PATCH V3 4/8] vhost_test: remove vhost_test_flush_vq()

2022-05-18 Thread Stefano Garzarella

On Tue, May 17, 2022 at 01:08:46PM -0500, Mike Christie wrote:

From: Andrey Ryabinin 

vhost_test_flush_vq() just a simple wrapper around vhost_work_dev_flush()
which seems have no value. It's just easier to call vhost_work_dev_flush()
directly. Besides there is no point in obtaining vhost_dev pointer
via 'n->vqs[index].poll.dev' while we can just use >dev.
It's the same pointers, see vhost_test_open()/vhost_dev_init().

Signed-off-by: Andrey Ryabinin 
Signed-off-by: Mike Christie 
Reviewed-by: Chaitanya Kulkarni 
Acked-by: Jason Wang 
---
drivers/vhost/test.c | 11 +++
1 file changed, 3 insertions(+), 8 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 0/8] virtio/vsock: experimental zerocopy receive

2022-05-17 Thread Stefano Garzarella

Hi Arseniy,

On Thu, May 12, 2022 at 05:04:11AM +, Arseniy Krasnov wrote:

 INTRODUCTION

Hello, this is experimental implementation of virtio vsock zerocopy
receive. It was inspired by TCP zerocopy receive by Eric Dumazet. This API uses
same idea: call 'mmap()' on socket's descriptor, then every 'getsockopt()' will
fill provided vma area with pages of virtio RX buffers. After received data was
processed by user, pages must be freed by 'madvise()'  call with MADV_DONTNEED
flag set(if user won't call 'madvise()', next 'getsockopt()' will fail).


Sounds cool, but maybe we would need some socket/net experts here for 
review.


Could we do something similar for the sending path as well?



DETAILS

Here is how mapping with mapped pages looks exactly: first page mapping
contains array of trimmed virtio vsock packet headers (in contains only length
of data on the corresponding page and 'flags' field):

struct virtio_vsock_usr_hdr {
uint32_t length;
uint32_t flags;
};

Field  'length' allows user to know exact size of payload within each sequence
of pages and 'flags' allows user to handle SOCK_SEQPACKET flags(such as message
bounds or record bounds). All other pages are data pages from RX queue.

Page 0  Page 1  Page N

[ hdr1 .. hdrN ][ data ] .. [ data ]
  ||   ^   ^
  ||   |   |
  |*---*
  ||
  ||
  **

Of course, single header could represent array of pages (when packet's
buffer is bigger than one page).So here is example of detailed mapping layout
for some set of packages. Lets consider that we have the following sequence  of
packages: 56 bytes, 4096 bytes and 8200 bytes. All pages: 0,1,2,3,4 and 5 will
be inserted to user's vma(vma is large enough).

Page 0: [[ hdr0 ][ hdr 1 ][ hdr 2 ][ hdr 3 ] ... ]
Page 1: [ 56 ]
Page 2: [ 4096 ]
Page 3: [ 4096 ]
Page 4: [ 4096 ]
Page 5: [ 8 ]

Page 0 contains only array of headers:
'hdr0' has 56 in length field.
'hdr1' has 4096 in length field.
'hdr2' has 8200 in length field.
'hdr3' has 0 in length field(this is end of data marker).

Page 1 corresponds to 'hdr0' and has only 56 bytes of data.
Page 2 corresponds to 'hdr1' and filled with data.
Page 3 corresponds to 'hdr2' and filled with data.
Page 4 corresponds to 'hdr2' and filled with data.
Page 5 corresponds to 'hdr2' and has only 8 bytes of data.

This patchset also changes packets allocation way: today implementation
uses only 'kmalloc()' to create data buffer. Problem happens when we try to map
such buffers to user's vma - kernel forbids to map slab pages to user's vma(as
pages of "not large" 'kmalloc()' allocations are marked with PageSlab flag and
"not large" could be bigger than one page). So to avoid this, data buffers now
allocated using 'alloc_pages()' call.

  TESTS

This patchset updates 'vsock_test' utility: two tests for new feature
were added. First test covers invalid cases. Second checks valid transmission
case.


Thanks for adding the test!



   BENCHMARKING

For benchmakring I've added small utility 'rx_zerocopy'. It works in
client/server mode. When client connects to server, server starts sending exact
amount of data to client(amount is set as input argument).Client reads data and
waits for next portion of it. Client works in two modes: copy and zero-copy. In
copy mode client uses 'read()' call while in zerocopy mode sequence of 'mmap()'
/'getsockopt()'/'madvise()' are used. Smaller amount of time for transmission
is better. For server, we can set size of tx buffer and for client we can set
size of rx buffer or rx mapping size(in zerocopy mode). Usage of this utility
is quiet simple:

For client mode:

./rx_zerocopy --mode client [--zerocopy] [--rx]

For server mode:

./rx_zerocopy --mode server [--mb] [--tx]

[--mb] sets number of megabytes to transfer.
[--rx] sets size of receive buffer/mapping in pages.
[--tx] sets size of transmit buffer in pages.

I checked for transmission of 4000mb of data. Here are some results:

  size of rx/tx buffers in pages
  *---*
  |8   |32|64   |   256|   512|
*--**--*-*--*--*
|   zerocopy   |   24   |   10.6   |  12.2   |   23.6   |21| secs to
*--* process
| non-zerocopy |   13   |   16.4   |  24.7   |   27.2   |   23.9   | 4000 mb

Re: [PATCH V2 0/8] vhost flush cleanups

2022-05-17 Thread Stefano Garzarella

On Sun, May 15, 2022 at 03:29:14PM -0500, Mike Christie wrote:

The following patches are Andrey Ryabinin's flush cleanups and some
from me. They reduce the number of flush calls and remove some bogus
ones where we don't even have a worker running anymore or they were
based on outdated or incorrect assumptions.

Jason, for the patches you gave me an explicit acked/reviewed tag I
added it. For the replies where I answered your review questions and
you only replied with an affirimative reply I did not add a tag,
because I was not not 100% sure what you wanted to do.

These patches will be used in the vhost threading patches which I think
will make progress again now that I have Christian's email figured out
:) (he had moved to microsoft but I've been using the ubuntu address).
I think the patches are ok cleanups in general so I thought they could
get merged separately if people agree.

V2:
- Added patch to rename vhost_work_dev_flush to just vhost_dev_flush
to handle review comment from Jason about the naming not being so
great.




Thank you for this cleanup!

I think there is only a small issue to fix in vhost/test.c, otherwise it 
looks good to me :-)


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 8/8] vhost: rename vhost_work_dev_flush

2022-05-17 Thread Stefano Garzarella

On Sun, May 15, 2022 at 03:29:22PM -0500, Mike Christie wrote:

This patch renames vhost_work_dev_flush to just vhost_dev_flush to
relfect that it flushes everything on the device and that drivers
don't know/care that polls are based on vhost_works. Drivers just
flush the entire device and polls, and works for vhost-scsi
management TMFs and IO net virtqueues, etc all are flushed.

Signed-off-by: Mike Christie 
---
drivers/vhost/net.c   |  4 ++--
drivers/vhost/scsi.c  |  2 +-
drivers/vhost/test.c  |  2 +-
drivers/vhost/vhost.c | 10 +-
drivers/vhost/vhost.h |  2 +-
drivers/vhost/vsock.c |  2 +-
6 files changed, 11 insertions(+), 11 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 4/8] vhost_test: remove vhost_test_flush_vq()

2022-05-17 Thread Stefano Garzarella

On Sun, May 15, 2022 at 03:29:18PM -0500, Mike Christie wrote:

From: Andrey Ryabinin 

vhost_test_flush_vq() just a simple wrapper around vhost_work_dev_flush()
which seems have no value. It's just easier to call vhost_work_dev_flush()
directly. Besides there is no point in obtaining vhost_dev pointer
via 'n->vqs[index].poll.dev' while we can just use >dev.
It's the same pointers, see vhost_test_open()/vhost_dev_init().

Signed-off-by: Andrey Ryabinin 
Signed-off-by: Mike Christie 
---
drivers/vhost/test.c | 11 +++
1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index f0ac9e35f5d6..837148d0a6a8 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -144,14 +144,9 @@ static void vhost_test_stop(struct vhost_test *n, void 
**privatep)
*privatep = vhost_test_stop_vq(n, n->vqs + VHOST_TEST_VQ);
}

-static void vhost_test_flush_vq(struct vhost_test *n, int index)
-{
-   vhost_work_dev_flush(n->vqs[index].poll.dev);
-}
-
static void vhost_test_flush(struct vhost_test *n)
{
-   vhost_test_flush_vq(n, VHOST_TEST_VQ);
+   vhost_work_dev_flush(>dev);
}

static int vhost_test_release(struct inode *inode, struct file *f)
@@ -210,7 +205,7 @@ static long vhost_test_run(struct vhost_test *n, int test)
goto err;

if (oldpriv) {
-   vhost_test_flush_vq(n, index);
+   vhost_test_flush(n, index);

^
Should we remove the `index` parameter?


}
}

@@ -303,7 +298,7 @@ static long vhost_test_set_backend(struct vhost_test *n, 
unsigned index, int fd)
mutex_unlock(>mutex);

if (enable) {
-   vhost_test_flush_vq(n, index);
+   vhost_test_flush(n);
}

mutex_unlock(>dev.mutex);
--
2.25.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 3/8] vhost_net: get rid of vhost_net_flush_vq() and extra flush calls

2022-05-17 Thread Stefano Garzarella

On Sun, May 15, 2022 at 03:29:17PM -0500, Mike Christie wrote:

From: Andrey Ryabinin 

vhost_net_flush_vq() calls vhost_work_dev_flush() twice passing
vhost_dev pointer obtained via 'n->poll[index].dev' and
'n->vqs[index].vq.poll.dev'. This is actually the same pointer,
initialized in vhost_net_open()/vhost_dev_init()/vhost_poll_init()

Remove vhost_net_flush_vq() and call vhost_work_dev_flush() directly.
Do the flushes only once instead of several flush calls in a row
which seems rather useless.

Signed-off-by: Andrey Ryabinin 
[drop vhost_dev forward declaration in vhost.h]
Signed-off-by: Mike Christie 
---
drivers/vhost/net.c | 11 ++-
1 file changed, 2 insertions(+), 9 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 2/8] vhost: flush dev once during vhost_dev_stop

2022-05-17 Thread Stefano Garzarella

On Sun, May 15, 2022 at 03:29:16PM -0500, Mike Christie wrote:

When vhost_work_dev_flush returns all work queued at that time will have
completed. There is then no need to flush after every vhost_poll_stop
call, and we can move the flush call to after the loop that stops the
pollers.

Signed-off-by: Mike Christie 
---
drivers/vhost/vhost.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH V2 1/8] vhost: get rid of vhost_poll_flush() wrapper

2022-05-17 Thread Stefano Garzarella

On Sun, May 15, 2022 at 03:29:15PM -0500, Mike Christie wrote:

From: Andrey Ryabinin 

vhost_poll_flush() is a simple wrapper around vhost_work_dev_flush().
It gives wrong impression that we are doing some work over vhost_poll,
while in fact it flushes vhost_poll->dev.
It only complicate understanding of the code and leads to mistakes
like flushing the same vhost_dev several times in a row.

Just remove vhost_poll_flush() and call vhost_work_dev_flush() directly.

Signed-off-by: Andrey Ryabinin 
[merge vhost_poll_flush removal from Stefano Garzarella]
Signed-off-by: Mike Christie 
---
drivers/vhost/net.c   |  4 ++--
drivers/vhost/test.c  |  2 +-
drivers/vhost/vhost.c | 12 ++--
drivers/vhost/vhost.h |  1 -
drivers/vhost/vsock.c |  2 +-
5 files changed, 6 insertions(+), 15 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH

2022-05-05 Thread Stefano Garzarella

On Thu, May 05, 2022 at 04:26:24PM +0800, Jason Wang wrote:

On Fri, Apr 29, 2022 at 3:14 PM Stefano Garzarella  wrote:


On Fri, Apr 29, 2022 at 10:46:40AM +0800, Jason Wang wrote:
>On Thu, Apr 28, 2022 at 11:13 PM Stefano Garzarella  
wrote:
>>
>> The simulator behaves like a ramdisk, so we don't have to do
>> anything when a VIRTIO_BLK_T_FLUSH request is received, but it
>> could be useful to test driver behavior.
>>
>> Let's expose the VIRTIO_BLK_F_FLUSH feature to inform the driver
>> that we support the flush command.
>>
>> Signed-off-by: Stefano Garzarella 
>> ---
>>  drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 12 
>>  1 file changed, 12 insertions(+)
>>
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
>> index 42d401d43911..a6dd1233797c 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
>> @@ -25,6 +25,7 @@
>>  #define DRV_LICENSE  "GPL v2"
>>
>>  #define VDPASIM_BLK_FEATURES   (VDPASIM_FEATURES | \
>> +(1ULL << VIRTIO_BLK_F_FLUSH)| \
>>  (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
>>  (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
>>  (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
>> @@ -166,6 +167,17 @@ static bool vdpasim_blk_handle_req(struct vdpasim 
*vdpasim,
>> pushed += bytes;
>> break;
>>
>> +   case VIRTIO_BLK_T_FLUSH:
>> +   if (sector != 0) {
>> +   dev_err(>vdpa.dev,
>> +   "A driver MUST set sector to 0 for a 
VIRTIO_BLK_T_FLUSH request - sector: 0x%llx\n",
>> +   sector);
>
>If this is something that could be triggered by userspace/guest, then
>we should avoid this.

It can only be triggered by an erratic driver.


Right, so guest can try to DOS the host via this.


Yes, but I don't expect the simulator to be used in the real world, but 
only for testing and development, so the user should have full control 
of the guest.






I was using the simulator to test a virtio-blk driver that I'm writing
in userspace and I forgot to set `sector` to zero, so I thought it would
be useful.

Do you mean to remove the error message?


Some like dev_warn_once() might be better here.


We also have other checks we do for each request (in and out header 
length, etc.) where we use dev_err(), should we change those too?


I don't know, from a developer's point of view I'd prefer to have them 
all printed, but actually if we have a totally wrong driver in the 
guest, we risk to hang our host to print an infinite number of messages.


Maybe we should change all the errors in the data path to 
dev_warn_once() and keep returning VIRTIO_BLK_S_IOERR to the guest which 
will surely get angry and print something.


If you agree, I'll send a patch to change all the printing and then 
repost this with your suggestion as well.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH

2022-04-29 Thread Stefano Garzarella

On Fri, Apr 29, 2022 at 10:46:40AM +0800, Jason Wang wrote:

On Thu, Apr 28, 2022 at 11:13 PM Stefano Garzarella  wrote:


The simulator behaves like a ramdisk, so we don't have to do
anything when a VIRTIO_BLK_T_FLUSH request is received, but it
could be useful to test driver behavior.

Let's expose the VIRTIO_BLK_F_FLUSH feature to inform the driver
that we support the flush command.

Signed-off-by: Stefano Garzarella 
---
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 42d401d43911..a6dd1233797c 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -25,6 +25,7 @@
 #define DRV_LICENSE  "GPL v2"

 #define VDPASIM_BLK_FEATURES   (VDPASIM_FEATURES | \
+(1ULL << VIRTIO_BLK_F_FLUSH)| \
 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
@@ -166,6 +167,17 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
pushed += bytes;
break;

+   case VIRTIO_BLK_T_FLUSH:
+   if (sector != 0) {
+   dev_err(>vdpa.dev,
+   "A driver MUST set sector to 0 for a 
VIRTIO_BLK_T_FLUSH request - sector: 0x%llx\n",
+   sector);


If this is something that could be triggered by userspace/guest, then
we should avoid this.


It can only be triggered by an erratic driver.

I was using the simulator to test a virtio-blk driver that I'm writing 
in userspace and I forgot to set `sector` to zero, so I thought it would 
be useful.


Do you mean to remove the error message?

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] vdpa_sim_blk: add support for VIRTIO_BLK_T_FLUSH

2022-04-28 Thread Stefano Garzarella
The simulator behaves like a ramdisk, so we don't have to do
anything when a VIRTIO_BLK_T_FLUSH request is received, but it
could be useful to test driver behavior.

Let's expose the VIRTIO_BLK_F_FLUSH feature to inform the driver
that we support the flush command.

Signed-off-by: Stefano Garzarella 
---
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c 
b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index 42d401d43911..a6dd1233797c 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -25,6 +25,7 @@
 #define DRV_LICENSE  "GPL v2"
 
 #define VDPASIM_BLK_FEATURES   (VDPASIM_FEATURES | \
+(1ULL << VIRTIO_BLK_F_FLUSH)| \
 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
@@ -166,6 +167,17 @@ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
pushed += bytes;
break;
 
+   case VIRTIO_BLK_T_FLUSH:
+   if (sector != 0) {
+   dev_err(>vdpa.dev,
+   "A driver MUST set sector to 0 for a 
VIRTIO_BLK_T_FLUSH request - sector: 0x%llx\n",
+   sector);
+   status = VIRTIO_BLK_S_IOERR;
+   break;
+   }
+
+   break;
+
default:
dev_warn(>vdpa.dev,
 "Unsupported request type %d\n", type);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net-next 2/2] vsock/virtio: add support for device suspend/resume

2022-04-28 Thread Stefano Garzarella
Implement .freeze and .restore callbacks of struct virtio_driver
to support device suspend/resume.

During suspension all connected sockets are reset and VQs deleted.
During resume the VQs are re-initialized.

Reported by: Vilas R K 
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 47 
 1 file changed, 47 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 31f4f6f40614..ad64f403536a 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -743,6 +743,49 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
kfree(vsock);
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int virtio_vsock_freeze(struct virtio_device *vdev)
+{
+   struct virtio_vsock *vsock = vdev->priv;
+
+   mutex_lock(_virtio_vsock_mutex);
+
+   rcu_assign_pointer(the_virtio_vsock, NULL);
+   synchronize_rcu();
+
+   virtio_vsock_vqs_del(vsock);
+
+   mutex_unlock(_virtio_vsock_mutex);
+
+   return 0;
+}
+
+static int virtio_vsock_restore(struct virtio_device *vdev)
+{
+   struct virtio_vsock *vsock = vdev->priv;
+   int ret;
+
+   mutex_lock(_virtio_vsock_mutex);
+
+   /* Only one virtio-vsock device per guest is supported */
+   if (rcu_dereference_protected(the_virtio_vsock,
+   lockdep_is_held(_virtio_vsock_mutex))) {
+   ret = -EBUSY;
+   goto out;
+   }
+
+   ret = virtio_vsock_vqs_init(vsock);
+   if (ret < 0)
+   goto out;
+
+   rcu_assign_pointer(the_virtio_vsock, vsock);
+
+out:
+   mutex_unlock(_virtio_vsock_mutex);
+   return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -760,6 +803,10 @@ static struct virtio_driver virtio_vsock_driver = {
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
+#ifdef CONFIG_PM_SLEEP
+   .freeze = virtio_vsock_freeze,
+   .restore = virtio_vsock_restore,
+#endif
 };
 
 static int __init virtio_vsock_init(void)
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net-next 1/2] vsock/virtio: factor our the code to initialize and delete VQs

2022-04-28 Thread Stefano Garzarella
Add virtio_vsock_vqs_init() and virtio_vsock_vqs_del() with the code
that was in virtio_vsock_probe() and virtio_vsock_remove to initialize
and delete VQs.

These new functions will be used in the next commit to support device
suspend/resume

Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 150 +--
 1 file changed, 84 insertions(+), 66 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ba1c8cc0c467..31f4f6f40614 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -566,67 +566,28 @@ static void virtio_transport_rx_work(struct work_struct 
*work)
mutex_unlock(>rx_lock);
 }
 
-static int virtio_vsock_probe(struct virtio_device *vdev)
+static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
 {
-   vq_callback_t *callbacks[] = {
-   virtio_vsock_rx_done,
-   virtio_vsock_tx_done,
-   virtio_vsock_event_done,
-   };
+   struct virtio_device *vdev = vsock->vdev;
static const char * const names[] = {
"rx",
"tx",
"event",
};
-   struct virtio_vsock *vsock = NULL;
+   vq_callback_t *callbacks[] = {
+   virtio_vsock_rx_done,
+   virtio_vsock_tx_done,
+   virtio_vsock_event_done,
+   };
int ret;
 
-   ret = mutex_lock_interruptible(_virtio_vsock_mutex);
-   if (ret)
-   return ret;
-
-   /* Only one virtio-vsock device per guest is supported */
-   if (rcu_dereference_protected(the_virtio_vsock,
-   lockdep_is_held(_virtio_vsock_mutex))) {
-   ret = -EBUSY;
-   goto out;
-   }
-
-   vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
-   if (!vsock) {
-   ret = -ENOMEM;
-   goto out;
-   }
-
-   vsock->vdev = vdev;
-
-   ret = virtio_find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names,
+   ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, callbacks, names,
  NULL);
if (ret < 0)
-   goto out;
+   return ret;
 
virtio_vsock_update_guest_cid(vsock);
 
-   vsock->rx_buf_nr = 0;
-   vsock->rx_buf_max_nr = 0;
-   atomic_set(>queued_replies, 0);
-
-   mutex_init(>tx_lock);
-   mutex_init(>rx_lock);
-   mutex_init(>event_lock);
-   spin_lock_init(>send_pkt_list_lock);
-   INIT_LIST_HEAD(>send_pkt_list);
-   INIT_WORK(>rx_work, virtio_transport_rx_work);
-   INIT_WORK(>tx_work, virtio_transport_tx_work);
-   INIT_WORK(>event_work, virtio_transport_event_work);
-   INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
-
-   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
-   vsock->seqpacket_allow = true;
-
-   vdev->priv = vsock;
-
virtio_device_ready(vdev);
 
mutex_lock(>tx_lock);
@@ -643,30 +604,15 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(>event_lock);
 
-   rcu_assign_pointer(the_virtio_vsock, vsock);
-
-   mutex_unlock(_virtio_vsock_mutex);
-
return 0;
-
-out:
-   kfree(vsock);
-   mutex_unlock(_virtio_vsock_mutex);
-   return ret;
 }
 
-static void virtio_vsock_remove(struct virtio_device *vdev)
+static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
 {
-   struct virtio_vsock *vsock = vdev->priv;
+   struct virtio_device *vdev = vsock->vdev;
struct virtio_vsock_pkt *pkt;
 
-   mutex_lock(_virtio_vsock_mutex);
-
-   vdev->priv = NULL;
-   rcu_assign_pointer(the_virtio_vsock, NULL);
-   synchronize_rcu();
-
-   /* Reset all connected sockets when the device disappear */
+   /* Reset all connected sockets when the VQs disappear */
vsock_for_each_connected_socket(_transport.transport,
virtio_vsock_reset_sock);
 
@@ -711,6 +657,78 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
 
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
+}
+
+static int virtio_vsock_probe(struct virtio_device *vdev)
+{
+   struct virtio_vsock *vsock = NULL;
+   int ret;
+
+   ret = mutex_lock_interruptible(_virtio_vsock_mutex);
+   if (ret)
+   return ret;
+
+   /* Only one virtio-vsock device per guest is supported */
+   if (rcu_dereference_protected(the_virtio_vsock,
+   lockdep_is_held(_virtio_vsock_mutex))) {
+   ret = -EBUSY;
+   goto out;
+   }
+
+   vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
+   if (!vsock) {
+ 

[PATCH net-next 0/2] vsock/virtio: add support for device suspend/resume

2022-04-28 Thread Stefano Garzarella
Vilas reported that virtio-vsock no longer worked properly after
suspend/resume (echo mem >/sys/power/state).
It was impossible to connect to the host and vice versa.

Indeed, the support has never been implemented.

This series implement .freeze and .restore callbacks of struct virtio_driver
to support device suspend/resume.

The first patch factors our the code to initialize and delete VQs.
The second patch uses that code to support device suspend/resume.

Signed-off-by: Stefano Garzarella 

Stefano Garzarella (2):
  vsock/virtio: factor our the code to initialize and delete VQs
  vsock/virtio: add support for device suspend/resume

 net/vmw_vsock/virtio_transport.c | 197 ---
 1 file changed, 131 insertions(+), 66 deletions(-)

-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 3/5] hv_sock: Add validation for untrusted Hyper-V values

2022-04-27 Thread Stefano Garzarella

On Wed, Apr 27, 2022 at 03:12:23PM +0200, Andrea Parri (Microsoft) wrote:

For additional robustness in the face of Hyper-V errors or malicious
behavior, validate all values that originate from packets that Hyper-V
has sent to the guest in the host-to-guest ring buffer.  Ensure that
invalid values cannot cause data being copied out of the bounds of the
source buffer in hvs_stream_dequeue().

Signed-off-by: Andrea Parri (Microsoft) 
Reviewed-by: Michael Kelley 
---
include/linux/hyperv.h   |  5 +
net/vmw_vsock/hyperv_transport.c | 10 --
2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index fe2e0179ed51e..55478a6810b60 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1663,6 +1663,11 @@ static inline u32 hv_pkt_datalen(const struct 
vmpacket_descriptor *desc)
return (desc->len8 << 3) - (desc->offset8 << 3);
}

+/* Get packet length associated with descriptor */
+static inline u32 hv_pkt_len(const struct vmpacket_descriptor *desc)
+{
+   return desc->len8 << 3;
+}

struct vmpacket_descriptor *
hv_pkt_iter_first_raw(struct vmbus_channel *channel);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 8c37d07017fc4..fd98229e3db30 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -577,12 +577,18 @@ static bool hvs_dgram_allow(u32 cid, u32 port)
static int hvs_update_recv_data(struct hvsock *hvs)
{
struct hvs_recv_buf *recv_buf;
-   u32 payload_len;
+   u32 pkt_len, payload_len;
+
+   pkt_len = hv_pkt_len(hvs->recv_desc);
+
+   if (pkt_len < HVS_HEADER_LEN)
+   return -EIO;

recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
payload_len = recv_buf->hdr.data_size;

-   if (payload_len > HVS_MTU_SIZE)
+   if (payload_len > pkt_len - HVS_HEADER_LEN ||
+   payload_len > HVS_MTU_SIZE)
return -EIO;

if (payload_len == 0)
--
2.25.1



Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 2/5] hv_sock: Copy packets sent by Hyper-V out of the ring buffer

2022-04-27 Thread Stefano Garzarella

On Wed, Apr 27, 2022 at 03:12:22PM +0200, Andrea Parri (Microsoft) wrote:

Pointers to VMbus packets sent by Hyper-V are used by the hv_sock driver
within the guest VM.  Hyper-V can send packets with erroneous values or
modify packet fields after they are processed by the guest.  To defend
against these scenarios, copy the incoming packet after validating its
length and offset fields using hv_pkt_iter_{first,next}().  Use
HVS_PKT_LEN(HVS_MTU_SIZE) to initialize the buffer which holds the
copies of the incoming packets.  In this way, the packet can no longer
be modified by the host.

Signed-off-by: Andrea Parri (Microsoft) 
Reviewed-by: Michael Kelley 
---
net/vmw_vsock/hyperv_transport.c | 9 +++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 943352530936e..8c37d07017fc4 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -78,6 +78,9 @@ struct hvs_send_buf {
 ALIGN((payload_len), 8) + \
 VMBUS_PKT_TRAILER_SIZE)

+/* Upper bound on the size of a VMbus packet for hv_sock */
+#define HVS_MAX_PKT_SIZE   HVS_PKT_LEN(HVS_MTU_SIZE)
+
union hvs_service_id {
guid_t  srv_id;

@@ -378,6 +381,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
}

+   chan->max_pkt_size = HVS_MAX_PKT_SIZE;
+
ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
 conn_from_host ? new : sk);
if (ret != 0) {
@@ -602,7 +607,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, 
struct msghdr *msg,
return -EOPNOTSUPP;

if (need_refill) {
-   hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
+   hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
if (!hvs->recv_desc)
return -ENOBUFS;
ret = hvs_update_recv_data(hvs);
@@ -618,7 +623,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, 
struct msghdr *msg,

hvs->recv_data_len -= to_read;
if (hvs->recv_data_len == 0) {
-   hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, 
hvs->recv_desc);
+   hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
if (hvs->recv_desc) {
ret = hvs_update_recv_data(hvs);
        if (ret)
--
2.25.1



Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 3/5] hv_sock: Add validation for untrusted Hyper-V values

2022-04-21 Thread Stefano Garzarella
On Thu, Apr 21, 2022 at 5:30 PM Andrea Parri  wrote:
>
> > > @@ -577,12 +577,19 @@ static bool hvs_dgram_allow(u32 cid, u32 port)
> > > static int hvs_update_recv_data(struct hvsock *hvs)
> > > {
> > > struct hvs_recv_buf *recv_buf;
> > > -   u32 payload_len;
> > > +   u32 pkt_len, payload_len;
> > > +
> > > +   pkt_len = hv_pkt_len(hvs->recv_desc);
> > > +
> > > +   /* Ensure the packet is big enough to read its header */
> > > +   if (pkt_len < HVS_HEADER_LEN)
> > > +   return -EIO;
> > >
> > > recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
> > > payload_len = recv_buf->hdr.data_size;
> > >
> > > -   if (payload_len > HVS_MTU_SIZE)
> > > +   /* Ensure the packet is big enough to read its payload */
> > > +   if (payload_len > pkt_len - HVS_HEADER_LEN || payload_len > 
> > > HVS_MTU_SIZE)
> >
> > checkpatch warns that we exceed 80 characters, I do not have a strong
> > opinion on this, but if you have to resend better break the condition into 2
> > lines.
>
> Will break if preferred.  (but does it really warn??  I understand that
> the warning was deprecated and the "limit" increased to 100 chars...)

I see the warn here:
https://patchwork.kernel.org/project/netdevbpf/patch/20220420200720.434717-4-parri.and...@gmail.com/

in the kernel doc [1] we still say we prefer 80 columns, so I try to
follow, especially when it doesn't make things worse.

[1] 
https://docs.kernel.org/process/coding-style.html#breaking-long-lines-and-strings

>
>
> > Maybe even update or remove the comment? (it only describes the first
> > condition, but the conditions are pretty clear, so I don't think it adds
> > much).
>
> Works for me.  (taking it as this applies to the previous comment too.)

Yep.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 3/5] hv_sock: Add validation for untrusted Hyper-V values

2022-04-21 Thread Stefano Garzarella

On Wed, Apr 20, 2022 at 10:07:18PM +0200, Andrea Parri (Microsoft) wrote:

For additional robustness in the face of Hyper-V errors or malicious
behavior, validate all values that originate from packets that Hyper-V
has sent to the guest in the host-to-guest ring buffer.  Ensure that
invalid values cannot cause data being copied out of the bounds of the
source buffer in hvs_stream_dequeue().

Signed-off-by: Andrea Parri (Microsoft) 
---
include/linux/hyperv.h   |  5 +
net/vmw_vsock/hyperv_transport.c | 11 +--
2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index fe2e0179ed51e..55478a6810b60 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1663,6 +1663,11 @@ static inline u32 hv_pkt_datalen(const struct 
vmpacket_descriptor *desc)
return (desc->len8 << 3) - (desc->offset8 << 3);
}

+/* Get packet length associated with descriptor */
+static inline u32 hv_pkt_len(const struct vmpacket_descriptor *desc)
+{
+   return desc->len8 << 3;
+}

struct vmpacket_descriptor *
hv_pkt_iter_first_raw(struct vmbus_channel *channel);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 8c37d07017fc4..092cadc2c866d 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -577,12 +577,19 @@ static bool hvs_dgram_allow(u32 cid, u32 port)
static int hvs_update_recv_data(struct hvsock *hvs)
{
struct hvs_recv_buf *recv_buf;
-   u32 payload_len;
+   u32 pkt_len, payload_len;
+
+   pkt_len = hv_pkt_len(hvs->recv_desc);
+
+   /* Ensure the packet is big enough to read its header */
+   if (pkt_len < HVS_HEADER_LEN)
+   return -EIO;

recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
payload_len = recv_buf->hdr.data_size;

-   if (payload_len > HVS_MTU_SIZE)
+   /* Ensure the packet is big enough to read its payload */
+   if (payload_len > pkt_len - HVS_HEADER_LEN || payload_len > 
HVS_MTU_SIZE)


checkpatch warns that we exceed 80 characters, I do not have a strong 
opinion on this, but if you have to resend better break the condition 
into 2 lines.


Maybe even update or remove the comment? (it only describes the first 
condition, but the conditions are pretty clear, so I don't think it adds 
much).


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 2/5] hv_sock: Copy packets sent by Hyper-V out of the ring buffer

2022-04-21 Thread Stefano Garzarella

On Wed, Apr 20, 2022 at 10:07:17PM +0200, Andrea Parri (Microsoft) wrote:

Pointers to VMbus packets sent by Hyper-V are used by the hv_sock driver
within the guest VM.  Hyper-V can send packets with erroneous values or
modify packet fields after they are processed by the guest.  To defend
against these scenarios, copy the incoming packet after validating its
length and offset fields using hv_pkt_iter_{first,next}().  In this way,
the packet can no longer be modified by the host.

Signed-off-by: Andrea Parri (Microsoft) 
---
net/vmw_vsock/hyperv_transport.c | 9 +++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 943352530936e..8c37d07017fc4 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -78,6 +78,9 @@ struct hvs_send_buf {
 ALIGN((payload_len), 8) + \
 VMBUS_PKT_TRAILER_SIZE)

+/* Upper bound on the size of a VMbus packet for hv_sock */
+#define HVS_MAX_PKT_SIZE   HVS_PKT_LEN(HVS_MTU_SIZE)
+
union hvs_service_id {
guid_t  srv_id;

@@ -378,6 +381,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
rcvbuf = ALIGN(rcvbuf, HV_HYP_PAGE_SIZE);
}

+   chan->max_pkt_size = HVS_MAX_PKT_SIZE;
+


premise, I don't know HyperV channels :-(

Is this change necessary to use hv_pkt_iter_first() instead of 
hv_pkt_iter_first_raw()?


If yes, then please mention that you set this value in the commit 
message, otherwise maybe better to have a separate patch.


Thanks,
Stefano


ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
 conn_from_host ? new : sk);
if (ret != 0) {
@@ -602,7 +607,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, 
struct msghdr *msg,
return -EOPNOTSUPP;

if (need_refill) {
-   hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
+   hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
if (!hvs->recv_desc)
return -ENOBUFS;
ret = hvs_update_recv_data(hvs);
@@ -618,7 +623,7 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, 
struct msghdr *msg,

hvs->recv_data_len -= to_read;
if (hvs->recv_data_len == 0) {
-   hvs->recv_desc = hv_pkt_iter_next_raw(hvs->chan, 
hvs->recv_desc);
+   hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
if (hvs->recv_desc) {
ret = hvs_update_recv_data(hvs);
if (ret)
--
2.25.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/5] hv_sock: Check hv_pkt_iter_first_raw()'s return value

2022-04-21 Thread Stefano Garzarella

On Wed, Apr 20, 2022 at 10:07:16PM +0200, Andrea Parri (Microsoft) wrote:

The function returns NULL if the ring buffer doesn't contain enough
readable bytes to constitute a packet descriptor.  The ring buffer's
write_index is in memory which is shared with the Hyper-V host, an
erroneous or malicious host could thus change its value and overturn
the result of hvs_stream_has_data().

Signed-off-by: Andrea Parri (Microsoft) 
---
net/vmw_vsock/hyperv_transport.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index e111e13b66604..943352530936e 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -603,6 +603,8 @@ static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, 
struct msghdr *msg,

if (need_refill) {
hvs->recv_desc = hv_pkt_iter_first_raw(hvs->chan);
+   if (!hvs->recv_desc)
+   return -ENOBUFS;
ret = hvs_update_recv_data(hvs);
if (ret)
return ret;
--
2.25.1



Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 2/2] virtio_ring: add unlikely annotation for free descs check

2022-03-29 Thread Stefano Garzarella

On Mon, Mar 28, 2022 at 06:58:17PM +0800, Xianting Tian wrote:

The 'if (vq->vq.num_free < descs_used)' check will almost always be false.

Signed-off-by: Xianting Tian 
---
drivers/virtio/virtio_ring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)


Reviewed-by: Stefano Garzarella 



diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index d597fc0874ec..ab6d5f0cb579 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -525,7 +525,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
descs_used = total_sg;
}

-   if (vq->vq.num_free < descs_used) {
+   if (unlikely(vq->vq.num_free < descs_used)) {
pr_debug("Can't add buf len %i - avail = %i\n",
 descs_used, vq->vq.num_free);
/* FIXME: for historical reasons, we force a notify here if
--
2.17.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/2] virtio_ring: remove unnecessary to_vvq call in vring hot path

2022-03-29 Thread Stefano Garzarella

On Mon, Mar 28, 2022 at 06:58:16PM +0800, Xianting Tian wrote:

It passes '_vq' to virtqueue_use_indirect(), which still calls
to_vvq to get 'vq', let's directly pass 'vq'. It can avoid
unnecessary call of to_vvq in hot path.

Signed-off-by: Xianting Tian 
---
drivers/virtio/virtio_ring.c | 8 +++-
1 file changed, 3 insertions(+), 5 deletions(-)


Reviewed-by: Stefano Garzarella 



diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 962f1477b1fa..d597fc0874ec 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -205,11 +205,9 @@ struct vring_virtqueue {

#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)

-static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
+static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
  unsigned int total_sg)
{
-   struct vring_virtqueue *vq = to_vvq(_vq);
-
/*
 * If the host supports indirect descriptor tables, and we have multiple
 * buffers, then go indirect. FIXME: tune this threshold
@@ -507,7 +505,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,

head = vq->free_head;

-   if (virtqueue_use_indirect(_vq, total_sg))
+   if (virtqueue_use_indirect(vq, total_sg))
desc = alloc_indirect_split(_vq, total_sg, gfp);
else {
desc = NULL;
@@ -1194,7 +1192,7 @@ static inline int virtqueue_add_packed(struct virtqueue 
*_vq,

BUG_ON(total_sg == 0);

-   if (virtqueue_use_indirect(_vq, total_sg)) {
+   if (virtqueue_use_indirect(vq, total_sg)) {
err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
in_sgs, data, gfp);
if (err != -ENOMEM) {
--
2.17.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] virtio_ring: remove unnecessary to_vvq call in vring hot path

2022-03-28 Thread Stefano Garzarella

On Thu, Mar 24, 2022 at 03:33:40PM +0800, Xianting Tian wrote:

It passes '_vq' to virtqueue_use_indirect(), which still calls
to_vvq to get 'vq', let's directly pass 'vq'. It can avoid
unnecessary call of to_vvq in hot path.


It seems reasonable to me.



Other tiny optimization:
Add unlikely to "if (vq->vq.num_free < descs_used).


Better to do this change in another patch.

Thanks,
Stefano



Signed-off-by: Xianting Tian 
---
drivers/virtio/virtio_ring.c | 10 --
1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 962f1477b1fa..ab6d5f0cb579 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -205,11 +205,9 @@ struct vring_virtqueue {

#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)

-static inline bool virtqueue_use_indirect(struct virtqueue *_vq,
+static inline bool virtqueue_use_indirect(struct vring_virtqueue *vq,
  unsigned int total_sg)
{
-   struct vring_virtqueue *vq = to_vvq(_vq);
-
/*
 * If the host supports indirect descriptor tables, and we have multiple
 * buffers, then go indirect. FIXME: tune this threshold
@@ -507,7 +505,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,

head = vq->free_head;

-   if (virtqueue_use_indirect(_vq, total_sg))
+   if (virtqueue_use_indirect(vq, total_sg))
desc = alloc_indirect_split(_vq, total_sg, gfp);
else {
desc = NULL;
@@ -527,7 +525,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
descs_used = total_sg;
}

-   if (vq->vq.num_free < descs_used) {
+   if (unlikely(vq->vq.num_free < descs_used)) {
pr_debug("Can't add buf len %i - avail = %i\n",
 descs_used, vq->vq.num_free);
/* FIXME: for historical reasons, we force a notify here if
@@ -1194,7 +1192,7 @@ static inline int virtqueue_add_packed(struct virtqueue 
*_vq,

BUG_ON(total_sg == 0);

-   if (virtqueue_use_indirect(_vq, total_sg)) {
+   if (virtqueue_use_indirect(vq, total_sg)) {
err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs,
in_sgs, data, gfp);
if (err != -ENOMEM) {
--
2.17.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/3] virtio: use virtio_device_ready() in virtio_device_restore()

2022-03-24 Thread Stefano Garzarella

On Thu, Mar 24, 2022 at 07:07:09AM -0400, Michael S. Tsirkin wrote:

On Thu, Mar 24, 2022 at 12:03:07PM +0100, Stefano Garzarella wrote:

On Thu, Mar 24, 2022 at 06:48:05AM -0400, Michael S. Tsirkin wrote:
> On Thu, Mar 24, 2022 at 04:40:02PM +0800, Jason Wang wrote:
> > From: Stefano Garzarella 
> >
> > This avoids setting DRIVER_OK twice for those drivers that call
> > virtio_device_ready() in the .restore
>
> Is this trying to say it's faster?

Nope, I mean, when I wrote the original version, I meant to do the same
things that we do in virtio_dev_probe() where we called
virtio_device_ready() which not only set the state, but also called
.enable_cbs callback.

Was this a side effect and maybe more compliant with the spec?



Sorry I don't understand the question. it says "avoids setting DRIVER_OK twice" 
-
why is that advantageous and worth calling out in the commit log?


I just wanted to say that it seems strange to set DRIVER_OK twice if we 
read the spec. I don't think it's wrong, but weird.


Yes, maybe we should rewrite the commit message saying that we want to 
use virtio_device_ready() everywhere to complete the setup before 
setting DRIVER_OK so we can do all the necessary operations inside (like 
in patch 3 or call enable_cbs).


Jason rewrote the commit log, so I don't know if he agrees.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 2/3] virtio: use virtio_reset_device() when possible

2022-03-24 Thread Stefano Garzarella

On Thu, Mar 24, 2022 at 04:40:03PM +0800, Jason Wang wrote:

This allows us to do common extension without duplicating codes.

Signed-off-by: Jason Wang 
---
drivers/virtio/virtio.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 75c8d560bbd3..8dde44ea044a 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -430,7 +430,7 @@ int register_virtio_device(struct virtio_device *dev)

/* We always start by resetting the device, in case a previous
 * driver messed it up.  This also tests that code path a little. */
-   dev->config->reset(dev);
+   virtio_reset_device(dev);

/* Acknowledge that we've seen the device. */
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
@@ -496,7 +496,7 @@ int virtio_device_restore(struct virtio_device *dev)

/* We always start by resetting the device, in case a previous
 * driver messed it up. */
-   dev->config->reset(dev);
+   virtio_reset_device(dev);

/* Acknowledge that we've seen the device. */
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
--
2.25.1



Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/3] virtio: use virtio_device_ready() in virtio_device_restore()

2022-03-24 Thread Stefano Garzarella

On Thu, Mar 24, 2022 at 06:48:05AM -0400, Michael S. Tsirkin wrote:

On Thu, Mar 24, 2022 at 04:40:02PM +0800, Jason Wang wrote:

From: Stefano Garzarella 

This avoids setting DRIVER_OK twice for those drivers that call
virtio_device_ready() in the .restore


Is this trying to say it's faster?


Nope, I mean, when I wrote the original version, I meant to do the same 
things that we do in virtio_dev_probe() where we called 
virtio_device_ready() which not only set the state, but also called 
.enable_cbs callback.


Was this a side effect and maybe more compliant with the spec?


If yes this one looks like a red herring. Yes we skip a write but we
replace it with a read which is not better performance-wise.
If we want to optimize this, it is better to just do that inside
virtio_add_status:



diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 75c8d560bbd3..cd943c31bdbb 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -161,8 +161,14 @@ static void virtio_config_enable(struct virtio_device *dev)

void virtio_add_status(struct virtio_device *dev, unsigned int status)
{
+   unsigned int device_status;
+
might_sleep();
-   dev->config->set_status(dev, dev->config->get_status(dev) | status);
+
+   device_status = dev->config->get_status(dev);
+
+   if (status & ~device_status)
+   dev->config->set_status(dev, device_status | status);
}
EXPORT_SYMBOL_GPL(virtio_add_status);


Could there be a case where we want to set the status again even though 
the device tells us it's already set?


I think not, so I guess it's okay.





and it will allows us to do
extension on virtio_device_ready() without duplicating codes.

Signed-off-by: Stefano Garzarella 
Signed-off-by: Jason Wang 
---
 drivers/virtio/virtio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 22f15f444f75..75c8d560bbd3 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -526,8 +526,9 @@ int virtio_device_restore(struct virtio_device *dev)
goto err;
}

-   /* Finally, tell the device we're all set */
-   virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+   /* If restore didn't do it, mark device DRIVER_OK ourselves. */


I preferred the original comment, it said why we are doing this,
new one repeats what code is doing.


I agree, copy & paste from virtio_dev_probe().

Jason can you fix this patch or should I resend?

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v3 3/3] vsock/virtio: enable VQs early on probe

2022-03-23 Thread Stefano Garzarella
virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after probe returns, but virtio-vsock
driver uses VQs in the probe function to fill rx and event VQs
with new buffers.

Let's fix this, calling virtio_device_ready() before using VQs
in the probe function.

Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 3954d3be9083..ba1c8cc0c467 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -627,6 +627,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
 
vdev->priv = vsock;
 
+   virtio_device_ready(vdev);
+
mutex_lock(>tx_lock);
vsock->tx_run = true;
mutex_unlock(>tx_lock);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v3 2/3] vsock/virtio: read the negotiated features before using VQs

2022-03-23 Thread Stefano Garzarella
Complete the driver configuration, reading the negotiated features,
before using the VQs in the virtio_vsock_probe().

Fixes: 53efbba12cc7 ("virtio/vsock: enable SEQPACKET for transport")
Suggested-by: Michael S. Tsirkin 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 3e5513934c9f..3954d3be9083 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
 
+   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+   vsock->seqpacket_allow = true;
+
vdev->priv = vsock;
 
mutex_lock(>tx_lock);
@@ -638,9 +641,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(>event_lock);
 
-   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
-   vsock->seqpacket_allow = true;
-
rcu_assign_pointer(the_virtio_vsock, vsock);
 
mutex_unlock(_virtio_vsock_mutex);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v3 1/3] vsock/virtio: initialize vdev->priv before using VQs

2022-03-23 Thread Stefano Garzarella
When we fill VQs with empty buffers and kick the host, it may send
an interrupt. `vdev->priv` must be initialized before this since it
is used in the virtqueue callbacks.

Fixes: 0deab087b16a ("vsock/virtio: use RCU to avoid use-after-free on 
the_virtio_vsock")
Suggested-by: Michael S. Tsirkin 
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5afc194a58bb..3e5513934c9f 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
 
+   vdev->priv = vsock;
+
mutex_lock(>tx_lock);
vsock->tx_run = true;
mutex_unlock(>tx_lock);
@@ -639,7 +641,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
vsock->seqpacket_allow = true;
 
-   vdev->priv = vsock;
rcu_assign_pointer(the_virtio_vsock, vsock);
 
mutex_unlock(_virtio_vsock_mutex);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v3 0/3] vsock/virtio: enable VQs early on probe and finish the setup before using them

2022-03-23 Thread Stefano Garzarella
The first patch fixes a virtio-spec violation. The other two patches
complete the driver configuration before using the VQs in the probe.

The patch order should simplify backporting in stable branches.

v3:
- re-ordered the patch to improve bisectability [MST]

v2: https://lore.kernel.org/netdev/20220323084954.11769-1-sgarz...@redhat.com/
v1: https://lore.kernel.org/netdev/20220322103823.83411-1-sgarz...@redhat.com/

Stefano Garzarella (3):
  vsock/virtio: initialize vdev->priv before using VQs
  vsock/virtio: read the negotiated features before using VQs
  vsock/virtio: enable VQs early on probe

 net/vmw_vsock/virtio_transport.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net v2 1/3] vsock/virtio: enable VQs early on probe

2022-03-23 Thread Stefano Garzarella

On Wed, Mar 23, 2022 at 01:44:42PM +, Stefan Hajnoczi wrote:

On Wed, Mar 23, 2022 at 09:49:52AM +0100, Stefano Garzarella wrote:

virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after probe returns, but virtio-vsock
driver uses VQs in the probe function to fill rx and event VQs
with new buffers.

Let's fix this, calling virtio_device_ready() before using VQs
in the probe function.

Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5afc194a58bb..b1962f8cd502 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);

+   virtio_device_ready(vdev);


Can rx and event virtqueue interrupts be lost if they occur before we
assign vdev->priv later in virtio_vsock_probe()?


Yep, as Michael suggested I'll fix the patch order in the next version.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net v2 0/3] vsock/virtio: enable VQs early on probe and finish the setup before using them

2022-03-23 Thread Stefano Garzarella

On Wed, Mar 23, 2022 at 09:22:02AM -0400, Michael S. Tsirkin wrote:

On Wed, Mar 23, 2022 at 09:49:51AM +0100, Stefano Garzarella wrote:

The first patch fixes a virtio-spec violation. The other two patches
complete the driver configuration before using the VQs in the probe.

The patch order should simplify backporting in stable branches.


Ok but I think the order is wrong. It should be 2-3-1,
otherwise bisect can pick just 1 and it will have
the issues previous reviw pointed out.


Right, I prioritized simplifying the backport, but obviously 
bisectability is priority!


I'll send v3 changing the order in 2-3-1

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v2 3/3] vsock/virtio: read the negotiated features before using VQs

2022-03-23 Thread Stefano Garzarella
Complete the driver configuration, reading the negotiated features,
before using the VQs and tell the device that the driver is ready in
the virtio_vsock_probe().

Fixes: 53efbba12cc7 ("virtio/vsock: enable SEQPACKET for transport")
Suggested-by: Michael S. Tsirkin 
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index fff67ad39087..1244e7cf585b 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,9 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
 
+   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
+   vsock->seqpacket_allow = true;
+
vdev->priv = vsock;
virtio_device_ready(vdev);
 
@@ -639,9 +642,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->event_run = true;
mutex_unlock(>event_lock);
 
-   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
-   vsock->seqpacket_allow = true;
-
rcu_assign_pointer(the_virtio_vsock, vsock);
 
mutex_unlock(_virtio_vsock_mutex);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v2 1/3] vsock/virtio: enable VQs early on probe

2022-03-23 Thread Stefano Garzarella
virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after probe returns, but virtio-vsock
driver uses VQs in the probe function to fill rx and event VQs
with new buffers.

Let's fix this, calling virtio_device_ready() before using VQs
in the probe function.

Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5afc194a58bb..b1962f8cd502 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
 
+   virtio_device_ready(vdev);
+
mutex_lock(>tx_lock);
vsock->tx_run = true;
mutex_unlock(>tx_lock);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v2 2/3] vsock/virtio: initialize vdev->priv before using VQs

2022-03-23 Thread Stefano Garzarella
When we fill VQs with empty buffers and kick the host, it may send
an interrupt. `vdev->priv` must be initialized before this since it
is used in the virtqueue callback.

Fixes: 0deab087b16a ("vsock/virtio: use RCU to avoid use-after-free on 
the_virtio_vsock")
Suggested-by: Michael S. Tsirkin 
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index b1962f8cd502..fff67ad39087 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
 
+   vdev->priv = vsock;
virtio_device_ready(vdev);
 
mutex_lock(>tx_lock);
@@ -641,7 +642,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
vsock->seqpacket_allow = true;
 
-   vdev->priv = vsock;
rcu_assign_pointer(the_virtio_vsock, vsock);
 
mutex_unlock(_virtio_vsock_mutex);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net v2 0/3] vsock/virtio: enable VQs early on probe and finish the setup before using them

2022-03-23 Thread Stefano Garzarella
The first patch fixes a virtio-spec violation. The other two patches
complete the driver configuration before using the VQs in the probe.

The patch order should simplify backporting in stable branches.

v2:
- patch 1 is not changed from v1
- added 2 patches to complete the driver configuration before using the
  VQs in the probe [MST]

v1: https://lore.kernel.org/netdev/20220322103823.83411-1-sgarz...@redhat.com/

Stefano Garzarella (3):
  vsock/virtio: enable VQs early on probe
  vsock/virtio: initialize vdev->priv before using VQs
  vsock/virtio: read the negotiated features before using VQs

 net/vmw_vsock/virtio_transport.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] virtio: use virtio_device_ready() in virtio_device_restore()

2022-03-23 Thread Stefano Garzarella

On Wed, Mar 23, 2022 at 11:10:27AM +0800, Jason Wang wrote:

On Tue, Mar 22, 2022 at 10:07 PM Michael S. Tsirkin  wrote:


On Tue, Mar 22, 2022 at 12:43:13PM +0100, Stefano Garzarella wrote:
> After waking up a suspended VM, the kernel prints the following trace
> for virtio drivers which do not directly call virtio_device_ready() in
> the .restore:
>
> PM: suspend exit
> irq 22: nobody cared (try booting with the "irqpoll" option)
> Call Trace:
>  
>  dump_stack_lvl+0x38/0x49
>  dump_stack+0x10/0x12
>  __report_bad_irq+0x3a/0xaf
>  note_interrupt.cold+0xb/0x60
>  handle_irq_event+0x71/0x80
>  handle_fasteoi_irq+0x95/0x1e0
>  __common_interrupt+0x6b/0x110
>  common_interrupt+0x63/0xe0
>  asm_common_interrupt+0x1e/0x40
>  ? __do_softirq+0x75/0x2f3
>  irq_exit_rcu+0x93/0xe0
>  sysvec_apic_timer_interrupt+0xac/0xd0
>  
>  
>  asm_sysvec_apic_timer_interrupt+0x12/0x20
>  arch_cpu_idle+0x12/0x20
>  default_idle_call+0x39/0xf0
>  do_idle+0x1b5/0x210
>  cpu_startup_entry+0x20/0x30
>  start_secondary+0xf3/0x100
>  secondary_startup_64_no_verify+0xc3/0xcb
>  
> handlers:
> [<8f9bac49>] vp_interrupt
> [<8f9bac49>] vp_interrupt
> Disabling IRQ #22
>
> This happens because we don't invoke .enable_cbs callback in
> virtio_device_restore(). That callback is used by some transports
> (e.g. virtio-pci) to enable interrupts.
>
> Let's fix it, by calling virtio_device_ready() as we do in
> virtio_dev_probe(). This function calls .enable_cts callback and sets
> DRIVER_OK status bit.
>
> This fix also avoids setting DRIVER_OK twice for those drivers that
> call virtio_device_ready() in the .restore.
>
> Fixes: d50497eb4e55 ("virtio_config: introduce a new .enable_cbs method")
> Signed-off-by: Stefano Garzarella 
> ---
>
> I'm not sure about the fixes tag. That one is more generic, but the
> following one I think introduced the issue.
>
> Fixes: 9e35276a5344 ("virtio_pci: harden MSI-X interrupts")

Jason what should we do about this one BTW? Just revert? We have other
issues ...


Let me post a patch to revert it and give it a rework.


Thanks for reverting it.

Should we queue this patch anyway to prevent future issues and avoid 
setting DRIVER_OK twice?


Please, let me know if I have to resend it by removing the call trace 
that after the revert should no longer occur.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net] vsock/virtio: enable VQs early on probe

2022-03-22 Thread Stefano Garzarella

On Tue, Mar 22, 2022 at 10:09:06AM -0400, Michael S. Tsirkin wrote:

On Tue, Mar 22, 2022 at 03:05:00PM +0100, Stefano Garzarella wrote:

On Tue, Mar 22, 2022 at 09:36:14AM -0400, Michael S. Tsirkin wrote:
> On Tue, Mar 22, 2022 at 11:38:23AM +0100, Stefano Garzarella wrote:
> > virtio spec requires drivers to set DRIVER_OK before using VQs.
> > This is set automatically after probe returns, but virtio-vsock
> > driver uses VQs in the probe function to fill rx and event VQs
> > with new buffers.
>
>
> So this is a spec violation. absolutely.
>
> > Let's fix this, calling virtio_device_ready() before using VQs
> > in the probe function.
> >
> > Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
> > Signed-off-by: Stefano Garzarella 
> > ---
> >  net/vmw_vsock/virtio_transport.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> > diff --git a/net/vmw_vsock/virtio_transport.c 
b/net/vmw_vsock/virtio_transport.c
> > index 5afc194a58bb..b1962f8cd502 100644
> > --- a/net/vmw_vsock/virtio_transport.c
> > +++ b/net/vmw_vsock/virtio_transport.c
> > @@ -622,6 +622,8 @@ static int virtio_vsock_probe(struct virtio_device 
*vdev)
> >   INIT_WORK(>event_work, virtio_transport_event_work);
> >   INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
> >
> > + virtio_device_ready(vdev);
> > +
> >   mutex_lock(>tx_lock);
> >   vsock->tx_run = true;
> >   mutex_unlock(>tx_lock);
>
> Here's the whole code snippet:
>
>
>mutex_lock(>tx_lock);
>vsock->tx_run = true;
>mutex_unlock(>tx_lock);
>
>mutex_lock(>rx_lock);
>virtio_vsock_rx_fill(vsock);
>vsock->rx_run = true;
>mutex_unlock(>rx_lock);
>
>mutex_lock(>event_lock);
>virtio_vsock_event_fill(vsock);
>vsock->event_run = true;
>mutex_unlock(>event_lock);
>
>if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
>vsock->seqpacket_allow = true;
>
>vdev->priv = vsock;
>rcu_assign_pointer(the_virtio_vsock, vsock);
>
>mutex_unlock(_virtio_vsock_mutex);
>
>
> I worry that this is not the only problem here:
> seqpacket_allow and setting of vdev->priv at least after
> device is active look suspicious.

Right, so if you agree I'll move these before virtio_device_ready().

> E.g.:
>
> static void virtio_vsock_event_done(struct virtqueue *vq)
> {
>struct virtio_vsock *vsock = vq->vdev->priv;
>
>if (!vsock)
>return;
>queue_work(virtio_vsock_workqueue, >event_work);
> }
>
> looks like it will miss events now they will be reported earlier.
> One might say that since vq has been kicked it might send
> interrupts earlier too so not a new problem, but
> there's a chance device actually waits until DRIVER_OK
> to start operating.

Yes I see, should I break into 2 patches (one where I move the code already
present and this one)?

Maybe a single patch is fine since it's the complete solution.

Thank you for the detailed explanation,
Stefano


Two I think since movement can be backported to before the hardening
effort.


Yep, maybe 3 since seqpacket was added later.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net] vsock/virtio: enable VQs early on probe

2022-03-22 Thread Stefano Garzarella

On Tue, Mar 22, 2022 at 09:36:14AM -0400, Michael S. Tsirkin wrote:

On Tue, Mar 22, 2022 at 11:38:23AM +0100, Stefano Garzarella wrote:

virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after probe returns, but virtio-vsock
driver uses VQs in the probe function to fill rx and event VQs
with new buffers.



So this is a spec violation. absolutely.


Let's fix this, calling virtio_device_ready() before using VQs
in the probe function.

Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5afc194a58bb..b1962f8cd502 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);

+   virtio_device_ready(vdev);
+
mutex_lock(>tx_lock);
vsock->tx_run = true;
mutex_unlock(>tx_lock);


Here's the whole code snippet:


   mutex_lock(>tx_lock);
   vsock->tx_run = true;
   mutex_unlock(>tx_lock);

   mutex_lock(>rx_lock);
   virtio_vsock_rx_fill(vsock);
   vsock->rx_run = true;
   mutex_unlock(>rx_lock);

   mutex_lock(>event_lock);
   virtio_vsock_event_fill(vsock);
   vsock->event_run = true;
   mutex_unlock(>event_lock);

   if (virtio_has_feature(vdev, VIRTIO_VSOCK_F_SEQPACKET))
   vsock->seqpacket_allow = true;

   vdev->priv = vsock;
   rcu_assign_pointer(the_virtio_vsock, vsock);

   mutex_unlock(_virtio_vsock_mutex);


I worry that this is not the only problem here:
seqpacket_allow and setting of vdev->priv at least after
device is active look suspicious.


Right, so if you agree I'll move these before virtio_device_ready().


E.g.:

static void virtio_vsock_event_done(struct virtqueue *vq)
{
   struct virtio_vsock *vsock = vq->vdev->priv;

   if (!vsock)
   return;
   queue_work(virtio_vsock_workqueue, >event_work);
}

looks like it will miss events now they will be reported earlier.
One might say that since vq has been kicked it might send
interrupts earlier too so not a new problem, but
there's a chance device actually waits until DRIVER_OK
to start operating.


Yes I see, should I break into 2 patches (one where I move the code 
already present and this one)?


Maybe a single patch is fine since it's the complete solution.

Thank you for the detailed explanation,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH] virtio: use virtio_device_ready() in virtio_device_restore()

2022-03-22 Thread Stefano Garzarella
After waking up a suspended VM, the kernel prints the following trace
for virtio drivers which do not directly call virtio_device_ready() in
the .restore:

PM: suspend exit
irq 22: nobody cared (try booting with the "irqpoll" option)
Call Trace:
 
 dump_stack_lvl+0x38/0x49
 dump_stack+0x10/0x12
 __report_bad_irq+0x3a/0xaf
 note_interrupt.cold+0xb/0x60
 handle_irq_event+0x71/0x80
 handle_fasteoi_irq+0x95/0x1e0
 __common_interrupt+0x6b/0x110
 common_interrupt+0x63/0xe0
 asm_common_interrupt+0x1e/0x40
 ? __do_softirq+0x75/0x2f3
 irq_exit_rcu+0x93/0xe0
 sysvec_apic_timer_interrupt+0xac/0xd0
 
 
 asm_sysvec_apic_timer_interrupt+0x12/0x20
 arch_cpu_idle+0x12/0x20
 default_idle_call+0x39/0xf0
 do_idle+0x1b5/0x210
 cpu_startup_entry+0x20/0x30
 start_secondary+0xf3/0x100
 secondary_startup_64_no_verify+0xc3/0xcb
 
handlers:
[<8f9bac49>] vp_interrupt
[<8f9bac49>] vp_interrupt
Disabling IRQ #22

This happens because we don't invoke .enable_cbs callback in
virtio_device_restore(). That callback is used by some transports
(e.g. virtio-pci) to enable interrupts.

Let's fix it, by calling virtio_device_ready() as we do in
virtio_dev_probe(). This function calls .enable_cts callback and sets
DRIVER_OK status bit.

This fix also avoids setting DRIVER_OK twice for those drivers that
call virtio_device_ready() in the .restore.

Fixes: d50497eb4e55 ("virtio_config: introduce a new .enable_cbs method")
Signed-off-by: Stefano Garzarella 
---

I'm not sure about the fixes tag. That one is more generic, but the
following one I think introduced the issue.

Fixes: 9e35276a5344 ("virtio_pci: harden MSI-X interrupts")

Thanks,
Stefano
---
 drivers/virtio/virtio.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 22f15f444f75..75c8d560bbd3 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -526,8 +526,9 @@ int virtio_device_restore(struct virtio_device *dev)
goto err;
}
 
-   /* Finally, tell the device we're all set */
-   virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+   /* If restore didn't do it, mark device DRIVER_OK ourselves. */
+   if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
+   virtio_device_ready(dev);
 
virtio_config_enable(dev);
 
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH net] vsock/virtio: enable VQs early on probe

2022-03-22 Thread Stefano Garzarella
virtio spec requires drivers to set DRIVER_OK before using VQs.
This is set automatically after probe returns, but virtio-vsock
driver uses VQs in the probe function to fill rx and event VQs
with new buffers.

Let's fix this, calling virtio_device_ready() before using VQs
in the probe function.

Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko")
Signed-off-by: Stefano Garzarella 
---
 net/vmw_vsock/virtio_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 5afc194a58bb..b1962f8cd502 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -622,6 +622,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
INIT_WORK(>event_work, virtio_transport_event_work);
INIT_WORK(>send_pkt_work, virtio_transport_send_pkt_work);
 
+   virtio_device_ready(vdev);
+
mutex_lock(>tx_lock);
vsock->tx_run = true;
mutex_unlock(>tx_lock);
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost: handle error while adding split ranges to iotlb

2022-03-18 Thread Stefano Garzarella

On Sat, Mar 12, 2022 at 07:41:21PM +0530, Anirudh Rayabharam wrote:

vhost_iotlb_add_range_ctx() handles the range [0, ULONG_MAX] by
splitting it into two ranges and adding them separately. The return
value of adding the first range to the iotlb is currently ignored.
Check the return value and bail out in case of an error.



We could add:

Fixes: e2ae38cf3d91 ("vhost: fix hung thread due to erroneous iotlb entries")


Signed-off-by: Anirudh Rayabharam 
---
drivers/vhost/iotlb.c | 6 +-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 40b098320b2a..5829cf2d0552 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -62,8 +62,12 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
 */
if (start == 0 && last == ULONG_MAX) {
u64 mid = last / 2;
+   int err = vhost_iotlb_add_range_ctx(iotlb, start, mid, addr,
+   perm, opaque);
+
+   if (err)
+   return err;

-   vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, 
opaque);
addr += mid + 1;
start = mid + 1;
}
--
2.35.1



Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v4 1/2] af_vsock: SOCK_SEQPACKET receive timeout test

2022-03-17 Thread Stefano Garzarella

On Thu, Mar 17, 2022 at 08:31:49AM +, Krasnov Arseniy Vladimirovich wrote:

Test for receive timeout check: connection is established,
receiver sets timeout, but sender does nothing. Receiver's
'read()' call must return EAGAIN.

Signed-off-by: Krasnov Arseniy Vladimirovich 
---
v3 -> v4:
1) Fix stupid bug about invalid 'if()' line.

tools/testing/vsock/vsock_test.c | 84 
1 file changed, 84 insertions(+)


Everything is okay now, tests pass and the patch looks good to me:

Reviewed-by: Stefano Garzarella 

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v3 2/2] af_vsock: SOCK_SEQPACKET broken buffer test

2022-03-17 Thread Stefano Garzarella

On Thu, Mar 17, 2022 at 05:28:21AM +, Krasnov Arseniy Vladimirovich wrote:

Add test where sender sends two message, each with own
data pattern. Reader tries to read first to broken buffer:
it has three pages size, but middle page is unmapped. Then,
reader tries to read second message to valid buffer. Test
checks, that uncopied part of first message was dropped
and thus not copied as part of second message.

Signed-off-by: Krasnov Arseniy Vladimirovich 
---
v2 -> v3:
1) "got X, expected Y" -> "expected X, got Y".
2) Some checkpatch.pl fixes.

tools/testing/vsock/vsock_test.c | 131 +++
1 file changed, 131 insertions(+)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH net-next v3 1/2] af_vsock: SOCK_SEQPACKET receive timeout test

2022-03-17 Thread Stefano Garzarella

On Thu, Mar 17, 2022 at 05:26:45AM +, Krasnov Arseniy Vladimirovich wrote:

Test for receive timeout check: connection is established,
receiver sets timeout, but sender does nothing. Receiver's
'read()' call must return EAGAIN.

Signed-off-by: Krasnov Arseniy Vladimirovich 
---
v2 -> v3:
1) Use 'fprintf()' instead of 'perror()' where 'errno' variable
   is not affected.
2) Print 'read()' overhead.

tools/testing/vsock/vsock_test.c | 84 
1 file changed, 84 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 2a3638c0a008..f5498de6751d 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -16,6 +16,7 @@
#include 
#include 
#include 
+#include 

#include "timeout.h"
#include "control.h"
@@ -391,6 +392,84 @@ static void test_seqpacket_msg_trunc_server(const struct 
test_opts *opts)
close(fd);
}

+static time_t current_nsec(void)
+{
+   struct timespec ts;
+
+   if (clock_gettime(CLOCK_REALTIME, )) {
+   perror("clock_gettime(3) failed");
+   exit(EXIT_FAILURE);
+   }
+
+   return (ts.tv_sec * 10ULL) + ts.tv_nsec;
+}
+
+#define RCVTIMEO_TIMEOUT_SEC 1
+#define READ_OVERHEAD_NSEC 25000 /* 0.25 sec */
+
+static void test_seqpacket_timeout_client(const struct test_opts *opts)
+{
+   int fd;
+   struct timeval tv;
+   char dummy;
+   time_t read_enter_ns;
+   time_t read_overhead_ns;
+
+   fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+   if (fd < 0) {
+   perror("connect");
+   exit(EXIT_FAILURE);
+   }
+
+   tv.tv_sec = RCVTIMEO_TIMEOUT_SEC;
+   tv.tv_usec = 0;
+
+   if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *), sizeof(tv)) == 
-1) {
+   perror("setsockopt 'SO_RCVTIMEO'");
+   exit(EXIT_FAILURE);
+   }
+
+   read_enter_ns = current_nsec();
+
+   if (errno != EAGAIN) {
+   perror("EAGAIN expected");
+   exit(EXIT_FAILURE);
+   }


Should this check go after read()?

Indeed now the test fails on my environment with "EAGAIN expected" 
message.


The rest LGTM :-)

Stefano


+
+   if (read(fd, , sizeof(dummy)) != -1) {
+   fprintf(stderr,
+   "expected 'dummy' read(2) failure\n");
+   exit(EXIT_FAILURE);
+   }
+
+   read_overhead_ns = current_nsec() - read_enter_ns -
+   10ULL * RCVTIMEO_TIMEOUT_SEC;
+
+   if (read_overhead_ns > READ_OVERHEAD_NSEC) {
+   fprintf(stderr,
+   "too much time in read(2), %lu > %i ns\n",
+   read_overhead_ns, READ_OVERHEAD_NSEC);
+   exit(EXIT_FAILURE);
+   }
+
+   control_writeln("WAITDONE");
+   close(fd);
+}
+
+static void test_seqpacket_timeout_server(const struct test_opts *opts)
+{
+   int fd;
+
+   fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+   if (fd < 0) {
+   perror("accept");
+   exit(EXIT_FAILURE);
+   }
+
+   control_expectln("WAITDONE");
+   close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -431,6 +510,11 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_msg_trunc_client,
.run_server = test_seqpacket_msg_trunc_server,
},
+   {
+   .name = "SOCK_SEQPACKET timeout",
+   .run_client = test_seqpacket_timeout_client,
+   .run_server = test_seqpacket_timeout_server,
+   },
{},
};

--
2.25.1


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v2 0/2] af_vsock: add two new tests for SOCK_SEQPACKET

2022-03-16 Thread Stefano Garzarella

On Wed, Mar 16, 2022 at 07:25:07AM +, Krasnov Arseniy Vladimirovich wrote:

This adds two tests: for receive timeout and reading to invalid
buffer provided by user. I forgot to put both patches to main
patchset.

Arseniy Krasnov(2):

af_vsock: SOCK_SEQPACKET receive timeout test
af_vsock: SOCK_SEQPACKET broken buffer test

tools/testing/vsock/vsock_test.c | 211 +++
1 file changed, 211 insertions(+)


I think there are only small things to fix, so next series you can 
remove RFC (remember to use net-next).


I added the tests to my suite and everything is running correctly.

I also suggest you to solve these little issues that checkpatch has 
highlighted to have patches ready for submission :-)


Thanks,
Stefano

$ ./scripts/checkpatch.pl --strict -g  master..HEAD
-
Commit 2a1bfb93b51d ("af_vsock: SOCK_SEQPACKET receive timeout test")
-
CHECK: Unnecessary parentheses around 'errno != EAGAIN'
#70: FILE: tools/testing/vsock/vsock_test.c:434:
+   if ((read(fd, , sizeof(dummy)) != -1) ||
+  (errno != EAGAIN)) {

WARNING: From:/Signed-off-by: email name mismatch: 'From: Krasnov Arseniy Vladimirovich 
' != 'Signed-off-by: Arseniy Krasnov 
'

total: 0 errors, 1 warnings, 1 checks, 97 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
  mechanically convert to the typical style using --fix or --fix-inplace.

Commit 2a1bfb93b51d ("af_vsock: SOCK_SEQPACKET receive timeout test") has style 
problems, please review.
---
Commit 9176bcabcdd7 ("af_vsock: SOCK_SEQPACKET broken buffer test")
---
CHECK: Comparison to NULL could be written "!buf1"
#51: FILE: tools/testing/vsock/vsock_test.c:486:
+   if (buf1 == NULL) {

CHECK: Comparison to NULL could be written "!buf2"
#57: FILE: tools/testing/vsock/vsock_test.c:492:
+   if (buf2 == NULL) {

CHECK: Please don't use multiple blank lines
#152: FILE: tools/testing/vsock/vsock_test.c:587:
+
+

WARNING: From:/Signed-off-by: email name mismatch: 'From: Krasnov Arseniy Vladimirovich 
' != 'Signed-off-by: Arseniy Krasnov 
'

total: 0 errors, 1 warnings, 3 checks, 150 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
  mechanically convert to the typical style using --fix or --fix-inplace.

Commit 9176bcabcdd7 ("af_vsock: SOCK_SEQPACKET broken buffer test") has style 
problems, please review.

NOTE: If any of the errors are false positives, please report
  them to the maintainer, see CHECKPATCH in MAINTAINERS.




___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v2 2/2] af_vsock: SOCK_SEQPACKET broken buffer test

2022-03-16 Thread Stefano Garzarella

On Wed, Mar 16, 2022 at 07:29:28AM +, Krasnov Arseniy Vladimirovich wrote:

Add test where sender sends two message, each with own
data pattern. Reader tries to read first to broken buffer:
it has three pages size, but middle page is unmapped. Then,
reader tries to read second message to valid buffer. Test
checks, that uncopied part of first message was dropped
and thus not copied as part of second message.

Signed-off-by: Arseniy Krasnov 
---
v1 -> v2:
1) Use 'fprintf()' instead of 'perror()' where 'errno' variable
   is not affected.
2) Replace word "invalid" -> "unexpected".

tools/testing/vsock/vsock_test.c | 132 +++
1 file changed, 132 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 6d7648cce5aa..1132bcd8ddb7 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -17,6 +17,7 @@
#include 
#include 
#include 
+#include 

#include "timeout.h"
#include "control.h"
@@ -465,6 +466,132 @@ static void test_seqpacket_timeout_server(const struct 
test_opts *opts)
close(fd);
}

+#define BUF_PATTERN_1 'a'
+#define BUF_PATTERN_2 'b'
+
+static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts 
*opts)
+{
+   int fd;
+   unsigned char *buf1;
+   unsigned char *buf2;
+   int buf_size = getpagesize() * 3;
+
+   fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+   if (fd < 0) {
+   perror("connect");
+   exit(EXIT_FAILURE);
+   }
+
+   buf1 = malloc(buf_size);
+   if (buf1 == NULL) {
+   perror("'malloc()' for 'buf1'");
+   exit(EXIT_FAILURE);
+   }
+
+   buf2 = malloc(buf_size);
+   if (buf2 == NULL) {
+   perror("'malloc()' for 'buf2'");
+   exit(EXIT_FAILURE);
+   }
+
+   memset(buf1, BUF_PATTERN_1, buf_size);
+   memset(buf2, BUF_PATTERN_2, buf_size);
+
+   if (send(fd, buf1, buf_size, 0) != buf_size) {
+   perror("send failed");
+   exit(EXIT_FAILURE);
+   }
+
+   if (send(fd, buf2, buf_size, 0) != buf_size) {
+   perror("send failed");
+   exit(EXIT_FAILURE);
+   }
+
+   close(fd);
+}
+
+static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts 
*opts)
+{
+   int fd;
+   unsigned char *broken_buf;
+   unsigned char *valid_buf;
+   int page_size = getpagesize();
+   int buf_size = page_size * 3;
+   ssize_t res;
+   int prot = PROT_READ | PROT_WRITE;
+   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+   int i;
+
+   fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+   if (fd < 0) {
+   perror("accept");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Setup first buffer. */
+   broken_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+   if (broken_buf == MAP_FAILED) {
+   perror("mmap for 'broken_buf'");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Unmap "hole" in buffer. */
+   if (munmap(broken_buf + page_size, page_size)) {
+   perror("'broken_buf' setup");
+   exit(EXIT_FAILURE);
+   }
+
+   valid_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+   if (valid_buf == MAP_FAILED) {
+   perror("mmap for 'valid_buf'");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Try to fill buffer with unmapped middle. */
+   res = read(fd, broken_buf, buf_size);
+   if (res != -1) {
+   fprintf(stderr,
+   "expected 'broken_buf' read(2) failure, got %zi\n",
+   res);
+   exit(EXIT_FAILURE);
+   }
+
+   if (errno != ENOMEM) {
+   perror("unexpected errno of 'broken_buf'");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Try to fill valid buffer. */
+   res = read(fd, valid_buf, buf_size);
+   if (res < 0) {
+   perror("unexpected 'valid_buf' read(2) failure");
+   exit(EXIT_FAILURE);
+   }
+
+   if (res != buf_size) {
+   fprintf(stderr,
+   "invalid 'valid_buf' read(2), got %zi, expected %i\n",
+   res, buf_size);


I would suggest to use always the same pattern in the error messages:
"expected X, got Y".

The rest LGTM.


+   exit(EXIT_FAILURE);
+   }
+
+   for (i = 0; i < buf_size; i++) {
+   if (valid_buf[i] != BUF_PATTERN_2) {
+   fprintf(stderr,
+   "invalid pattern for 'valid_buf' at %i, expected 
%hhX, got %hhX\n",
+   i, BUF_PATTERN_2, valid_buf[i]);
+   exit(EXIT_FAILURE);
+   }
+   }
+
+
+   /* Unmap buffers. */
+   munmap(broken_buf, page_size);
+   munmap(broken_buf + page_size * 2, page_size);
+   munmap(valid_buf, buf_size);
+   

Re: [RFC PATCH v2 1/2] af_vsock: SOCK_SEQPACKET receive timeout test

2022-03-16 Thread Stefano Garzarella

On Wed, Mar 16, 2022 at 07:27:45AM +, Krasnov Arseniy Vladimirovich wrote:

Test for receive timeout check: connection is established,
receiver sets timeout, but sender does nothing. Receiver's
'read()' call must return EAGAIN.

Signed-off-by: Arseniy Krasnov 
---
v1 -> v2:
1) Check amount of time spent in 'read()'.


The patch looks correct to me, but since it's an RFC and you have to 
send another version anyway, here are some minor suggestions :-)




tools/testing/vsock/vsock_test.c | 79 
1 file changed, 79 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 2a3638c0a008..6d7648cce5aa 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -16,6 +16,7 @@
#include 
#include 
#include 
+#include 

#include "timeout.h"
#include "control.h"
@@ -391,6 +392,79 @@ static void test_seqpacket_msg_trunc_server(const struct 
test_opts *opts)
close(fd);
}

+static time_t current_nsec(void)
+{
+   struct timespec ts;
+
+   if (clock_gettime(CLOCK_REALTIME, )) {
+   perror("clock_gettime(3) failed");
+   exit(EXIT_FAILURE);
+   }
+
+   return (ts.tv_sec * 10ULL) + ts.tv_nsec;
+}
+
+#define RCVTIMEO_TIMEOUT_SEC 1
+#define READ_OVERHEAD_NSEC 25000 /* 0.25 sec */
+
+static void test_seqpacket_timeout_client(const struct test_opts *opts)
+{
+   int fd;
+   struct timeval tv;
+   char dummy;
+   time_t read_enter_ns;
+   time_t read_overhead_ns;
+
+   fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+   if (fd < 0) {
+   perror("connect");
+   exit(EXIT_FAILURE);
+   }
+
+   tv.tv_sec = RCVTIMEO_TIMEOUT_SEC;
+   tv.tv_usec = 0;
+
+   if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *), sizeof(tv)) == 
-1) {
+   perror("setsockopt 'SO_RCVTIMEO'");
+   exit(EXIT_FAILURE);
+   }
+
+   read_enter_ns = current_nsec();
+
+   if ((read(fd, , sizeof(dummy)) != -1) ||
+   (errno != EAGAIN)) {


Here we can split in 2 checks like in patch 2, since if read() return 
value is >= 0, errno is not set.



+   perror("EAGAIN expected");
+   exit(EXIT_FAILURE);
+   }
+
+   read_overhead_ns = current_nsec() - read_enter_ns -
+   10ULL * RCVTIMEO_TIMEOUT_SEC;
+
+   if (read_overhead_ns > READ_OVERHEAD_NSEC) {
+   fprintf(stderr,
+   "too much time in read(2) with SO_RCVTIMEO: %lu ns\n",
+   read_overhead_ns);


What about printing also the expected overhead?


+   exit(EXIT_FAILURE);
+   }
+
+   control_writeln("WAITDONE");
+   close(fd);
+}
+
+static void test_seqpacket_timeout_server(const struct test_opts *opts)
+{
+   int fd;
+
+   fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+   if (fd < 0) {
+   perror("accept");
+   exit(EXIT_FAILURE);
+   }
+
+   control_expectln("WAITDONE");
+   close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -431,6 +505,11 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_msg_trunc_client,
.run_server = test_seqpacket_msg_trunc_server,
},
+   {
+   .name = "SOCK_SEQPACKET timeout",
+   .run_client = test_seqpacket_timeout_client,
+   .run_server = test_seqpacket_timeout_server,
+   },
{},
};

--
2.25.1


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 3/3] af_vsock: SOCK_SEQPACKET broken buffer test

2022-03-15 Thread Stefano Garzarella

On Tue, Mar 15, 2022 at 12:43:13PM +, Krasnov Arseniy Vladimirovich wrote:

On 15.03.2022 12:35, Arseniy Krasnov wrote:

On 15.03.2022 11:36, Stefano Garzarella wrote:

On Fri, Mar 11, 2022 at 10:58:32AM +, Krasnov Arseniy Vladimirovich wrote:

Add test where sender sends two message, each with own
data pattern. Reader tries to read first to broken buffer:
it has three pages size, but middle page is unmapped. Then,
reader tries to read second message to valid buffer. Test
checks, that uncopied part of first message was dropped
and thus not copied as part of second message.

Signed-off-by: Arseniy Krasnov 
---
tools/testing/vsock/vsock_test.c | 121 +++
1 file changed, 121 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index aa2de27d0f77..686af712b4ad 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -16,6 +16,7 @@
#include 
#include 
#include 
+#include 

#include "timeout.h"
#include "control.h"
@@ -435,6 +436,121 @@ static void test_seqpacket_timeout_server(const struct 
test_opts *opts)
close(fd);
}

+#define BUF_PATTERN_1 'a'
+#define BUF_PATTERN_2 'b'
+
+static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts 
*opts)
+{
+    int fd;
+    unsigned char *buf1;
+    unsigned char *buf2;
+    int buf_size = getpagesize() * 3;
+
+    fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+    if (fd < 0) {
+    perror("connect");
+    exit(EXIT_FAILURE);
+    }
+
+    buf1 = malloc(buf_size);
+    if (buf1 == NULL) {
+    perror("'malloc()' for 'buf1'");
+    exit(EXIT_FAILURE);
+    }
+
+    buf2 = malloc(buf_size);
+    if (buf2 == NULL) {
+    perror("'malloc()' for 'buf2'");
+    exit(EXIT_FAILURE);
+    }
+
+    memset(buf1, BUF_PATTERN_1, buf_size);
+    memset(buf2, BUF_PATTERN_2, buf_size);
+
+    if (send(fd, buf1, buf_size, 0) != buf_size) {
+    perror("send failed");
+    exit(EXIT_FAILURE);
+    }
+
+    if (send(fd, buf2, buf_size, 0) != buf_size) {
+    perror("send failed");
+    exit(EXIT_FAILURE);
+    }
+
+    close(fd);
+}
+
+static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts 
*opts)
+{
+    int fd;
+    unsigned char *broken_buf;
+    unsigned char *valid_buf;
+    int page_size = getpagesize();
+    int buf_size = page_size * 3;
+    ssize_t res;
+    int prot = PROT_READ | PROT_WRITE;
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    int i;
+
+    fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+    if (fd < 0) {
+    perror("accept");
+    exit(EXIT_FAILURE);
+    }
+
+    /* Setup first buffer. */
+    broken_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+    if (broken_buf == MAP_FAILED) {
+    perror("mmap for 'broken_buf'");
+    exit(EXIT_FAILURE);
+    }
+
+    /* Unmap "hole" in buffer. */
+    if (munmap(broken_buf + page_size, page_size)) {
+    perror("'broken_buf' setup");
+    exit(EXIT_FAILURE);
+    }
+
+    valid_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+    if (valid_buf == MAP_FAILED) {
+    perror("mmap for 'valid_buf'");
+    exit(EXIT_FAILURE);
+    }
+
+    /* Try to fill buffer with unmapped middle. */
+    res = read(fd, broken_buf, buf_size);
+    if (res != -1) {
+    perror("invalid read result of 'broken_buf'");


if `res` is valid, errno is not set, better to use fprintf(stderr, ...) 
printing the expected and received result.
Take a look at test_stream_connection_reset()


Ack, fix it in v2




+    exit(EXIT_FAILURE);
+    }
+
+    if (errno != ENOMEM) {
+    perror("invalid errno of 'broken_buf'");


Instead of "invalid", I would say "unexpected".


Ack




+    exit(EXIT_FAILURE);
+    }




+
+    /* Try to fill valid buffer. */
+    res = read(fd, valid_buf, buf_size);
+    if (res != buf_size) {
+    perror("invalid read result of 'valid_buf'");


I would split in 2 checks:
- (res < 0) then use perror()
- (res != buf_size) then use fprintf(stderr, ...) printing the expected   and 
received result.


Ack




+    exit(EXIT_FAILURE);
+    }
+
+    for (i = 0; i < buf_size; i++) {
+    if (valid_buf[i] != BUF_PATTERN_2) {
+    perror("invalid pattern for valid buf");


errno is not set here, better to use fprintf(stderr, ...)


Ack




+    exit(EXIT_FAILURE);
+    }
+    }


What about replace this for with a memcmp()?


memcmp() will require special buffer with BUF_PATTERN_2 data as
second argument. I think 'if()' condition is better here, as we
compare each element of buffer with single byte. Anyway, 'memcmp()'
does the same things inside itself.


Ah, I see. Okay, I agree on for()/if(), maybe we can also print more 
info (index, expected value, 

Re: [RFC PATCH v1 3/3] af_vsock: SOCK_SEQPACKET broken buffer test

2022-03-15 Thread Stefano Garzarella
On Tue, Mar 15, 2022 at 09:34:35AM +, Krasnov Arseniy Vladimirovich 
wrote:

On 15.03.2022 11:36, Stefano Garzarella wrote:


Is this the right behavior? If read() fails because the buffer is invalid, do 
we throw out the whole packet?

I was expecting the packet not to be consumed, have you tried AF_UNIX, does it 
have the same behavior?


I've just checked AF_UNIX implementation of SEQPACKET receive in 
net/unix/af_unix.c. So, if 'skb_copy_datagram_msg()'
fails, it calls 'skb_free_datagram()'. I think this means that whole sk buff 
will be dropped, but anyway, i'll check
this behaviour in practice. See '__unix_dgram_recvmsg()' in net/unix/af_unix.c.



Yep. you are right it seems to be discarded but I don't know that
code very well, so better to test as you said ;-)

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 3/3] vdpa: support exposing the count of vqs to userspace

2022-03-15 Thread Stefano Garzarella

On Tue, Mar 15, 2022 at 11:25:53AM +0800, Longpeng(Mike) wrote:

From: Longpeng 

- GET_VQS_COUNT: the count of virtqueues that exposed

Signed-off-by: Longpeng 
---
drivers/vhost/vdpa.c   | 13 +
include/uapi/linux/vhost.h |  3 +++
2 files changed, 16 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 0c82eb5..69b3f05 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -370,6 +370,16 @@ static long vhost_vdpa_get_config_size(struct vhost_vdpa 
*v, u32 __user *argp)
return 0;
}

+static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp)
+{
+   struct vdpa_device *vdpa = v->vdpa;
+
+   if (copy_to_user(argp, >nvqs, sizeof(vdpa->nvqs)))
+   return -EFAULT;
+
+   return 0;
+}
+
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
   void __user *argp)
{
@@ -510,6 +520,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_VDPA_GET_CONFIG_SIZE:
r = vhost_vdpa_get_config_size(v, argp);
break;
+   case VHOST_VDPA_GET_VQS_COUNT:
+   r = vhost_vdpa_get_vqs_count(v, argp);
+   break;
default:
r = vhost_dev_ioctl(>vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bc74e95..5d99e7c 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -154,4 +154,7 @@
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE  _IOR(VHOST_VIRTIO, 0x79, __u32)

+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT   _IOR(VHOST_VIRTIO, 0x80, __u32)


I'd prefer VHOST_VDPA_GET_NUM_QUEUES, since we use "num_queues" also in 
the spec [1].


But I'm fine also with this:

Reviewed-by: Stefano Garzarella 

[1] 
https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-1120003


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 2/3] vdpa: change the type of nvqs to u32

2022-03-15 Thread Stefano Garzarella

On Tue, Mar 15, 2022 at 11:25:52AM +0800, Longpeng(Mike) wrote:

From: Longpeng 

Change vdpa_device.nvqs and vhost_vdpa.nvqs to use u32

Signed-off-by: Longpeng 
---
drivers/vdpa/vdpa.c  |  6 +++---
drivers/vhost/vdpa.c | 10 ++
include/linux/vdpa.h |  6 +++---
3 files changed, 12 insertions(+), 10 deletions(-)


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 1/3] vdpa: support exposing the config size to userspace

2022-03-15 Thread Stefano Garzarella

On Tue, Mar 15, 2022 at 11:25:51AM +0800, Longpeng(Mike) wrote:

From: Longpeng 

- GET_CONFIG_SIZE: return the size of the virtio config space.

The size contains the fields which are conditional on feature
bits.

Acked-by: Jason Wang 
Signed-off-by: Longpeng 
---
drivers/vhost/vdpa.c   | 17 +
include/linux/vdpa.h   |  3 ++-
include/uapi/linux/vhost.h |  4 
3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index ec5249e..605c7ae 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -355,6 +355,20 @@ static long vhost_vdpa_get_iova_range(struct vhost_vdpa 
*v, u32 __user *argp)
return 0;
}

+static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp)
+{
+   struct vdpa_device *vdpa = v->vdpa;
+   const struct vdpa_config_ops *ops = vdpa->config;
+   u32 size;
+
+   size = ops->get_config_size(vdpa);


get_config_size() returns a size_t, perhaps we could have a comment here 
where we say we don't expect there to be an overflow.


I don't have a strong opinion on this, and I wouldn't want to get you to 
repin just for that, so:


Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 1/3] af_vsock: add two new tests for SOCK_SEQPACKET

2022-03-15 Thread Stefano Garzarella

Hi Arseniy,

On Fri, Mar 11, 2022 at 10:52:36AM +, Krasnov Arseniy Vladimirovich wrote:

This adds two tests: for receive timeout and reading to invalid
buffer provided by user. I forgot to put both patches to main
patchset.

Arseniy Krasnov(2):

af_vsock: SOCK_SEQPACKET receive timeout test
af_vsock: SOCK_SEQPACKET broken buffer test

tools/testing/vsock/vsock_test.c | 170 +++
1 file changed, 170 insertions(+)


Thank you for these tests!

I left a few comments and I'm not sure about the 'broken buffer test' 
behavior.


About the series, it sounds like something is wrong with your setup, 
usually the cover letter is "patch" 0. In this case I would have 
expected:


[0/2] af_vsock: add two new tests for SOCK_SEQPACKET
[1/2] af_vsock: SOCK_SEQPACKET receive timeout test
[2/2] af_vsock: SOCK_SEQPACKET broken buffer test

Are you using `git send-email` or `git publish`?


When you will remove the RFC, please add `net-next` label:
[PATCH net-next 0/2], etc..

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [RFC PATCH v1 3/3] af_vsock: SOCK_SEQPACKET broken buffer test

2022-03-15 Thread Stefano Garzarella

On Fri, Mar 11, 2022 at 10:58:32AM +, Krasnov Arseniy Vladimirovich wrote:

Add test where sender sends two message, each with own
data pattern. Reader tries to read first to broken buffer:
it has three pages size, but middle page is unmapped. Then,
reader tries to read second message to valid buffer. Test
checks, that uncopied part of first message was dropped
and thus not copied as part of second message.

Signed-off-by: Arseniy Krasnov 
---
tools/testing/vsock/vsock_test.c | 121 +++
1 file changed, 121 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index aa2de27d0f77..686af712b4ad 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -16,6 +16,7 @@
#include 
#include 
#include 
+#include 

#include "timeout.h"
#include "control.h"
@@ -435,6 +436,121 @@ static void test_seqpacket_timeout_server(const struct 
test_opts *opts)
close(fd);
}

+#define BUF_PATTERN_1 'a'
+#define BUF_PATTERN_2 'b'
+
+static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts 
*opts)
+{
+   int fd;
+   unsigned char *buf1;
+   unsigned char *buf2;
+   int buf_size = getpagesize() * 3;
+
+   fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+   if (fd < 0) {
+   perror("connect");
+   exit(EXIT_FAILURE);
+   }
+
+   buf1 = malloc(buf_size);
+   if (buf1 == NULL) {
+   perror("'malloc()' for 'buf1'");
+   exit(EXIT_FAILURE);
+   }
+
+   buf2 = malloc(buf_size);
+   if (buf2 == NULL) {
+   perror("'malloc()' for 'buf2'");
+   exit(EXIT_FAILURE);
+   }
+
+   memset(buf1, BUF_PATTERN_1, buf_size);
+   memset(buf2, BUF_PATTERN_2, buf_size);
+
+   if (send(fd, buf1, buf_size, 0) != buf_size) {
+   perror("send failed");
+   exit(EXIT_FAILURE);
+   }
+
+   if (send(fd, buf2, buf_size, 0) != buf_size) {
+   perror("send failed");
+   exit(EXIT_FAILURE);
+   }
+
+   close(fd);
+}
+
+static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts 
*opts)
+{
+   int fd;
+   unsigned char *broken_buf;
+   unsigned char *valid_buf;
+   int page_size = getpagesize();
+   int buf_size = page_size * 3;
+   ssize_t res;
+   int prot = PROT_READ | PROT_WRITE;
+   int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+   int i;
+
+   fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+   if (fd < 0) {
+   perror("accept");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Setup first buffer. */
+   broken_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+   if (broken_buf == MAP_FAILED) {
+   perror("mmap for 'broken_buf'");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Unmap "hole" in buffer. */
+   if (munmap(broken_buf + page_size, page_size)) {
+   perror("'broken_buf' setup");
+   exit(EXIT_FAILURE);
+   }
+
+   valid_buf = mmap(NULL, buf_size, prot, flags, -1, 0);
+   if (valid_buf == MAP_FAILED) {
+   perror("mmap for 'valid_buf'");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Try to fill buffer with unmapped middle. */
+   res = read(fd, broken_buf, buf_size);
+   if (res != -1) {
+   perror("invalid read result of 'broken_buf'");


if `res` is valid, errno is not set, better to use fprintf(stderr, ...) 
printing the expected and received result.

Take a look at test_stream_connection_reset()


+   exit(EXIT_FAILURE);
+   }
+
+   if (errno != ENOMEM) {
+   perror("invalid errno of 'broken_buf'");


Instead of "invalid", I would say "unexpected".


+   exit(EXIT_FAILURE);
+   }




+
+   /* Try to fill valid buffer. */
+   res = read(fd, valid_buf, buf_size);
+   if (res != buf_size) {
+   perror("invalid read result of 'valid_buf'");


I would split in 2 checks:
- (res < 0) then use perror()
- (res != buf_size) then use fprintf(stderr, ...) printing the expected 
  and received result.



+   exit(EXIT_FAILURE);
+   }
+
+   for (i = 0; i < buf_size; i++) {
+   if (valid_buf[i] != BUF_PATTERN_2) {
+   perror("invalid pattern for valid buf");


errno is not set here, better to use fprintf(stderr, ...)


+   exit(EXIT_FAILURE);
+   }
+   }


What about replace this for with a memcmp()?


+
+
+   /* Unmap buffers. */
+   munmap(broken_buf, page_size);
+   munmap(broken_buf + page_size * 2, page_size);
+   munmap(valid_buf, buf_size);
+   close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -480,6 +596,11 @@ static struct test_case test_cases[] = {

Re: [RFC PATCH v1 2/3] af_vsock: SOCK_SEQPACKET receive timeout test

2022-03-15 Thread Stefano Garzarella

On Fri, Mar 11, 2022 at 10:55:42AM +, Krasnov Arseniy Vladimirovich wrote:

Test for receive timeout check: connection is established,
receiver sets timeout, but sender does nothing. Receiver's
'read()' call must return EAGAIN.

Signed-off-by: Arseniy Krasnov 
---
tools/testing/vsock/vsock_test.c | 49 
1 file changed, 49 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 2a3638c0a008..aa2de27d0f77 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -391,6 +391,50 @@ static void test_seqpacket_msg_trunc_server(const struct 
test_opts *opts)
close(fd);
}

+static void test_seqpacket_timeout_client(const struct test_opts *opts)
+{
+   int fd;
+   struct timeval tv;
+   char dummy;
+
+   fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+   if (fd < 0) {
+   perror("connect");
+   exit(EXIT_FAILURE);
+   }
+
+   tv.tv_sec = 1;
+   tv.tv_usec = 0;
+
+   if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (void *), sizeof(tv)) == 
-1) {
+   perror("setsockopt 'SO_RCVTIMEO'");
+   exit(EXIT_FAILURE);
+   }
+
+   if ((read(fd, , sizeof(dummy)) != -1) ||
+   (errno != EAGAIN)) {
+   perror("EAGAIN expected");
+   exit(EXIT_FAILURE);
+   }


The patch LGTM, maybe the only thing I would add here is a check on the 
time spent in the read(), to see that it is approximately the timeout we 
have set.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v3] vsock: each transport cycles only on its own sockets

2022-03-10 Thread Stefano Garzarella

On Thu, Mar 10, 2022 at 10:50:11PM +0900, Jiyong Park wrote:

When iterating over sockets using vsock_for_each_connected_socket, make
sure that a transport filters out sockets that don't belong to the
transport.

There actually was an issue caused by this; in a nested VM
configuration, destroying the nested VM (which often involves the
closing of /dev/vhost-vsock if there was h2g connections to the nested
VM) kills not only the h2g connections, but also all existing g2h
connections to the (outmost) host which are totally unrelated.

Tested: Executed the following steps on Cuttlefish (Android running on a
VM) [1]: (1) Enter into an `adb shell` session - to have a g2h
connection inside the VM, (2) open and then close /dev/vhost-vsock by
`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb
session is not reset.

[1] https://android.googlesource.com/device/google/cuttlefish/

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Signed-off-by: Jiyong Park 
---
Changes in v3:
 - Fixed the build error in vmci_transport.c
Changes in v2:
 - Squashed into a single patch

drivers/vhost/vsock.c| 3 ++-
include/net/af_vsock.h   | 3 ++-
net/vmw_vsock/af_vsock.c | 9 +++--
net/vmw_vsock/virtio_transport.c | 7 +--
net/vmw_vsock/vmci_transport.c   | 5 -
5 files changed, 20 insertions(+), 7 deletions(-)


It seems okay now, I ran my test suite and everything seems to be fine:

Reviewed-by: Stefano Garzarella 

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2] vsock: each transport cycles only on its own sockets

2022-03-10 Thread Stefano Garzarella

On Thu, Mar 10, 2022 at 10:28:29PM +0900, Jiyong Park wrote:

When iterating over sockets using vsock_for_each_connected_socket, make
sure that a transport filters out sockets that don't belong to the
transport.

There actually was an issue caused by this; in a nested VM
configuration, destroying the nested VM (which often involves the
closing of /dev/vhost-vsock if there was h2g connections to the nested
VM) kills not only the h2g connections, but also all existing g2h
connections to the (outmost) host which are totally unrelated.

Tested: Executed the following steps on Cuttlefish (Android running on a
VM) [1]: (1) Enter into an `adb shell` session - to have a g2h
connection inside the VM, (2) open and then close /dev/vhost-vsock by
`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb
session is not reset.

[1] https://android.googlesource.com/device/google/cuttlefish/

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Signed-off-by: Jiyong Park 
---
Changes in v2:
 - Squashed into a single patch

drivers/vhost/vsock.c| 3 ++-
include/net/af_vsock.h   | 3 ++-
net/vmw_vsock/af_vsock.c | 9 +++--
net/vmw_vsock/virtio_transport.c | 7 +--
net/vmw_vsock/vmci_transport.c   | 3 ++-
5 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 37f0b4274113..e6c9d41db1de 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -753,7 +753,8 @@ static int vhost_vsock_dev_release(struct inode *inode, 
struct file *file)

/* Iterating over all connections for all CIDs to find orphans is
 * inefficient.  Room for improvement here. */
-   vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
+   vsock_for_each_connected_socket(_transport.transport,
+   vhost_vsock_reset_orphans);

/* Don't check the owner, because we are in the release path, so we
 * need to stop the vsock device in any case.
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8..f742e50207fb 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -205,7 +205,8 @@ struct sock *vsock_find_bound_socket(struct sockaddr_vm 
*addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
 struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+void (*fn)(struct sock *sk));
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
bool vsock_find_cid(unsigned int cid);

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e..f04abf662ec6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);

-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+void (*fn)(struct sock *sk))
{
int i;

@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct 
sock *sk))
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
struct vsock_sock *vsk;
list_for_each_entry(vsk, _connected_table[i],
-   connected_table)
+   connected_table) {
+   if (vsk->transport != transport)
+   continue;
+
fn(sk_vsock(vsk));
+   }
}

spin_unlock_bh(_table_lock);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index fb3302fff627..5afc194a58bb 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -24,6 +24,7 @@
static struct workqueue_struct *virtio_vsock_workqueue;
static struct virtio_vsock __rcu *the_virtio_vsock;
static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
+static struct virtio_transport virtio_transport; /* forward declaration */

struct virtio_vsock {
struct virtio_device *vdev;
@@ -384,7 +385,8 @@ static void virtio_vsock_event_handle(struct virtio_vsock 
*vsock,
switch (le32_to_cpu(event->id)) {
case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
virtio_vsock_update_guest_cid(vsock);
-   vsock_for_each_connected_socket(virtio_vsock_reset_sock);
+   vsock_for_each_connected_socket(_transport.transport,
+   virtio_vsock_reset_sock);
break;
}
}
@@ -662,7 +664,8 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
synchronize_rcu();

/* Reset all connected 

Re: [PATCH 1/2] vsock: each transport cycles only on its own sockets

2022-03-10 Thread Stefano Garzarella

On Thu, Mar 10, 2022 at 08:01:53AM -0500, Michael S. Tsirkin wrote:

On Thu, Mar 10, 2022 at 09:54:24PM +0900, Jiyong Park wrote:

When iterating over sockets using vsock_for_each_connected_socket, make
sure that a transport filters out sockets that don't belong to the
transport.

There actually was an issue caused by this; in a nested VM
configuration, destroying the nested VM (which often involves the
closing of /dev/vhost-vsock if there was h2g connections to the nested
VM) kills not only the h2g connections, but also all existing g2h
connections to the (outmost) host which are totally unrelated.

Tested: Executed the following steps on Cuttlefish (Android running on a
VM) [1]: (1) Enter into an `adb shell` session - to have a g2h
connection inside the VM, (2) open and then close /dev/vhost-vsock by
`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb
session is not reset.

[1] https://android.googlesource.com/device/google/cuttlefish/

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")
Signed-off-by: Jiyong Park 
---
 drivers/vhost/vsock.c| 4 
 net/vmw_vsock/virtio_transport.c | 7 +++
 net/vmw_vsock/vmci_transport.c   | 5 +
 3 files changed, 16 insertions(+)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 37f0b4274113..853ddac00d5b 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
 * executing.
 */

+   /* Only handle our own sockets */
+   if (vsk->transport != _transport.transport)
+   return;
+
/* If the peer is still valid, no need to reset connection */
if (vhost_vsock_get(vsk->remote_addr.svm_cid))
return;



We know this is incomplete though. So I think it's the wrong thing to do
when you backport, too. If all you worry about is breaking a binary
module interface, how about simply exporting a new function when you
backport. Thus you will have downstream both:

void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));

void vsock_for_each_connected_socket_new(struct vsock_transport *transport,
   void (*fn)(struct sock *sk));


and then upstream we can squash these two patches.

Hmm?



Yep, reading more of the kernel documentation [1] it seems that upstream 
we don't worry about this.


I agree with Michael, it's better to just have the final patch upstream 
and downstream will be handled accordingly.


This should make it easier upstream to backport into stable branches 
future patches that depend on this change.


Thanks,
Stefano

[1] 
https://www.kernel.org/doc/Documentation/process/stable-api-nonsense.rst


___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: reset only the h2g connections upon release

2022-03-10 Thread Stefano Garzarella

On Thu, Mar 10, 2022 at 07:41:54PM +0900, Jiyong Park wrote:

Hi Stefano,

On Thu, Mar 10, 2022 at 5:59 PM Stefano Garzarella  wrote:


Hi Jiyong,

On Thu, Mar 10, 2022 at 05:18:54PM +0900, Jiyong Park wrote:
>Filtering non-h2g connections out when determining orphaned connections.
>Otherwise, in a nested VM configuration, destroying the nested VM (which
>often involves the closing of /dev/vhost-vsock if there was h2g
>connections to the nested VM) kills not only the h2g connections, but
>also all existing g2h connections to the (outmost) host which are
>totally unrelated.
>
>Tested: Executed the following steps on Cuttlefish (Android running on a
>VM) [1]: (1) Enter into an `adb shell` session - to have a g2h
>connection inside the VM, (2) open and then close /dev/vhost-vsock by
>`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb
>session is not reset.
>
>[1] https://android.googlesource.com/device/google/cuttlefish/
>
>Signed-off-by: Jiyong Park 
>---
> drivers/vhost/vsock.c | 4 
> 1 file changed, 4 insertions(+)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index 37f0b4274113..2f6d5d66f5ed 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
>* executing.
>*/
>
>+  /* Only the h2g connections are reset */
>+  if (vsk->transport != _transport.transport)
>+  return;
>+
>   /* If the peer is still valid, no need to reset connection */
>   if (vhost_vsock_get(vsk->remote_addr.svm_cid))
>   return;
>--
>2.35.1.723.g4982287a31-goog
>

Thanks for your patch!

Yes, I see the problem and I think I introduced it with the
multi-transports support (ooops).

So we should add this fixes tag:

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")


IIUC the problem is for all transports that should only cycle on their
own sockets. Indeed I think there is the same problem if the g2h driver
will be unloaded (or a reset event is received after a VM migration), it
will close all sockets of the nested h2g.

So I suggest a more generic solution, modifying
vsock_for_each_connected_socket() like this (not tested):

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e..f04abf662ec6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
  }
  EXPORT_SYMBOL_GPL(vsock_remove_sock);

-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+void (*fn)(struct sock *sk))
  {
 int i;

@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct 
sock *sk))
 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
 struct vsock_sock *vsk;
 list_for_each_entry(vsk, _connected_table[i],
-   connected_table)
+   connected_table) {
+   if (vsk->transport != transport)
+   continue;
+
 fn(sk_vsock(vsk));
+   }
 }


And all transports that call it.

Thanks,
Stefano



Thanks for the suggestion, which looks much better. It actually worked well.


Thanks for trying this!



By the way, the suggested change will alter the kernel-module interface (KMI),
which will make it difficult to land the change on older releases where we'd
like to keep the KMI stable [1]. Would it be OK if we let the supplied function
(fn) be responsible for checking the transport? I think that there, in
the future,
might be a case where one needs to cycle over all sockets for inspection or so.
I admit that this would be prone to error, though.

Please let me know what you think. I don't have a strong preference. I will
submit a revision as you want.


I see your point, and it makes sense to keep KMI on stable branches, but 
mainline I would like to have the proposed approach since all transports 
use it to cycle on their sockets.


So we could do a series with 2 patches:
- Patch 1 fixes the problem in all transports by checking the transport
  in the callback (here we insert the fixes tag so we backport it to the
  stable branches)
- Patch 2 moves the check in vsock_for_each_connected_socket() removing
  it from callbacks so it is less prone to errors and we merge it only
  mainline

What do you think?

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: reset only the h2g connections upon release

2022-03-10 Thread Stefano Garzarella

Hi Jiyong,

On Thu, Mar 10, 2022 at 05:18:54PM +0900, Jiyong Park wrote:

Filtering non-h2g connections out when determining orphaned connections.
Otherwise, in a nested VM configuration, destroying the nested VM (which
often involves the closing of /dev/vhost-vsock if there was h2g
connections to the nested VM) kills not only the h2g connections, but
also all existing g2h connections to the (outmost) host which are
totally unrelated.

Tested: Executed the following steps on Cuttlefish (Android running on a
VM) [1]: (1) Enter into an `adb shell` session - to have a g2h
connection inside the VM, (2) open and then close /dev/vhost-vsock by
`exec 3< /dev/vhost-vsock && exec 3<&-`, (3) observe that the adb
session is not reset.

[1] https://android.googlesource.com/device/google/cuttlefish/

Signed-off-by: Jiyong Park 
---
drivers/vhost/vsock.c | 4 
1 file changed, 4 insertions(+)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 37f0b4274113..2f6d5d66f5ed 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -722,6 +722,10 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
 * executing.
 */

+   /* Only the h2g connections are reset */
+   if (vsk->transport != _transport.transport)
+   return;
+
/* If the peer is still valid, no need to reset connection */
if (vhost_vsock_get(vsk->remote_addr.svm_cid))
return;
--
2.35.1.723.g4982287a31-goog



Thanks for your patch!

Yes, I see the problem and I think I introduced it with the 
multi-transports support (ooops).


So we should add this fixes tag:

Fixes: c0cfa2d8a788 ("vsock: add multi-transports support")


IIUC the problem is for all transports that should only cycle on their 
own sockets. Indeed I think there is the same problem if the g2h driver 
will be unloaded (or a reset event is received after a VM migration), it 
will close all sockets of the nested h2g.


So I suggest a more generic solution, modifying 
vsock_for_each_connected_socket() like this (not tested):


diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e..f04abf662ec6 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -334,7 +334,8 @@ void vsock_remove_sock(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_remove_sock);

-void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
+void vsock_for_each_connected_socket(struct vsock_transport *transport,
+void (*fn)(struct sock *sk))
 {
int i;

@@ -343,8 +344,12 @@ void vsock_for_each_connected_socket(void (*fn)(struct 
sock *sk))
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
struct vsock_sock *vsk;
list_for_each_entry(vsk, _connected_table[i],
-   connected_table)
+   connected_table) {
+   if (vsk->transport != transport)
+   continue;
+
fn(sk_vsock(vsk));
+   }
}


And all transports that call it.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/1] vhost: Provide a kernel warning if mutex is held whilst clean-up in progress

2022-03-03 Thread Stefano Garzarella

On Thu, Mar 03, 2022 at 04:01:06PM -0500, Michael S. Tsirkin wrote:

On Thu, Mar 03, 2022 at 09:14:36PM +0200, Leon Romanovsky wrote:

On Thu, Mar 03, 2022 at 03:19:29PM +, Lee Jones wrote:
> All workers/users should be halted before any clean-up should take place.
>
> Suggested-by:  Michael S. Tsirkin 
> Signed-off-by: Lee Jones 
> ---
>  drivers/vhost/vhost.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index bbaff6a5e21b8..d935d2506963f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -693,6 +693,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>int i;
>
>for (i = 0; i < dev->nvqs; ++i) {
> +  /* Ideally all workers should be stopped prior to clean-up */
> +  WARN_ON(mutex_is_locked(>vqs[i]->mutex));
> +
>mutex_lock(>vqs[i]->mutex);

I know nothing about vhost, but this construction and patch looks
strange to me.

If all workers were stopped, you won't need mutex_lock(). The mutex_lock
here suggests to me that workers can still run here.

Thanks



"Ideally" here is misleading, we need a bigger detailed comment
along the lines of:

/*
* By design, no workers can run here. But if there's a bug and the
* driver did not flush all work properly then they might, and we
* encountered such bugs in the past.  With no proper flush guest won't
* work correctly but avoiding host memory corruption in this case
* sounds like a good idea.
*/


Can we use vhost_vq_get_backend() to check this situation?

IIUC all the vhost devices clear the backend to stop the workers.
This is not racy (if we do after the mutex_lock) and should cover all 
cases.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-comment] [PATCH v5 1/2] virtio-vsock: add description for datagram type

2022-03-03 Thread Stefano Garzarella

On Thu, Mar 03, 2022 at 03:29:31AM +, Bobby Eshleman wrote:

On Wed, Mar 02, 2022 at 05:09:58PM +0100, Stefano Garzarella wrote:

Hi Bobby,
Sorry for the delay, but I saw these patches today.
Please, can you keep me in CC?



Hey Stefano, sorry about that. I'm not sure how I lost your CC on this
one. I'll make sure you are there moving forward.


No problem :-)



I want to mention that I'm taking a look at
https://gitlab.com/vsock/vsock/-/issues/1 in parallel with my dgram work
here. After sending out this series we noticed potential overlap between
the two issues. The additional dgram queues may become redundant if a
fairness mechanism that solves issue #1 above also applies to
connection-less protocols (similar to how the TC subsystem works). I've
just begun sorting out potential solutions so no hard results yet. Just
putting on your radar that the proposal here in v5 may be impacted if my
investigation into issue #1 yields something adequate.


Oh, this would be great!




On Thu, Feb 24, 2022 at 10:15:46PM +, beshleman.dev...@gmail.com wrote:
> From: Jiang Wang 
>


... snip ...


>
> virtio-vsock.tex | 63 +++-
> 1 file changed, 53 insertions(+), 10 deletions(-)
>
> diff --git a/virtio-vsock.tex b/virtio-vsock.tex
> index d79984d..1a66a1b 100644
> --- a/virtio-vsock.tex
> +++ b/virtio-vsock.tex
> @@ -9,11 +9,26 @@ \subsection{Device ID}\label{sec:Device Types / Socket 
Device / Device ID}
>
> \subsection{Virtqueues}\label{sec:Device Types / Socket Device / Virtqueues}
> \begin{description}
> -\item[0] rx
> -\item[1] tx
> +\item[0] stream rx
> +\item[1] stream tx
> +\item[2] datagram rx
> +\item[3] datagram tx
> +\item[4] event
> +\end{description}
> +The virtio socket device uses 5 queues if feature bit VIRTIO_VSOCK_F_DRGAM 
is set. Otherwise, it
> +only uses 3 queues, as the following.

We are also adding a new flag (VIRTIO_VSOCK_F_NO_IMPLIED_STREAM) to
provide the possibility to support for example only dgrams.

So I think we should consider the case where we have only DGRAM queues
(and it will be similar to the stream only case so 3 queues).

Maybe we could describe this part better and say that if we have both
STREAM (or SEQPACKET) and DGRAM set we have 5 queues, otherwise
only 3 queues.



Roger that.


> \subsubsection{Buffer Space Management}\label{sec:Device Types / Socket 
Device / Device Operation / Buffer Space Management}
> \field{buf_alloc} and \field{fwd_cnt} are used for buffer space management of
> stream sockets. The guest and the device publish how much buffer space is
> @@ -170,7 +193,7 @@ \subsubsection{Buffer Space Management}\label{sec:Device 
Types / Socket Device /
> u32 peer_free = peer_buf_alloc - (tx_cnt - peer_fwd_cnt);
> \end{lstlisting}
>
> -If there is insufficient buffer space, the sender waits until virtqueue 
buffers
> +For stream sockets, if there is insufficient buffer space, the sender waits 
until virtqueue buffers

stream and seqpacket

> are returned and checks \field{buf_alloc} and \field{fwd_cnt} again. Sending
> the VIRTIO_VSOCK_OP_CREDIT_REQUEST packet queries how much buffer space is
> available. The reply to this query is a VIRTIO_VSOCK_OP_CREDIT_UPDATE packet.
> @@ -178,22 +201,32 @@ \subsubsection{Buffer Space 
Management}\label{sec:Device Types / Socket Device /
> previously receiving a VIRTIO_VSOCK_OP_CREDIT_REQUEST packet. This allows
> communicating updates any time a change in buffer space occurs.
>
> +Unlike stream sockets, dgram sockets do not use VIRTIO_VSOCK_OP_CREDIT_UPDATE
> +or VIRTIO_VSOCK_OP_CREDIT_REQUEST packets. The dgram buffer management is 
split
> +into two parts: senders and receivers. For senders, the packet is dropped if 
the
> +virtqueue is full. For receivers, the packet is dropped if there is no space
> +in the receive buffer.
> +
> \drivernormative{\paragraph}{Device Operation: Buffer Space 
Management}{Device Types / Socket Device / Device Operation / Buffer Space 
Management}
> -VIRTIO_VSOCK_OP_RW data packets MUST only be transmitted when the peer has
> -sufficient free buffer space for the payload.
> +For stream sockets, VIRTIO_VSOCK_OP_RW data packets MUST only be transmitted 
when the peer has

stream and seqpacket

> +sufficient free buffer space for the payload. For dgram sockets, 
VIRTIO_VSOCK_OP_RW data packets
> +MAY be transmitted when the peer rx buffer is full. Then the packet will be 
dropped by the peer,
> +and driver will not get any notification.
>
> All packets associated with a stream flow MUST contain valid information in
> \field{buf_alloc} and \field{fwd_cnt} fields.
>
> \devicenormative{\paragraph}{Device Operation: Buffer Space 
Management}{Device Types / Socket Device / Device Operation / Buffer Space 
Management}
> -VIRTIO_VSOCK_OP_RW data packets MUST only be 

Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use

2022-03-02 Thread Stefano Garzarella

On Wed, Mar 02, 2022 at 04:49:17PM +, Lee Jones wrote:

On Wed, 02 Mar 2022, Michael S. Tsirkin wrote:


On Wed, Mar 02, 2022 at 05:28:31PM +0100, Stefano Garzarella wrote:
> On Wed, Mar 2, 2022 at 3:57 PM Lee Jones  wrote:
> >
> > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote:
> >
> > > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote:
> > > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote:
> > > >
> > > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote:
> > > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call
> > > > > > to vhost_get_vq_desc().  All we have to do is take the same lock
> > > > > > during virtqueue clean-up and we mitigate the reported issues.
> > > > > >
> > > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00
> > > > > >
> > > > > > Cc: 
> > > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com
> > > > > > Signed-off-by: Lee Jones 
> > > > > > ---
> > > > > >  drivers/vhost/vhost.c | 2 ++
> > > > > >  1 file changed, 2 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644
> > > > > > --- a/drivers/vhost/vhost.c
> > > > > > +++ b/drivers/vhost/vhost.c
> > > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
> > > > > > int i;
> > > > > >
> > > > > > for (i = 0; i < dev->nvqs; ++i) {
> > > > > > +   mutex_lock(>vqs[i]->mutex);
> > > > > > if (dev->vqs[i]->error_ctx)
> > > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx);
> > > > > > if (dev->vqs[i]->kick)
> > > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
> > > > > > if (dev->vqs[i]->call_ctx.ctx)
> > > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
> > > > > > vhost_vq_reset(dev, dev->vqs[i]);
> > > > > > +   mutex_unlock(>vqs[i]->mutex);
> > > > > > }
> > > > >
> > > > > So this is a mitigation plan but the bug is still there though
> > > > > we don't know exactly what it is.  I would prefer adding something 
like
> > > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense?
> > > >
> > > > As a rework to this, or as a subsequent patch?
> > >
> > > Can be a separate patch.
> > >
> > > > Just before the first lock I assume?
> > >
> > > I guess so, yes.
> >
> > No problem.  Patch to follow.
> >
> > I'm also going to attempt to debug the root cause, but I'm new to this
> > subsystem to it might take a while for me to get my head around.
>
> IIUC the root cause should be the same as the one we solved here:
> 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9
>
> The worker was not stopped before calling vhost_dev_cleanup(). So while
> the worker was still running we were going to free memory or initialize
> fields while it was still using virtqueue.

Right, and I agree but it's not the root though, we do attempt to stop all 
workers.


Exactly.  This is what happens, but the question I'm going to attempt
to answer is *why* does this happen.


IIUC the worker was still running because the /dev/vhost-vsock file was 
not explicitly closed, so vhost_vsock_dev_release() was called in the 
do_exit() of the process.


In that case there was the issue, because vhost_dev_check_owner() 
returned false in vhost_vsock_stop() since current->mm was NULL.

So it returned earlier, without calling vhost_vq_set_backend(vq, NULL).

This did not stop the worker from continuing to run, causing the 
multiple issues we are seeing.


current->mm was NULL, because in the do_exit() the address space is 
cleaned in the exit_mm(), which is called before releasing the files 
into the exit_task_work().


This can be seen from the logs, where we see first the warnings printed 
by vhost_dev_cleanup() and then the panic in the worker (e.g. here 
https://syzkaller.appspot.com/text?tag=CrashLog=16a61fce70)


Mike also added a few more helpful details in this thread: 
https://lore.kernel.org/virtualization/20220221100500.2x3s2sddqahgdfyt@sgarzare-redhat/T/#ree61316eac63245c9ba3050b44330e4034282cc2


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use

2022-03-02 Thread Stefano Garzarella
On Wed, Mar 2, 2022 at 3:57 PM Lee Jones  wrote:
>
> On Wed, 02 Mar 2022, Michael S. Tsirkin wrote:
>
> > On Wed, Mar 02, 2022 at 01:56:35PM +, Lee Jones wrote:
> > > On Wed, 02 Mar 2022, Michael S. Tsirkin wrote:
> > >
> > > > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote:
> > > > > vhost_vsock_handle_tx_kick() already holds the mutex during its call
> > > > > to vhost_get_vq_desc().  All we have to do is take the same lock
> > > > > during virtqueue clean-up and we mitigate the reported issues.
> > > > >
> > > > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00
> > > > >
> > > > > Cc: 
> > > > > Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com
> > > > > Signed-off-by: Lee Jones 
> > > > > ---
> > > > >  drivers/vhost/vhost.c | 2 ++
> > > > >  1 file changed, 2 insertions(+)
> > > > >
> > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > > > > index 59edb5a1ffe28..bbaff6a5e21b8 100644
> > > > > --- a/drivers/vhost/vhost.c
> > > > > +++ b/drivers/vhost/vhost.c
> > > > > @@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
> > > > > int i;
> > > > >
> > > > > for (i = 0; i < dev->nvqs; ++i) {
> > > > > +   mutex_lock(>vqs[i]->mutex);
> > > > > if (dev->vqs[i]->error_ctx)
> > > > > eventfd_ctx_put(dev->vqs[i]->error_ctx);
> > > > > if (dev->vqs[i]->kick)
> > > > > @@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
> > > > > if (dev->vqs[i]->call_ctx.ctx)
> > > > > eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
> > > > > vhost_vq_reset(dev, dev->vqs[i]);
> > > > > +   mutex_unlock(>vqs[i]->mutex);
> > > > > }
> > > >
> > > > So this is a mitigation plan but the bug is still there though
> > > > we don't know exactly what it is.  I would prefer adding something like
> > > > WARN_ON(mutex_is_locked(vqs[i]->mutex) here - does this make sense?
> > >
> > > As a rework to this, or as a subsequent patch?
> >
> > Can be a separate patch.
> >
> > > Just before the first lock I assume?
> >
> > I guess so, yes.
>
> No problem.  Patch to follow.
>
> I'm also going to attempt to debug the root cause, but I'm new to this
> subsystem to it might take a while for me to get my head around.

IIUC the root cause should be the same as the one we solved here:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9

The worker was not stopped before calling vhost_dev_cleanup(). So while 
the worker was still running we were going to free memory or initialize 
fields while it was still using virtqueue.

Cheers,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [virtio-comment] [PATCH v5 2/2] virtio-vsock: add mergeable buffer feature bit

2022-03-02 Thread Stefano Garzarella

On Thu, Feb 24, 2022 at 10:15:47PM +, beshleman.dev...@gmail.com wrote:

From: Jiang Wang 

Add support for mergeable buffers for virtio-vsock. Mergeable buffers
allow individual large packets to be spread across multiple buffers
while still using only a single packet header. This avoids
artificially restraining packet size to the size of a single
buffer and offers a performant fragmentation/defragmentation
scheme.


We need this new functionality because we want to fragment a datagram 
packet over multiple buffers, right?


This reminded me that we don't have a maximum size for now, in this case 
what would it be?


Maybe it would be helpful to define it as Laura is planning to do here:
https://lists.oasis-open.org/archives/virtio-comment/202202/msg00053.html



Signed-off-by: Jiang Wang 
Signed-off-by: Bobby Eshleman 
---
virtio-vsock.tex | 76 
1 file changed, 76 insertions(+)

diff --git a/virtio-vsock.tex b/virtio-vsock.tex
index 1a66a1b..bf44d5d 100644
--- a/virtio-vsock.tex
+++ b/virtio-vsock.tex
@@ -39,6 +39,7 @@ \subsection{Feature bits}\label{sec:Device Types / Socket 
Device / Feature bits}
\item[VIRTIO_VSOCK_F_STREAM (0)] stream socket type is supported.
\item[VIRTIO_VSOCK_F_SEQPACKET (1)] seqpacket socket type is supported.
\item[VIRTIO_VSOCK_F_DGRAM (2)] datagram socket type is supported.
+\item[VIRTIO_VSOCK_F_MRG_RXBUF (3)] driver can merge receive buffers.
\end{description}

\subsection{Device configuration layout}\label{sec:Device Types / Socket Device 
/ Device configuration layout}
@@ -87,6 +88,8 @@ \subsection{Device Operation}\label{sec:Device Types / Socket 
Device / Device Op

Packets transmitted or received contain a header before the payload:

+If feature VIRTIO_VSOCK_F_MRG_RXBUF is not negotiated, use the following 
header.
+
\begin{lstlisting}
struct virtio_vsock_hdr {
le64 src_cid;
@@ -102,6 +105,15 @@ \subsection{Device Operation}\label{sec:Device Types / 
Socket Device / Device Op
};
\end{lstlisting}

+If feature VIRTIO_VSOCK_F_MRG_RXBUF is negotiated, use the following header.
+\begin{lstlisting}
+struct virtio_vsock_hdr_mrg_rxbuf {
+   struct virtio_vsock_hdr hdr;
+   le16 num_buffers;
+};
+\end{lstlisting}
+
+
The upper 32 bits of src_cid and dst_cid are reserved and zeroed.

Most packets simply transfer data but control packets are also used for
@@ -207,6 +219,25 @@ \subsubsection{Buffer Space Management}\label{sec:Device 
Types / Socket Device /
virtqueue is full. For receivers, the packet is dropped if there is no space
in the receive buffer.

+\drivernormative{\paragraph}{Device Operation: Buffer Space Management}{Device 
Types / Socket Device / Device Operation / Setting Up Receive Buffers}
+\begin{itemize}
+\item If VIRTIO_VSOCK_F_MRG_RXBUF is not negotiated, the driver SHOULD 
populate the datagram rx queue
+  with buffers of at least 4096 bytes.
+\item If VIRTIO_VSOCK_F_MRG_RXBUF is negotiated, each buffer MUST be at
+  least the size of the struct virtio_vsock_hdr_mgr_rxbuf.
+\end{itemize}
+
+\begin{note}
+Each buffer may be split across multiple descriptor elements.
+\end{note}
+
+\devicenormative{\paragraph}{Device Operation: Buffer Space Management}{Device 
Types / Socket Device / Device Operation / Setting Up Receive Buffers}
+The device MUST set \field{num_buffers} to the number of descriptors used when
+transmitting the packet.
+
+The device MUST use only a single descriptor if VIRTIO_VSOCK_F_MRG_RXBUF
+is not negotiated.
+
\drivernormative{\paragraph}{Device Operation: Buffer Space Management}{Device 
Types / Socket Device / Device Operation / Buffer Space Management}
For stream sockets, VIRTIO_VSOCK_OP_RW data packets MUST only be transmitted 
when the peer has
sufficient free buffer space for the payload. For dgram sockets, 
VIRTIO_VSOCK_OP_RW data packets
@@ -229,6 +260,7 @@ \subsubsection{Receive and Transmit}\label{sec:Device Types 
/ Socket Device / De
The driver queues outgoing packets on the tx virtqueue and allocates incoming 
packet
receive buffers on the rx virtqueue. Packets are of the following form:

+If VIRTIO_VSOCK_F_MRG_RXBUF is not negotiated, use the following.
\begin{lstlisting}
struct virtio_vsock_packet {
struct virtio_vsock_hdr hdr;
@@ -236,11 +268,42 @@ \subsubsection{Receive and 
Transmit}\label{sec:Device Types / Socket Device / De

};
\end{lstlisting}

+If VIRTIO_VSOCK_F_MRG_RXBUF is negotiated, use the following form:
+\begin{lstlisting}
+struct virtio_vsock_packet_mrg_rxbuf {
+struct virtio_vsock_hdr_mrg_rxbuf hdr;
+u8 data[];
+};
+\end{lstlisting}
+
+
Virtqueue buffers for outgoing packets are read-only. Virtqueue buffers for
incoming packets are write-only.

When transmitting packets to the device, \field{num_buffers} is not used.

+\begin{enumerate}
+\item \field{num_buffers} indicates how many descriptors
+  this packet is spread over (including this one).
+  This is valid only if VIRTIO_VSOCK_F_MRG_RXBUF is negotiated.
+  This 

Re: [virtio-comment] [PATCH v5 1/2] virtio-vsock: add description for datagram type

2022-03-02 Thread Stefano Garzarella

Hi Bobby,
Sorry for the delay, but I saw these patches today.
Please, can you keep me in CC?

On Thu, Feb 24, 2022 at 10:15:46PM +, beshleman.dev...@gmail.com wrote:

From: Jiang Wang 

Add supports for datagram type for virtio-vsock. Datagram
sockets are connectionless and unreliable. To avoid contention
with stream and other sockets, add two more virtqueues and
a new feature bit to identify if those two new queues exist or not.

Also add descriptions for resource management of datagram, which
does not use the existing credit update mechanism associated with
stream sockets.

Signed-off-by: Jiang Wang 
Signed-off-by: Bobby Eshleman 
---

V2: Addressed the comments for the previous version.
V3: Add description for the mergeable receive buffer.
V4: Add a feature bit for stream and reserver a bit for seqpacket.
   Fix mrg_rxbuf related sentences.
V5: Rebase onto head (change to more nicely accompany seqpacket changes).
   Remove statement about no feature bits being set (already made by seqpacket 
patches).
   Clarify \field{type} declaration.
   Use words "sender/receiver" instead of "tx/rx" for clarity, and other prose 
changes.
   Directly state that full buffers result in dropped packets.
   Change verbs to present tense.
   Clarify if-else pairs (e.g., "If XYZ is negotiated" is followed by "If XYZ
   is not negotiated" instead of "Otherwise").
   Mergeable buffer changes are split off into a separate patch.

virtio-vsock.tex | 63 +++-
1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/virtio-vsock.tex b/virtio-vsock.tex
index d79984d..1a66a1b 100644
--- a/virtio-vsock.tex
+++ b/virtio-vsock.tex
@@ -9,11 +9,26 @@ \subsection{Device ID}\label{sec:Device Types / Socket Device 
/ Device ID}

\subsection{Virtqueues}\label{sec:Device Types / Socket Device / Virtqueues}
\begin{description}
-\item[0] rx
-\item[1] tx
+\item[0] stream rx
+\item[1] stream tx
+\item[2] datagram rx
+\item[3] datagram tx
+\item[4] event
+\end{description}
+The virtio socket device uses 5 queues if feature bit VIRTIO_VSOCK_F_DRGAM is 
set. Otherwise, it
+only uses 3 queues, as the following.


We are also adding a new flag (VIRTIO_VSOCK_F_NO_IMPLIED_STREAM) to
provide the possibility to support for example only dgrams.

So I think we should consider the case where we have only DGRAM queues
(and it will be similar to the stream only case so 3 queues).

Maybe we could describe this part better and say that if we have both
STREAM (or SEQPACKET) and DGRAM set we have 5 queues, otherwise
only 3 queues.


+
+\begin{description}
+\item[0] stream rx
+\item[1] stream tx
\item[2] event
\end{description}

+When behavior differs between stream and datagram rx/tx virtqueues
+their full names are used. Common behavior is simply described in
+terms of rx/tx virtqueues and applies to both stream and datagram
+virtqueues.
+
\subsection{Feature bits}\label{sec:Device Types / Socket Device / Feature bits}

If no feature bit is set, only stream socket type is supported.
@@ -23,6 +38,7 @@ \subsection{Feature bits}\label{sec:Device Types / Socket 
Device / Feature bits}
\begin{description}
\item[VIRTIO_VSOCK_F_STREAM (0)] stream socket type is supported.
\item[VIRTIO_VSOCK_F_SEQPACKET (1)] seqpacket socket type is supported.
+\item[VIRTIO_VSOCK_F_DGRAM (2)] datagram socket type is supported.
\end{description}

\subsection{Device configuration layout}\label{sec:Device Types / Socket Device 
/ Device configuration layout}
@@ -109,6 +125,9 @@ \subsection{Device Operation}\label{sec:Device Types / 
Socket Device / Device Op

\subsubsection{Virtqueue Flow Control}\label{sec:Device Types / Socket Device / 
Device Operation / Virtqueue Flow Control}

+Flow control applies to stream sockets; datagram sockets do not have
+flow control.
+
The tx virtqueue carries packets initiated by applications and replies to
received packets.  The rx virtqueue carries packets initiated by the device and
replies to previously transmitted packets.
@@ -142,18 +161,22 @@ \subsubsection{Addressing}\label{sec:Device Types / 
Socket Device / Device Opera
consists of a (cid, port number) tuple. The header fields used for this are
\field{src_cid}, \field{src_port}, \field{dst_cid}, and \field{dst_port}.

-Currently stream and seqpacket sockets are supported. \field{type} is 1 
(VIRTIO_VSOCK_TYPE_STREAM)
-for stream socket types, and 2 (VIRTIO_VSOCK_TYPE_SEQPACKET) for seqpacket 
socket types.
+Currently stream, seqpacket, and dgram sockets are supported. \field{type} is 
1 (VIRTIO_VSOCK_TYPE_STREAM)
+for stream socket types, 2 (VIRTIO_VSOCK_TYPE_SEQPACKET) for seqpacket socket 
types, and 3 (VIRTIO_VSOCK_TYPE_DGRAM) for dgram socket types.

\begin{lstlisting}
#define VIRTIO_VSOCK_TYPE_STREAM1
#define VIRTIO_VSOCK_TYPE_SEQPACKET 2
+#define VIRTIO_VSOCK_TYPE_DGRAM 3
\end{lstlisting}

Stream sockets provide in-order, guaranteed, connection-oriented delivery
without message boundaries. Seqpacket sockets provide 

Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use

2022-03-02 Thread Stefano Garzarella

On Wed, Mar 02, 2022 at 09:50:38AM -0500, Michael S. Tsirkin wrote:

On Wed, Mar 02, 2022 at 03:11:21PM +0100, Stefano Garzarella wrote:

On Wed, Mar 02, 2022 at 08:35:08AM -0500, Michael S. Tsirkin wrote:
> On Wed, Mar 02, 2022 at 10:34:46AM +0100, Stefano Garzarella wrote:
> > On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote:
> > > vhost_vsock_handle_tx_kick() already holds the mutex during its call
> > > to vhost_get_vq_desc().  All we have to do is take the same lock
> > > during virtqueue clean-up and we mitigate the reported issues.
> > >
> > > Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00
> >
> > This issue is similar to [1] that should be already fixed upstream by [2].
> >
> > However I think this patch would have prevented some issues, because
> > vhost_vq_reset() sets vq->private to NULL, preventing the worker from
> > running.
> >
> > Anyway I think that when we enter in vhost_dev_cleanup() the worker should
> > be already stopped, so it shouldn't be necessary to take the mutex. But in
> > order to prevent future issues maybe it's better to take them, so:
> >
> > Reviewed-by: Stefano Garzarella 
> >
> > [1]
> > 
https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822
> > [2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9
>
>
> Right. I want to queue this but I would like to get a warning
> so we can detect issues like [2] before they cause more issues.

I agree, what about moving the warning that we already have higher up, right
at the beginning of the function?

I mean something like this:

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 59edb5a1ffe2..1721ff3f18c0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -692,6 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 {
int i;
+   WARN_ON(!llist_empty(>work_list));
+
for (i = 0; i < dev->nvqs; ++i) {
if (dev->vqs[i]->error_ctx)
eventfd_ctx_put(dev->vqs[i]->error_ctx);
@@ -712,7 +714,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
dev->iotlb = NULL;
vhost_clear_msg(dev);
wake_up_interruptible_poll(>wait, EPOLLIN | EPOLLRDNORM);
-   WARN_ON(!llist_empty(>work_list));
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;



Hmm I'm not sure why it matters.


Because after this new patch, putting locks in the while loop, when we 
finish the loop the workers should be stopped, because vhost_vq_reset() 
sets vq->private to NULL.


But the best thing IMHO is to check that there is no backend set for 
each vq, so the workers have been stopped correctly at this point.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use

2022-03-02 Thread Stefano Garzarella

On Wed, Mar 02, 2022 at 08:35:08AM -0500, Michael S. Tsirkin wrote:

On Wed, Mar 02, 2022 at 10:34:46AM +0100, Stefano Garzarella wrote:

On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote:
> vhost_vsock_handle_tx_kick() already holds the mutex during its call
> to vhost_get_vq_desc().  All we have to do is take the same lock
> during virtqueue clean-up and we mitigate the reported issues.
>
> Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00

This issue is similar to [1] that should be already fixed upstream by [2].

However I think this patch would have prevented some issues, because
vhost_vq_reset() sets vq->private to NULL, preventing the worker from
running.

Anyway I think that when we enter in vhost_dev_cleanup() the worker should
be already stopped, so it shouldn't be necessary to take the mutex. But in
order to prevent future issues maybe it's better to take them, so:

Reviewed-by: Stefano Garzarella 

[1]
https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822
[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9



Right. I want to queue this but I would like to get a warning
so we can detect issues like [2] before they cause more issues.


I agree, what about moving the warning that we already have higher up, 
right at the beginning of the function?


I mean something like this:

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 59edb5a1ffe2..1721ff3f18c0 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -692,6 +692,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
 {
int i;
 
+   WARN_ON(!llist_empty(>work_list));

+
for (i = 0; i < dev->nvqs; ++i) {
if (dev->vqs[i]->error_ctx)
eventfd_ctx_put(dev->vqs[i]->error_ctx);
@@ -712,7 +714,6 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
dev->iotlb = NULL;
vhost_clear_msg(dev);
wake_up_interruptible_poll(>wait, EPOLLIN | EPOLLRDNORM);
-   WARN_ON(!llist_empty(>work_list));
if (dev->worker) {
kthread_stop(dev->worker);
dev->worker = NULL;


And maybe we can also check vq->private and warn in the loop, because 
the work_list may be empty if the device is doing nothing.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH 1/1] vhost: Protect the virtqueue from being cleared whilst still in use

2022-03-02 Thread Stefano Garzarella

On Wed, Mar 02, 2022 at 07:54:21AM +, Lee Jones wrote:

vhost_vsock_handle_tx_kick() already holds the mutex during its call
to vhost_get_vq_desc().  All we have to do is take the same lock
during virtqueue clean-up and we mitigate the reported issues.

Link: https://syzkaller.appspot.com/bug?extid=279432d30d825e63ba00


This issue is similar to [1] that should be already fixed upstream by 
[2].


However I think this patch would have prevented some issues, because 
vhost_vq_reset() sets vq->private to NULL, preventing the worker from 
running.


Anyway I think that when we enter in vhost_dev_cleanup() the worker 
should be already stopped, so it shouldn't be necessary to take the 
mutex. But in order to prevent future issues maybe it's better to take 
them, so:


Reviewed-by: Stefano Garzarella 

[1] 
https://syzkaller.appspot.com/bug?id=993d8b5e64393ed9e6a70f9ae4de0119c605a822
[2] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9




Cc: 
Reported-by: syzbot+adc3cb32385586bec...@syzkaller.appspotmail.com
Signed-off-by: Lee Jones 
---
drivers/vhost/vhost.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 59edb5a1ffe28..bbaff6a5e21b8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -693,6 +693,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
int i;

for (i = 0; i < dev->nvqs; ++i) {
+   mutex_lock(>vqs[i]->mutex);
if (dev->vqs[i]->error_ctx)
eventfd_ctx_put(dev->vqs[i]->error_ctx);
if (dev->vqs[i]->kick)
@@ -700,6 +701,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
if (dev->vqs[i]->call_ctx.ctx)
eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx);
vhost_vq_reset(dev, dev->vqs[i]);
+   mutex_unlock(>vqs[i]->mutex);
}
vhost_dev_free_iovecs(dev);
if (dev->log_ctx)
--
2.35.1.574.g5d30c73bfb-goog



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [syzbot] kernel BUG in vhost_get_vq_desc

2022-03-02 Thread Stefano Garzarella

On Wed, Mar 02, 2022 at 10:18:07AM +0100, Stefano Garzarella wrote:

On Wed, Mar 02, 2022 at 08:29:41AM +, Lee Jones wrote:

On Fri, 18 Feb 2022, Michael S. Tsirkin wrote:


On Thu, Feb 17, 2022 at 05:21:20PM -0800, syzbot wrote:

syzbot has found a reproducer for the following issue on:

HEAD commit:f71077a4d84b Merge tag 'mmc-v5.17-rc1-2' of git://git.kern..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=104c04ca70
kernel config:  https://syzkaller.appspot.com/x/.config?x=a78b064590b9f912
dashboard link: https://syzkaller.appspot.com/bug?extid=3140b17cb44a7b174008
compiler:   gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for 
Debian) 2.35.2
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1362e23270
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=11373a6c70

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com

[ cut here ]
kernel BUG at drivers/vhost/vhost.c:2335!
invalid opcode:  [#1] PREEMPT SMP KASAN
CPU: 1 PID: 3597 Comm: vhost-3596 Not tainted 
5.17.0-rc4-syzkaller-00054-gf71077a4d84b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:vhost_get_vq_desc+0x1d43/0x22c0 drivers/vhost/vhost.c:2335
Code: 00 00 00 48 c7 c6 20 2c 9d 8a 48 c7 c7 98 a6 8e 8d 48 89 ca 48 c1 e1 04 48 01 
d9 e8 b7 59 28 fd e9 74 ff ff ff e8 5d c8 a1 fa <0f> 0b e8 56 c8 a1 fa 48 8b 54 
24 18 48 b8 00 00 00 00 00 fc ff df
RSP: 0018:c90001d1fb88 EFLAGS: 00010293
RAX:  RBX: 0001 RCX: 
RDX: 8880234b RSI: 86d715c3 RDI: 0003
RBP:  R08:  R09: 0001
R10: 86d706bc R11:  R12: 888072c24d68
R13:  R14: dc00 R15: 888072c24bb0
FS:  () GS:8880b9d0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 0002 CR3: 7902c000 CR4: 003506e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:
 
 vhost_vsock_handle_tx_kick+0x277/0xa20 drivers/vhost/vsock.c:522
 vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372
 kthread+0x2e9/0x3a0 kernel/kthread.c:377
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295


I don't see how this can trigger normally so I'm assuming
another case of use after free.


Yes, exactly.


I think this issue is related to the issue fixed by this patch merged 
some days ago upstream: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9




I patched it.  Please see:

https://lore.kernel.org/all/20220302075421.2131221-1-lee.jo...@linaro.org/T/#t



I'm not sure that patch is avoiding the issue. I'll reply to it.


My bad, I think it should be fine, because vhost_vq_reset() set 
vq->private_data to NULL and avoids the worker to run.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [syzbot] kernel BUG in vhost_get_vq_desc

2022-03-02 Thread Stefano Garzarella

On Wed, Mar 02, 2022 at 08:29:41AM +, Lee Jones wrote:

On Fri, 18 Feb 2022, Michael S. Tsirkin wrote:


On Thu, Feb 17, 2022 at 05:21:20PM -0800, syzbot wrote:
> syzbot has found a reproducer for the following issue on:
>
> HEAD commit:f71077a4d84b Merge tag 'mmc-v5.17-rc1-2' of git://git.kern..
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=104c04ca70
> kernel config:  https://syzkaller.appspot.com/x/.config?x=a78b064590b9f912
> dashboard link: https://syzkaller.appspot.com/bug?extid=3140b17cb44a7b174008
> compiler:   gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils 
for Debian) 2.35.2
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1362e23270
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=11373a6c70
>
> IMPORTANT: if you fix the issue, please add the following tag to the commit:
> Reported-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com
>
> [ cut here ]
> kernel BUG at drivers/vhost/vhost.c:2335!
> invalid opcode:  [#1] PREEMPT SMP KASAN
> CPU: 1 PID: 3597 Comm: vhost-3596 Not tainted 
5.17.0-rc4-syzkaller-00054-gf71077a4d84b #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS 
Google 01/01/2011
> RIP: 0010:vhost_get_vq_desc+0x1d43/0x22c0 drivers/vhost/vhost.c:2335
> Code: 00 00 00 48 c7 c6 20 2c 9d 8a 48 c7 c7 98 a6 8e 8d 48 89 ca 48 c1 e1 04 48 01 
d9 e8 b7 59 28 fd e9 74 ff ff ff e8 5d c8 a1 fa <0f> 0b e8 56 c8 a1 fa 48 8b 54 24 
18 48 b8 00 00 00 00 00 fc ff df
> RSP: 0018:c90001d1fb88 EFLAGS: 00010293
> RAX:  RBX: 0001 RCX: 
> RDX: 8880234b RSI: 86d715c3 RDI: 0003
> RBP:  R08:  R09: 0001
> R10: 86d706bc R11:  R12: 888072c24d68
> R13:  R14: dc00 R15: 888072c24bb0
> FS:  () GS:8880b9d0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 0002 CR3: 7902c000 CR4: 003506e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  
>  vhost_vsock_handle_tx_kick+0x277/0xa20 drivers/vhost/vsock.c:522
>  vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372
>  kthread+0x2e9/0x3a0 kernel/kthread.c:377
>  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295

I don't see how this can trigger normally so I'm assuming
another case of use after free.


Yes, exactly.


I think this issue is related to the issue fixed by this patch merged 
some days ago upstream: 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a58da53ffd70294ebea8ecd0eb45fd0d74add9f9




I patched it.  Please see:

https://lore.kernel.org/all/20220302075421.2131221-1-lee.jo...@linaro.org/T/#t



I'm not sure that patch is avoiding the issue. I'll reply to it.

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vdpa: fix use-after-free on vp_vdpa_remove

2022-03-01 Thread Stefano Garzarella
On Tue, Mar 1, 2022 at 2:26 AM Yi Wang  wrote:
>
> From: Zhang Min 
>
> When vp_vdpa driver is unbind, vp_vdpa is freed in vdpa_unregister_device
> and then vp_vdpa->mdev.pci_dev is dereferenced in vp_modern_remove,
> triggering use-after-free.
>
> Call Trace of unbinding driver free vp_vdpa :
> do_syscall_64
>   vfs_write
> kernfs_fop_write_iter
>   device_release_driver_internal
> pci_device_remove
>   vp_vdpa_remove
> vdpa_unregister_device
>   kobject_release
> device_release
>   kfree
>
> Call Trace of dereference vp_vdpa->mdev.pci_dev:
> vp_modern_remove
>   pci_release_selected_regions
> pci_release_region
>   pci_resource_len
> pci_resource_end
>   (dev)->resource[(bar)].end

We can add the fixes tag:

Fixes: 64b9f64f80a6 ("vdpa: introduce virtio pci driver")

>
> Signed-off-by: Zhang Min 
> Signed-off-by: Yi Wang 
> ---
>  drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c 
> b/drivers/vdpa/virtio_pci/vp_vdpa.c
> index a57e381e830b..cce101e6a940 100644
> --- a/drivers/vdpa/virtio_pci/vp_vdpa.c
> +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
> @@ -533,8 +533,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
>  {
> struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
>
> -   vdpa_unregister_device(_vdpa->vdpa);
> vp_modern_remove(_vdpa->mdev);
> +   vdpa_unregister_device(_vdpa->vdpa);
>  }
>
>  static struct pci_driver vp_vdpa_driver = {
> --
> 2.27.0
>

The patch LGTM:

Reviewed-by: Stefano Garzarella 

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


[PATCH v2] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-22 Thread Stefano Garzarella
vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
ownership. It expects current->mm to be valid.

vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
the user has not done close(), so when we are in do_exit(). In this
case current->mm is invalid and we're releasing the device, so we
should clean it anyway.

Let's check the owner only when vhost_vsock_stop() is called
by an ioctl.

When invoked from release we can not fail so we don't check return
code of vhost_vsock_stop(). We need to stop vsock even if it's not
the owner.

Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
Cc: sta...@vger.kernel.org
Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com
Signed-off-by: Stefano Garzarella 
---
v2:
- initialized `ret` in vhost_vsock_stop [Dan]
- added comment about vhost_vsock_stop() calling in the code and an explanation
  in the commit message [MST]

v1: 
https://lore.kernel.org/virtualization/20220221114916.107045-1-sgarz...@redhat.com
---
 drivers/vhost/vsock.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index d6ca1c7ad513..37f0b4274113 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -629,16 +629,18 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
return ret;
 }
 
-static int vhost_vsock_stop(struct vhost_vsock *vsock)
+static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
 {
size_t i;
-   int ret;
+   int ret = 0;
 
mutex_lock(>dev.mutex);
 
-   ret = vhost_dev_check_owner(>dev);
-   if (ret)
-   goto err;
+   if (check_owner) {
+   ret = vhost_dev_check_owner(>dev);
+   if (ret)
+   goto err;
+   }
 
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = >vqs[i];
@@ -753,7 +755,12 @@ static int vhost_vsock_dev_release(struct inode *inode, 
struct file *file)
 * inefficient.  Room for improvement here. */
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
 
-   vhost_vsock_stop(vsock);
+   /* Don't check the owner, because we are in the release path, so we
+* need to stop the vsock device in any case.
+* vhost_vsock_stop() can not fail in this case, so we don't need to
+* check the return code.
+*/
+   vhost_vsock_stop(vsock, false);
vhost_vsock_flush(vsock);
vhost_dev_stop(>dev);
 
@@ -868,7 +875,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned 
int ioctl,
if (start)
return vhost_vsock_start(vsock);
else
-   return vhost_vsock_stop(vsock);
+   return vhost_vsock_stop(vsock, true);
case VHOST_GET_FEATURES:
features = VHOST_VSOCK_FEATURES;
if (copy_to_user(argp, , sizeof(features)))
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-22 Thread Stefano Garzarella

On Tue, Feb 22, 2022 at 01:06:12AM +0530, Anirudh Rayabharam wrote:

On Mon, Feb 21, 2022 at 07:26:28PM +0100, Stefano Garzarella wrote:

On Mon, Feb 21, 2022 at 11:33:11PM +0530, Anirudh Rayabharam wrote:
> On Mon, Feb 21, 2022 at 05:44:20PM +0100, Stefano Garzarella wrote:
> > On Mon, Feb 21, 2022 at 09:44:39PM +0530, Anirudh Rayabharam wrote:
> > > On Mon, Feb 21, 2022 at 02:59:30PM +0100, Stefano Garzarella wrote:
> > > > On Mon, Feb 21, 2022 at 12:49 PM Stefano Garzarella 
 wrote:
> > > > >
> > > > > vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
> > > > > ownership. It expects current->mm to be valid.
> > > > >
> > > > > vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
> > > > > the user has not done close(), so when we are in do_exit(). In this
> > > > > case current->mm is invalid and we're releasing the device, so we
> > > > > should clean it anyway.
> > > > >
> > > > > Let's check the owner only when vhost_vsock_stop() is called
> > > > > by an ioctl.
> > > > >
> > > > > Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
> > > > > Cc: sta...@vger.kernel.org
> > > > > Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
> > > > > Signed-off-by: Stefano Garzarella 
> > > > > ---
> > > > >  drivers/vhost/vsock.c | 14 --
> > > > >  1 file changed, 8 insertions(+), 6 deletions(-)
> > > >
> > > > Reported-and-tested-by: 
syzbot+0abd373e2e50d704d...@syzkaller.appspotmail.com
> > >
> > > I don't think this patch fixes "INFO: task hung in vhost_work_dev_flush"
> > > even though syzbot says so. I am able to reproduce the issue locally
> > > even with this patch applied.
> >
> > Are you using the sysbot reproducer or another test?
> > In that case, can you share it?
>
> I am using the syzbot reproducer.
>
> >
> > From the stack trace it seemed to me that the worker accesses a zone that
> > has been cleaned (iotlb), so it is invalid and fails.
>
> Would the thread hang in that case? How?

Looking at this log [1] it seems that the process is blocked on the
wait_for_completion() in vhost_work_dev_flush().

Since we're not setting the backend to NULL to stop the worker, it's likely
that the worker will keep running, preventing the flush work from
completing.


The log shows that the worker thread is stuck in iotlb_access_ok(). How
will setting the backend to NULL stop it? During my debugging I found
that the worker is stuck in this while loop:


Okay, looking at your new patch, now I see. If we enter in this loop 
before setting the backend to NULL and we have start = 0 and end = (u64) 
-1 , we should be there forever.


I'll remove that tag in v2, but the test might fail without this patch 
applied, because for now we don't stop workers correctly.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-22 Thread Stefano Garzarella

On Tue, Feb 22, 2022 at 08:30:17AM +0300, Dan Carpenter wrote:

Hi Stefano,

url:
https://github.com/0day-ci/linux/commits/Stefano-Garzarella/vhost-vsock-don-t-check-owner-in-vhost_vsock_stop-while-releasing/20220221-195038
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
config: x86_64-randconfig-m031-20220221 
(https://download.01.org/0day-ci/archive/20220222/202202220707.am3rkucp-...@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 
Reported-by: Dan Carpenter 

smatch warnings:
drivers/vhost/vsock.c:655 vhost_vsock_stop() error: uninitialized symbol 'ret'.

vim +/ret +655 drivers/vhost/vsock.c

3ace84c91bfcde Stefano Garzarella 2022-02-21  632  static int 
vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
433fc58e6bf2c8 Asias He   2016-07-28  633  {
433fc58e6bf2c8 Asias He   2016-07-28  634   size_t i;
433fc58e6bf2c8 Asias He   2016-07-28  635   int ret;
433fc58e6bf2c8 Asias He   2016-07-28  636
433fc58e6bf2c8 Asias He   2016-07-28  637   
mutex_lock(>dev.mutex);
433fc58e6bf2c8 Asias He   2016-07-28  638
3ace84c91bfcde Stefano Garzarella 2022-02-21  639   if (check_owner) {
433fc58e6bf2c8 Asias He   2016-07-28  640   ret = 
vhost_dev_check_owner(>dev);
433fc58e6bf2c8 Asias He   2016-07-28  641   if (ret)
433fc58e6bf2c8 Asias He   2016-07-28  642   goto 
err;
3ace84c91bfcde Stefano Garzarella 2022-02-21  643   }

"ret" not initialized on else path.


Oooops, I was testing with vhost_vsock_dev_release() where we don't 
check the ret value, but of course we need to initialize it to 0 for the 
vhost_vsock_dev_ioctl() use case.


I'll fix in the v2.

Thanks for the report,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-21 Thread Stefano Garzarella

On Mon, Feb 21, 2022 at 11:33:11PM +0530, Anirudh Rayabharam wrote:

On Mon, Feb 21, 2022 at 05:44:20PM +0100, Stefano Garzarella wrote:

On Mon, Feb 21, 2022 at 09:44:39PM +0530, Anirudh Rayabharam wrote:
> On Mon, Feb 21, 2022 at 02:59:30PM +0100, Stefano Garzarella wrote:
> > On Mon, Feb 21, 2022 at 12:49 PM Stefano Garzarella  
wrote:
> > >
> > > vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
> > > ownership. It expects current->mm to be valid.
> > >
> > > vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
> > > the user has not done close(), so when we are in do_exit(). In this
> > > case current->mm is invalid and we're releasing the device, so we
> > > should clean it anyway.
> > >
> > > Let's check the owner only when vhost_vsock_stop() is called
> > > by an ioctl.
> > >
> > > Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
> > > Cc: sta...@vger.kernel.org
> > > Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
> > > Signed-off-by: Stefano Garzarella 
> > > ---
> > >  drivers/vhost/vsock.c | 14 --
> > >  1 file changed, 8 insertions(+), 6 deletions(-)
> >
> > Reported-and-tested-by: 
syzbot+0abd373e2e50d704d...@syzkaller.appspotmail.com
>
> I don't think this patch fixes "INFO: task hung in vhost_work_dev_flush"
> even though syzbot says so. I am able to reproduce the issue locally
> even with this patch applied.

Are you using the sysbot reproducer or another test?
In that case, can you share it?


I am using the syzbot reproducer.



From the stack trace it seemed to me that the worker accesses a zone that
has been cleaned (iotlb), so it is invalid and fails.


Would the thread hang in that case? How?


Looking at this log [1] it seems that the process is blocked on the 
wait_for_completion() in vhost_work_dev_flush().


Since we're not setting the backend to NULL to stop the worker, it's 
likely that the worker will keep running, preventing the flush work from 
completing.


[1] https://syzkaller.appspot.com/text?tag=CrashLog=153f085270

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] tools/virtio: Test virtual address range detection

2022-02-21 Thread Stefano Garzarella

On Mon, Feb 21, 2022 at 04:15:22PM +, David Woodhouse wrote:

As things stand, an application which wants to use vhost with a trivial
1:1 mapping of its virtual address space is forced to jump through hoops
to detect what the address range might be. The VHOST_SET_MEM_TABLE ioctl
helpfully doesn't fail immediately; you only get a failure *later* when
you attempt to set the backend, if the table *could* map to an address
which is out of range, even if no out-of-range address is actually
being referenced.

Since userspace is growing workarounds for this lovely kernel API, let's
ensure that we have a regression test that does things basically the same
way as https://gitlab.com/openconnect/openconnect/-/commit/443edd9d8826
does.

This is untested as I can't actually get virtio_test to work at all; it
just seems to deadlock on a spinlock. But it's getting the right answer
for the virtio range on x86_64 at least.


I had a similar issue with virtio_test and this simple patch [1] should 
fix the deadlock.


[1] 
https://lore.kernel.org/lkml/20220118150631.167015-1-sgarz...@redhat.com/


Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost: handle zero regions in vhost_set_memory

2022-02-21 Thread Stefano Garzarella

On Mon, Feb 21, 2022 at 12:58:51PM +0530, Anirudh Rayabharam wrote:

Return early when userspace sends zero regions in the VHOST_SET_MEM_TABLE
ioctl.

Otherwise, this causes an erroneous entry to be added to the iotlb. This
entry has a range size of 0 (due to u64 overflow). This then causes
iotlb_access_ok() to loop indefinitely resulting in a hung thread.
Syzbot has reported this here:

https://syzkaller.appspot.com/bug?extid=0abd373e2e50d704db87


IIUC vhost_iotlb_add_range() in the for loop is never called if 
mem.nregions is 0, so I'm not sure the problem reported by syzbot is 
related.


In any case maybe this patch is fine, but currently I think we're just 
registering an iotlb without any regions, which in theory shouldn't 
cause any problems.


Thanks,
Stefano



Reported-and-tested-by: syzbot+0abd373e2e50d704d...@syzkaller.appspotmail.com
Signed-off-by: Anirudh Rayabharam 
---
drivers/vhost/vhost.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 59edb5a1ffe2..821aba60eac2 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1428,6 +1428,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct 
vhost_memory __user *m)
return -EFAULT;
if (mem.padding)
return -EOPNOTSUPP;
+   if (mem.nregions == 0)
+   return 0;
if (mem.nregions > max_mem_regions)
return -E2BIG;
newmem = kvzalloc(struct_size(newmem, regions, mem.nregions),
--
2.35.1



___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-21 Thread Stefano Garzarella

On Mon, Feb 21, 2022 at 09:44:39PM +0530, Anirudh Rayabharam wrote:

On Mon, Feb 21, 2022 at 02:59:30PM +0100, Stefano Garzarella wrote:

On Mon, Feb 21, 2022 at 12:49 PM Stefano Garzarella  wrote:
>
> vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
> ownership. It expects current->mm to be valid.
>
> vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
> the user has not done close(), so when we are in do_exit(). In this
> case current->mm is invalid and we're releasing the device, so we
> should clean it anyway.
>
> Let's check the owner only when vhost_vsock_stop() is called
> by an ioctl.
>
> Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
> Cc: sta...@vger.kernel.org
> Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
> Signed-off-by: Stefano Garzarella 
> ---
>  drivers/vhost/vsock.c | 14 --
>  1 file changed, 8 insertions(+), 6 deletions(-)

Reported-and-tested-by: syzbot+0abd373e2e50d704d...@syzkaller.appspotmail.com


I don't think this patch fixes "INFO: task hung in vhost_work_dev_flush"
even though syzbot says so. I am able to reproduce the issue locally
even with this patch applied.


Are you using the sysbot reproducer or another test?
In that case, can you share it?

From the stack trace it seemed to me that the worker accesses a zone 
that has been cleaned (iotlb), so it is invalid and fails.
That's why I had this patch tested which should stop the worker before 
cleaning.


Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [syzbot] INFO: task hung in vhost_work_dev_flush

2022-02-21 Thread Stefano Garzarella

On Mon, Feb 21, 2022 at 09:23:04PM +0530, Anirudh Rayabharam wrote:

On Mon, Feb 21, 2022 at 03:12:33PM +0100, Stefano Garzarella wrote:

#syz test: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/
f71077a4d84b

Patch sent upstream:
https://lore.kernel.org/virtualization/20220221114916.107045-1-sgarz...@redhat.com/T/#u


I don't see how your patch fixes this issue. It looks unrelated. It is
surprising that syzbot is happy with it.

I have sent a patch for this issue here:
https://lore.kernel.org/lkml/20220221072852.31820-1-m...@anirudhrb.com/


It is related because the worker thread is accessing the iotlb that is 
going to be freed, so it could be corrupted/invalid.


Your patch seems right, but simply prevents iotlb from being set for the 
the specific test case, so it remains NULL and iotlb_access_ok() exits 
immediately.


Anyway, currently if nregions is 0 vhost_set_memory() sets an iotlb with 
no regions (the for loop is not executed), so I'm not sure 
iotlb_access_ok() cycles infinitely.


Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-21 Thread Stefano Garzarella

On Mon, Feb 21, 2022 at 10:03:39AM -0500, Michael S. Tsirkin wrote:

On Mon, Feb 21, 2022 at 12:49:16PM +0100, Stefano Garzarella wrote:

vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
ownership. It expects current->mm to be valid.

vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
the user has not done close(), so when we are in do_exit(). In this
case current->mm is invalid and we're releasing the device, so we
should clean it anyway.

Let's check the owner only when vhost_vsock_stop() is called
by an ioctl.






Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
Cc: sta...@vger.kernel.org
Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vsock.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index d6ca1c7ad513..f00d2dfd72b7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -629,16 +629,18 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
return ret;
 }

-static int vhost_vsock_stop(struct vhost_vsock *vsock)
+static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)



 {
size_t i;
int ret;

mutex_lock(>dev.mutex);

-   ret = vhost_dev_check_owner(>dev);
-   if (ret)
-   goto err;
+   if (check_owner) {
+   ret = vhost_dev_check_owner(>dev);
+   if (ret)
+   goto err;
+   }

for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = >vqs[i];
@@ -753,7 +755,7 @@ static int vhost_vsock_dev_release(struct inode *inode, 
struct file *file)
 * inefficient.  Room for improvement here. */
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);

-   vhost_vsock_stop(vsock);


Let's add an explanation:

When invoked from release we can not fail so we don't
check return code of vhost_vsock_stop.
We need to stop vsock even if it's not the owner.


Do you want me to send a v2 by adding this as a comment in the code?

Thanks,
Stefano

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [syzbot] INFO: task hung in vhost_work_dev_flush

2022-02-21 Thread Stefano Garzarella
; RIP: 0010:__sanitizer_cov_trace_pc+0xd/0x60 kernel/kcov.c:200
> Code: 00 00 e9 c6 41 66 02 66 0f 1f 44 00 00 48 8b be b0 01 00 00 e8 b4 ff ff 
> ff 31 c0 c3 90 65 8b 05 29 f7 89 7e 89 c1 48 8b 34 24 <81> e1 00 01 00 00 65 
> 48 8b 14 25 00 70 02 00 a9 00 01 ff 00 74 0e
> RSP: 0018:c9cd7c78 EFLAGS: 0246
> RAX: 8000 RBX: 888079ca8a80 RCX: 8000
> RDX:  RSI: 86d3f8fb RDI: 0003
> RBP:  R08:  R09: c9cd7c77
> R10: 86d3f8ed R11: 0001 R12: 
> R13:  R14: 0001 R15: 
> FS:  () GS:8880b9d0() knlGS:
> CS:  0010 DS:  ES:  CR0: 80050033
> CR2: 7ffdf716a3b8 CR3: 235b6000 CR4: 003506e0
> DR0:  DR1:  DR2: 
> DR3:  DR6: fffe0ff0 DR7: 0400
> Call Trace:
>  
>  iotlb_access_ok+0x21b/0x3e0 drivers/vhost/vhost.c:1340
>  vq_meta_prefetch+0xbc/0x280 drivers/vhost/vhost.c:1366
>  vhost_transport_do_send_pkt+0xe0/0xfd0 drivers/vhost/vsock.c:104
>  vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372
>  kthread+0x2e9/0x3a0 kernel/kthread.c:377
>  ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
>  
> 
> Code disassembly (best guess):
>0:   00 00   add%al,(%rax)
>2:   e9 c6 41 66 02  jmpq   0x26641cd
>7:   66 0f 1f 44 00 00   nopw   0x0(%rax,%rax,1)
>d:   48 8b be b0 01 00 00mov0x1b0(%rsi),%rdi
>   14:   e8 b4 ff ff ff  callq  0xffcd
>   19:   31 c0   xor%eax,%eax
>   1b:   c3  retq
>   1c:   90  nop
>   1d:   65 8b 05 29 f7 89 7emov%gs:0x7e89f729(%rip),%eax# 
> 0x7e89f74d
>   24:   89 c1   mov%eax,%ecx
>   26:   48 8b 34 24 mov(%rsp),%rsi
> * 2a:   81 e1 00 01 00 00   and$0x100,%ecx <-- trapping instruction
>   30:   65 48 8b 14 25 00 70mov%gs:0x27000,%rdx
>   37:   02 00
>   39:   a9 00 01 ff 00  test   $0xff0100,%eax
>   3e:   74 0e   je 0x4e
>
>
> ---
> This report is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkal...@googlegroups.com.
>
> syzbot will keep track of this issue. See:
> https://goo.gl/tpsmEJ#status for how to communicate with syzbot.
> syzbot can test patches for this issue, for details see:
> https://goo.gl/tpsmEJ#testing-patches
>
From 4951112bf98d3e10d3e9557986e5ca5419ca738f Mon Sep 17 00:00:00 2001
From: Stefano Garzarella 
Date: Mon, 21 Feb 2022 11:07:49 +0100
Subject: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while
 releasing

vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
ownership. It expects current->mm to be valid.

vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
the user has not done close(), so when we are in do_exit(). In this
case current->mm is invalid and we're releasing the device, so we
should clean it anyway.

Let's check the owner only when vhost_vsock_stop() is called
by an ioctl.

Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
Cc: sta...@vger.kernel.org
Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vsock.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index d6ca1c7ad513..f00d2dfd72b7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -629,16 +629,18 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
 	return ret;
 }
 
-static int vhost_vsock_stop(struct vhost_vsock *vsock)
+static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
 {
 	size_t i;
 	int ret;
 
 	mutex_lock(>dev.mutex);
 
-	ret = vhost_dev_check_owner(>dev);
-	if (ret)
-		goto err;
+	if (check_owner) {
+		ret = vhost_dev_check_owner(>dev);
+		if (ret)
+			goto err;
+	}
 
 	for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
 		struct vhost_virtqueue *vq = >vqs[i];
@@ -753,7 +755,7 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
 	 * inefficient.  Room for improvement here. */
 	vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
 
-	vhost_vsock_stop(vsock);
+	vhost_vsock_stop(vsock, false);
 	vhost_vsock_flush(vsock);
 	vhost_dev_stop(>dev);
 
@@ -868,7 +870,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
 		if (start)
 			return vhost_vsock_start(vsock);
 		else
-			return vhost_vsock_stop(vsock);
+			return vhost_vsock_stop(vsock, true);
 	case VHOST_GET_FEATURES:
 		features = VHOST_VSOCK_FEATURES;
 		if (copy_to_user(argp, , sizeof(features)))
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Re: [PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-21 Thread Stefano Garzarella
On Mon, Feb 21, 2022 at 12:49 PM Stefano Garzarella  wrote:
>
> vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
> ownership. It expects current->mm to be valid.
>
> vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
> the user has not done close(), so when we are in do_exit(). In this
> case current->mm is invalid and we're releasing the device, so we
> should clean it anyway.
>
> Let's check the owner only when vhost_vsock_stop() is called
> by an ioctl.
>
> Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
> Cc: sta...@vger.kernel.org
> Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
> Signed-off-by: Stefano Garzarella 
> ---
>  drivers/vhost/vsock.c | 14 --
>  1 file changed, 8 insertions(+), 6 deletions(-)

Reported-and-tested-by: syzbot+0abd373e2e50d704d...@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+3140b17cb44a7b174...@syzkaller.appspotmail.com

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [syzbot] general protection fault in vhost_iotlb_itree_first

2022-02-21 Thread Stefano Garzarella

#syz test: https://github.com/stefano-garzarella/linux.git vsock-fix-stop

On Sat, Feb 19, 2022 at 01:18:24AM -0800, syzbot wrote:

Hello,

syzbot found the following issue on:

HEAD commit:359303076163 tty: n_tty: do not look ahead for EOL charact..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=16b34b5470
kernel config:  https://syzkaller.appspot.com/x/.config?x=da674567f7b6043d
dashboard link: https://syzkaller.appspot.com/bug?extid=bbb030fc51d6f3c5d067
compiler:   gcc (Debian 10.2.1-6) 10.2.1 20210110, GNU ld (GNU Binutils for 
Debian) 2.35.2

Unfortunately, I don't have any reproducer for this issue yet.

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+bbb030fc51d6f3c5d...@syzkaller.appspotmail.com

general protection fault, probably for non-canonical address 
0xdc00:  [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x-0x0007]
CPU: 1 PID: 17981 Comm: vhost-17980 Not tainted 
5.17.0-rc4-syzkaller-00052-g359303076163 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
RIP: 0010:vhost_iotlb_itree_iter_first drivers/vhost/iotlb.c:19 [inline]
RIP: 0010:vhost_iotlb_itree_first+0x29/0x280 drivers/vhost/iotlb.c:169
Code: 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 89 f3 e8 e8 eb a0 fa 48 
89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 e8 01 00 00 
4c 8b 65 00 4d 85 e4 0f 84 b3 01 00
RSP: 0018:c90004f57ac8 EFLAGS: 00010246
RAX: dc00 RBX: 30303030320a0028 RCX: c900103dc000
RDX:  RSI: 86d72738 RDI: 
RBP:  R08:  R09: 0002
R10: 86d62d88 R11:  R12: 8880260e4d68
R13: 303030305f3a3057 R14: dc00 R15: 
FS:  () GS:8880b9d0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7f2d46121901 CR3: 1d652000 CR4: 003506e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400
Call Trace:

translate_desc+0x11e/0x3e0 drivers/vhost/vhost.c:2054
vhost_get_vq_desc+0x662/0x22c0 drivers/vhost/vhost.c:2300
vhost_vsock_handle_tx_kick+0x277/0xa20 drivers/vhost/vsock.c:522
vhost_worker+0x23d/0x3d0 drivers/vhost/vhost.c:372
kthread+0x2e9/0x3a0 kernel/kthread.c:377
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295

Modules linked in:
---[ end trace  ]---
RIP: 0010:vhost_iotlb_itree_iter_first drivers/vhost/iotlb.c:19 [inline]
RIP: 0010:vhost_iotlb_itree_first+0x29/0x280 drivers/vhost/iotlb.c:169
Code: 00 41 57 41 56 41 55 49 89 d5 41 54 55 48 89 fd 53 48 89 f3 e8 e8 eb a0 fa 48 
89 ea 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 e8 01 00 00 
4c 8b 65 00 4d 85 e4 0f 84 b3 01 00
RSP: 0018:c90004f57ac8 EFLAGS: 00010246
RAX: dc00 RBX: 30303030320a0028 RCX: c900103dc000
RDX:  RSI: 86d72738 RDI: 
RBP:  R08:  R09: 0002
R10: 86d62d88 R11:  R12: 8880260e4d68
R13: 303030305f3a3057 R14: dc00 R15: 
FS:  () GS:8880b9d0() knlGS:
CS:  0010 DS:  ES:  CR0: 80050033
CR2: 7f2d449f6718 CR3: 1d652000 CR4: 003506e0
DR0:  DR1:  DR2: 
DR3:  DR6: fffe0ff0 DR7: 0400

Code disassembly (best guess):
  0:00 41 57add%al,0x57(%rcx)
  3:41 56   push   %r14
  5:41 55   push   %r13
  7:49 89 d5mov%rdx,%r13
  a:41 54   push   %r12
  c:55  push   %rbp
  d:48 89 fdmov%rdi,%rbp
 10:53  push   %rbx
 11:48 89 f3mov%rsi,%rbx
 14:e8 e8 eb a0 fa  callq  0xfaa0ec01
 19:48 89 eamov%rbp,%rdx
 1c:48 b8 00 00 00 00 00movabs $0xdc00,%rax
 23:fc ff df
 26:48 c1 ea 03 shr$0x3,%rdx
* 2a:   80 3c 02 00 cmpb   $0x0,(%rdx,%rax,1) <-- trapping 
instruction
 2e:0f 85 e8 01 00 00   jne0x21c
 34:4c 8b 65 00 mov0x0(%rbp),%r12
 38:4d 85 e4test   %r12,%r12
 3b:0f  .byte 0xf
 3c:84  .byte 0x84
 3d:b3 01   mov$0x1,%bl


---
This report is generated by a bot. It may contain errors.
See https://goo.gl/tpsmEJ for more information about syzbot.
syzbot engineers can be reached at syzkal...@googlegroups.com.

syzbot will keep track of this issue. See:
https://goo.gl/tpsmEJ

[PATCH] vhost/vsock: don't check owner in vhost_vsock_stop() while releasing

2022-02-21 Thread Stefano Garzarella
vhost_vsock_stop() calls vhost_dev_check_owner() to check the device
ownership. It expects current->mm to be valid.

vhost_vsock_stop() is also called by vhost_vsock_dev_release() when
the user has not done close(), so when we are in do_exit(). In this
case current->mm is invalid and we're releasing the device, so we
should clean it anyway.

Let's check the owner only when vhost_vsock_stop() is called
by an ioctl.

Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
Cc: sta...@vger.kernel.org
Reported-by: syzbot+1e3ea63db39f2b444...@syzkaller.appspotmail.com
Signed-off-by: Stefano Garzarella 
---
 drivers/vhost/vsock.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index d6ca1c7ad513..f00d2dfd72b7 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -629,16 +629,18 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
return ret;
 }
 
-static int vhost_vsock_stop(struct vhost_vsock *vsock)
+static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner)
 {
size_t i;
int ret;
 
mutex_lock(>dev.mutex);
 
-   ret = vhost_dev_check_owner(>dev);
-   if (ret)
-   goto err;
+   if (check_owner) {
+   ret = vhost_dev_check_owner(>dev);
+   if (ret)
+   goto err;
+   }
 
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = >vqs[i];
@@ -753,7 +755,7 @@ static int vhost_vsock_dev_release(struct inode *inode, 
struct file *file)
 * inefficient.  Room for improvement here. */
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
 
-   vhost_vsock_stop(vsock);
+   vhost_vsock_stop(vsock, false);
vhost_vsock_flush(vsock);
vhost_dev_stop(>dev);
 
@@ -868,7 +870,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned 
int ioctl,
if (start)
return vhost_vsock_start(vsock);
else
-   return vhost_vsock_stop(vsock);
+   return vhost_vsock_stop(vsock, true);
case VHOST_GET_FEATURES:
features = VHOST_VSOCK_FEATURES;
if (copy_to_user(argp, , sizeof(features)))
-- 
2.35.1

___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization


  1   2   3   4   5   6   7   8   9   10   >