Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-18 Thread Eugenio Perez Martin
On Thu, Mar 18, 2021 at 10:22 AM Jason Wang  wrote:
>
>
> 在 2021/3/18 下午4:06, Eugenio Perez Martin 写道:
> > On Thu, Mar 18, 2021 at 4:14 AM Jason Wang  wrote:
> >>
> >> 在 2021/3/17 下午10:38, Eugenio Perez Martin 写道:
> >>> On Wed, Mar 17, 2021 at 3:51 AM Jason Wang  wrote:
>  在 2021/3/17 上午12:05, Eugenio Perez Martin 写道:
> > On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:
> >> 在 2021/3/16 上午3:48, Eugenio Pérez 写道:
> >>> Initial version of shadow virtqueue that actually forward buffers.
> >>>
> >>> It reuses the VirtQueue code for the device part. The driver part is
> >>> based on Linux's virtio_ring driver, but with stripped functionality
> >>> and optimizations so it's easier to review.
> >>>
> >>> These will be added in later commits.
> >>>
> >>> Signed-off-by: Eugenio Pérez 
> >>> ---
> >>>  hw/virtio/vhost-shadow-virtqueue.c | 212 
> >>> +++--
> >>>  hw/virtio/vhost.c  | 113 ++-
> >>>  2 files changed, 312 insertions(+), 13 deletions(-)
> >>>
> >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> >>> b/hw/virtio/vhost-shadow-virtqueue.c
> >>> index 1460d1d5d1..68ed0f2740 100644
> >>> --- a/hw/virtio/vhost-shadow-virtqueue.c
> >>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> >>> @@ -9,6 +9,7 @@
> >>>
> >>>  #include "hw/virtio/vhost-shadow-virtqueue.h"
> >>>  #include "hw/virtio/vhost.h"
> >>> +#include "hw/virtio/virtio-access.h"
> >>>
> >>>  #include "standard-headers/linux/vhost_types.h"
> >>>
> >>> @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
> >>>  /* Virtio device */
> >>>  VirtIODevice *vdev;
> >>>
> >>> +/* Map for returning guest's descriptors */
> >>> +VirtQueueElement **ring_id_maps;
> >>> +
> >>> +/* Next head to expose to device */
> >>> +uint16_t avail_idx_shadow;
> >>> +
> >>> +/* Next free descriptor */
> >>> +uint16_t free_head;
> >>> +
> >>> +/* Last seen used idx */
> >>> +uint16_t shadow_used_idx;
> >>> +
> >>> +/* Next head to consume from device */
> >>> +uint16_t used_idx;
> >>> +
> >>>  /* Descriptors copied from guest */
> >>>  vring_desc_t descs[];
> >>>  } VhostShadowVirtqueue;
> >>>
> >>> -/* Forward guest notifications */
> >>> +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> >>> +const struct iovec *iovec,
> >>> +size_t num, bool more_descs, 
> >>> bool write)
> >>> +{
> >>> +uint16_t i = svq->free_head, last = svq->free_head;
> >>> +unsigned n;
> >>> +uint16_t flags = write ? virtio_tswap16(svq->vdev, 
> >>> VRING_DESC_F_WRITE) : 0;
> >>> +vring_desc_t *descs = svq->vring.desc;
> >>> +
> >>> +if (num == 0) {
> >>> +return;
> >>> +}
> >>> +
> >>> +for (n = 0; n < num; n++) {
> >>> +if (more_descs || (n + 1 < num)) {
> >>> +descs[i].flags = flags | virtio_tswap16(svq->vdev,
> >>> +
> >>> VRING_DESC_F_NEXT);
> >>> +} else {
> >>> +descs[i].flags = flags;
> >>> +}
> >>> +descs[i].addr = virtio_tswap64(svq->vdev, 
> >>> (hwaddr)iovec[n].iov_base);
> >> So unsing virtio_tswap() is probably not correct since we're talking
> >> with vhost backends which has its own endiness.
> >>
> > I was trying to expose the buffer with the same endianness as the
> > driver originally offered, so if guest->qemu requires a bswap, I think
> > it will always require a bswap again to expose to the device again.
>  So assumes vhost-vDPA always provide a non-transitional device[1].
> 
>  Then if Qemu present a transitional device, we need to do the endian
>  conversion when necessary, if Qemu present a non-transitional device, we
>  don't need to do that, guest driver will do that for us.
> 
>  But it looks to me the virtio_tswap() can't be used for this since it:
> 
>  static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
>  {
>  #if defined(LEGACY_VIRTIO_IS_BIENDIAN)
> return virtio_is_big_endian(vdev);
>  #elif defined(TARGET_WORDS_BIGENDIAN)
> if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
> /* Devices conforming to VIRTIO 1.0 or later are always LE. */
> return false;
> }
> return true;
>  #else
> return false;
>  #endif
>  }
> 
>  So if we present a legacy device on top of a non-transitiaonl vDPA
>  device. The VIRITIO_F_VERSION_1 check is wrong.
> 
> 
> >> For 

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-18 Thread Jason Wang



在 2021/3/18 下午4:06, Eugenio Perez Martin 写道:

On Thu, Mar 18, 2021 at 4:14 AM Jason Wang  wrote:


在 2021/3/17 下午10:38, Eugenio Perez Martin 写道:

On Wed, Mar 17, 2021 at 3:51 AM Jason Wang  wrote:

在 2021/3/17 上午12:05, Eugenio Perez Martin 写道:

On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:

在 2021/3/16 上午3:48, Eugenio Pérez 写道:

Initial version of shadow virtqueue that actually forward buffers.

It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.

These will be added in later commits.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
 hw/virtio/vhost.c  | 113 ++-
 2 files changed, 312 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 1460d1d5d1..68ed0f2740 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -9,6 +9,7 @@

 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-access.h"

 #include "standard-headers/linux/vhost_types.h"

@@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
 /* Virtio device */
 VirtIODevice *vdev;

+/* Map for returning guest's descriptors */
+VirtQueueElement **ring_id_maps;
+
+/* Next head to expose to device */
+uint16_t avail_idx_shadow;
+
+/* Next free descriptor */
+uint16_t free_head;
+
+/* Last seen used idx */
+uint16_t shadow_used_idx;
+
+/* Next head to consume from device */
+uint16_t used_idx;
+
 /* Descriptors copied from guest */
 vring_desc_t descs[];
 } VhostShadowVirtqueue;

-/* Forward guest notifications */
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+const struct iovec *iovec,
+size_t num, bool more_descs, bool write)
+{
+uint16_t i = svq->free_head, last = svq->free_head;
+unsigned n;
+uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0;
+vring_desc_t *descs = svq->vring.desc;
+
+if (num == 0) {
+return;
+}
+
+for (n = 0; n < num; n++) {
+if (more_descs || (n + 1 < num)) {
+descs[i].flags = flags | virtio_tswap16(svq->vdev,
+VRING_DESC_F_NEXT);
+} else {
+descs[i].flags = flags;
+}
+descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base);

So unsing virtio_tswap() is probably not correct since we're talking
with vhost backends which has its own endiness.


I was trying to expose the buffer with the same endianness as the
driver originally offered, so if guest->qemu requires a bswap, I think
it will always require a bswap again to expose to the device again.

So assumes vhost-vDPA always provide a non-transitional device[1].

Then if Qemu present a transitional device, we need to do the endian
conversion when necessary, if Qemu present a non-transitional device, we
don't need to do that, guest driver will do that for us.

But it looks to me the virtio_tswap() can't be used for this since it:

static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
{
#if defined(LEGACY_VIRTIO_IS_BIENDIAN)
   return virtio_is_big_endian(vdev);
#elif defined(TARGET_WORDS_BIGENDIAN)
   if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
   /* Devices conforming to VIRTIO 1.0 or later are always LE. */
   return false;
   }
   return true;
#else
   return false;
#endif
}

So if we present a legacy device on top of a non-transitiaonl vDPA
device. The VIRITIO_F_VERSION_1 check is wrong.



For vhost-vDPA, we can assume that it's a 1.0 device.

Isn't it needed if the host is big endian?

[1]

So non-transitional device always assume little endian.

For vhost-vDPA, we don't want to present transitional device which may
end up with a lot of burdens.

I suspect the legacy driver plust vhost vDPA already break, so I plan to
mandate VERSION_1 for all vDPA devices.


Right. I think it's the best then.

However, then we will need a similar method to always expose
address/length as little endian, isn't it?


Yes.



+descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
+
+last = i;
+i = virtio_tswap16(svq->vdev, descs[i].next);
+}
+
+svq->free_head = virtio_tswap16(svq->vdev, descs[last].next);
+}
+
+static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+int head;
+unsigned avail_idx;
+vring_avail_t *avail = svq->vring.avail;
+
+head = svq->free_head;
+
+/* We need some descriptors here */
+assert(elem->out_num || elem->in_num);
+
+

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-18 Thread Eugenio Perez Martin
On Thu, Mar 18, 2021 at 4:14 AM Jason Wang  wrote:
>
>
> 在 2021/3/17 下午10:38, Eugenio Perez Martin 写道:
> > On Wed, Mar 17, 2021 at 3:51 AM Jason Wang  wrote:
> >>
> >> 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道:
> >>> On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:
>  在 2021/3/16 上午3:48, Eugenio Pérez 写道:
> > Initial version of shadow virtqueue that actually forward buffers.
> >
> > It reuses the VirtQueue code for the device part. The driver part is
> > based on Linux's virtio_ring driver, but with stripped functionality
> > and optimizations so it's easier to review.
> >
> > These will be added in later commits.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> > hw/virtio/vhost-shadow-virtqueue.c | 212 
> > +++--
> > hw/virtio/vhost.c  | 113 ++-
> > 2 files changed, 312 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 1460d1d5d1..68ed0f2740 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -9,6 +9,7 @@
> >
> > #include "hw/virtio/vhost-shadow-virtqueue.h"
> > #include "hw/virtio/vhost.h"
> > +#include "hw/virtio/virtio-access.h"
> >
> > #include "standard-headers/linux/vhost_types.h"
> >
> > @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
> > /* Virtio device */
> > VirtIODevice *vdev;
> >
> > +/* Map for returning guest's descriptors */
> > +VirtQueueElement **ring_id_maps;
> > +
> > +/* Next head to expose to device */
> > +uint16_t avail_idx_shadow;
> > +
> > +/* Next free descriptor */
> > +uint16_t free_head;
> > +
> > +/* Last seen used idx */
> > +uint16_t shadow_used_idx;
> > +
> > +/* Next head to consume from device */
> > +uint16_t used_idx;
> > +
> > /* Descriptors copied from guest */
> > vring_desc_t descs[];
> > } VhostShadowVirtqueue;
> >
> > -/* Forward guest notifications */
> > +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> > +const struct iovec *iovec,
> > +size_t num, bool more_descs, bool 
> > write)
> > +{
> > +uint16_t i = svq->free_head, last = svq->free_head;
> > +unsigned n;
> > +uint16_t flags = write ? virtio_tswap16(svq->vdev, 
> > VRING_DESC_F_WRITE) : 0;
> > +vring_desc_t *descs = svq->vring.desc;
> > +
> > +if (num == 0) {
> > +return;
> > +}
> > +
> > +for (n = 0; n < num; n++) {
> > +if (more_descs || (n + 1 < num)) {
> > +descs[i].flags = flags | virtio_tswap16(svq->vdev,
> > +VRING_DESC_F_NEXT);
> > +} else {
> > +descs[i].flags = flags;
> > +}
> > +descs[i].addr = virtio_tswap64(svq->vdev, 
> > (hwaddr)iovec[n].iov_base);
>  So unsing virtio_tswap() is probably not correct since we're talking
>  with vhost backends which has its own endiness.
> 
> >>> I was trying to expose the buffer with the same endianness as the
> >>> driver originally offered, so if guest->qemu requires a bswap, I think
> >>> it will always require a bswap again to expose to the device again.
> >>
> >> So assumes vhost-vDPA always provide a non-transitional device[1].
> >>
> >> Then if Qemu present a transitional device, we need to do the endian
> >> conversion when necessary, if Qemu present a non-transitional device, we
> >> don't need to do that, guest driver will do that for us.
> >>
> >> But it looks to me the virtio_tswap() can't be used for this since it:
> >>
> >> static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
> >> {
> >> #if defined(LEGACY_VIRTIO_IS_BIENDIAN)
> >>   return virtio_is_big_endian(vdev);
> >> #elif defined(TARGET_WORDS_BIGENDIAN)
> >>   if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
> >>   /* Devices conforming to VIRTIO 1.0 or later are always LE. */
> >>   return false;
> >>   }
> >>   return true;
> >> #else
> >>   return false;
> >> #endif
> >> }
> >>
> >> So if we present a legacy device on top of a non-transitiaonl vDPA
> >> device. The VIRITIO_F_VERSION_1 check is wrong.
> >>
> >>
>  For vhost-vDPA, we can assume that it's a 1.0 device.
> >>> Isn't it needed if the host is big endian?
> >>
> >> [1]
> >>
> >> So non-transitional device always assume little endian.
> >>
> >> For vhost-vDPA, we don't want to present transitional device which may
> >> end up with a lot of burdens.
> >>
> >> I suspect the legacy driver plust vhost vDPA already break, so 

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-17 Thread Jason Wang



在 2021/3/17 下午10:38, Eugenio Perez Martin 写道:

On Wed, Mar 17, 2021 at 3:51 AM Jason Wang  wrote:


在 2021/3/17 上午12:05, Eugenio Perez Martin 写道:

On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:

在 2021/3/16 上午3:48, Eugenio Pérez 写道:

Initial version of shadow virtqueue that actually forward buffers.

It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.

These will be added in later commits.

Signed-off-by: Eugenio Pérez 
---
hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
hw/virtio/vhost.c  | 113 ++-
2 files changed, 312 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 1460d1d5d1..68ed0f2740 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -9,6 +9,7 @@

#include "hw/virtio/vhost-shadow-virtqueue.h"
#include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-access.h"

#include "standard-headers/linux/vhost_types.h"

@@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
/* Virtio device */
VirtIODevice *vdev;

+/* Map for returning guest's descriptors */
+VirtQueueElement **ring_id_maps;
+
+/* Next head to expose to device */
+uint16_t avail_idx_shadow;
+
+/* Next free descriptor */
+uint16_t free_head;
+
+/* Last seen used idx */
+uint16_t shadow_used_idx;
+
+/* Next head to consume from device */
+uint16_t used_idx;
+
/* Descriptors copied from guest */
vring_desc_t descs[];
} VhostShadowVirtqueue;

-/* Forward guest notifications */
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+const struct iovec *iovec,
+size_t num, bool more_descs, bool write)
+{
+uint16_t i = svq->free_head, last = svq->free_head;
+unsigned n;
+uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0;
+vring_desc_t *descs = svq->vring.desc;
+
+if (num == 0) {
+return;
+}
+
+for (n = 0; n < num; n++) {
+if (more_descs || (n + 1 < num)) {
+descs[i].flags = flags | virtio_tswap16(svq->vdev,
+VRING_DESC_F_NEXT);
+} else {
+descs[i].flags = flags;
+}
+descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base);

So unsing virtio_tswap() is probably not correct since we're talking
with vhost backends which has its own endiness.


I was trying to expose the buffer with the same endianness as the
driver originally offered, so if guest->qemu requires a bswap, I think
it will always require a bswap again to expose to the device again.


So assumes vhost-vDPA always provide a non-transitional device[1].

Then if Qemu present a transitional device, we need to do the endian
conversion when necessary, if Qemu present a non-transitional device, we
don't need to do that, guest driver will do that for us.

But it looks to me the virtio_tswap() can't be used for this since it:

static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
{
#if defined(LEGACY_VIRTIO_IS_BIENDIAN)
  return virtio_is_big_endian(vdev);
#elif defined(TARGET_WORDS_BIGENDIAN)
  if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
  /* Devices conforming to VIRTIO 1.0 or later are always LE. */
  return false;
  }
  return true;
#else
  return false;
#endif
}

So if we present a legacy device on top of a non-transitiaonl vDPA
device. The VIRITIO_F_VERSION_1 check is wrong.



For vhost-vDPA, we can assume that it's a 1.0 device.

Isn't it needed if the host is big endian?


[1]

So non-transitional device always assume little endian.

For vhost-vDPA, we don't want to present transitional device which may
end up with a lot of burdens.

I suspect the legacy driver plust vhost vDPA already break, so I plan to
mandate VERSION_1 for all vDPA devices.


Right. I think it's the best then.

However, then we will need a similar method to always expose
address/length as little endian, isn't it?



Yes.





+descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
+
+last = i;
+i = virtio_tswap16(svq->vdev, descs[i].next);
+}
+
+svq->free_head = virtio_tswap16(svq->vdev, descs[last].next);
+}
+
+static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+int head;
+unsigned avail_idx;
+vring_avail_t *avail = svq->vring.avail;
+
+head = svq->free_head;
+
+/* We need some descriptors here */
+assert(elem->out_num || elem->in_num);
+
+vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
+elem->in_num > 0, false);
+   

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-17 Thread Eugenio Perez Martin
On Wed, Mar 17, 2021 at 3:51 AM Jason Wang  wrote:
>
>
> 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道:
> > On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:
> >>
> >> 在 2021/3/16 上午3:48, Eugenio Pérez 写道:
> >>> Initial version of shadow virtqueue that actually forward buffers.
> >>>
> >>> It reuses the VirtQueue code for the device part. The driver part is
> >>> based on Linux's virtio_ring driver, but with stripped functionality
> >>> and optimizations so it's easier to review.
> >>>
> >>> These will be added in later commits.
> >>>
> >>> Signed-off-by: Eugenio Pérez 
> >>> ---
> >>>hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
> >>>hw/virtio/vhost.c  | 113 ++-
> >>>2 files changed, 312 insertions(+), 13 deletions(-)
> >>>
> >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> >>> b/hw/virtio/vhost-shadow-virtqueue.c
> >>> index 1460d1d5d1..68ed0f2740 100644
> >>> --- a/hw/virtio/vhost-shadow-virtqueue.c
> >>> +++ b/hw/virtio/vhost-shadow-virtqueue.c
> >>> @@ -9,6 +9,7 @@
> >>>
> >>>#include "hw/virtio/vhost-shadow-virtqueue.h"
> >>>#include "hw/virtio/vhost.h"
> >>> +#include "hw/virtio/virtio-access.h"
> >>>
> >>>#include "standard-headers/linux/vhost_types.h"
> >>>
> >>> @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
> >>>/* Virtio device */
> >>>VirtIODevice *vdev;
> >>>
> >>> +/* Map for returning guest's descriptors */
> >>> +VirtQueueElement **ring_id_maps;
> >>> +
> >>> +/* Next head to expose to device */
> >>> +uint16_t avail_idx_shadow;
> >>> +
> >>> +/* Next free descriptor */
> >>> +uint16_t free_head;
> >>> +
> >>> +/* Last seen used idx */
> >>> +uint16_t shadow_used_idx;
> >>> +
> >>> +/* Next head to consume from device */
> >>> +uint16_t used_idx;
> >>> +
> >>>/* Descriptors copied from guest */
> >>>vring_desc_t descs[];
> >>>} VhostShadowVirtqueue;
> >>>
> >>> -/* Forward guest notifications */
> >>> +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> >>> +const struct iovec *iovec,
> >>> +size_t num, bool more_descs, bool 
> >>> write)
> >>> +{
> >>> +uint16_t i = svq->free_head, last = svq->free_head;
> >>> +unsigned n;
> >>> +uint16_t flags = write ? virtio_tswap16(svq->vdev, 
> >>> VRING_DESC_F_WRITE) : 0;
> >>> +vring_desc_t *descs = svq->vring.desc;
> >>> +
> >>> +if (num == 0) {
> >>> +return;
> >>> +}
> >>> +
> >>> +for (n = 0; n < num; n++) {
> >>> +if (more_descs || (n + 1 < num)) {
> >>> +descs[i].flags = flags | virtio_tswap16(svq->vdev,
> >>> +VRING_DESC_F_NEXT);
> >>> +} else {
> >>> +descs[i].flags = flags;
> >>> +}
> >>> +descs[i].addr = virtio_tswap64(svq->vdev, 
> >>> (hwaddr)iovec[n].iov_base);
> >>
> >> So unsing virtio_tswap() is probably not correct since we're talking
> >> with vhost backends which has its own endiness.
> >>
> > I was trying to expose the buffer with the same endianness as the
> > driver originally offered, so if guest->qemu requires a bswap, I think
> > it will always require a bswap again to expose to the device again.
>
>
> So assumes vhost-vDPA always provide a non-transitional device[1].
>
> Then if Qemu present a transitional device, we need to do the endian
> conversion when necessary, if Qemu present a non-transitional device, we
> don't need to do that, guest driver will do that for us.
>
> But it looks to me the virtio_tswap() can't be used for this since it:
>
> static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
> {
> #if defined(LEGACY_VIRTIO_IS_BIENDIAN)
>  return virtio_is_big_endian(vdev);
> #elif defined(TARGET_WORDS_BIGENDIAN)
>  if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
>  /* Devices conforming to VIRTIO 1.0 or later are always LE. */
>  return false;
>  }
>  return true;
> #else
>  return false;
> #endif
> }
>
> So if we present a legacy device on top of a non-transitiaonl vDPA
> device. The VIRITIO_F_VERSION_1 check is wrong.
>
>
> >
> >> For vhost-vDPA, we can assume that it's a 1.0 device.
> > Isn't it needed if the host is big endian?
>
>
> [1]
>
> So non-transitional device always assume little endian.
>
> For vhost-vDPA, we don't want to present transitional device which may
> end up with a lot of burdens.
>
> I suspect the legacy driver plust vhost vDPA already break, so I plan to
> mandate VERSION_1 for all vDPA devices.
>

Right. I think it's the best then.

However, then we will need a similar method to always expose
address/length as little endian, isn't it?

>
> >
> >>
> >>> +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
> >>> +
> >>> +last = i;
> >>> +i = virtio_tswap16(svq->vdev, descs[i].next);
> >>> +}
> >>> +
> 

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-16 Thread Jason Wang



在 2021/3/17 上午12:05, Eugenio Perez Martin 写道:

On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:


在 2021/3/16 上午3:48, Eugenio Pérez 写道:

Initial version of shadow virtqueue that actually forward buffers.

It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.

These will be added in later commits.

Signed-off-by: Eugenio Pérez 
---
   hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
   hw/virtio/vhost.c  | 113 ++-
   2 files changed, 312 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 1460d1d5d1..68ed0f2740 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -9,6 +9,7 @@

   #include "hw/virtio/vhost-shadow-virtqueue.h"
   #include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-access.h"

   #include "standard-headers/linux/vhost_types.h"

@@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
   /* Virtio device */
   VirtIODevice *vdev;

+/* Map for returning guest's descriptors */
+VirtQueueElement **ring_id_maps;
+
+/* Next head to expose to device */
+uint16_t avail_idx_shadow;
+
+/* Next free descriptor */
+uint16_t free_head;
+
+/* Last seen used idx */
+uint16_t shadow_used_idx;
+
+/* Next head to consume from device */
+uint16_t used_idx;
+
   /* Descriptors copied from guest */
   vring_desc_t descs[];
   } VhostShadowVirtqueue;

-/* Forward guest notifications */
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+const struct iovec *iovec,
+size_t num, bool more_descs, bool write)
+{
+uint16_t i = svq->free_head, last = svq->free_head;
+unsigned n;
+uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0;
+vring_desc_t *descs = svq->vring.desc;
+
+if (num == 0) {
+return;
+}
+
+for (n = 0; n < num; n++) {
+if (more_descs || (n + 1 < num)) {
+descs[i].flags = flags | virtio_tswap16(svq->vdev,
+VRING_DESC_F_NEXT);
+} else {
+descs[i].flags = flags;
+}
+descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base);


So unsing virtio_tswap() is probably not correct since we're talking
with vhost backends which has its own endiness.


I was trying to expose the buffer with the same endianness as the
driver originally offered, so if guest->qemu requires a bswap, I think
it will always require a bswap again to expose to the device again.



So assumes vhost-vDPA always provide a non-transitional device[1].

Then if Qemu present a transitional device, we need to do the endian 
conversion when necessary, if Qemu present a non-transitional device, we 
don't need to do that, guest driver will do that for us.


But it looks to me the virtio_tswap() can't be used for this since it:

static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
{
#if defined(LEGACY_VIRTIO_IS_BIENDIAN)
    return virtio_is_big_endian(vdev);
#elif defined(TARGET_WORDS_BIGENDIAN)
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
    return false;
    }
    return true;
#else
    return false;
#endif
}

So if we present a legacy device on top of a non-transitiaonl vDPA 
device. The VIRITIO_F_VERSION_1 check is wrong.






For vhost-vDPA, we can assume that it's a 1.0 device.

Isn't it needed if the host is big endian?



[1]

So non-transitional device always assume little endian.

For vhost-vDPA, we don't want to present transitional device which may 
end up with a lot of burdens.


I suspect the legacy driver plust vhost vDPA already break, so I plan to 
mandate VERSION_1 for all vDPA devices.








+descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
+
+last = i;
+i = virtio_tswap16(svq->vdev, descs[i].next);
+}
+
+svq->free_head = virtio_tswap16(svq->vdev, descs[last].next);
+}
+
+static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+int head;
+unsigned avail_idx;
+vring_avail_t *avail = svq->vring.avail;
+
+head = svq->free_head;
+
+/* We need some descriptors here */
+assert(elem->out_num || elem->in_num);
+
+vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
+elem->in_num > 0, false);
+vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
+
+/*
+ * Put entry in available array (but don't update avail->idx until they
+ * do sync).
+ */
+avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
+

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-16 Thread Eugenio Perez Martin
On Tue, Mar 16, 2021 at 9:15 AM Jason Wang  wrote:
>
>
> 在 2021/3/16 上午3:48, Eugenio Pérez 写道:
> > Initial version of shadow virtqueue that actually forward buffers.
> >
> > It reuses the VirtQueue code for the device part. The driver part is
> > based on Linux's virtio_ring driver, but with stripped functionality
> > and optimizations so it's easier to review.
> >
> > These will be added in later commits.
> >
> > Signed-off-by: Eugenio Pérez 
> > ---
> >   hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
> >   hw/virtio/vhost.c  | 113 ++-
> >   2 files changed, 312 insertions(+), 13 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
> > b/hw/virtio/vhost-shadow-virtqueue.c
> > index 1460d1d5d1..68ed0f2740 100644
> > --- a/hw/virtio/vhost-shadow-virtqueue.c
> > +++ b/hw/virtio/vhost-shadow-virtqueue.c
> > @@ -9,6 +9,7 @@
> >
> >   #include "hw/virtio/vhost-shadow-virtqueue.h"
> >   #include "hw/virtio/vhost.h"
> > +#include "hw/virtio/virtio-access.h"
> >
> >   #include "standard-headers/linux/vhost_types.h"
> >
> > @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
> >   /* Virtio device */
> >   VirtIODevice *vdev;
> >
> > +/* Map for returning guest's descriptors */
> > +VirtQueueElement **ring_id_maps;
> > +
> > +/* Next head to expose to device */
> > +uint16_t avail_idx_shadow;
> > +
> > +/* Next free descriptor */
> > +uint16_t free_head;
> > +
> > +/* Last seen used idx */
> > +uint16_t shadow_used_idx;
> > +
> > +/* Next head to consume from device */
> > +uint16_t used_idx;
> > +
> >   /* Descriptors copied from guest */
> >   vring_desc_t descs[];
> >   } VhostShadowVirtqueue;
> >
> > -/* Forward guest notifications */
> > +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
> > +const struct iovec *iovec,
> > +size_t num, bool more_descs, bool 
> > write)
> > +{
> > +uint16_t i = svq->free_head, last = svq->free_head;
> > +unsigned n;
> > +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) 
> > : 0;
> > +vring_desc_t *descs = svq->vring.desc;
> > +
> > +if (num == 0) {
> > +return;
> > +}
> > +
> > +for (n = 0; n < num; n++) {
> > +if (more_descs || (n + 1 < num)) {
> > +descs[i].flags = flags | virtio_tswap16(svq->vdev,
> > +VRING_DESC_F_NEXT);
> > +} else {
> > +descs[i].flags = flags;
> > +}
> > +descs[i].addr = virtio_tswap64(svq->vdev, 
> > (hwaddr)iovec[n].iov_base);
>
>
> So unsing virtio_tswap() is probably not correct since we're talking
> with vhost backends which has its own endiness.
>

I was trying to expose the buffer with the same endianness as the
driver originally offered, so if guest->qemu requires a bswap, I think
it will always require a bswap again to expose to the device again.

> For vhost-vDPA, we can assume that it's a 1.0 device.

Isn't it needed if the host is big endian?

>
>
> > +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
> > +
> > +last = i;
> > +i = virtio_tswap16(svq->vdev, descs[i].next);
> > +}
> > +
> > +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next);
> > +}
> > +
> > +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq,
> > +  VirtQueueElement *elem)
> > +{
> > +int head;
> > +unsigned avail_idx;
> > +vring_avail_t *avail = svq->vring.avail;
> > +
> > +head = svq->free_head;
> > +
> > +/* We need some descriptors here */
> > +assert(elem->out_num || elem->in_num);
> > +
> > +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
> > +elem->in_num > 0, false);
> > +vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
> > +
> > +/*
> > + * Put entry in available array (but don't update avail->idx until they
> > + * do sync).
> > + */
> > +avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
> > +avail->ring[avail_idx] = virtio_tswap16(svq->vdev, head);
> > +svq->avail_idx_shadow++;
> > +
> > +/* Expose descriptors to device */
> > +smp_wmb();
> > +avail->idx = virtio_tswap16(svq->vdev, svq->avail_idx_shadow);
> > +
> > +return head;
> > +
> > +}
> > +
> > +static void vhost_shadow_vq_add(VhostShadowVirtqueue *svq,
> > +VirtQueueElement *elem)
> > +{
> > +unsigned qemu_head = vhost_shadow_vq_add_split(svq, elem);
> > +
> > +svq->ring_id_maps[qemu_head] = elem;
> > +}
> > +
> > +/* Handle guest->device notifications */
> >   static void vhost_handle_guest_kick(EventNotifier *n)
> >   {
> >   VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
> > @@ -69,7 +155,72 @@ static void 

Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-16 Thread Jason Wang



在 2021/3/16 上午3:48, Eugenio Pérez 写道:

Initial version of shadow virtqueue that actually forward buffers.

It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.

These will be added in later commits.

Signed-off-by: Eugenio Pérez 
---
  hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
  hw/virtio/vhost.c  | 113 ++-
  2 files changed, 312 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 1460d1d5d1..68ed0f2740 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -9,6 +9,7 @@
  
  #include "hw/virtio/vhost-shadow-virtqueue.h"

  #include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-access.h"
  
  #include "standard-headers/linux/vhost_types.h"
  
@@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {

  /* Virtio device */
  VirtIODevice *vdev;
  
+/* Map for returning guest's descriptors */

+VirtQueueElement **ring_id_maps;
+
+/* Next head to expose to device */
+uint16_t avail_idx_shadow;
+
+/* Next free descriptor */
+uint16_t free_head;
+
+/* Last seen used idx */
+uint16_t shadow_used_idx;
+
+/* Next head to consume from device */
+uint16_t used_idx;
+
  /* Descriptors copied from guest */
  vring_desc_t descs[];
  } VhostShadowVirtqueue;
  
-/* Forward guest notifications */

+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+const struct iovec *iovec,
+size_t num, bool more_descs, bool write)
+{
+uint16_t i = svq->free_head, last = svq->free_head;
+unsigned n;
+uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0;
+vring_desc_t *descs = svq->vring.desc;
+
+if (num == 0) {
+return;
+}
+
+for (n = 0; n < num; n++) {
+if (more_descs || (n + 1 < num)) {
+descs[i].flags = flags | virtio_tswap16(svq->vdev,
+VRING_DESC_F_NEXT);
+} else {
+descs[i].flags = flags;
+}
+descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base);



So unsing virtio_tswap() is probably not correct since we're talking 
with vhost backends which has its own endiness.


For vhost-vDPA, we can assume that it's a 1.0 device.



+descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
+
+last = i;
+i = virtio_tswap16(svq->vdev, descs[i].next);
+}
+
+svq->free_head = virtio_tswap16(svq->vdev, descs[last].next);
+}
+
+static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+int head;
+unsigned avail_idx;
+vring_avail_t *avail = svq->vring.avail;
+
+head = svq->free_head;
+
+/* We need some descriptors here */
+assert(elem->out_num || elem->in_num);
+
+vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
+elem->in_num > 0, false);
+vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
+
+/*
+ * Put entry in available array (but don't update avail->idx until they
+ * do sync).
+ */
+avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
+avail->ring[avail_idx] = virtio_tswap16(svq->vdev, head);
+svq->avail_idx_shadow++;
+
+/* Expose descriptors to device */
+smp_wmb();
+avail->idx = virtio_tswap16(svq->vdev, svq->avail_idx_shadow);
+
+return head;
+
+}
+
+static void vhost_shadow_vq_add(VhostShadowVirtqueue *svq,
+VirtQueueElement *elem)
+{
+unsigned qemu_head = vhost_shadow_vq_add_split(svq, elem);
+
+svq->ring_id_maps[qemu_head] = elem;
+}
+
+/* Handle guest->device notifications */
  static void vhost_handle_guest_kick(EventNotifier *n)
  {
  VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
@@ -69,7 +155,72 @@ static void vhost_handle_guest_kick(EventNotifier *n)
  return;
  }
  
-event_notifier_set(>kick_notifier);

+/* Make available as many buffers as possible */
+do {
+if (virtio_queue_get_notification(svq->vq)) {
+/* No more notifications until process all available */
+virtio_queue_set_notification(svq->vq, false);
+}
+
+while (true) {
+VirtQueueElement *elem;
+if (virtio_queue_full(svq->vq)) {
+break;



So we've disabled guest notification. If buffer has been consumed, we 
need to retry the handle_guest_kick here. But I didn't find the code?




+}
+
+elem = virtqueue_pop(svq->vq, sizeof(*elem));
+if (!elem) {
+break;
+}
+
+

[RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding

2021-03-15 Thread Eugenio Pérez
Initial version of shadow virtqueue that actually forward buffers.

It reuses the VirtQueue code for the device part. The driver part is
based on Linux's virtio_ring driver, but with stripped functionality
and optimizations so it's easier to review.

These will be added in later commits.

Signed-off-by: Eugenio Pérez 
---
 hw/virtio/vhost-shadow-virtqueue.c | 212 +++--
 hw/virtio/vhost.c  | 113 ++-
 2 files changed, 312 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-shadow-virtqueue.c 
b/hw/virtio/vhost-shadow-virtqueue.c
index 1460d1d5d1..68ed0f2740 100644
--- a/hw/virtio/vhost-shadow-virtqueue.c
+++ b/hw/virtio/vhost-shadow-virtqueue.c
@@ -9,6 +9,7 @@
 
 #include "hw/virtio/vhost-shadow-virtqueue.h"
 #include "hw/virtio/vhost.h"
+#include "hw/virtio/virtio-access.h"
 
 #include "standard-headers/linux/vhost_types.h"
 
@@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue {
 /* Virtio device */
 VirtIODevice *vdev;
 
+/* Map for returning guest's descriptors */
+VirtQueueElement **ring_id_maps;
+
+/* Next head to expose to device */
+uint16_t avail_idx_shadow;
+
+/* Next free descriptor */
+uint16_t free_head;
+
+/* Last seen used idx */
+uint16_t shadow_used_idx;
+
+/* Next head to consume from device */
+uint16_t used_idx;
+
 /* Descriptors copied from guest */
 vring_desc_t descs[];
 } VhostShadowVirtqueue;
 
-/* Forward guest notifications */
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
+const struct iovec *iovec,
+size_t num, bool more_descs, bool write)
+{
+uint16_t i = svq->free_head, last = svq->free_head;
+unsigned n;
+uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0;
+vring_desc_t *descs = svq->vring.desc;
+
+if (num == 0) {
+return;
+}
+
+for (n = 0; n < num; n++) {
+if (more_descs || (n + 1 < num)) {
+descs[i].flags = flags | virtio_tswap16(svq->vdev,
+VRING_DESC_F_NEXT);
+} else {
+descs[i].flags = flags;
+}
+descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base);
+descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len);
+
+last = i;
+i = virtio_tswap16(svq->vdev, descs[i].next);
+}
+
+svq->free_head = virtio_tswap16(svq->vdev, descs[last].next);
+}
+
+static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq,
+  VirtQueueElement *elem)
+{
+int head;
+unsigned avail_idx;
+vring_avail_t *avail = svq->vring.avail;
+
+head = svq->free_head;
+
+/* We need some descriptors here */
+assert(elem->out_num || elem->in_num);
+
+vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
+elem->in_num > 0, false);
+vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
+
+/*
+ * Put entry in available array (but don't update avail->idx until they
+ * do sync).
+ */
+avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1);
+avail->ring[avail_idx] = virtio_tswap16(svq->vdev, head);
+svq->avail_idx_shadow++;
+
+/* Expose descriptors to device */
+smp_wmb();
+avail->idx = virtio_tswap16(svq->vdev, svq->avail_idx_shadow);
+
+return head;
+
+}
+
+static void vhost_shadow_vq_add(VhostShadowVirtqueue *svq,
+VirtQueueElement *elem)
+{
+unsigned qemu_head = vhost_shadow_vq_add_split(svq, elem);
+
+svq->ring_id_maps[qemu_head] = elem;
+}
+
+/* Handle guest->device notifications */
 static void vhost_handle_guest_kick(EventNotifier *n)
 {
 VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
@@ -69,7 +155,72 @@ static void vhost_handle_guest_kick(EventNotifier *n)
 return;
 }
 
-event_notifier_set(>kick_notifier);
+/* Make available as many buffers as possible */
+do {
+if (virtio_queue_get_notification(svq->vq)) {
+/* No more notifications until process all available */
+virtio_queue_set_notification(svq->vq, false);
+}
+
+while (true) {
+VirtQueueElement *elem;
+if (virtio_queue_full(svq->vq)) {
+break;
+}
+
+elem = virtqueue_pop(svq->vq, sizeof(*elem));
+if (!elem) {
+break;
+}
+
+vhost_shadow_vq_add(svq, elem);
+event_notifier_set(>kick_notifier);
+}
+
+virtio_queue_set_notification(svq->vq, true);
+} while (!virtio_queue_empty(svq->vq));
+}
+
+static bool vhost_shadow_vq_more_used(VhostShadowVirtqueue *svq)
+{
+if (svq->used_idx != svq->shadow_used_idx) {
+return true;
+}
+
+/* Get used idx must not be reordered */
+