Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
On Thu, Mar 18, 2021 at 10:22 AM Jason Wang wrote: > > > 在 2021/3/18 下午4:06, Eugenio Perez Martin 写道: > > On Thu, Mar 18, 2021 at 4:14 AM Jason Wang wrote: > >> > >> 在 2021/3/17 下午10:38, Eugenio Perez Martin 写道: > >>> On Wed, Mar 17, 2021 at 3:51 AM Jason Wang wrote: > 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道: > > On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: > >> 在 2021/3/16 上午3:48, Eugenio Pérez 写道: > >>> Initial version of shadow virtqueue that actually forward buffers. > >>> > >>> It reuses the VirtQueue code for the device part. The driver part is > >>> based on Linux's virtio_ring driver, but with stripped functionality > >>> and optimizations so it's easier to review. > >>> > >>> These will be added in later commits. > >>> > >>> Signed-off-by: Eugenio Pérez > >>> --- > >>> hw/virtio/vhost-shadow-virtqueue.c | 212 > >>> +++-- > >>> hw/virtio/vhost.c | 113 ++- > >>> 2 files changed, 312 insertions(+), 13 deletions(-) > >>> > >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c > >>> b/hw/virtio/vhost-shadow-virtqueue.c > >>> index 1460d1d5d1..68ed0f2740 100644 > >>> --- a/hw/virtio/vhost-shadow-virtqueue.c > >>> +++ b/hw/virtio/vhost-shadow-virtqueue.c > >>> @@ -9,6 +9,7 @@ > >>> > >>> #include "hw/virtio/vhost-shadow-virtqueue.h" > >>> #include "hw/virtio/vhost.h" > >>> +#include "hw/virtio/virtio-access.h" > >>> > >>> #include "standard-headers/linux/vhost_types.h" > >>> > >>> @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { > >>> /* Virtio device */ > >>> VirtIODevice *vdev; > >>> > >>> +/* Map for returning guest's descriptors */ > >>> +VirtQueueElement **ring_id_maps; > >>> + > >>> +/* Next head to expose to device */ > >>> +uint16_t avail_idx_shadow; > >>> + > >>> +/* Next free descriptor */ > >>> +uint16_t free_head; > >>> + > >>> +/* Last seen used idx */ > >>> +uint16_t shadow_used_idx; > >>> + > >>> +/* Next head to consume from device */ > >>> +uint16_t used_idx; > >>> + > >>> /* Descriptors copied from guest */ > >>> vring_desc_t descs[]; > >>> } VhostShadowVirtqueue; > >>> > >>> -/* Forward guest notifications */ > >>> +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > >>> +const struct iovec *iovec, > >>> +size_t num, bool more_descs, > >>> bool write) > >>> +{ > >>> +uint16_t i = svq->free_head, last = svq->free_head; > >>> +unsigned n; > >>> +uint16_t flags = write ? virtio_tswap16(svq->vdev, > >>> VRING_DESC_F_WRITE) : 0; > >>> +vring_desc_t *descs = svq->vring.desc; > >>> + > >>> +if (num == 0) { > >>> +return; > >>> +} > >>> + > >>> +for (n = 0; n < num; n++) { > >>> +if (more_descs || (n + 1 < num)) { > >>> +descs[i].flags = flags | virtio_tswap16(svq->vdev, > >>> + > >>> VRING_DESC_F_NEXT); > >>> +} else { > >>> +descs[i].flags = flags; > >>> +} > >>> +descs[i].addr = virtio_tswap64(svq->vdev, > >>> (hwaddr)iovec[n].iov_base); > >> So unsing virtio_tswap() is probably not correct since we're talking > >> with vhost backends which has its own endiness. > >> > > I was trying to expose the buffer with the same endianness as the > > driver originally offered, so if guest->qemu requires a bswap, I think > > it will always require a bswap again to expose to the device again. > So assumes vhost-vDPA always provide a non-transitional device[1]. > > Then if Qemu present a transitional device, we need to do the endian > conversion when necessary, if Qemu present a non-transitional device, we > don't need to do that, guest driver will do that for us. > > But it looks to me the virtio_tswap() can't be used for this since it: > > static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) > { > #if defined(LEGACY_VIRTIO_IS_BIENDIAN) > return virtio_is_big_endian(vdev); > #elif defined(TARGET_WORDS_BIGENDIAN) > if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { > /* Devices conforming to VIRTIO 1.0 or later are always LE. */ > return false; > } > return true; > #else > return false; > #endif > } > > So if we present a legacy device on top of a non-transitiaonl vDPA > device. The VIRITIO_F_VERSION_1 check is wrong. > > > >> For
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
在 2021/3/18 下午4:06, Eugenio Perez Martin 写道: On Thu, Mar 18, 2021 at 4:14 AM Jason Wang wrote: 在 2021/3/17 下午10:38, Eugenio Perez Martin 写道: On Wed, Mar 17, 2021 at 3:51 AM Jason Wang wrote: 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道: On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: 在 2021/3/16 上午3:48, Eugenio Pérez 写道: Initial version of shadow virtqueue that actually forward buffers. It reuses the VirtQueue code for the device part. The driver part is based on Linux's virtio_ring driver, but with stripped functionality and optimizations so it's easier to review. These will be added in later commits. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- hw/virtio/vhost.c | 113 ++- 2 files changed, 312 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1460d1d5d1..68ed0f2740 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -9,6 +9,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/vhost_types.h" @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; +/* Map for returning guest's descriptors */ +VirtQueueElement **ring_id_maps; + +/* Next head to expose to device */ +uint16_t avail_idx_shadow; + +/* Next free descriptor */ +uint16_t free_head; + +/* Last seen used idx */ +uint16_t shadow_used_idx; + +/* Next head to consume from device */ +uint16_t used_idx; + /* Descriptors copied from guest */ vring_desc_t descs[]; } VhostShadowVirtqueue; -/* Forward guest notifications */ +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, +const struct iovec *iovec, +size_t num, bool more_descs, bool write) +{ +uint16_t i = svq->free_head, last = svq->free_head; +unsigned n; +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0; +vring_desc_t *descs = svq->vring.desc; + +if (num == 0) { +return; +} + +for (n = 0; n < num; n++) { +if (more_descs || (n + 1 < num)) { +descs[i].flags = flags | virtio_tswap16(svq->vdev, +VRING_DESC_F_NEXT); +} else { +descs[i].flags = flags; +} +descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base); So unsing virtio_tswap() is probably not correct since we're talking with vhost backends which has its own endiness. I was trying to expose the buffer with the same endianness as the driver originally offered, so if guest->qemu requires a bswap, I think it will always require a bswap again to expose to the device again. So assumes vhost-vDPA always provide a non-transitional device[1]. Then if Qemu present a transitional device, we need to do the endian conversion when necessary, if Qemu present a non-transitional device, we don't need to do that, guest driver will do that for us. But it looks to me the virtio_tswap() can't be used for this since it: static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) { #if defined(LEGACY_VIRTIO_IS_BIENDIAN) return virtio_is_big_endian(vdev); #elif defined(TARGET_WORDS_BIGENDIAN) if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { /* Devices conforming to VIRTIO 1.0 or later are always LE. */ return false; } return true; #else return false; #endif } So if we present a legacy device on top of a non-transitiaonl vDPA device. The VIRITIO_F_VERSION_1 check is wrong. For vhost-vDPA, we can assume that it's a 1.0 device. Isn't it needed if the host is big endian? [1] So non-transitional device always assume little endian. For vhost-vDPA, we don't want to present transitional device which may end up with a lot of burdens. I suspect the legacy driver plust vhost vDPA already break, so I plan to mandate VERSION_1 for all vDPA devices. Right. I think it's the best then. However, then we will need a similar method to always expose address/length as little endian, isn't it? Yes. +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); + +last = i; +i = virtio_tswap16(svq->vdev, descs[i].next); +} + +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next); +} + +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem) +{ +int head; +unsigned avail_idx; +vring_avail_t *avail = svq->vring.avail; + +head = svq->free_head; + +/* We need some descriptors here */ +assert(elem->out_num || elem->in_num); + +
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
On Thu, Mar 18, 2021 at 4:14 AM Jason Wang wrote: > > > 在 2021/3/17 下午10:38, Eugenio Perez Martin 写道: > > On Wed, Mar 17, 2021 at 3:51 AM Jason Wang wrote: > >> > >> 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道: > >>> On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: > 在 2021/3/16 上午3:48, Eugenio Pérez 写道: > > Initial version of shadow virtqueue that actually forward buffers. > > > > It reuses the VirtQueue code for the device part. The driver part is > > based on Linux's virtio_ring driver, but with stripped functionality > > and optimizations so it's easier to review. > > > > These will be added in later commits. > > > > Signed-off-by: Eugenio Pérez > > --- > > hw/virtio/vhost-shadow-virtqueue.c | 212 > > +++-- > > hw/virtio/vhost.c | 113 ++- > > 2 files changed, 312 insertions(+), 13 deletions(-) > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c > > b/hw/virtio/vhost-shadow-virtqueue.c > > index 1460d1d5d1..68ed0f2740 100644 > > --- a/hw/virtio/vhost-shadow-virtqueue.c > > +++ b/hw/virtio/vhost-shadow-virtqueue.c > > @@ -9,6 +9,7 @@ > > > > #include "hw/virtio/vhost-shadow-virtqueue.h" > > #include "hw/virtio/vhost.h" > > +#include "hw/virtio/virtio-access.h" > > > > #include "standard-headers/linux/vhost_types.h" > > > > @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { > > /* Virtio device */ > > VirtIODevice *vdev; > > > > +/* Map for returning guest's descriptors */ > > +VirtQueueElement **ring_id_maps; > > + > > +/* Next head to expose to device */ > > +uint16_t avail_idx_shadow; > > + > > +/* Next free descriptor */ > > +uint16_t free_head; > > + > > +/* Last seen used idx */ > > +uint16_t shadow_used_idx; > > + > > +/* Next head to consume from device */ > > +uint16_t used_idx; > > + > > /* Descriptors copied from guest */ > > vring_desc_t descs[]; > > } VhostShadowVirtqueue; > > > > -/* Forward guest notifications */ > > +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > > +const struct iovec *iovec, > > +size_t num, bool more_descs, bool > > write) > > +{ > > +uint16_t i = svq->free_head, last = svq->free_head; > > +unsigned n; > > +uint16_t flags = write ? virtio_tswap16(svq->vdev, > > VRING_DESC_F_WRITE) : 0; > > +vring_desc_t *descs = svq->vring.desc; > > + > > +if (num == 0) { > > +return; > > +} > > + > > +for (n = 0; n < num; n++) { > > +if (more_descs || (n + 1 < num)) { > > +descs[i].flags = flags | virtio_tswap16(svq->vdev, > > +VRING_DESC_F_NEXT); > > +} else { > > +descs[i].flags = flags; > > +} > > +descs[i].addr = virtio_tswap64(svq->vdev, > > (hwaddr)iovec[n].iov_base); > So unsing virtio_tswap() is probably not correct since we're talking > with vhost backends which has its own endiness. > > >>> I was trying to expose the buffer with the same endianness as the > >>> driver originally offered, so if guest->qemu requires a bswap, I think > >>> it will always require a bswap again to expose to the device again. > >> > >> So assumes vhost-vDPA always provide a non-transitional device[1]. > >> > >> Then if Qemu present a transitional device, we need to do the endian > >> conversion when necessary, if Qemu present a non-transitional device, we > >> don't need to do that, guest driver will do that for us. > >> > >> But it looks to me the virtio_tswap() can't be used for this since it: > >> > >> static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) > >> { > >> #if defined(LEGACY_VIRTIO_IS_BIENDIAN) > >> return virtio_is_big_endian(vdev); > >> #elif defined(TARGET_WORDS_BIGENDIAN) > >> if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { > >> /* Devices conforming to VIRTIO 1.0 or later are always LE. */ > >> return false; > >> } > >> return true; > >> #else > >> return false; > >> #endif > >> } > >> > >> So if we present a legacy device on top of a non-transitiaonl vDPA > >> device. The VIRITIO_F_VERSION_1 check is wrong. > >> > >> > For vhost-vDPA, we can assume that it's a 1.0 device. > >>> Isn't it needed if the host is big endian? > >> > >> [1] > >> > >> So non-transitional device always assume little endian. > >> > >> For vhost-vDPA, we don't want to present transitional device which may > >> end up with a lot of burdens. > >> > >> I suspect the legacy driver plust vhost vDPA already break, so
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
在 2021/3/17 下午10:38, Eugenio Perez Martin 写道: On Wed, Mar 17, 2021 at 3:51 AM Jason Wang wrote: 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道: On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: 在 2021/3/16 上午3:48, Eugenio Pérez 写道: Initial version of shadow virtqueue that actually forward buffers. It reuses the VirtQueue code for the device part. The driver part is based on Linux's virtio_ring driver, but with stripped functionality and optimizations so it's easier to review. These will be added in later commits. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- hw/virtio/vhost.c | 113 ++- 2 files changed, 312 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1460d1d5d1..68ed0f2740 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -9,6 +9,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/vhost_types.h" @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; +/* Map for returning guest's descriptors */ +VirtQueueElement **ring_id_maps; + +/* Next head to expose to device */ +uint16_t avail_idx_shadow; + +/* Next free descriptor */ +uint16_t free_head; + +/* Last seen used idx */ +uint16_t shadow_used_idx; + +/* Next head to consume from device */ +uint16_t used_idx; + /* Descriptors copied from guest */ vring_desc_t descs[]; } VhostShadowVirtqueue; -/* Forward guest notifications */ +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, +const struct iovec *iovec, +size_t num, bool more_descs, bool write) +{ +uint16_t i = svq->free_head, last = svq->free_head; +unsigned n; +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0; +vring_desc_t *descs = svq->vring.desc; + +if (num == 0) { +return; +} + +for (n = 0; n < num; n++) { +if (more_descs || (n + 1 < num)) { +descs[i].flags = flags | virtio_tswap16(svq->vdev, +VRING_DESC_F_NEXT); +} else { +descs[i].flags = flags; +} +descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base); So unsing virtio_tswap() is probably not correct since we're talking with vhost backends which has its own endiness. I was trying to expose the buffer with the same endianness as the driver originally offered, so if guest->qemu requires a bswap, I think it will always require a bswap again to expose to the device again. So assumes vhost-vDPA always provide a non-transitional device[1]. Then if Qemu present a transitional device, we need to do the endian conversion when necessary, if Qemu present a non-transitional device, we don't need to do that, guest driver will do that for us. But it looks to me the virtio_tswap() can't be used for this since it: static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) { #if defined(LEGACY_VIRTIO_IS_BIENDIAN) return virtio_is_big_endian(vdev); #elif defined(TARGET_WORDS_BIGENDIAN) if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { /* Devices conforming to VIRTIO 1.0 or later are always LE. */ return false; } return true; #else return false; #endif } So if we present a legacy device on top of a non-transitiaonl vDPA device. The VIRITIO_F_VERSION_1 check is wrong. For vhost-vDPA, we can assume that it's a 1.0 device. Isn't it needed if the host is big endian? [1] So non-transitional device always assume little endian. For vhost-vDPA, we don't want to present transitional device which may end up with a lot of burdens. I suspect the legacy driver plust vhost vDPA already break, so I plan to mandate VERSION_1 for all vDPA devices. Right. I think it's the best then. However, then we will need a similar method to always expose address/length as little endian, isn't it? Yes. +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); + +last = i; +i = virtio_tswap16(svq->vdev, descs[i].next); +} + +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next); +} + +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem) +{ +int head; +unsigned avail_idx; +vring_avail_t *avail = svq->vring.avail; + +head = svq->free_head; + +/* We need some descriptors here */ +assert(elem->out_num || elem->in_num); + +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, +elem->in_num > 0, false); +
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
On Wed, Mar 17, 2021 at 3:51 AM Jason Wang wrote: > > > 在 2021/3/17 上午12:05, Eugenio Perez Martin 写道: > > On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: > >> > >> 在 2021/3/16 上午3:48, Eugenio Pérez 写道: > >>> Initial version of shadow virtqueue that actually forward buffers. > >>> > >>> It reuses the VirtQueue code for the device part. The driver part is > >>> based on Linux's virtio_ring driver, but with stripped functionality > >>> and optimizations so it's easier to review. > >>> > >>> These will be added in later commits. > >>> > >>> Signed-off-by: Eugenio Pérez > >>> --- > >>>hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- > >>>hw/virtio/vhost.c | 113 ++- > >>>2 files changed, 312 insertions(+), 13 deletions(-) > >>> > >>> diff --git a/hw/virtio/vhost-shadow-virtqueue.c > >>> b/hw/virtio/vhost-shadow-virtqueue.c > >>> index 1460d1d5d1..68ed0f2740 100644 > >>> --- a/hw/virtio/vhost-shadow-virtqueue.c > >>> +++ b/hw/virtio/vhost-shadow-virtqueue.c > >>> @@ -9,6 +9,7 @@ > >>> > >>>#include "hw/virtio/vhost-shadow-virtqueue.h" > >>>#include "hw/virtio/vhost.h" > >>> +#include "hw/virtio/virtio-access.h" > >>> > >>>#include "standard-headers/linux/vhost_types.h" > >>> > >>> @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { > >>>/* Virtio device */ > >>>VirtIODevice *vdev; > >>> > >>> +/* Map for returning guest's descriptors */ > >>> +VirtQueueElement **ring_id_maps; > >>> + > >>> +/* Next head to expose to device */ > >>> +uint16_t avail_idx_shadow; > >>> + > >>> +/* Next free descriptor */ > >>> +uint16_t free_head; > >>> + > >>> +/* Last seen used idx */ > >>> +uint16_t shadow_used_idx; > >>> + > >>> +/* Next head to consume from device */ > >>> +uint16_t used_idx; > >>> + > >>>/* Descriptors copied from guest */ > >>>vring_desc_t descs[]; > >>>} VhostShadowVirtqueue; > >>> > >>> -/* Forward guest notifications */ > >>> +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > >>> +const struct iovec *iovec, > >>> +size_t num, bool more_descs, bool > >>> write) > >>> +{ > >>> +uint16_t i = svq->free_head, last = svq->free_head; > >>> +unsigned n; > >>> +uint16_t flags = write ? virtio_tswap16(svq->vdev, > >>> VRING_DESC_F_WRITE) : 0; > >>> +vring_desc_t *descs = svq->vring.desc; > >>> + > >>> +if (num == 0) { > >>> +return; > >>> +} > >>> + > >>> +for (n = 0; n < num; n++) { > >>> +if (more_descs || (n + 1 < num)) { > >>> +descs[i].flags = flags | virtio_tswap16(svq->vdev, > >>> +VRING_DESC_F_NEXT); > >>> +} else { > >>> +descs[i].flags = flags; > >>> +} > >>> +descs[i].addr = virtio_tswap64(svq->vdev, > >>> (hwaddr)iovec[n].iov_base); > >> > >> So unsing virtio_tswap() is probably not correct since we're talking > >> with vhost backends which has its own endiness. > >> > > I was trying to expose the buffer with the same endianness as the > > driver originally offered, so if guest->qemu requires a bswap, I think > > it will always require a bswap again to expose to the device again. > > > So assumes vhost-vDPA always provide a non-transitional device[1]. > > Then if Qemu present a transitional device, we need to do the endian > conversion when necessary, if Qemu present a non-transitional device, we > don't need to do that, guest driver will do that for us. > > But it looks to me the virtio_tswap() can't be used for this since it: > > static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) > { > #if defined(LEGACY_VIRTIO_IS_BIENDIAN) > return virtio_is_big_endian(vdev); > #elif defined(TARGET_WORDS_BIGENDIAN) > if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { > /* Devices conforming to VIRTIO 1.0 or later are always LE. */ > return false; > } > return true; > #else > return false; > #endif > } > > So if we present a legacy device on top of a non-transitiaonl vDPA > device. The VIRITIO_F_VERSION_1 check is wrong. > > > > > >> For vhost-vDPA, we can assume that it's a 1.0 device. > > Isn't it needed if the host is big endian? > > > [1] > > So non-transitional device always assume little endian. > > For vhost-vDPA, we don't want to present transitional device which may > end up with a lot of burdens. > > I suspect the legacy driver plust vhost vDPA already break, so I plan to > mandate VERSION_1 for all vDPA devices. > Right. I think it's the best then. However, then we will need a similar method to always expose address/length as little endian, isn't it? > > > > >> > >>> +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); > >>> + > >>> +last = i; > >>> +i = virtio_tswap16(svq->vdev, descs[i].next); > >>> +} > >>> + >
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
在 2021/3/17 上午12:05, Eugenio Perez Martin 写道: On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: 在 2021/3/16 上午3:48, Eugenio Pérez 写道: Initial version of shadow virtqueue that actually forward buffers. It reuses the VirtQueue code for the device part. The driver part is based on Linux's virtio_ring driver, but with stripped functionality and optimizations so it's easier to review. These will be added in later commits. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- hw/virtio/vhost.c | 113 ++- 2 files changed, 312 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1460d1d5d1..68ed0f2740 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -9,6 +9,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/vhost_types.h" @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; +/* Map for returning guest's descriptors */ +VirtQueueElement **ring_id_maps; + +/* Next head to expose to device */ +uint16_t avail_idx_shadow; + +/* Next free descriptor */ +uint16_t free_head; + +/* Last seen used idx */ +uint16_t shadow_used_idx; + +/* Next head to consume from device */ +uint16_t used_idx; + /* Descriptors copied from guest */ vring_desc_t descs[]; } VhostShadowVirtqueue; -/* Forward guest notifications */ +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, +const struct iovec *iovec, +size_t num, bool more_descs, bool write) +{ +uint16_t i = svq->free_head, last = svq->free_head; +unsigned n; +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0; +vring_desc_t *descs = svq->vring.desc; + +if (num == 0) { +return; +} + +for (n = 0; n < num; n++) { +if (more_descs || (n + 1 < num)) { +descs[i].flags = flags | virtio_tswap16(svq->vdev, +VRING_DESC_F_NEXT); +} else { +descs[i].flags = flags; +} +descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base); So unsing virtio_tswap() is probably not correct since we're talking with vhost backends which has its own endiness. I was trying to expose the buffer with the same endianness as the driver originally offered, so if guest->qemu requires a bswap, I think it will always require a bswap again to expose to the device again. So assumes vhost-vDPA always provide a non-transitional device[1]. Then if Qemu present a transitional device, we need to do the endian conversion when necessary, if Qemu present a non-transitional device, we don't need to do that, guest driver will do that for us. But it looks to me the virtio_tswap() can't be used for this since it: static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) { #if defined(LEGACY_VIRTIO_IS_BIENDIAN) return virtio_is_big_endian(vdev); #elif defined(TARGET_WORDS_BIGENDIAN) if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { /* Devices conforming to VIRTIO 1.0 or later are always LE. */ return false; } return true; #else return false; #endif } So if we present a legacy device on top of a non-transitiaonl vDPA device. The VIRITIO_F_VERSION_1 check is wrong. For vhost-vDPA, we can assume that it's a 1.0 device. Isn't it needed if the host is big endian? [1] So non-transitional device always assume little endian. For vhost-vDPA, we don't want to present transitional device which may end up with a lot of burdens. I suspect the legacy driver plust vhost vDPA already break, so I plan to mandate VERSION_1 for all vDPA devices. +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); + +last = i; +i = virtio_tswap16(svq->vdev, descs[i].next); +} + +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next); +} + +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem) +{ +int head; +unsigned avail_idx; +vring_avail_t *avail = svq->vring.avail; + +head = svq->free_head; + +/* We need some descriptors here */ +assert(elem->out_num || elem->in_num); + +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, +elem->in_num > 0, false); +vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); + +/* + * Put entry in available array (but don't update avail->idx until they + * do sync). + */ +avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1); +
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
On Tue, Mar 16, 2021 at 9:15 AM Jason Wang wrote: > > > 在 2021/3/16 上午3:48, Eugenio Pérez 写道: > > Initial version of shadow virtqueue that actually forward buffers. > > > > It reuses the VirtQueue code for the device part. The driver part is > > based on Linux's virtio_ring driver, but with stripped functionality > > and optimizations so it's easier to review. > > > > These will be added in later commits. > > > > Signed-off-by: Eugenio Pérez > > --- > > hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- > > hw/virtio/vhost.c | 113 ++- > > 2 files changed, 312 insertions(+), 13 deletions(-) > > > > diff --git a/hw/virtio/vhost-shadow-virtqueue.c > > b/hw/virtio/vhost-shadow-virtqueue.c > > index 1460d1d5d1..68ed0f2740 100644 > > --- a/hw/virtio/vhost-shadow-virtqueue.c > > +++ b/hw/virtio/vhost-shadow-virtqueue.c > > @@ -9,6 +9,7 @@ > > > > #include "hw/virtio/vhost-shadow-virtqueue.h" > > #include "hw/virtio/vhost.h" > > +#include "hw/virtio/virtio-access.h" > > > > #include "standard-headers/linux/vhost_types.h" > > > > @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { > > /* Virtio device */ > > VirtIODevice *vdev; > > > > +/* Map for returning guest's descriptors */ > > +VirtQueueElement **ring_id_maps; > > + > > +/* Next head to expose to device */ > > +uint16_t avail_idx_shadow; > > + > > +/* Next free descriptor */ > > +uint16_t free_head; > > + > > +/* Last seen used idx */ > > +uint16_t shadow_used_idx; > > + > > +/* Next head to consume from device */ > > +uint16_t used_idx; > > + > > /* Descriptors copied from guest */ > > vring_desc_t descs[]; > > } VhostShadowVirtqueue; > > > > -/* Forward guest notifications */ > > +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, > > +const struct iovec *iovec, > > +size_t num, bool more_descs, bool > > write) > > +{ > > +uint16_t i = svq->free_head, last = svq->free_head; > > +unsigned n; > > +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) > > : 0; > > +vring_desc_t *descs = svq->vring.desc; > > + > > +if (num == 0) { > > +return; > > +} > > + > > +for (n = 0; n < num; n++) { > > +if (more_descs || (n + 1 < num)) { > > +descs[i].flags = flags | virtio_tswap16(svq->vdev, > > +VRING_DESC_F_NEXT); > > +} else { > > +descs[i].flags = flags; > > +} > > +descs[i].addr = virtio_tswap64(svq->vdev, > > (hwaddr)iovec[n].iov_base); > > > So unsing virtio_tswap() is probably not correct since we're talking > with vhost backends which has its own endiness. > I was trying to expose the buffer with the same endianness as the driver originally offered, so if guest->qemu requires a bswap, I think it will always require a bswap again to expose to the device again. > For vhost-vDPA, we can assume that it's a 1.0 device. Isn't it needed if the host is big endian? > > > > +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); > > + > > +last = i; > > +i = virtio_tswap16(svq->vdev, descs[i].next); > > +} > > + > > +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next); > > +} > > + > > +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq, > > + VirtQueueElement *elem) > > +{ > > +int head; > > +unsigned avail_idx; > > +vring_avail_t *avail = svq->vring.avail; > > + > > +head = svq->free_head; > > + > > +/* We need some descriptors here */ > > +assert(elem->out_num || elem->in_num); > > + > > +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, > > +elem->in_num > 0, false); > > +vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); > > + > > +/* > > + * Put entry in available array (but don't update avail->idx until they > > + * do sync). > > + */ > > +avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1); > > +avail->ring[avail_idx] = virtio_tswap16(svq->vdev, head); > > +svq->avail_idx_shadow++; > > + > > +/* Expose descriptors to device */ > > +smp_wmb(); > > +avail->idx = virtio_tswap16(svq->vdev, svq->avail_idx_shadow); > > + > > +return head; > > + > > +} > > + > > +static void vhost_shadow_vq_add(VhostShadowVirtqueue *svq, > > +VirtQueueElement *elem) > > +{ > > +unsigned qemu_head = vhost_shadow_vq_add_split(svq, elem); > > + > > +svq->ring_id_maps[qemu_head] = elem; > > +} > > + > > +/* Handle guest->device notifications */ > > static void vhost_handle_guest_kick(EventNotifier *n) > > { > > VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, > > @@ -69,7 +155,72 @@ static void
Re: [RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
在 2021/3/16 上午3:48, Eugenio Pérez 写道: Initial version of shadow virtqueue that actually forward buffers. It reuses the VirtQueue code for the device part. The driver part is based on Linux's virtio_ring driver, but with stripped functionality and optimizations so it's easier to review. These will be added in later commits. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- hw/virtio/vhost.c | 113 ++- 2 files changed, 312 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1460d1d5d1..68ed0f2740 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -9,6 +9,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/vhost_types.h" @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; +/* Map for returning guest's descriptors */ +VirtQueueElement **ring_id_maps; + +/* Next head to expose to device */ +uint16_t avail_idx_shadow; + +/* Next free descriptor */ +uint16_t free_head; + +/* Last seen used idx */ +uint16_t shadow_used_idx; + +/* Next head to consume from device */ +uint16_t used_idx; + /* Descriptors copied from guest */ vring_desc_t descs[]; } VhostShadowVirtqueue; -/* Forward guest notifications */ +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, +const struct iovec *iovec, +size_t num, bool more_descs, bool write) +{ +uint16_t i = svq->free_head, last = svq->free_head; +unsigned n; +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0; +vring_desc_t *descs = svq->vring.desc; + +if (num == 0) { +return; +} + +for (n = 0; n < num; n++) { +if (more_descs || (n + 1 < num)) { +descs[i].flags = flags | virtio_tswap16(svq->vdev, +VRING_DESC_F_NEXT); +} else { +descs[i].flags = flags; +} +descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base); So unsing virtio_tswap() is probably not correct since we're talking with vhost backends which has its own endiness. For vhost-vDPA, we can assume that it's a 1.0 device. +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); + +last = i; +i = virtio_tswap16(svq->vdev, descs[i].next); +} + +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next); +} + +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem) +{ +int head; +unsigned avail_idx; +vring_avail_t *avail = svq->vring.avail; + +head = svq->free_head; + +/* We need some descriptors here */ +assert(elem->out_num || elem->in_num); + +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, +elem->in_num > 0, false); +vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); + +/* + * Put entry in available array (but don't update avail->idx until they + * do sync). + */ +avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1); +avail->ring[avail_idx] = virtio_tswap16(svq->vdev, head); +svq->avail_idx_shadow++; + +/* Expose descriptors to device */ +smp_wmb(); +avail->idx = virtio_tswap16(svq->vdev, svq->avail_idx_shadow); + +return head; + +} + +static void vhost_shadow_vq_add(VhostShadowVirtqueue *svq, +VirtQueueElement *elem) +{ +unsigned qemu_head = vhost_shadow_vq_add_split(svq, elem); + +svq->ring_id_maps[qemu_head] = elem; +} + +/* Handle guest->device notifications */ static void vhost_handle_guest_kick(EventNotifier *n) { VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, @@ -69,7 +155,72 @@ static void vhost_handle_guest_kick(EventNotifier *n) return; } -event_notifier_set(>kick_notifier); +/* Make available as many buffers as possible */ +do { +if (virtio_queue_get_notification(svq->vq)) { +/* No more notifications until process all available */ +virtio_queue_set_notification(svq->vq, false); +} + +while (true) { +VirtQueueElement *elem; +if (virtio_queue_full(svq->vq)) { +break; So we've disabled guest notification. If buffer has been consumed, we need to retry the handle_guest_kick here. But I didn't find the code? +} + +elem = virtqueue_pop(svq->vq, sizeof(*elem)); +if (!elem) { +break; +} + +
[RFC v2 11/13] vhost: Shadow virtqueue buffers forwarding
Initial version of shadow virtqueue that actually forward buffers. It reuses the VirtQueue code for the device part. The driver part is based on Linux's virtio_ring driver, but with stripped functionality and optimizations so it's easier to review. These will be added in later commits. Signed-off-by: Eugenio Pérez --- hw/virtio/vhost-shadow-virtqueue.c | 212 +++-- hw/virtio/vhost.c | 113 ++- 2 files changed, 312 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1460d1d5d1..68ed0f2740 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -9,6 +9,7 @@ #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost.h" +#include "hw/virtio/virtio-access.h" #include "standard-headers/linux/vhost_types.h" @@ -55,11 +56,96 @@ typedef struct VhostShadowVirtqueue { /* Virtio device */ VirtIODevice *vdev; +/* Map for returning guest's descriptors */ +VirtQueueElement **ring_id_maps; + +/* Next head to expose to device */ +uint16_t avail_idx_shadow; + +/* Next free descriptor */ +uint16_t free_head; + +/* Last seen used idx */ +uint16_t shadow_used_idx; + +/* Next head to consume from device */ +uint16_t used_idx; + /* Descriptors copied from guest */ vring_desc_t descs[]; } VhostShadowVirtqueue; -/* Forward guest notifications */ +static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, +const struct iovec *iovec, +size_t num, bool more_descs, bool write) +{ +uint16_t i = svq->free_head, last = svq->free_head; +unsigned n; +uint16_t flags = write ? virtio_tswap16(svq->vdev, VRING_DESC_F_WRITE) : 0; +vring_desc_t *descs = svq->vring.desc; + +if (num == 0) { +return; +} + +for (n = 0; n < num; n++) { +if (more_descs || (n + 1 < num)) { +descs[i].flags = flags | virtio_tswap16(svq->vdev, +VRING_DESC_F_NEXT); +} else { +descs[i].flags = flags; +} +descs[i].addr = virtio_tswap64(svq->vdev, (hwaddr)iovec[n].iov_base); +descs[i].len = virtio_tswap32(svq->vdev, iovec[n].iov_len); + +last = i; +i = virtio_tswap16(svq->vdev, descs[i].next); +} + +svq->free_head = virtio_tswap16(svq->vdev, descs[last].next); +} + +static unsigned vhost_shadow_vq_add_split(VhostShadowVirtqueue *svq, + VirtQueueElement *elem) +{ +int head; +unsigned avail_idx; +vring_avail_t *avail = svq->vring.avail; + +head = svq->free_head; + +/* We need some descriptors here */ +assert(elem->out_num || elem->in_num); + +vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, +elem->in_num > 0, false); +vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true); + +/* + * Put entry in available array (but don't update avail->idx until they + * do sync). + */ +avail_idx = svq->avail_idx_shadow & (svq->vring.num - 1); +avail->ring[avail_idx] = virtio_tswap16(svq->vdev, head); +svq->avail_idx_shadow++; + +/* Expose descriptors to device */ +smp_wmb(); +avail->idx = virtio_tswap16(svq->vdev, svq->avail_idx_shadow); + +return head; + +} + +static void vhost_shadow_vq_add(VhostShadowVirtqueue *svq, +VirtQueueElement *elem) +{ +unsigned qemu_head = vhost_shadow_vq_add_split(svq, elem); + +svq->ring_id_maps[qemu_head] = elem; +} + +/* Handle guest->device notifications */ static void vhost_handle_guest_kick(EventNotifier *n) { VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, @@ -69,7 +155,72 @@ static void vhost_handle_guest_kick(EventNotifier *n) return; } -event_notifier_set(>kick_notifier); +/* Make available as many buffers as possible */ +do { +if (virtio_queue_get_notification(svq->vq)) { +/* No more notifications until process all available */ +virtio_queue_set_notification(svq->vq, false); +} + +while (true) { +VirtQueueElement *elem; +if (virtio_queue_full(svq->vq)) { +break; +} + +elem = virtqueue_pop(svq->vq, sizeof(*elem)); +if (!elem) { +break; +} + +vhost_shadow_vq_add(svq, elem); +event_notifier_set(>kick_notifier); +} + +virtio_queue_set_notification(svq->vq, true); +} while (!virtio_queue_empty(svq->vq)); +} + +static bool vhost_shadow_vq_more_used(VhostShadowVirtqueue *svq) +{ +if (svq->used_idx != svq->shadow_used_idx) { +return true; +} + +/* Get used idx must not be reordered */ +