On Fri, Jul 17, 2020 at 11:29:28AM +0200, Marc Hartmayer wrote: > Since virtio existed even before it got standardized, the virtio > standard defines the following types of virtio devices: > > + legacy device (pre-virtio 1.0) > + non-legacy or VIRTIO 1.0 device > + transitional device (which can act both as legacy and non-legacy) > > Virtio 1.0 defines the fields of the virtqueues as little endian, > while legacy uses guest's native endian [1]. Currently libvhost-user > does not handle virtio endianness at all, i.e. it works only if the > native endianness matches with whatever is actually needed. That means > things break spectacularly on big-endian targets. Let us handle virtio > endianness for non-legacy as required by the virtio specification > [1]. We will fence non-legacy virtio devices with the upcoming patch. > > [1] > https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.html#x1-210003 > > Signed-off-by: Marc Hartmayer <mhart...@linux.ibm.com> > > --- > Note: As we don't support legacy virtio devices
Who's "we" in this sentence? vhost user supports legacy generally ... > there is dead code in > libvhost-access.h that could be removed. But for the sake of > completeness, I left it in the code. > --- > contrib/libvhost-user/libvhost-access.h | 61 ++++++++++++ > contrib/libvhost-user/libvhost-user.c | 119 ++++++++++++------------ > 2 files changed, 121 insertions(+), 59 deletions(-) > create mode 100644 contrib/libvhost-user/libvhost-access.h > > diff --git a/contrib/libvhost-user/libvhost-access.h > b/contrib/libvhost-user/libvhost-access.h > new file mode 100644 > index 000000000000..868ba3e7bfb8 > --- /dev/null > +++ b/contrib/libvhost-user/libvhost-access.h > @@ -0,0 +1,61 @@ > +#ifndef LIBVHOST_ACCESS_H > + > +#include "qemu/bswap.h" > + > +#include "libvhost-user.h" > + > +static inline bool vu_access_is_big_endian(VuDev *dev) > +{ > + /* Devices conforming to VIRTIO 1.0 or later are always LE. */ > + return false; > +} > + > +static inline void vu_stw_p(VuDev *vdev, void *ptr, uint16_t v) > +{ > + if (vu_access_is_big_endian(vdev)) { > + stw_be_p(ptr, v); > + } else { > + stw_le_p(ptr, v); > + } > +} > + > +static inline void vu_stl_p(VuDev *vdev, void *ptr, uint32_t v) > +{ > + if (vu_access_is_big_endian(vdev)) { > + stl_be_p(ptr, v); > + } else { > + stl_le_p(ptr, v); > + } > +} > + > +static inline void vu_stq_p(VuDev *vdev, void *ptr, uint64_t v) > +{ > + if (vu_access_is_big_endian(vdev)) { > + stq_be_p(ptr, v); > + } else { > + stq_le_p(ptr, v); > + } > +} > + > +static inline int vu_lduw_p(VuDev *vdev, const void *ptr) > +{ > + if (vu_access_is_big_endian(vdev)) > + return lduw_be_p(ptr); > + return lduw_le_p(ptr); > +} > + > +static inline int vu_ldl_p(VuDev *vdev, const void *ptr) > +{ > + if (vu_access_is_big_endian(vdev)) > + return ldl_be_p(ptr); > + return ldl_le_p(ptr); > +} > + > +static inline uint64_t vu_ldq_p(VuDev *vdev, const void *ptr) > +{ > + if (vu_access_is_big_endian(vdev)) > + return ldq_be_p(ptr); > + return ldq_le_p(ptr); > +} > + > +#endif /* LIBVHOST_ACCESS_H */ > diff --git a/contrib/libvhost-user/libvhost-user.c > b/contrib/libvhost-user/libvhost-user.c > index d315db139606..0214b04c5291 100644 > --- a/contrib/libvhost-user/libvhost-user.c > +++ b/contrib/libvhost-user/libvhost-user.c > @@ -45,6 +45,7 @@ > #include "qemu/memfd.h" > > #include "libvhost-user.h" > +#include "libvhost-access.h" > > /* usually provided by GLib */ > #ifndef MIN > @@ -1074,7 +1075,7 @@ vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg) > return false; > } > > - vq->used_idx = vq->vring.used->idx; > + vq->used_idx = vu_lduw_p(dev, &vq->vring.used->idx); > > if (vq->last_avail_idx != vq->used_idx) { > bool resume = dev->iface->queue_is_processed_in_order && > @@ -1191,7 +1192,7 @@ vu_check_queue_inflights(VuDev *dev, VuVirtq *vq) > return 0; > } > > - vq->used_idx = vq->vring.used->idx; > + vq->used_idx = vu_lduw_p(dev, &vq->vring.used->idx); > vq->resubmit_num = 0; > vq->resubmit_list = NULL; > vq->counter = 0; > @@ -2019,35 +2020,35 @@ vu_queue_started(const VuDev *dev, const VuVirtq *vq) > } > > static inline uint16_t > -vring_avail_flags(VuVirtq *vq) > +vring_avail_flags(VuDev *dev, VuVirtq *vq) > { > - return vq->vring.avail->flags; > + return vu_lduw_p(dev, &vq->vring.avail->flags); > } > > static inline uint16_t > -vring_avail_idx(VuVirtq *vq) > +vring_avail_idx(VuDev *dev, VuVirtq *vq) > { > - vq->shadow_avail_idx = vq->vring.avail->idx; > + vq->shadow_avail_idx = vu_lduw_p(dev, &vq->vring.avail->idx); > > return vq->shadow_avail_idx; > } > > static inline uint16_t > -vring_avail_ring(VuVirtq *vq, int i) > +vring_avail_ring(VuDev *dev, VuVirtq *vq, int i) > { > - return vq->vring.avail->ring[i]; > + return vu_lduw_p(dev, &vq->vring.avail->ring[i]); > } > > static inline uint16_t > -vring_get_used_event(VuVirtq *vq) > +vring_get_used_event(VuDev *dev, VuVirtq *vq) > { > - return vring_avail_ring(vq, vq->vring.num); > + return vring_avail_ring(dev, vq, vq->vring.num); > } > > static int > virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx) > { > - uint16_t num_heads = vring_avail_idx(vq) - idx; > + uint16_t num_heads = vring_avail_idx(dev, vq) - idx; > > /* Check it isn't doing very strange things with descriptor numbers. */ > if (num_heads > vq->vring.num) { > @@ -2070,7 +2071,7 @@ virtqueue_get_head(VuDev *dev, VuVirtq *vq, > { > /* Grab the next descriptor number they're advertising, and increment > * the index we've seen. */ > - *head = vring_avail_ring(vq, idx % vq->vring.num); > + *head = vring_avail_ring(dev, vq, idx % vq->vring.num); > > /* If their number is silly, that's a fatal mistake. */ > if (*head >= vq->vring.num) { > @@ -2123,12 +2124,12 @@ virtqueue_read_next_desc(VuDev *dev, struct > vring_desc *desc, > int i, unsigned int max, unsigned int *next) > { > /* If this descriptor says it doesn't chain, we're done. */ > - if (!(desc[i].flags & VRING_DESC_F_NEXT)) { > + if (!(vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_NEXT)) { > return VIRTQUEUE_READ_DESC_DONE; > } > > /* Check they're not leading us off end of descriptors. */ > - *next = desc[i].next; > + *next = vu_lduw_p(dev, &desc[i].next); > /* Make sure compiler knows to grab that: we don't want it changing! */ > smp_wmb(); > > @@ -2171,8 +2172,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, > unsigned int *in_bytes, > } > desc = vq->vring.desc; > > - if (desc[i].flags & VRING_DESC_F_INDIRECT) { > - if (desc[i].len % sizeof(struct vring_desc)) { > + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) { > + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) { > vu_panic(dev, "Invalid size for indirect buffer table"); > goto err; > } > @@ -2185,8 +2186,8 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, > unsigned int *in_bytes, > > /* loop over the indirect descriptor table */ > indirect = 1; > - desc_addr = desc[i].addr; > - desc_len = desc[i].len; > + desc_addr = vu_ldq_p(dev, &desc[i].addr); > + desc_len = vu_ldl_p(dev, &desc[i].len); > max = desc_len / sizeof(struct vring_desc); > read_len = desc_len; > desc = vu_gpa_to_va(dev, &read_len, desc_addr); > @@ -2213,10 +2214,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, > unsigned int *in_bytes, > goto err; > } > > - if (desc[i].flags & VRING_DESC_F_WRITE) { > - in_total += desc[i].len; > + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) { > + in_total += vu_ldl_p(dev, &desc[i].len); > } else { > - out_total += desc[i].len; > + out_total += vu_ldl_p(dev, &desc[i].len); > } > if (in_total >= max_in_bytes && out_total >= max_out_bytes) { > goto done; > @@ -2277,7 +2278,7 @@ vu_queue_empty(VuDev *dev, VuVirtq *vq) > return false; > } > > - return vring_avail_idx(vq) == vq->last_avail_idx; > + return vring_avail_idx(dev, vq) == vq->last_avail_idx; > } > > static bool > @@ -2296,14 +2297,14 @@ vring_notify(VuDev *dev, VuVirtq *vq) > } > > if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { > - return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); > + return !(vring_avail_flags(dev, vq) & VRING_AVAIL_F_NO_INTERRUPT); > } > > v = vq->signalled_used_valid; > vq->signalled_used_valid = true; > old = vq->signalled_used; > new = vq->signalled_used = vq->used_idx; > - return !v || vring_need_event(vring_get_used_event(vq), new, old); > + return !v || vring_need_event(vring_get_used_event(dev, vq), new, old); > } > > static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync) > @@ -2361,33 +2362,33 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq) > } > > static inline void > -vring_used_flags_set_bit(VuVirtq *vq, int mask) > +vring_used_flags_set_bit(VuDev *dev, VuVirtq *vq, int mask) > +{ > + uint16_t *flags; > + > + flags = (uint16_t *)(char*)vq->vring.used + > + offsetof(struct vring_used, flags); > + vu_stw_p(dev, flags, vu_lduw_p(dev, flags) | mask); > +} > + > +static inline void > +vring_used_flags_unset_bit(VuDev *dev, VuVirtq *vq, int mask) > { > uint16_t *flags; > > flags = (uint16_t *)((char*)vq->vring.used + > offsetof(struct vring_used, flags)); > - *flags |= mask; > + vu_stw_p(dev, flags, vu_lduw_p(dev, flags) & ~mask); > } > > static inline void > -vring_used_flags_unset_bit(VuVirtq *vq, int mask) > -{ > - uint16_t *flags; > - > - flags = (uint16_t *)((char*)vq->vring.used + > - offsetof(struct vring_used, flags)); > - *flags &= ~mask; > -} > - > -static inline void > -vring_set_avail_event(VuVirtq *vq, uint16_t val) > +vring_set_avail_event(VuDev *dev, VuVirtq *vq, uint16_t val) > { > if (!vq->notification) { > return; > } > > - *((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val; > + vu_stw_p(dev, &vq->vring.used->ring[vq->vring.num], val); > } > > void > @@ -2395,11 +2396,11 @@ vu_queue_set_notification(VuDev *dev, VuVirtq *vq, > int enable) > { > vq->notification = enable; > if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { > - vring_set_avail_event(vq, vring_avail_idx(vq)); > + vring_set_avail_event(dev, vq, vring_avail_idx(dev, vq)); > } else if (enable) { > - vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY); > + vring_used_flags_unset_bit(dev, vq, VRING_USED_F_NO_NOTIFY); > } else { > - vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY); > + vring_used_flags_set_bit(dev, vq, VRING_USED_F_NO_NOTIFY); > } > if (enable) { > /* Expose avail event/used flags before caller checks the avail idx. > */ > @@ -2476,14 +2477,14 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned > int idx, size_t sz) > struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; > int rc; > > - if (desc[i].flags & VRING_DESC_F_INDIRECT) { > - if (desc[i].len % sizeof(struct vring_desc)) { > + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) { > + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) { > vu_panic(dev, "Invalid size for indirect buffer table"); > } > > /* loop over the indirect descriptor table */ > - desc_addr = desc[i].addr; > - desc_len = desc[i].len; > + desc_addr = vu_ldq_p(dev, &desc[i].addr); > + desc_len = vu_ldl_p(dev, &desc[i].len); > max = desc_len / sizeof(struct vring_desc); > read_len = desc_len; > desc = vu_gpa_to_va(dev, &read_len, desc_addr); > @@ -2505,10 +2506,10 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned > int idx, size_t sz) > > /* Collect all the descriptors */ > do { > - if (desc[i].flags & VRING_DESC_F_WRITE) { > + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) { > virtqueue_map_desc(dev, &in_num, iov + out_num, > VIRTQUEUE_MAX_SIZE - out_num, true, > - desc[i].addr, desc[i].len); > + vu_ldq_p(dev, &desc[i].addr), vu_ldl_p(dev, > &desc[i].len)); > } else { > if (in_num) { > vu_panic(dev, "Incorrect order for descriptors"); > @@ -2516,7 +2517,7 @@ vu_queue_map_desc(VuDev *dev, VuVirtq *vq, unsigned int > idx, size_t sz) > } > virtqueue_map_desc(dev, &out_num, iov, > VIRTQUEUE_MAX_SIZE, false, > - desc[i].addr, desc[i].len); > + vu_ldq_p(dev, &desc[i].addr), vu_ldl_p(dev, > &desc[i].len)); > } > > /* If we've got too many, that implies a descriptor loop. */ > @@ -2642,7 +2643,7 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) > } > > if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { > - vring_set_avail_event(vq, vq->last_avail_idx); > + vring_set_avail_event(dev, vq, vq->last_avail_idx); > } > > elem = vu_queue_map_desc(dev, vq, head, sz); > @@ -2712,14 +2713,14 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, > max = vq->vring.num; > i = elem->index; > > - if (desc[i].flags & VRING_DESC_F_INDIRECT) { > - if (desc[i].len % sizeof(struct vring_desc)) { > + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_INDIRECT) { > + if (vu_ldl_p(dev, &desc[i].len) % sizeof(struct vring_desc)) { > vu_panic(dev, "Invalid size for indirect buffer table"); > } > > /* loop over the indirect descriptor table */ > - desc_addr = desc[i].addr; > - desc_len = desc[i].len; > + desc_addr = vu_ldq_p(dev, &desc[i].addr); > + desc_len = vu_ldl_p(dev, &desc[i].len); > max = desc_len / sizeof(struct vring_desc); > read_len = desc_len; > desc = vu_gpa_to_va(dev, &read_len, desc_addr); > @@ -2745,9 +2746,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, > return; > } > > - if (desc[i].flags & VRING_DESC_F_WRITE) { > - min = MIN(desc[i].len, len); > - vu_log_write(dev, desc[i].addr, min); > + if (vu_lduw_p(dev, &desc[i].flags) & VRING_DESC_F_WRITE) { > + min = MIN(vu_ldl_p(dev, &desc[i].len), len); > + vu_log_write(dev, vu_ldq_p(dev, &desc[i].addr), min); > len -= min; > } > > @@ -2772,15 +2773,15 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq, > > idx = (idx + vq->used_idx) % vq->vring.num; > > - uelem.id = elem->index; > - uelem.len = len; > + vu_stl_p(dev, &uelem.id, elem->index); > + vu_stl_p(dev, &uelem.len, len); > vring_used_write(dev, vq, &uelem, idx); > } > > static inline > void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val) > { > - vq->vring.used->idx = val; > + vu_stw_p(dev, &vq->vring.used->idx, val); > vu_log_write(dev, > vq->vring.log_guest_addr + offsetof(struct vring_used, idx), > sizeof(vq->vring.used->idx)); > -- > 2.25.4