[Qemu-devel] [PATCH v4 06/11] virtio: get avail bytes check for packed ring
From: Wei Xu Add packed ring headcount check. Common part of split/packed ring are kept. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 197 - 1 file changed, 179 insertions(+), 18 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f2ff980..832287b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingPackedDesc), + desc, sizeof(VRingPackedDesc)); +virtio_tswap16s(vdev, >flags); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, int64_t len = 0; int rc; -if (unlikely(!vq->vring.desc)) { -if (in_bytes) { -*in_bytes = 0; -} -if (out_bytes) { -*out_bytes = 0; -} -return; -} - rcu_read_lock(); idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; max = vq->vring.num; caches = vring_get_region_caches(vq); -if (caches->desc.len < max * sizeof(VRingDesc)) { -virtio_error(vdev, "Cannot map descriptor ring"); -goto err; -} - while ((rc = virtqueue_num_heads(vq, idx)) > 0) { MemoryRegionCache *desc_cache = >desc; unsigned int num_bufs; @@ -792,6 +788,171 @@ err: goto done; } +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingPackedDesc desc; +bool wrap_counter; + +rcu_read_lock(); +idx = vq->last_avail_idx; +wrap_counter = vq->last_avail_wrap_counter; +total_bufs = in_total = out_total = 0; + +max = vq->vring.num; +caches = vring_get_region_caches(vq); +desc_cache = >desc; +vring_packed_desc_read_flags(vdev, , desc_cache, idx); +while (is_desc_avail(, wrap_counter)) { +unsigned int num_bufs; +unsigned int i = 0; + +num_bufs = total_bufs; + +/* Make sure flags has been read before all the fields. */ +smp_rmb(); +vring_packed_desc_read(vdev, , desc_cache, idx); + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingPackedDesc); +num_bufs = i = 0; +vring_packed_desc_read(vdev, , desc_cache, i); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +}
[Qemu-devel] [PATCH v4 09/11] virtio-net: update the head descriptor in a chain lastly
From: Wei Xu This is a helper for packed ring. To support packed ring, the head descriptor in a chain should be updated lastly since no 'avail_idx' like in packed ring to explicitly tell the driver side that all payload is ready after having done the chain, so the head is always visible immediately. This patch fills the header after done all the other ones. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 3f319ef..330abea 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1251,6 +1251,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, struct virtio_net_hdr_mrg_rxbuf mhdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset; +VirtQueueElement head; +int head_len = 0; if (!virtio_net_can_receive(nc)) { return -1; @@ -1328,7 +1330,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } /* signal other side */ -virtqueue_fill(q->rx_vq, elem, total, i++); +if (i == 0) { +head_len = total; +head = *elem; +} else { +virtqueue_fill(q->rx_vq, elem, len, i); +} +i++; g_free(elem); } @@ -1339,6 +1347,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, _buffers, sizeof mhdr.num_buffers); } +virtqueue_fill(q->rx_vq, , head_len, 0); virtqueue_flush(q->rx_vq, i); virtio_notify(vdev, q->rx_vq); -- 1.8.3.1
[Qemu-devel] [PATCH v4 08/11] virtio: event suppression support for packed ring
From: Wei Xu Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter': For Tx(guest transmitting), they are the same after each pop of a desc. For Rx(guest receiving), they are also the same when there are enough descriptors to carry the payload for a packet(e.g. usually 16 descs are needed for a 64k packet in typical iperf tcp connection with tso enabled), however, when the ring is running out of descriptors while there are still a few free ones, e.g. 6 descriptors are available which is not enough to carry an entire packet which needs 16 descriptors, in this case the 'avail_wrap_counter' should be set as the first one pending being handled by guest driver in order to get a notification, and the 'last_avail_wrap_counter' should stay unchanged to the head of available descriptors, like below: Mark meaning: | | -- available |*| -- used A Snapshot of the queue: last_avail_idx = 253 last_avail_wrap_counter = 1 | +-+ 0 | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255 +-+ | shadow_avail_idx = 3 avail_wrap_counter = 0 Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 137 + 1 file changed, 128 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 7e276b4..8cfc7b6 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, >next); } +static void vring_packed_event_read(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +address_space_read_cached(cache, 0, e, sizeof(*e)); +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +} + +static void vring_packed_off_wrap_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t off_wrap) +{ +virtio_tswap16s(vdev, _wrap); +address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap), +_wrap, sizeof(off_wrap)); +address_space_cache_invalidate(cache, +offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap)); +} + +static void vring_packed_flags_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t flags) +{ +virtio_tswap16s(vdev, ); +address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags), +, sizeof(flags)); +address_space_cache_invalidate(cache, +offsetof(VRingPackedDescEvent, flags), sizeof(flags)); +} + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); @@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(>used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) { -vq->notification = enable; - -if (!vq->vring.desc) { -return; -} - rcu_read_lock(); if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); @@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable) +{ +VRingPackedDescEvent e; +VRingMemoryRegionCaches *caches; + +rcu_read_lock(); +caches = vring_get_region_caches(vq); +vring_packed_event_read(vq->vdev, >used, ); + +if (!enable) { +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +/* no need to write device area since this is outdated. */ +goto out; +} + +e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; +goto update; +} + +e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15; + +vring_packed_off_wrap_write(vq->vdev, >used, off_wrap); +/* Make sure off_wrap is wrote before flags */ +smp_wmb(); + +e.flags = VRING_PACKED_EVENT_FLAG_DESC; +} + +update: +vring_packed_flags_write(vq->vdev, >used, e.flags); +out: +rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ +vq->notification = enable; + +if (!vq->vring.desc) { +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtio_queue_set_notification_packed(vq, enable); +} else { +
[Qemu-devel] [PATCH v4 10/11] virtio: migration support for packed ring
From: Wei Xu Both userspace and vhost-net/user are supported with this patch. A new subsection is introduced for packed ring, only 'last_avail_idx' and 'last_avail_wrap_counter' are saved/loaded presumably based on all the others relevant data(inuse, used/avail index and wrap count should be the same. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 69 +++--- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 8cfc7b6..7c5de07 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2349,6 +2349,13 @@ static bool virtio_virtqueue_needed(void *opaque) return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); } +static bool virtio_packed_virtqueue_needed(void *opaque) +{ +VirtIODevice *vdev = opaque; + +return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED); +} + static bool virtio_ringsize_needed(void *opaque) { VirtIODevice *vdev = opaque; @@ -2390,6 +2397,17 @@ static const VMStateDescription vmstate_virtqueue = { } }; +static const VMStateDescription vmstate_packed_virtqueue = { +.name = "packed_virtqueue_state", +.version_id = 1, +.minimum_version_id = 1, +.fields = (VMStateField[]) { +VMSTATE_UINT16(last_avail_idx, struct VirtQueue), +VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_virtio_virtqueues = { .name = "virtio/virtqueues", .version_id = 1, @@ -2402,6 +2420,18 @@ static const VMStateDescription vmstate_virtio_virtqueues = { } }; +static const VMStateDescription vmstate_virtio_packed_virtqueues = { +.name = "virtio/packed_virtqueues", +.version_id = 1, +.minimum_version_id = 1, +.needed = _packed_virtqueue_needed, +.fields = (VMStateField[]) { +VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, + VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_ringsize = { .name = "ringsize_state", .version_id = 1, @@ -2522,6 +2552,7 @@ static const VMStateDescription vmstate_virtio = { _virtio_ringsize, _virtio_broken, _virtio_extra_state, +_virtio_packed_virtqueues, NULL } }; @@ -2794,6 +2825,17 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) virtio_queue_update_rings(vdev, i); } +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx; +vdev->vq[i].avail_wrap_counter = +vdev->vq[i].last_avail_wrap_counter; + +vdev->vq[i].used_idx = vdev->vq[i].last_avail_idx; +vdev->vq[i].used_wrap_counter = +vdev->vq[i].last_avail_wrap_counter; +continue; +} + nheads = vring_avail_idx(>vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { @@ -2955,17 +2997,34 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) { -return vdev->vq[n].last_avail_idx; +uint16_t idx; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +idx = vdev->vq[n].last_avail_idx; +idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15; +} else { +idx = (int)vdev->vq[n].last_avail_idx; +} +return idx; } void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) { -vdev->vq[n].last_avail_idx = idx; -vdev->vq[n].shadow_avail_idx = idx; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[n].last_avail_idx = idx & 0x7fff; +vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000); +} else { +vdev->vq[n].last_avail_idx = idx; +vdev->vq[n].shadow_avail_idx = idx; +} } void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]); @@ -2976,6 +3035,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].used_idx = vring_used_idx(>vq[n]); -- 1.8.3.1
[Qemu-devel] [PATCH v4 05/11] virtio: queue/descriptor check helpers for packed ring
From: Wei Xu These are descriptor available and queue empty check helpers. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 56 +- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 54dc098..f2ff980 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -368,6 +368,25 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +virtio_tswap16s(vdev, >flags); +} + +static inline bool is_desc_avail(struct VRingPackedDesc *desc, +bool wrap_counter) +{ +bool avail, used; + +avail = !!(desc->flags & (1 << VRING_PACKED_DESC_F_AVAIL)); +used = !!(desc->flags & (1 << VRING_PACKED_DESC_F_USED)); +return (avail != used) && (avail == wrap_counter); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ @@ -388,7 +407,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -410,6 +429,41 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ +struct VRingPackedDesc desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->vring.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_packed_desc_read_flags(vq->vdev, , >desc, +vq->last_avail_idx); + +return !is_desc_avail(, vq->last_avail_wrap_counter); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_packed_empty_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_packed_empty(vq); +} else { +return virtio_queue_split_empty(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -- 1.8.3.1
[Qemu-devel] [PATCH v4 11/11] virtio: CLI and provide packed ring feature bit by default
From: Wei Xu Add userspace and vhost kernel/user support. Add CLI "ring_packed=true/false" to enable/disable packed ring provision. Usage: -device virtio-net-pci,netdev=xx,mac=xx:xx:xx:xx:xx:xx,ring_packed=false By default it is provided. Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 2 ++ include/hw/virtio/virtio.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..f593086 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; @@ -78,6 +79,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9c1fa07..2eb27d2 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, true) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); -- 1.8.3.1
[Qemu-devel] [PATCH v4 01/11] virtio: rename structure for packed ring
From: Wei Xu Redefine packed ring structure according to Qemu nomenclature. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a1ff647..eafb4cc 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingPackedDesc { +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingPackedDesc; + typedef struct VRingAvail { uint16_t flags; @@ -77,17 +84,25 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t off_wrap; +uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; /* Next head to pop */ uint16_t last_avail_idx; +bool last_avail_wrap_counter; /* Last avail_idx read from VQ. */ uint16_t shadow_avail_idx; +bool avail_wrap_counter; uint16_t used_idx; +bool used_wrap_counter; /* Last used index value we have signalled on */ uint16_t signalled_used; -- 1.8.3.1
[Qemu-devel] [PATCH v4 00/11] packed ring virtio-net backends support
From: Wei Xu https://github.com/Whishay/qemu.git Userspace and vhost-net backend test has been done with upstream kernel in guest. v3->v4: - add version number to the subject of each patch.(mst) v2->v3: v2/01 - drop it since the header has been synchronized from kernel.(mst & jason) v3/01 - rename 'avail_wrap_counter' to 'last_avail_wrap_counter', 'event_wrap_counter' to 'avail_wrap_counter' to make it easier to understand.(Jason) - revise commit message.(Jason) v3/02 - split packed ring areas size calculation to next patch.(Jason) to not break bisect(Jason). v3/03 - initialize packed ring region with correct size and attribute. - remove unnecessary 'else' checks. (Jason) v3/06 - add commit log. - replace 'event_wrap-counter' with 'avail_wrap_counter'. - merge common memory cache size check to virtqueue_get_avail_bytes().(Jason) - revise memory barrier comment.(Jason) - check indirect descriptors by desc.len/sizeof(desc).(Jason) - flip wrap counter with '^=1'.(Jason) v3/07 - move desc.id/len initialization to the declaration.(Jason) - flip wrap counter '!' with '^=1'.(Jason) - add memory barrier comments in commit message. v3/08 - use offsetof() when writing cache.(Jason) - avoid duplicated memory region write when turning off event_idx supported notification.(Jason) - add commit log.(Jason) - add avail & last_avail wrap counter difference description in commit log. v3/09 - remove unnecessary used/avail idx/wrap-counter from subsection. - put new subsection to the end of vmstate_virtio.(Jason) - squash the two userspace and vhost-net migration patches in v2.(Jason) v3/10 - reword commit message. - this is a help not a bug fix so I would like to keep it as a separate patch still.(Proposed a merge it by Jason) - the virtqueue_fill() is also not like an API so I would prefer not to touch it, please correct me if I did not get it in the right way.(Proposed a squash by Jason) v3/11 - squash feature bits for user space and vhost kernel/user backends. - enable packed ring feature bit provision on host by default.(Jason) Wei Xu (11): virtio: rename structure for packed ring virtio: device/driver area size calculation helper for split ring virtio: initialize packed ring region virtio: initialize wrap counter for packed ring virtio: queue/descriptor check helpers for packed ring virtio: get avail bytes check for packed ring virtio: fill/flush/pop for packed ring virtio: event suppression support for packed ring virtio-net: update the head descriptor in a chain lastly virtio: migration support for packed ring virtio: CLI and provide packed ring feature bit by default hw/net/vhost_net.c | 2 + hw/net/virtio-net.c| 11 +- hw/virtio/virtio.c | 798 + include/hw/virtio/virtio.h | 4 +- 4 files changed, 757 insertions(+), 58 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH v4 07/11] virtio: fill/flush/pop for packed ring
From: Wei Xu last_used_idx/wrap_counter should be equal to last_avail_idx/wrap_counter after a successful flush. Batching in vhost-net & dpdk testpmd is not equivalently supported in userspace backend, but a chained descriptors for Rx is similarly presented as a lightweight batch, so a write barrier is nailed only for the first(head) descriptor. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 291 + 1 file changed, 274 insertions(+), 17 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 832287b..7e276b4 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -379,6 +379,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, virtio_tswap16s(vdev, >id); } +static void vring_packed_desc_write_data(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + >id, sizeof(desc->id)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + sizeof(desc->id)); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + >len, sizeof(desc->len)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + sizeof(desc->len)); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -388,6 +407,18 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, virtio_tswap16s(vdev, >flags); } +static void vring_packed_desc_write_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + sizeof(desc->flags)); +} + static inline bool is_desc_avail(struct VRingPackedDesc *desc, bool wrap_counter) { @@ -554,19 +585,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -578,16 +601,71 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) { -uint16_t old, new; +uint16_t head; +VRingMemoryRegionCaches *caches; +VRingPackedDesc desc = { +.flags = 0, +.id = elem->index, +.len = len, +}; +bool wrap_counter = vq->used_wrap_counter; + +if (unlikely(!vq->vring.desc)) { +return; +} + +head = vq->used_idx + idx; +if (head >= vq->vring.num) { +head -= vq->vring.num; +wrap_counter ^= 1; +} +if (wrap_counter) { +desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL); +desc.flags |= (1 << VRING_PACKED_DESC_F_USED); +} else { +desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL); +desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED); +} + +caches = vring_get_region_caches(vq); +vring_packed_desc_write_data(vq->vdev, , >desc, head); +if (idx == 0) { +/* + * Make sure descriptor id and len is written before + * flags for the first used buffer. + */ +smp_wmb(); +} + +vring_packed_desc_write_flags(vq->vdev, , >desc, head); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_fill(vq, elem, len, idx); +} else { +virtqueue_split_fill(vq, elem,
[Qemu-devel] [PATCH v4 04/11] virtio: initialize wrap counter for packed ring
From: Wei Xu Set to 'true' by default due to spec. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 1a98e61..54dc098 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1238,6 +1238,9 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; +vdev->vq[i].last_avail_wrap_counter = true; +vdev->vq[i].avail_wrap_counter = true; +vdev->vq[i].used_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; -- 1.8.3.1
[Qemu-devel] [PATCH v4 03/11] virtio: initialize packed ring region
From: Wei Xu Initialize packed ring memory region with correct size and attribute. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 6769e54..1a98e61 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -156,7 +156,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; int64_t len; - +bool attr; addr = vq->vring.desc; if (!addr) { @@ -164,8 +164,10 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) } new = g_new0(VRingMemoryRegionCaches, 1); size = virtio_queue_get_desc_size(vdev, n); +attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? + true : false; len = address_space_cache_init(>desc, vdev->dma_as, - addr, size, false); + addr, size, attr); if (len < size) { virtio_error(vdev, "Cannot map desc"); goto err_desc; @@ -2335,6 +2337,10 @@ hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { int s; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingAvail, ring) + sizeof(uint16_t) * vdev->vq[n].vring.num + s; @@ -2344,6 +2350,10 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { int s; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingUsed, ring) + sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; -- 1.8.3.1
[Qemu-devel] [PATCH v4 02/11] virtio: device/driver area size calculation helper for split ring
From: Wei Xu There is slight size difference between split/packed rings. This is a refactor of split ring as well as a helper to expand device and driver area size calculation for packed ring. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index eafb4cc..6769e54 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -155,10 +155,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; -int event_size; int64_t len; -event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; addr = vq->vring.desc; if (!addr) { @@ -173,7 +171,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_desc; } -size = virtio_queue_get_used_size(vdev, n) + event_size; +size = virtio_queue_get_used_size(vdev, n); len = address_space_cache_init(>used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -181,7 +179,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } -size = virtio_queue_get_avail_size(vdev, n) + event_size; +size = virtio_queue_get_avail_size(vdev, n); len = address_space_cache_init(>avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { @@ -2335,14 +2333,20 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { +int s; + +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingAvail, ring) + -sizeof(uint16_t) * vdev->vq[n].vring.num; +sizeof(uint16_t) * vdev->vq[n].vring.num + s; } hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { +int s; + +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingUsed, ring) + -sizeof(VRingUsedElem) * vdev->vq[n].vring.num; +sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; } uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) -- 1.8.3.1
[Qemu-devel] [PATCH 10/11] virtio: migration support for packed ring
From: Wei Xu Both userspace and vhost-net/user are supported with this patch. A new subsection is introduced for packed ring, only 'last_avail_idx' and 'last_avail_wrap_counter' are saved/loaded presumably based on all the others relevant data(inuse, used/avail index and wrap count should be the same. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 69 +++--- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 8cfc7b6..7c5de07 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2349,6 +2349,13 @@ static bool virtio_virtqueue_needed(void *opaque) return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); } +static bool virtio_packed_virtqueue_needed(void *opaque) +{ +VirtIODevice *vdev = opaque; + +return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED); +} + static bool virtio_ringsize_needed(void *opaque) { VirtIODevice *vdev = opaque; @@ -2390,6 +2397,17 @@ static const VMStateDescription vmstate_virtqueue = { } }; +static const VMStateDescription vmstate_packed_virtqueue = { +.name = "packed_virtqueue_state", +.version_id = 1, +.minimum_version_id = 1, +.fields = (VMStateField[]) { +VMSTATE_UINT16(last_avail_idx, struct VirtQueue), +VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_virtio_virtqueues = { .name = "virtio/virtqueues", .version_id = 1, @@ -2402,6 +2420,18 @@ static const VMStateDescription vmstate_virtio_virtqueues = { } }; +static const VMStateDescription vmstate_virtio_packed_virtqueues = { +.name = "virtio/packed_virtqueues", +.version_id = 1, +.minimum_version_id = 1, +.needed = _packed_virtqueue_needed, +.fields = (VMStateField[]) { +VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, + VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_ringsize = { .name = "ringsize_state", .version_id = 1, @@ -2522,6 +2552,7 @@ static const VMStateDescription vmstate_virtio = { _virtio_ringsize, _virtio_broken, _virtio_extra_state, +_virtio_packed_virtqueues, NULL } }; @@ -2794,6 +2825,17 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) virtio_queue_update_rings(vdev, i); } +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx; +vdev->vq[i].avail_wrap_counter = +vdev->vq[i].last_avail_wrap_counter; + +vdev->vq[i].used_idx = vdev->vq[i].last_avail_idx; +vdev->vq[i].used_wrap_counter = +vdev->vq[i].last_avail_wrap_counter; +continue; +} + nheads = vring_avail_idx(>vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { @@ -2955,17 +2997,34 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) { -return vdev->vq[n].last_avail_idx; +uint16_t idx; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +idx = vdev->vq[n].last_avail_idx; +idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15; +} else { +idx = (int)vdev->vq[n].last_avail_idx; +} +return idx; } void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) { -vdev->vq[n].last_avail_idx = idx; -vdev->vq[n].shadow_avail_idx = idx; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[n].last_avail_idx = idx & 0x7fff; +vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000); +} else { +vdev->vq[n].last_avail_idx = idx; +vdev->vq[n].shadow_avail_idx = idx; +} } void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]); @@ -2976,6 +3035,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].used_idx = vring_used_idx(>vq[n]); -- 1.8.3.1
[Qemu-devel] [PATCH 05/11] virtio: queue/descriptor check helpers for packed ring
From: Wei Xu These are descriptor available and queue empty check helpers. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 56 +- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 54dc098..f2ff980 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -368,6 +368,25 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +virtio_tswap16s(vdev, >flags); +} + +static inline bool is_desc_avail(struct VRingPackedDesc *desc, +bool wrap_counter) +{ +bool avail, used; + +avail = !!(desc->flags & (1 << VRING_PACKED_DESC_F_AVAIL)); +used = !!(desc->flags & (1 << VRING_PACKED_DESC_F_USED)); +return (avail != used) && (avail == wrap_counter); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ @@ -388,7 +407,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -410,6 +429,41 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ +struct VRingPackedDesc desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->vring.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_packed_desc_read_flags(vq->vdev, , >desc, +vq->last_avail_idx); + +return !is_desc_avail(, vq->last_avail_wrap_counter); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_packed_empty_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_packed_empty(vq); +} else { +return virtio_queue_split_empty(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -- 1.8.3.1
[Qemu-devel] [PATCH 11/11] virtio: CLI and provide packed ring feature bit by default
From: Wei Xu Add userspace and vhost kernel/user support. Add CLI "ring_packed=true/false" to enable/disable packed ring provision. Usage: -device virtio-net-pci,netdev=xx,mac=xx:xx:xx:xx:xx:xx,ring_packed=false By default it is provided. Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 2 ++ include/hw/virtio/virtio.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..f593086 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; @@ -78,6 +79,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9c1fa07..2eb27d2 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, true) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); -- 1.8.3.1
[Qemu-devel] [PATCH 06/11] virtio: get avail bytes check for packed ring
From: Wei Xu Add packed ring headcount check. Common part of split/packed ring are kept. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 197 - 1 file changed, 179 insertions(+), 18 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f2ff980..832287b 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingPackedDesc), + desc, sizeof(VRingPackedDesc)); +virtio_tswap16s(vdev, >flags); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, int64_t len = 0; int rc; -if (unlikely(!vq->vring.desc)) { -if (in_bytes) { -*in_bytes = 0; -} -if (out_bytes) { -*out_bytes = 0; -} -return; -} - rcu_read_lock(); idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; max = vq->vring.num; caches = vring_get_region_caches(vq); -if (caches->desc.len < max * sizeof(VRingDesc)) { -virtio_error(vdev, "Cannot map descriptor ring"); -goto err; -} - while ((rc = virtqueue_num_heads(vq, idx)) > 0) { MemoryRegionCache *desc_cache = >desc; unsigned int num_bufs; @@ -792,6 +788,171 @@ err: goto done; } +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingPackedDesc desc; +bool wrap_counter; + +rcu_read_lock(); +idx = vq->last_avail_idx; +wrap_counter = vq->last_avail_wrap_counter; +total_bufs = in_total = out_total = 0; + +max = vq->vring.num; +caches = vring_get_region_caches(vq); +desc_cache = >desc; +vring_packed_desc_read_flags(vdev, , desc_cache, idx); +while (is_desc_avail(, wrap_counter)) { +unsigned int num_bufs; +unsigned int i = 0; + +num_bufs = total_bufs; + +/* Make sure flags has been read before all the fields. */ +smp_rmb(); +vring_packed_desc_read(vdev, , desc_cache, idx); + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingPackedDesc); +num_bufs = i = 0; +vring_packed_desc_read(vdev, , desc_cache, i); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +}
[Qemu-devel] [PATCH 03/11] virtio: initialize packed ring region
From: Wei Xu Initialize packed ring memory region with correct size and attribute. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 6769e54..1a98e61 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -156,7 +156,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; int64_t len; - +bool attr; addr = vq->vring.desc; if (!addr) { @@ -164,8 +164,10 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) } new = g_new0(VRingMemoryRegionCaches, 1); size = virtio_queue_get_desc_size(vdev, n); +attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? + true : false; len = address_space_cache_init(>desc, vdev->dma_as, - addr, size, false); + addr, size, attr); if (len < size) { virtio_error(vdev, "Cannot map desc"); goto err_desc; @@ -2335,6 +2337,10 @@ hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { int s; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingAvail, ring) + sizeof(uint16_t) * vdev->vq[n].vring.num + s; @@ -2344,6 +2350,10 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { int s; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} + s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingUsed, ring) + sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; -- 1.8.3.1
[Qemu-devel] [PATCH 08/11] virtio: event suppression support for packed ring
From: Wei Xu Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter': For Tx(guest transmitting), they are the same after each pop of a desc. For Rx(guest receiving), they are also the same when there are enough descriptors to carry the payload for a packet(e.g. usually 16 descs are needed for a 64k packet in typical iperf tcp connection with tso enabled), however, when the ring is running out of descriptors while there are still a few free ones, e.g. 6 descriptors are available which is not enough to carry an entire packet which needs 16 descriptors, in this case the 'avail_wrap_counter' should be set as the first one pending being handled by guest driver in order to get a notification, and the 'last_avail_wrap_counter' should stay unchanged to the head of available descriptors, like below: Mark meaning: | | -- available |*| -- used A Snapshot of the queue: last_avail_idx = 253 last_avail_wrap_counter = 1 | +-+ 0 | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255 +-+ | shadow_avail_idx = 3 avail_wrap_counter = 0 Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 137 + 1 file changed, 128 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 7e276b4..8cfc7b6 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, >next); } +static void vring_packed_event_read(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +address_space_read_cached(cache, 0, e, sizeof(*e)); +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +} + +static void vring_packed_off_wrap_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t off_wrap) +{ +virtio_tswap16s(vdev, _wrap); +address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap), +_wrap, sizeof(off_wrap)); +address_space_cache_invalidate(cache, +offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap)); +} + +static void vring_packed_flags_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t flags) +{ +virtio_tswap16s(vdev, ); +address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags), +, sizeof(flags)); +address_space_cache_invalidate(cache, +offsetof(VRingPackedDescEvent, flags), sizeof(flags)); +} + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); @@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(>used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) { -vq->notification = enable; - -if (!vq->vring.desc) { -return; -} - rcu_read_lock(); if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); @@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable) +{ +VRingPackedDescEvent e; +VRingMemoryRegionCaches *caches; + +rcu_read_lock(); +caches = vring_get_region_caches(vq); +vring_packed_event_read(vq->vdev, >used, ); + +if (!enable) { +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +/* no need to write device area since this is outdated. */ +goto out; +} + +e.flags = VRING_PACKED_EVENT_FLAG_DISABLE; +goto update; +} + +e.flags = VRING_PACKED_EVENT_FLAG_ENABLE; +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 15; + +vring_packed_off_wrap_write(vq->vdev, >used, off_wrap); +/* Make sure off_wrap is wrote before flags */ +smp_wmb(); + +e.flags = VRING_PACKED_EVENT_FLAG_DESC; +} + +update: +vring_packed_flags_write(vq->vdev, >used, e.flags); +out: +rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ +vq->notification = enable; + +if (!vq->vring.desc) { +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtio_queue_set_notification_packed(vq, enable); +} else { +
[Qemu-devel] [PATCH 02/11] virtio: device/driver area size calculation helper for split ring
From: Wei Xu There is slight size difference between split/packed rings. This is a refactor of split ring as well as a helper to expand device and driver area size calculation for packed ring. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index eafb4cc..6769e54 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -155,10 +155,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; -int event_size; int64_t len; -event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; addr = vq->vring.desc; if (!addr) { @@ -173,7 +171,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_desc; } -size = virtio_queue_get_used_size(vdev, n) + event_size; +size = virtio_queue_get_used_size(vdev, n); len = address_space_cache_init(>used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -181,7 +179,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } -size = virtio_queue_get_avail_size(vdev, n) + event_size; +size = virtio_queue_get_avail_size(vdev, n); len = address_space_cache_init(>avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { @@ -2335,14 +2333,20 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { +int s; + +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingAvail, ring) + -sizeof(uint16_t) * vdev->vq[n].vring.num; +sizeof(uint16_t) * vdev->vq[n].vring.num + s; } hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { +int s; + +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return offsetof(VRingUsed, ring) + -sizeof(VRingUsedElem) * vdev->vq[n].vring.num; +sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; } uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) -- 1.8.3.1
[Qemu-devel] [PATCH 04/11] virtio: initialize wrap counter for packed ring
From: Wei Xu Set to 'true' by default due to spec. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 1a98e61..54dc098 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1238,6 +1238,9 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; +vdev->vq[i].last_avail_wrap_counter = true; +vdev->vq[i].avail_wrap_counter = true; +vdev->vq[i].used_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; -- 1.8.3.1
[Qemu-devel] [PATCH 07/11] virtio: fill/flush/pop for packed ring
From: Wei Xu last_used_idx/wrap_counter should be equal to last_avail_idx/wrap_counter after a successful flush. Batching in vhost-net & dpdk testpmd is not equivalently supported in userspace backend, but a chained descriptors for Rx is similarly presented as a lightweight batch, so a write barrier is nailed only for the first(head) descriptor. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 291 + 1 file changed, 274 insertions(+), 17 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 832287b..7e276b4 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -379,6 +379,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, virtio_tswap16s(vdev, >id); } +static void vring_packed_desc_write_data(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + >id, sizeof(desc->id)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + sizeof(desc->id)); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + >len, sizeof(desc->len)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + sizeof(desc->len)); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -388,6 +407,18 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, virtio_tswap16s(vdev, >flags); } +static void vring_packed_desc_write_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + sizeof(desc->flags)); +} + static inline bool is_desc_avail(struct VRingPackedDesc *desc, bool wrap_counter) { @@ -554,19 +585,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -578,16 +601,71 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) { -uint16_t old, new; +uint16_t head; +VRingMemoryRegionCaches *caches; +VRingPackedDesc desc = { +.flags = 0, +.id = elem->index, +.len = len, +}; +bool wrap_counter = vq->used_wrap_counter; + +if (unlikely(!vq->vring.desc)) { +return; +} + +head = vq->used_idx + idx; +if (head >= vq->vring.num) { +head -= vq->vring.num; +wrap_counter ^= 1; +} +if (wrap_counter) { +desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL); +desc.flags |= (1 << VRING_PACKED_DESC_F_USED); +} else { +desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL); +desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED); +} + +caches = vring_get_region_caches(vq); +vring_packed_desc_write_data(vq->vdev, , >desc, head); +if (idx == 0) { +/* + * Make sure descriptor id and len is written before + * flags for the first used buffer. + */ +smp_wmb(); +} + +vring_packed_desc_write_flags(vq->vdev, , >desc, head); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_fill(vq, elem, len, idx); +} else { +virtqueue_split_fill(vq, elem,
[Qemu-devel] [PATCH 09/11] virtio-net: update the head descriptor in a chain lastly
From: Wei Xu This is a helper for packed ring. To support packed ring, the head descriptor in a chain should be updated lastly since no 'avail_idx' like in packed ring to explicitly tell the driver side that all payload is ready after having done the chain, so the head is always visible immediately. This patch fills the header after done all the other ones. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 3f319ef..330abea 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1251,6 +1251,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, struct virtio_net_hdr_mrg_rxbuf mhdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset; +VirtQueueElement head; +int head_len = 0; if (!virtio_net_can_receive(nc)) { return -1; @@ -1328,7 +1330,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } /* signal other side */ -virtqueue_fill(q->rx_vq, elem, total, i++); +if (i == 0) { +head_len = total; +head = *elem; +} else { +virtqueue_fill(q->rx_vq, elem, len, i); +} +i++; g_free(elem); } @@ -1339,6 +1347,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, _buffers, sizeof mhdr.num_buffers); } +virtqueue_fill(q->rx_vq, , head_len, 0); virtqueue_flush(q->rx_vq, i); virtio_notify(vdev, q->rx_vq); -- 1.8.3.1
[Qemu-devel] [PATCH 01/11] virtio: rename structure for packed ring
From: Wei Xu Redefine packed ring structure according to Qemu nomenclature. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 15 +++ 1 file changed, 15 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a1ff647..eafb4cc 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingPackedDesc { +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingPackedDesc; + typedef struct VRingAvail { uint16_t flags; @@ -77,17 +84,25 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t off_wrap; +uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; /* Next head to pop */ uint16_t last_avail_idx; +bool last_avail_wrap_counter; /* Last avail_idx read from VQ. */ uint16_t shadow_avail_idx; +bool avail_wrap_counter; uint16_t used_idx; +bool used_wrap_counter; /* Last used index value we have signalled on */ uint16_t signalled_used; -- 1.8.3.1
[Qemu-devel] [PATCH v3 00/11] packed ring virtio-net backends support
From: Wei Xu https://github.com/Whishay/qemu.git Userspace and vhost-net backedn test has been done with upstream kernel in guest. v2->v3 v2/01 - drop it since the header has been synchronized from kernel.(mst & jason) v3/01 - rename 'avail_wrap_counter' to 'last_avail_wrap_counter', 'event_wrap_counter' to 'avail_wrap_counter' to make it easier to understand.(Jason) - revise commit message.(Jason) v3/02 - split packed ring areas size calculation to next patch.(Jason) to not break bisect(Jason). v3/03 - initialize packed ring region with correct size and attribute. - remove unnecessary 'else' checks. (Jason) v3/06 - add commit log. - replace 'event_wrap-counter' with 'avail_wrap_counter'. - merge common memory cache size check to virtqueue_get_avail_bytes().(Jason) - revise memory barrier comment.(Jason) - check indirect descriptors by desc.len/sizeof(desc).(Jason) - flip wrap counter with '^=1'.(Jason) v3/07 - move desc.id/len initialization to the declaration.(Jason) - flip wrap counter '!' with '^=1'.(Jason) - add memory barrier comments in commit message. v3/08 - use offsetof() when writing cache.(Jason) - avoid duplicated memory region write when turning off event_idx supported notification.(Jason) - add commit log.(Jason) - add avail & last_avail wrap counter difference description in commit log. v3/09 - remove unnecessary used/avail idx/wrap-counter from subsection. - put new subsection to the end of vmstate_virtio.(Jason) - squash the two userspace and vhost-net migration patches in v2.(Jason) v3/10 - reword commit message. - this is a help not a bug fix so I would like to keep it as a separate patch still.(Proposed a merge it by Jason) - the virtqueue_fill() is also not like an API so I would prefer not to touch it, please correct me if I did not get it in the right way.(Proposed a squash by Jason) v3/11 - squash feature bits for user space and vhost kernel/user backends. - enable packed ring feature bit provision on host by default.(Jason) Wei Xu (11): virtio: rename structure for packed ring virtio: device/driver area size calculation helper for split ring virtio: initialize packed ring region virtio: initialize wrap counter for packed ring virtio: queue/descriptor check helpers for packed ring virtio: get avail bytes check for packed ring virtio: fill/flush/pop for packed ring virtio: event suppression support for packed ring virtio-net: update the head descriptor in a chain lastly virtio: migration support for packed ring virtio: CLI and provide packed ring feature bit by default hw/net/vhost_net.c | 2 + hw/net/virtio-net.c| 11 +- hw/virtio/virtio.c | 798 + include/hw/virtio/virtio.h | 4 +- 4 files changed, 757 insertions(+), 58 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH v2 15/15] virtio: enable packed ring via a new command line
From: Wei Xu Signed-off-by: Wei Xu --- include/hw/virtio/virtio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9c1fa07..cb286bb 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, false) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); -- 1.8.3.1
[Qemu-devel] [PATCH v2 14/15] vhost: enable packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index fb4b18f..f593086 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; -- 1.8.3.1
[Qemu-devel] [PATCH v2 11/15] virtio: add userspace migration for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 39 +++ 1 file changed, 39 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 0bcf8a5..722a4fd 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2346,6 +2346,13 @@ static bool virtio_virtqueue_needed(void *opaque) return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1); } +static bool virtio_packed_virtqueue_needed(void *opaque) +{ +VirtIODevice *vdev = opaque; + +return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED); +} + static bool virtio_ringsize_needed(void *opaque) { VirtIODevice *vdev = opaque; @@ -2387,6 +2394,21 @@ static const VMStateDescription vmstate_virtqueue = { } }; +static const VMStateDescription vmstate_packed_virtqueue = { +.name = "packed_virtqueue_state", +.version_id = 1, +.minimum_version_id = 1, +.fields = (VMStateField[]) { +VMSTATE_BOOL(avail_wrap_counter, struct VirtQueue), +VMSTATE_BOOL(event_wrap_counter, struct VirtQueue), +VMSTATE_BOOL(used_wrap_counter, struct VirtQueue), +VMSTATE_UINT16(used_idx, struct VirtQueue), +VMSTATE_UINT16(shadow_avail_idx, struct VirtQueue), +VMSTATE_UINT32(inuse, struct VirtQueue), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_virtio_virtqueues = { .name = "virtio/virtqueues", .version_id = 1, @@ -2399,6 +2421,18 @@ static const VMStateDescription vmstate_virtio_virtqueues = { } }; +static const VMStateDescription vmstate_virtio_packed_virtqueues = { +.name = "virtio/packed_virtqueues", +.version_id = 1, +.minimum_version_id = 1, +.needed = _packed_virtqueue_needed, +.fields = (VMStateField[]) { +VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice, + VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, VirtQueue), +VMSTATE_END_OF_LIST() +} +}; + static const VMStateDescription vmstate_ringsize = { .name = "ringsize_state", .version_id = 1, @@ -2516,6 +2550,7 @@ static const VMStateDescription vmstate_virtio = { _virtio_device_endian, _virtio_64bit_features, _virtio_virtqueues, +_virtio_packed_virtqueues, _virtio_ringsize, _virtio_broken, _virtio_extra_state, @@ -2791,6 +2826,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) virtio_queue_update_rings(vdev, i); } +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +continue; +} + nheads = vring_avail_idx(>vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { -- 1.8.3.1
[Qemu-devel] [PATCH v2 09/15] virtio: event suppression support for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 121 +++-- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 5562ecd..0bcf8a5 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -238,6 +238,30 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, >next); } +static void vring_packed_event_read(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +address_space_read_cached(cache, 0, e, sizeof(*e)); +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +} + +static void vring_packed_off_wrap_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t off_wrap) +{ +virtio_tswap16s(vdev, _wrap); +address_space_write_cached(cache, 0, _wrap, sizeof(off_wrap)); +address_space_cache_invalidate(cache, 0, sizeof(off_wrap)); +} + +static void vring_packed_flags_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t flags) +{ +virtio_tswap16s(vdev, ); +address_space_write_cached(cache, sizeof(uint16_t), , sizeof(flags)); +address_space_cache_invalidate(cache, sizeof(uint16_t), sizeof(flags)); +} + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); @@ -344,7 +368,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(>used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) { vq->notification = enable; @@ -367,6 +391,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable) +{ +VRingPackedDescEvent e; +VRingMemoryRegionCaches *caches; + +rcu_read_lock(); +caches = vring_get_region_caches(vq); +vring_packed_event_read(vq->vdev, >used, ); + +if (!enable) { +e.flags = RING_EVENT_FLAGS_DISABLE; +goto out; +} + +e.flags = RING_EVENT_FLAGS_ENABLE; +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +uint16_t off_wrap = vq->shadow_avail_idx | vq->event_wrap_counter << 15; + +vring_packed_off_wrap_write(vq->vdev, >used, off_wrap); +/* Make sure off_wrap is wrote before flags */ +smp_wmb(); + +e.flags = RING_EVENT_FLAGS_DESC; +} + +out: +vring_packed_flags_write(vq->vdev, >used, e.flags); +rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ +vq->notification = enable; + +if (!vq->vring.desc) { +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtio_queue_set_notification_packed(vq, enable); +} else { +virtio_queue_set_notification_split(vq, enable); +} +} + int virtio_queue_ready(VirtQueue *vq) { return vq->vring.avail != 0; @@ -2118,8 +2187,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value) } } -/* Called within rcu_read_lock(). */ -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; bool v; @@ -2142,6 +2210,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } +static bool vring_packed_need_event(VirtQueue *vq, bool wrap, +uint16_t off_wrap, uint16_t new, uint16_t old) +{ +int off = off_wrap & ~(1 << 15); + +if (wrap != off_wrap >> 15) { +off -= vq->vring.num; +} + +return vring_need_event(off, new, old); +} + +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +VRingPackedDescEvent e; +uint16_t old, new; +bool v; +VRingMemoryRegionCaches *caches; + +caches = vring_get_region_caches(vq); +vring_packed_event_read(vdev, >avail, ); + +old = vq->signalled_used; +new = vq->signalled_used = vq->used_idx; +v = vq->signalled_used_valid; +vq->signalled_used_valid = true; + +if (e.flags == RING_EVENT_FLAGS_DISABLE) { +return false; +} else if (e.flags == RING_EVENT_FLAGS_ENABLE) { +return true; +} + +return !v || vring_packed_need_event(vq, +vq->used_wrap_counter, e.off_wrap, new, old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return virtio_packed_should_notify(vdev, vq); +} else { +return virtio_split_should_notify(vdev,
[Qemu-devel] [PATCH v2 12/15] virtio: add vhost-net migration for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 27 --- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 722a4fd..0cb912e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2991,17 +2991,34 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) { -return vdev->vq[n].last_avail_idx; +uint16_t idx; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +idx = vdev->vq[n].last_avail_idx; +idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15; +} else { +idx = (int)vdev->vq[n].last_avail_idx; +} +return idx; } void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) { -vdev->vq[n].last_avail_idx = idx; -vdev->vq[n].shadow_avail_idx = idx; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[n].last_avail_idx = idx & 0x7fff; +vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000); +} else { +vdev->vq[n].last_avail_idx = idx; +vdev->vq[n].shadow_avail_idx = idx; +} } void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]); @@ -3012,6 +3029,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].used_idx = vring_used_idx(>vq[n]); -- 1.8.3.1
[Qemu-devel] [PATCH v2 13/15] virtio: packed ring feature bit for userspace backend
From: Wei Xu Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..fb4b18f 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -78,6 +78,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, -- 1.8.3.1
[Qemu-devel] [PATCH v2 07/15] virtio: get avail bytes check for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 180 + 1 file changed, 167 insertions(+), 13 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index e728201..cb599e9 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -372,6 +372,17 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingPackedDesc), + desc, sizeof(VRingPackedDesc)); +virtio_tswap16s(vdev, >flags); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -671,9 +682,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -683,16 +694,6 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, int64_t len = 0; int rc; -if (unlikely(!vq->vring.desc)) { -if (in_bytes) { -*in_bytes = 0; -} -if (out_bytes) { -*out_bytes = 0; -} -return; -} - rcu_read_lock(); idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; @@ -796,6 +797,159 @@ err: goto done; } +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingPackedDesc desc; +bool wrap_counter; + +rcu_read_lock(); +idx = vq->last_avail_idx; +wrap_counter = vq->avail_wrap_counter; +total_bufs = in_total = out_total = 0; + +max = vq->vring.num; +caches = vring_get_region_caches(vq); +if (caches->desc.len < max * sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Cannot map descriptor ring"); +goto err; +} + +desc_cache = >desc; +vring_packed_desc_read_flags(vdev, , desc_cache, idx); +while (is_desc_avail(, wrap_counter)) { +unsigned int num_bufs; +unsigned int i = 0; + +num_bufs = total_bufs; + +/* Make sure all the fields have been exposed. */ +smp_rmb(); +vring_packed_desc_read(vdev, , desc_cache, idx); + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingPackedDesc); +num_bufs = i = 0; +vring_packed_desc_read(vdev, , desc_cache, i); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +} else { +out_total += desc.len; +} +if (in_total >= max_in_bytes && out_total >= max_out_bytes) { +goto done; +} + +if (desc_cache == _desc_cache) { +vring_packed_desc_read(vdev, , desc_cache, i); +
[Qemu-devel] [PATCH v2 06/15] virtio: init and desc empty check for packed ring
From: Wei Xu ring check and other basical helpers for packed ring. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 59 +- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 833289e..e728201 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -24,6 +24,9 @@ #include "hw/virtio/virtio-access.h" #include "sysemu/dma.h" +#define AVAIL_DESC_PACKED(b) ((b) << 7) +#define USED_DESC_PACKED(b) ((b) << 15) + /* * The alignment to use between consumer and producer parts of vring. * x86 pagesize again. This is the default, used by transports like PCI @@ -369,6 +372,25 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +virtio_tswap16s(vdev, >flags); +} + +static inline bool is_desc_avail(struct VRingPackedDesc *desc, +bool wrap_counter) +{ +bool avail, used; + +avail = !!(desc->flags & AVAIL_DESC_PACKED(1)); +used = !!(desc->flags & USED_DESC_PACKED(1)); +return (avail != used) && (avail == wrap_counter); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ @@ -389,7 +411,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -411,6 +433,41 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ +struct VRingPackedDesc desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->vring.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_packed_desc_read_flags(vq->vdev, , >desc, +vq->last_avail_idx); + +return !is_desc_avail(, vq->avail_wrap_counter); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_packed_empty_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_packed_empty(vq); +} else { +return virtio_queue_split_empty(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -- 1.8.3.1
[Qemu-devel] [PATCH v2 08/15] virtio: fill/flush/pop for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 295 ++--- 1 file changed, 278 insertions(+), 17 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index cb599e9..5562ecd 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -383,6 +383,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, virtio_tswap16s(vdev, >id); } +static void vring_packed_desc_write_data(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + >id, sizeof(desc->id)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + sizeof(desc->id)); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + >len, sizeof(desc->len)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + sizeof(desc->len)); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -392,6 +411,18 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, virtio_tswap16s(vdev, >flags); } +static void vring_packed_desc_write_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + sizeof(desc->flags)); +} + static inline bool is_desc_avail(struct VRingPackedDesc *desc, bool wrap_counter) { @@ -558,19 +589,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -582,16 +605,72 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) { -uint16_t old, new; +uint16_t head; +VRingMemoryRegionCaches *caches; +VRingPackedDesc desc = { +.flags = 0, +}; +bool wrap_counter = vq->used_wrap_counter; + +if (unlikely(!vq->vring.desc)) { +return; +} + +caches = vring_get_region_caches(vq); +desc.id = elem->index; +desc.len = len; + +head = vq->used_idx + idx; +if (head >= vq->vring.num) { +head -= vq->vring.num; +wrap_counter ^= 1; +} +if (wrap_counter) { +desc.flags |= VRING_DESC_F_AVAIL; +desc.flags |= VRING_DESC_F_USED; +} else { +desc.flags &= ~VRING_DESC_F_AVAIL; +desc.flags &= ~VRING_DESC_F_USED; +} + +vring_packed_desc_write_data(vq->vdev, , >desc, head); +if (idx == 0) { +/* + * Make sure descriptor id and len is written before + * flags for the first used buffer. + */ +smp_wmb(); +} + +vring_packed_desc_write_flags(vq->vdev, , >desc, head); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_fill(vq, elem, len, idx); +} else { +virtqueue_split_fill(vq, elem, len, idx); +} +} + +/* Called within rcu_read_lock(). */ +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ +uint16_t old, new; + if (unlikely(!vq->vring.used)) { return; } @@ -607,6 +686,31 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) vq->signalled_used_valid = false; } +static void virtqueue_packed_flush(VirtQueue
[Qemu-devel] [PATCH v2 10/15] virtio-net: fill head desc after done all in a chain
From: Wei Xu With the support of marking a descriptor used/unused in 'flags' field for 1.1, the current way of filling a chained descriptors does not work since driver side may get the wrong 'num_buffer' information in case of the head descriptor has been filled in while the subsequent ones are still in processing in device side. This patch fills the head one after done all the others one. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index e37fc34..39336b9 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1198,6 +1198,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, struct virtio_net_hdr_mrg_rxbuf mhdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset; +VirtQueueElement head; +int head_len = 0; if (!virtio_net_can_receive(nc)) { return -1; @@ -1275,7 +1277,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } /* signal other side */ -virtqueue_fill(q->rx_vq, elem, total, i++); +if (i == 0) { +head_len = total; +head = *elem; +} else { +virtqueue_fill(q->rx_vq, elem, len, i); +} +i++; g_free(elem); } @@ -1286,6 +1294,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, _buffers, sizeof mhdr.num_buffers); } +virtqueue_fill(q->rx_vq, , head_len, 0); virtqueue_flush(q->rx_vq, i); virtio_notify(vdev, q->rx_vq); -- 1.8.3.1
[Qemu-devel] [PATCH v2 05/15] virtio: init wrap counter for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 454da3d..833289e 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1239,6 +1239,9 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; +vdev->vq[i].avail_wrap_counter = true; +vdev->vq[i].event_wrap_counter = true; +vdev->vq[i].used_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; -- 1.8.3.1
[Qemu-devel] [PATCH v2 00/15] packed ring virtio-net backends support
From: Wei Xu v1->v2: - fix patchew complaint - only set/get last_avail_idx/wrap_counter for vhost migration(Maxime) - replace 'out_num' and 'in_num' with 'elem_entries'in packed_pop()(Maxime) - set last used idx/wrap_counter to last avail ones when flushing(Maxime) - replace '*host_has_feature()' with '*vdev_has_feature()' for ioctl(Maxime) - replace going through indirect descriptors with desc.len/sizeof(desc)(btw) - add new subsection for packed ring(Jason) rfc v3 -> v1 - migration support for both userspace and vhost-net, need tweak vhost ioctl() to make it work(the code is pasted in the commit message of vhost migration patch #13). Note: the high 32-bit guest feature bit is saved as a subsection for virtio devices which makes packed ring feature bit check unusable when loading the saved per-queue variables(this is done before loading subsection which is the last action for device during migration), so I save and load all the things generally for now, any idea to fix this? - Fixed comments from Jason for rfc v3 sorted by patch #, two comments I didn't take were(from patch) listed here: 09: - introduce new API(virtqueue_fill_n()). - Didn't take it since userspace backend does not support batching, so only one element is popped and current API should be enough. 06 & 07: Refactor split and packed pop()/get_avail_bytes(). - the duplicated code interwined with split/packed ring specific things and it might make it unclear, so I only extracted the few common parts out side rcu and keep the others separate. The other revised comments: 02: - reuse current 'avail/used' for 'driver/device' in VRingMemoryRegionCache. - remove event_idx since shadow_avail_idx works. 03: - move size recalculation to a separate patch. - keep 'avail/used' in current calculation function name. - initialize 'desc' memory region as 'false' for 1.0('true' for 1.1) 04: - delete 'event_idx' 05: - rename 'wc' to wrap_counter. 06: - converge common part outside rcu section for 1.0/1.1. - move memory barrier for the first 'desc' in between checking flag and read other fields. - remove unnecessary memory barriers for indirect descriptors. - no need to destroy indirect memory cache since it is generally done before return from the function. - remove redundant maximum chained descriptors limitation check. - there are some differences(desc name, wrap idx/counter, flags) between split and packed rings, so keep them separate for now. - amend the comment when recording index and wrap counter for a kick from guest. 07: - calculate fields in descriptor instead of read it when filling. - put memory barrier correctly before filling the flags in descriptor. - replace full memory barrier with a write barrier in fill. - shift to read descriptor flags and descriptor necessarily and separately in packed_pop(). - correct memory barrier in packed_pop() as in packed_fill(). 08: - reuse 'shadow_avail_idx' instead of adding a new 'event_idx'. - use the compact and verified vring_packed_need_event() version for vhost net/user. 12: - remove the odd cherry-pick comment. - used bit '15' for wrap_counters. rfc v2->v3 - addressed performance issue - fixed feedback from v2 rfc v1->v2 - sync to tiwei's v5 - reuse memory cache function with 1.0 - dropped detach patch and notification helper(04 & 05 in v1) - guest virtio-net driver unload/reload support - event suppression support(not tested) - addressed feedback from v1 Wei Xu (15): virtio: introduce packed ring definitions virtio: redefine structure & memory cache for packed ring virtio: expand offset calculation for packed ring virtio: add memory region init for packed ring virtio: init wrap counter for packed ring virtio: init and desc empty check for packed ring virtio: get avail bytes check for packed ring virtio: fill/flush/pop for packed ring virtio: event suppression support for packed ring virtio-net: fill head desc after done all in a chain virtio: add userspace migration for packed ring virtio: add vhost-net migration for packed ring virtio: packed ring feature bit for userspace backend vhost: enable packed ring virtio: enable packed ring via a new command line hw/net/vhost_net.c | 2 + hw/net/virtio-net.c| 11 +- hw/virtio/virtio.c | 774 +++-- include/hw/virtio/virtio.h | 4 +- include/standard-headers/linux/virtio_config.h | 15 + include/standard-headers/linux/virtio_ring.h | 43 ++ 6 files changed, 800 insertions(+), 49 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH v2 03/15] virtio: expand offset calculation for packed ring
From: Wei Xu Expand 1.0 to 1.1 by adding offset calculation accordingly. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 22 ++ 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 827e745..112845c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2336,14 +2336,28 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { -return offsetof(VRingAvail, ring) + -sizeof(uint16_t) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingAvail, ring) + +sizeof(uint16_t) * vdev->vq[n].vring.num + s; +} } hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { -return offsetof(VRingUsed, ring) + -sizeof(VRingUsedElem) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingUsed, ring) + +sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; +} } uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) -- 1.8.3.1
[Qemu-devel] [PATCH v2 02/15] virtio: redefine structure & memory cache for packed ring
From: Wei Xu Redefine packed ring structure according to Qemu nomenclature, field data(wrap counter, etc) are introduced also. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 16 1 file changed, 16 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 22bd1ac..827e745 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingPackedDesc { +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingPackedDesc; + typedef struct VRingAvail { uint16_t flags; @@ -77,6 +84,11 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t off_wrap; +uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; @@ -87,7 +99,11 @@ struct VirtQueue /* Last avail_idx read from VQ. */ uint16_t shadow_avail_idx; +bool event_wrap_counter; +bool avail_wrap_counter; + uint16_t used_idx; +bool used_wrap_counter; /* Last used index value we have signalled on */ uint16_t signalled_used; -- 1.8.3.1
[Qemu-devel] [PATCH v2 04/15] virtio: add memory region init for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 112845c..454da3d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -156,10 +156,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; -int event_size; int64_t len; - -event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +bool attr; addr = vq->vring.desc; if (!addr) { @@ -167,14 +165,16 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) } new = g_new0(VRingMemoryRegionCaches, 1); size = virtio_queue_get_desc_size(vdev, n); +attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? + true : false; len = address_space_cache_init(>desc, vdev->dma_as, - addr, size, false); + addr, size, attr); if (len < size) { virtio_error(vdev, "Cannot map desc"); goto err_desc; } -size = virtio_queue_get_used_size(vdev, n) + event_size; +size = virtio_queue_get_used_size(vdev, n); len = address_space_cache_init(>used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -182,7 +182,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } -size = virtio_queue_get_avail_size(vdev, n) + event_size; +size = virtio_queue_get_avail_size(vdev, n); len = address_space_cache_init(>avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { -- 1.8.3.1
[Qemu-devel] [PATCH v2 01/15] virtio: introduce packed ring definitions
From: Wei Xu >From 1.1 spec. Signed-off-by: Wei Xu --- include/standard-headers/linux/virtio_config.h | 15 + include/standard-headers/linux/virtio_ring.h | 43 ++ 2 files changed, 58 insertions(+) diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index 0b19436..9f450fd 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -75,6 +75,21 @@ */ #define VIRTIO_F_IOMMU_PLATFORM33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + +/* Enable events */ +#define RING_EVENT_FLAGS_ENABLE 0x0 +/* Disable events */ +#define RING_EVENT_FLAGS_DISABLE 0x1 +/* + * * Enable events for a specific descriptor + * * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + ** Only valid if VIRTIO_F_RING_EVENT_IDX has been negotiated. + * */ +#define RING_EVENT_FLAGS_DESC 0x2 +/* The value 0x3 is reserved */ + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/standard-headers/linux/virtio_ring.h b/include/standard-headers/linux/virtio_ring.h index d26e72b..1719c6f 100644 --- a/include/standard-headers/linux/virtio_ring.h +++ b/include/standard-headers/linux/virtio_ring.h @@ -42,6 +42,10 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* Mark a descriptor as available or used. */ +#define VRING_DESC_F_AVAIL (1ul << 7) +#define VRING_DESC_F_USED (1ul << 15) + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -51,6 +55,17 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events. */ +#define VRING_EVENT_F_ENABLE 0x0 +/* Disable events. */ +#define VRING_EVENT_F_DISABLE 0x1 +/* + * Enable events for a specific descriptor + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_EVENT_F_DESC 0x2 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC28 @@ -169,4 +184,32 @@ static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_ return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __virtio16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __virtio16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __virtio64 addr; + /* Buffer Length. */ + __virtio32 len; + /* Buffer ID. */ + __virtio16 id; + /* The flags depending on descriptor type. */ + __virtio16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _LINUX_VIRTIO_RING_H */ -- 1.8.3.1
[Qemu-devel] [PATCH v1 15/16] vhost: enable packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index fb4b18f..f593086 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; -- 1.8.3.1
[Qemu-devel] [PATCH v1 16/16] virtio: enable packed ring via a new command line
From: Wei Xu Signed-off-by: Wei Xu --- include/hw/virtio/virtio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index a6fdf3f..36fc4ef 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, false) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); -- 1.8.3.1
[Qemu-devel] [PATCH v1 14/16] virtio: packed ring feature bit for userspace backend
From: Wei Xu Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..fb4b18f 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -78,6 +78,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, -- 1.8.3.1
[Qemu-devel] [PATCH v1 12/16] virtio: add userspace migration of packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 18 ++ 1 file changed, 18 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 240c4e3..64d5c04 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2558,6 +2558,12 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f) */ qemu_put_be64(f, vdev->vq[i].vring.desc); qemu_put_be16s(f, >vq[i].last_avail_idx); +qemu_put_8s(f, (const uint8_t *)>vq[i].avail_wrap_counter); +qemu_put_8s(f, (const uint8_t *)>vq[i].event_wrap_counter); +qemu_put_8s(f, (const uint8_t *)>vq[i].used_wrap_counter); +qemu_put_be16s(f, >vq[i].used_idx); +qemu_put_be16s(f, >vq[i].shadow_avail_idx); +qemu_put_be32s(f, >vq[i].inuse); if (k->save_queue) { k->save_queue(qbus->parent, i, f); } @@ -2705,6 +2711,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) } vdev->vq[i].vring.desc = qemu_get_be64(f); qemu_get_be16s(f, >vq[i].last_avail_idx); + +qemu_get_8s(f, (uint8_t *)>vq[i].avail_wrap_counter); +qemu_get_8s(f, (uint8_t *)>vq[i].event_wrap_counter); +qemu_get_8s(f, (uint8_t *)>vq[i].used_wrap_counter); +qemu_get_be16s(f, >vq[i].used_idx); +qemu_get_be16s(f, >vq[i].shadow_avail_idx); +qemu_get_be32s(f, >vq[i].inuse); + vdev->vq[i].signalled_used_valid = false; vdev->vq[i].notification = true; @@ -2786,6 +2800,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) virtio_queue_update_rings(vdev, i); } +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +continue; +} + nheads = vring_avail_idx(>vq[i]) - vdev->vq[i].last_avail_idx; /* Check it isn't doing strange things with descriptor numbers. */ if (nheads > vdev->vq[i].vring.num) { -- 1.8.3.1
[Qemu-devel] [PATCH v1 13/16] virtio: add vhost-net migration of packed ring
From: Wei Xu tweaked vhost-net code to test migration. @@ -1414,64 +1430,20 @@ long vhost_vring_ioctl(struct vhost_dev r = -EFAULT; break; } + vq->last_avail_idx = s.num & 0x7FFF; + /* Forget the cached index value. */ + vq->avail_idx = vq->last_avail_idx; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->last_avail_wrap_counter = !!(s.num & 0x8000); + vq->avail_wrap_counter = vq->last_avail_wrap_counter; + + vq->last_used_idx = (s.num & 0x7fFF) >> 16; + vq->last_used_wrap_counter = !!(s.num & 0x8000); + } + break; + case VHOST_GET_VRING_BASE: + s.index = idx; +s.num = vq->last_avail_idx; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + s.num |= vq->last_avail_wrap_counter << 15; + s.num |= vq->last_used_idx << 16; + s.num |= vq->last_used_wrap_counter << 31; + } + if (copy_to_user(argp, , sizeof(s))) + r = -EFAULT; + break; Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 35 ++- include/hw/virtio/virtio.h | 4 ++-- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 64d5c04..7487d3d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2963,19 +2963,40 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) } } -uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) +int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) { -return vdev->vq[n].last_avail_idx; +int idx; + +if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +idx = vdev->vq[n].last_avail_idx; +idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15; +idx |= (vdev->vq[n].used_idx) << 16; +idx |= ((int)vdev->vq[n].used_wrap_counter) << 31; +} else { +idx = (int)vdev->vq[n].last_avail_idx; +} +return idx; } -void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, int idx) { -vdev->vq[n].last_avail_idx = idx; -vdev->vq[n].shadow_avail_idx = idx; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[n].last_avail_idx = idx & 0x7fff; +vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000); +vdev->vq[n].used_idx = (idx & 0x7fff) >> 16; +vdev->vq[n].used_wrap_counter = !!(idx & 0x8000); +} else { +vdev->vq[n].last_avail_idx = idx; +vdev->vq[n].shadow_avail_idx = idx; +} } void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]); @@ -2986,6 +3007,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) { +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return; +} + rcu_read_lock(); if (vdev->vq[n].vring.desc) { vdev->vq[n].used_idx = vring_used_idx(>vq[n]); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9c1fa07..a6fdf3f 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -272,8 +272,8 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n); -uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); -void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx); +int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, int idx); void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n); void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n); void virtio_queue_update_used_idx(VirtIODevice *vdev, int n); -- 1.8.3.1
[Qemu-devel] [PATCH v1 10/16] virtio: event suppression support for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 121 +++-- 1 file changed, 118 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 99a6601..240c4e3 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -238,6 +238,30 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, >next); } +static void vring_packed_event_read(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +address_space_read_cached(cache, 0, e, sizeof(*e)); +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +} + +static void vring_packed_off_wrap_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t off_wrap) +{ +virtio_tswap16s(vdev, _wrap); +address_space_write_cached(cache, 0, _wrap, sizeof(off_wrap)); +address_space_cache_invalidate(cache, 0, sizeof(off_wrap)); +} + +static void vring_packed_flags_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t flags) +{ +virtio_tswap16s(vdev, ); +address_space_write_cached(cache, sizeof(uint16_t), , sizeof(flags)); +address_space_cache_invalidate(cache, sizeof(uint16_t), sizeof(flags)); +} + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); @@ -344,7 +368,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(>used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) { vq->notification = enable; @@ -367,6 +391,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable) +{ +VRingPackedDescEvent e; +VRingMemoryRegionCaches *caches; + +rcu_read_lock(); +caches = vring_get_region_caches(vq); +vring_packed_event_read(vq->vdev, >used, ); + +if (!enable) { +e.flags = RING_EVENT_FLAGS_DISABLE; +goto out; +} + +e.flags = RING_EVENT_FLAGS_ENABLE; +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +uint16_t off_wrap = vq->shadow_avail_idx | vq->event_wrap_counter << 15; + +vring_packed_off_wrap_write(vq->vdev, >used, off_wrap); +/* Make sure off_wrap is wrote before flags */ +smp_wmb(); + +e.flags = RING_EVENT_FLAGS_DESC; +} + +out: +vring_packed_flags_write(vq->vdev, >used, e.flags); +rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ +vq->notification = enable; + +if (!vq->vring.desc) { +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtio_queue_set_notification_packed(vq, enable); +} else { +virtio_queue_set_notification_split(vq, enable); +} +} + int virtio_queue_ready(VirtQueue *vq) { return vq->vring.avail != 0; @@ -2113,8 +2182,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value) } } -/* Called within rcu_read_lock(). */ -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; bool v; @@ -2137,6 +2205,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } +static bool vring_packed_need_event(VirtQueue *vq, bool wrap, +uint16_t off_wrap, uint16_t new, uint16_t old) +{ +int off = off_wrap & ~(1 << 15); + +if (wrap != off_wrap >> 15) { +off -= vq->vring.num; +} + +return vring_need_event(off, new, old); +} + +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +VRingPackedDescEvent e; +uint16_t old, new; +bool v; +VRingMemoryRegionCaches *caches; + +caches = vring_get_region_caches(vq); +vring_packed_event_read(vdev, >avail, ); + +old = vq->signalled_used; +new = vq->signalled_used = vq->used_idx; +v = vq->signalled_used_valid; +vq->signalled_used_valid = true; + +if (e.flags == RING_EVENT_FLAGS_DISABLE) { +return false; +} else if (e.flags == RING_EVENT_FLAGS_ENABLE) { +return true; +} + +return !v || vring_packed_need_event(vq, +vq->used_wrap_counter, e.off_wrap, new, old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return virtio_packed_should_notify(vdev, vq); +} else { +return virtio_split_should_notify(vdev,
[Qemu-devel] [PATCH v1 07/16] virtio: init and desc empty check for packed ring
From: Wei Xu ring check and other basical helpers for packed ring. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 59 +- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 74d9710..9d485e4 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -24,6 +24,9 @@ #include "hw/virtio/virtio-access.h" #include "sysemu/dma.h" +#define AVAIL_DESC_PACKED(b) ((b) << 7) +#define USED_DESC_PACKED(b) ((b) << 15) + /* * The alignment to use between consumer and producer parts of vring. * x86 pagesize again. This is the default, used by transports like PCI @@ -369,6 +372,25 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +virtio_tswap16s(vdev, >flags); +} + +static inline bool is_desc_avail(struct VRingPackedDesc *desc, +bool wrap_counter) +{ +bool avail, used; + +avail = !!(desc->flags & AVAIL_DESC_PACKED(1)); +used = !!(desc->flags & USED_DESC_PACKED(1)); +return (avail != used) && (avail == wrap_counter); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ @@ -389,7 +411,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -411,6 +433,41 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ +struct VRingPackedDesc desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->vring.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_packed_desc_read_flags(vq->vdev, , >desc, +vq->last_avail_idx); + +return !is_desc_avail(, vq->avail_wrap_counter); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_packed_empty_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_packed_empty(vq); +} else { +return virtio_queue_split_empty(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -- 1.8.3.1
[Qemu-devel] [PATCH v1 09/16] virtio: fill/flush/pop for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 290 + 1 file changed, 273 insertions(+), 17 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 13265e3..99a6601 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -383,6 +383,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, virtio_tswap16s(vdev, >id); } +static void vring_packed_desc_write_data(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + >id, sizeof(desc->id)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id), + sizeof(desc->id)); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + >len, sizeof(desc->len)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len), + sizeof(desc->len)); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -392,6 +411,18 @@ static void vring_packed_desc_read_flags(VirtIODevice *vdev, virtio_tswap16s(vdev, >flags); } +static void vring_packed_desc_write_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +address_space_cache_invalidate(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + sizeof(desc->flags)); +} + static inline bool is_desc_avail(struct VRingPackedDesc *desc, bool wrap_counter) { @@ -558,19 +589,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -582,16 +605,71 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) { -uint16_t old, new; +uint16_t head; +VRingMemoryRegionCaches *caches; +VRingPackedDesc desc = { +.flags = 0, +}; +bool wrap_counter = vq->used_wrap_counter; + +if (unlikely(!vq->vring.desc)) { +return; +} + +caches = vring_get_region_caches(vq); +desc.id = elem->index; +desc.len = len; + +head = vq->used_idx + idx; +if (head >= vq->vring.num) { +head -= vq->vring.num; +wrap_counter ^= 1; +} +if (wrap_counter) { +desc.flags |= VRING_DESC_F_AVAIL; +desc.flags |= VRING_DESC_F_USED; +} else { +desc.flags &= ~VRING_DESC_F_AVAIL; +desc.flags &= ~VRING_DESC_F_USED; +} + +vring_packed_desc_write_data(vq->vdev, , >desc, head); +if (idx == 0) { +/* Make sure descriptor id and len is written before + * flags for the first used buffer. + */ +smp_wmb(); +} + +vring_packed_desc_write_flags(vq->vdev, , >desc, head); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_fill(vq, elem, len, idx); +} else { +virtqueue_split_fill(vq, elem, len, idx); +} +} + +/* Called within rcu_read_lock(). */ +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ +uint16_t old, new; + if (unlikely(!vq->vring.used)) { return; } @@ -607,6 +685,34 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) vq->signalled_used_valid = false; } +static void virtqueue_packed_flush(VirtQueue *vq,
[Qemu-devel] [PATCH v1 06/16] virtio: init wrap counter for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 99565c6..74d9710 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1239,6 +1239,9 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; +vdev->vq[i].avail_wrap_counter = true; +vdev->vq[i].event_wrap_counter = true; +vdev->vq[i].used_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; -- 1.8.3.1
[Qemu-devel] [PATCH v1 11/16] virtio-net: fill head desc after done all in a chain
From: Wei Xu With the support of marking a descriptor used/unused in 'flags' field for 1.1, the current way of filling a chained descriptors does not work since driver side may get the wrong 'num_buffer' information in case of the head descriptor has been filled in while the subsequent ones are still in processing in device side. This patch fills the head one after done all the others one. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 385b1a0..2db0e8b 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1198,6 +1198,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, struct virtio_net_hdr_mrg_rxbuf mhdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset; +VirtQueueElement head; +int head_len = 0; if (!virtio_net_can_receive(nc)) { return -1; @@ -1275,7 +1277,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } /* signal other side */ -virtqueue_fill(q->rx_vq, elem, total, i++); +if (i == 0) { +head_len = total; +head = *elem; +} else { +virtqueue_fill(q->rx_vq, elem, len, i); +} +i++; g_free(elem); } @@ -1286,6 +1294,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, _buffers, sizeof mhdr.num_buffers); } +virtqueue_fill(q->rx_vq, , head_len, 0); virtqueue_flush(q->rx_vq, i); virtio_notify(vdev, q->rx_vq); -- 1.8.3.1
[Qemu-devel] [PATCH v1 05/16] virtio: add memory region init for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a41c2d3..99565c6 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -156,10 +156,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; -int event_size; int64_t len; - -event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +bool attr; addr = vq->vring.desc; if (!addr) { @@ -167,14 +165,16 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) } new = g_new0(VRingMemoryRegionCaches, 1); size = virtio_queue_get_desc_size(vdev, n); +attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ? + true : false; len = address_space_cache_init(>desc, vdev->dma_as, - addr, size, false); + addr, size, attr); if (len < size) { virtio_error(vdev, "Cannot map desc"); goto err_desc; } -size = virtio_queue_get_used_size(vdev, n) + event_size; +size = virtio_queue_get_used_size(vdev, n); len = address_space_cache_init(>used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -182,7 +182,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } -size = virtio_queue_get_avail_size(vdev, n) + event_size; +size = virtio_queue_get_avail_size(vdev, n); len = address_space_cache_init(>avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { -- 1.8.3.1
[Qemu-devel] [PATCH v1 04/16] virtio: expand offset calculation for packed ring
From: Wei Xu Expand 1.0 to 1.1 by adding offset calculation accordingly. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 22 ++ 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index a8e737c..a41c2d3 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2336,14 +2336,28 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { -return offsetof(VRingAvail, ring) + -sizeof(uint16_t) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingAvail, ring) + +sizeof(uint16_t) * vdev->vq[n].vring.num + s; +} } hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { -return offsetof(VRingUsed, ring) + -sizeof(VRingUsedElem) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingUsed, ring) + +sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; +} } uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) -- 1.8.3.1
[Qemu-devel] [PATCH v1 01/16] Update version for v3.1.0-rc2 release
From: Peter Maydell Signed-off-by: Peter Maydell --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 3af1c22..bbcce69 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.91 +3.0.92 -- 1.8.3.1
[Qemu-devel] [PATCH v1 08/16] virtio: get avail bytes check for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 180 + 1 file changed, 167 insertions(+), 13 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 9d485e4..13265e3 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -372,6 +372,17 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingPackedDesc), + desc, sizeof(VRingPackedDesc)); +virtio_tswap16s(vdev, >flags); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -671,9 +682,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -683,16 +694,6 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, int64_t len = 0; int rc; -if (unlikely(!vq->vring.desc)) { -if (in_bytes) { -*in_bytes = 0; -} -if (out_bytes) { -*out_bytes = 0; -} -return; -} - rcu_read_lock(); idx = vq->last_avail_idx; total_bufs = in_total = out_total = 0; @@ -796,6 +797,159 @@ err: goto done; } +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingPackedDesc desc; +bool wrap_counter; + +rcu_read_lock(); +idx = vq->last_avail_idx; +wrap_counter = vq->avail_wrap_counter; +total_bufs = in_total = out_total = 0; + +max = vq->vring.num; +caches = vring_get_region_caches(vq); +if (caches->desc.len < max * sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Cannot map descriptor ring"); +goto err; +} + +desc_cache = >desc; +vring_packed_desc_read_flags(vdev, , desc_cache, idx); +while (is_desc_avail(, wrap_counter)) { +unsigned int num_bufs; +unsigned int i = 0; + +num_bufs = total_bufs; + +/* Make sure all the fields have been exposed. */ +smp_rmb(); +vring_packed_desc_read(vdev, , desc_cache, idx); + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingPackedDesc); +num_bufs = i = 0; +vring_packed_desc_read(vdev, , desc_cache, i); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +} else { +out_total += desc.len; +} +if (in_total >= max_in_bytes && out_total >= max_out_bytes) { +goto done; +} + +if (desc_cache == _desc_cache) { +vring_packed_desc_read(vdev, , desc_cache, i); +
[Qemu-devel] [PATCH v1 00/16] packed ring virtio-net backend support
From: Wei Xu Code base: https://github.com/Whishay/qemu.git rfc v3 -> v1 - migration support for both userspace and vhost-net, need tweak vhost ioctl() to make it work(the code is pasted in the commit message of vhost migration patch #13). Note: the high 32-bit guest feature bit is saved as a subsection for virtio devices which makes packed ring feature bit check unusable when loading the saved per-queue variables(this is done before loading subsection which is the last action for device during migration), so I save and load all the things generally for now, any idea to fix this? - Fixed comments from Jason for rfc v3 sorted by patch #, two comments I didn't take were(from patch) listed here: 09: - introduce new API(virtqueue_fill_n()). - Didn't take it since userspace backend does not support batching, so only one element is popped and current API should be enough. 06 & 07: Refactor split and packed pop()/get_avail_bytes(). - the duplicated code interwined with split/packed ring specific things and it might make it unclear, so I only extracted the few common parts out side rcu and keep the others separate. The other revised comments: 02: - reuse current 'avail/used' for 'driver/device' in VRingMemoryRegionCache. - remove event_idx since shadow_avail_idx works. 03: - move size recalculation to a separate patch. - keep 'avail/used' in current calculation function name. - initialize 'desc' memory region as 'false' for 1.0('true' for 1.1) 04: - delete 'event_idx' 05: - rename 'wc' to wrap_counter. 06: - converge common part outside rcu section for 1.0/1.1. - move memory barrier for the first 'desc' in between checking flag and read other fields. - remove unnecessary memory barriers for indirect descriptors. - no need to destroy indirect memory cache since it is generally done before return from the function. - remove redundant maximum chained descriptors limitation check. - there are some differences(desc name, wrap idx/counter, flags) between split and packed rings, so keep them separate for now. - amend the comment when recording index and wrap counter for a kick from guest. 07: - calculate fields in descriptor instead of read it when filling. - put memory barrier correctly before filling the flags in descriptor. - replace full memory barrier with a write barrier in fill. - shift to read descriptor flags and descriptor necessarily and separately in packed_pop(). - correct memory barrier in packed_pop() as in packed_fill(). 08: - reuse 'shadow_avail_idx' instead of adding a new 'event_idx'. - use the compact and verified vring_packed_need_event() version for vhost net/user. 12: - remove the odd cherry-pick comment. - used bit '15' for wrap_counters. rfc v2->v3 - addressed performance issue - fixed feedback from v2 rfc v1->v2 - sync to tiwei's v5 - reuse memory cache function with 1.0 - dropped detach patch and notification helper(04 & 05 in v1) - guest virtio-net driver unload/reload support - event suppression support(not tested) - addressed feedback from v1 Wei Xu (15): virtio: introduce packed ring definitions virtio: redefine structure & memory cache for packed ring virtio: expand offset calculation for packed ring virtio: add memory region init for packed ring virtio: init wrap counter for packed ring virtio: init and desc empty check for packed ring virtio: get avail bytes check for packed ring virtio: fill/flush/pop for packed ring virtio: event suppression support for packed ring virtio-net: fill head desc after done all in a chain virtio: add userspace migration of packed ring virtio: add vhost-net migration of packed ring virtio: packed ring feature bit for userspace backend vhost: enable packed ring virtio: enable packed ring via a new command line VERSION| 2 +- hw/net/vhost_net.c | 2 + hw/net/virtio-net.c| 11 +- hw/virtio/virtio.c | 756 +++-- include/hw/virtio/virtio.h | 8 +- include/standard-headers/linux/virtio_config.h | 15 + include/standard-headers/linux/virtio_ring.h | 43 ++ 7 files changed, 783 insertions(+), 54 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH v1 03/16] virtio: redefine structure & memory cache for packed ring
From: Wei Xu Redefine packed ring structure according to Qemu nomenclature, field data(wrap counter, etc) are introduced also. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 16 1 file changed, 16 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 4136d23..a8e737c 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingPackedDesc { +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingPackedDesc; + typedef struct VRingAvail { uint16_t flags; @@ -77,6 +84,11 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t off_wrap; +uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; @@ -87,7 +99,11 @@ struct VirtQueue /* Last avail_idx read from VQ. */ uint16_t shadow_avail_idx; +bool event_wrap_counter; +bool avail_wrap_counter; + uint16_t used_idx; +bool used_wrap_counter; /* Last used index value we have signalled on */ uint16_t signalled_used; -- 1.8.3.1
[Qemu-devel] [PATCH v1 02/16] virtio: introduce packed ring definitions
From: Wei Xu >From 1.1 spec. Signed-off-by: Wei Xu --- include/standard-headers/linux/virtio_config.h | 15 + include/standard-headers/linux/virtio_ring.h | 43 ++ 2 files changed, 58 insertions(+) diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index 0b19436..9f450fd 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -75,6 +75,21 @@ */ #define VIRTIO_F_IOMMU_PLATFORM33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + +/* Enable events */ +#define RING_EVENT_FLAGS_ENABLE 0x0 +/* Disable events */ +#define RING_EVENT_FLAGS_DISABLE 0x1 +/* + * * Enable events for a specific descriptor + * * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + ** Only valid if VIRTIO_F_RING_EVENT_IDX has been negotiated. + * */ +#define RING_EVENT_FLAGS_DESC 0x2 +/* The value 0x3 is reserved */ + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/standard-headers/linux/virtio_ring.h b/include/standard-headers/linux/virtio_ring.h index d26e72b..1719c6f 100644 --- a/include/standard-headers/linux/virtio_ring.h +++ b/include/standard-headers/linux/virtio_ring.h @@ -42,6 +42,10 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* Mark a descriptor as available or used. */ +#define VRING_DESC_F_AVAIL (1ul << 7) +#define VRING_DESC_F_USED (1ul << 15) + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -51,6 +55,17 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events. */ +#define VRING_EVENT_F_ENABLE 0x0 +/* Disable events. */ +#define VRING_EVENT_F_DISABLE 0x1 +/* + * Enable events for a specific descriptor + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_EVENT_F_DESC 0x2 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC28 @@ -169,4 +184,32 @@ static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_ return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __virtio16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __virtio16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __virtio64 addr; + /* Buffer Length. */ + __virtio32 len; + /* Buffer ID. */ + __virtio16 id; + /* The flags depending on descriptor type. */ + __virtio16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _LINUX_VIRTIO_RING_H */ -- 1.8.3.1
[Qemu-devel] [[RFC v3 09/12] virtio-net: fill head desc after done all in a chain
From: Wei Xu With the support of marking a descriptor used/unused in 'flags' field for 1.1, the current way of filling a chained descriptors does not work since driver side may get the wrong 'num_buffer' information in case of the head descriptor has been filled in while the subsequent ones are still in processing in device side. This patch fills the head one after done all the others one. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 4bdd5b8..186c86cd2 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1198,6 +1198,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, struct virtio_net_hdr_mrg_rxbuf mhdr; unsigned mhdr_cnt = 0; size_t offset, i, guest_offset; +VirtQueueElement head; +int head_len = 0; if (!virtio_net_can_receive(nc)) { return -1; @@ -1275,7 +1277,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } /* signal other side */ -virtqueue_fill(q->rx_vq, elem, total, i++); +if (i == 0) { +head_len = total; +head = *elem; +} else { +virtqueue_fill(q->rx_vq, elem, len, i); +} +i++; g_free(elem); } @@ -1286,6 +1294,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, _buffers, sizeof mhdr.num_buffers); } +virtqueue_fill(q->rx_vq, , head_len, 0); virtqueue_flush(q->rx_vq, i); virtio_notify(vdev, q->rx_vq); -- 1.8.3.1
[Qemu-devel] [[RFC v3 10/12] virtio: packed ring feature bit for userspace backend
From: Wei Xu Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..fb4b18f 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -78,6 +78,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, -- 1.8.3.1
[Qemu-devel] [[RFC v3 12/12] virtio: feature vhost-net support for packed ring
From: Wei Xu (cherry picked from commit 305a2c4640c15c5717245067ab937fd10f478ee6) Signed-off-by: Wei Xu (cherry picked from commit 46476dae6f44c6fef8802a4a0ac7d0d79fe399e3) Signed-off-by: Wei Xu --- hw/virtio/vhost.c | 3 +++ hw/virtio/virtio.c | 4 include/hw/virtio/virtio.h | 1 + 3 files changed, 8 insertions(+) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9df2da3..de06d55 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -974,6 +974,9 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } state.num = virtio_queue_get_last_avail_idx(vdev, idx); +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +state.num |= ((int)virtio_queue_packed_get_wc(vdev, idx)) << 31; +} r = dev->vhost_ops->vhost_set_vring_base(dev, ); if (r) { VHOST_OPS_DEBUG("vhost_set_vring_base failed"); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 1d25776..2a90163 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2894,6 +2894,10 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->use_guest_notifier_mask = true; } +bool virtio_queue_packed_get_wc(VirtIODevice *vdev, int n) +{ +return vdev->vq[n].avail_wrap_counter; +} hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) { return vdev->vq[n].vring.desc; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 9af8839..0bb3be5 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -295,6 +295,7 @@ void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx, VirtIOHandleAIOOutput handle_output); VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); VirtQueue *virtio_vector_next_queue(VirtQueue *vq); +bool virtio_queue_packed_get_wc(VirtIODevice *vdev, int n); static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) { -- 1.8.3.1
[Qemu-devel] [[RFC v3 06/12] virtio: get avail bytes check for packed ring
From: Wei Xu Same thought as 1.0 except a bit confused when trying to reuse 'shadow_avail_idx', so the interrelated new event_idx and the wrap counter for notifications has been introduced in previous patch. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 176 - 1 file changed, 173 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 86f88da..13c6c98 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -375,6 +375,17 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingPackedDesc), + desc, sizeof(VRingPackedDesc)); +virtio_tswap16s(vdev, >flags); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -672,9 +683,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -797,6 +808,165 @@ err: goto done; } +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingPackedDesc desc; +bool wrap_counter; + +if (unlikely(!vq->vring.desc)) { +if (in_bytes) { +*in_bytes = 0; +} +if (out_bytes) { +*out_bytes = 0; +} +return; +} + +rcu_read_lock(); +idx = vq->last_avail_idx; +wrap_counter = vq->avail_wrap_counter; +total_bufs = in_total = out_total = 0; + +max = vq->vring.num; +caches = vring_get_region_caches(vq); +if (caches->desc.len < max * sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Cannot map descriptor ring"); +goto err; +} + +desc_cache = >desc; +vring_packed_desc_read(vdev, , desc_cache, idx); +/* Make sure we see all the fields*/ +smp_rmb(); +while (is_desc_avail(, wrap_counter)) { +unsigned int num_bufs; +unsigned int i = 0; + +num_bufs = total_bufs; + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingPackedDesc)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingPackedDesc); +num_bufs = i = 0; +vring_packed_desc_read(vdev, , desc_cache, i); +/* Make sure we see all the fields*/ +smp_rmb(); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +} else { +out_total += desc.len; +} +if (in_total >= max_in_bytes && out_total >= max_out_bytes) { +goto done; +} + +if (desc_cache == _desc_cache) { +if (++i > vq->vring.num) { +virtio_error(vdev, "Looped descriptor"); +
[Qemu-devel] [[RFC v3 08/12] virtio: event suppression support for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 126 +++-- 1 file changed, 123 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d12a7e3..1d25776 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -241,6 +241,30 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, >next); } +static void vring_packed_event_read(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +address_space_read_cached(cache, 0, e, sizeof(*e)); +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +} + +static void vring_packed_off_wrap_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t off_wrap) +{ +virtio_tswap16s(vdev, _wrap); +address_space_write_cached(cache, 0, _wrap, sizeof(off_wrap)); +address_space_cache_invalidate(cache, 0, sizeof(off_wrap)); +} + +static void vring_packed_flags_write(VirtIODevice *vdev, +MemoryRegionCache *cache, uint16_t flags) +{ +virtio_tswap16s(vdev, ); +address_space_write_cached(cache, sizeof(uint16_t), , sizeof(flags)); +address_space_cache_invalidate(cache, sizeof(uint16_t), sizeof(flags)); +} + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); @@ -347,7 +371,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(>used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) { vq->notification = enable; @@ -370,6 +394,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable) +{ +VRingPackedDescEvent e; +VRingMemoryRegionCaches *caches; + +rcu_read_lock(); +caches = vring_get_region_caches(vq); +vring_packed_event_read(vq->vdev, >device, ); + +if (!enable) { +e.flags = RING_EVENT_FLAGS_DISABLE; +goto out; +} + +e.flags = RING_EVENT_FLAGS_ENABLE; +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { +uint16_t off_wrap = vq->event_idx | vq->event_wrap_counter << 15; + +vring_packed_off_wrap_write(vq->vdev, >device, off_wrap); +/* Make sure off_wrap is wrote before flags */ +smp_wmb(); + +e.flags = RING_EVENT_FLAGS_DESC; +} + +out: +vring_packed_flags_write(vq->vdev, >device, e.flags); +rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ +vq->notification = enable; + +if (!vq->vring.desc) { +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtio_queue_set_notification_packed(vq, enable); +} else { +virtio_queue_set_notification_split(vq, enable); +} +} + int virtio_queue_ready(VirtQueue *vq) { return vq->vring.avail != 0; @@ -2103,8 +2172,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value) } } -/* Called within rcu_read_lock(). */ -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; bool v; @@ -2127,6 +2195,58 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } +static bool vring_packed_need_event(VirtQueue *vq, uint16_t off_wrap, +uint16_t new, uint16_t old) +{ +bool wrap = vq->event_wrap_counter; +int off = off_wrap & ~(1 << 15); + +if (new < old) { +new += vq->vring.num; +wrap ^= 1; +} + +if (wrap != off_wrap >> 15) { +off += vq->vring.num; +} + +return vring_need_event(off, new, old); +} + +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +VRingPackedDescEvent e; +uint16_t old, new; +bool v; +VRingMemoryRegionCaches *caches; + +caches = vring_get_region_caches(vq); +vring_packed_event_read(vdev, >driver, ); + +old = vq->signalled_used; +new = vq->signalled_used = vq->used_idx; +v = vq->signalled_used_valid; +vq->signalled_used_valid = true; + +if (e.flags == RING_EVENT_FLAGS_DISABLE) { +return false; +} else if (e.flags == RING_EVENT_FLAGS_ENABLE) { +return true; +} + +return !v || vring_packed_need_event(vq, e.off_wrap, new, old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return
[Qemu-devel] [[RFC v3 04/12] virtio: init wrap counter for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index bfb3364..9185efb 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1243,6 +1243,9 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; +vdev->vq[i].avail_wrap_counter = true; +vdev->vq[i].event_idx = 0; +vdev->vq[i].event_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; -- 1.8.3.1
[Qemu-devel] [[RFC v3 11/12] virtio: enable packed ring via a new command line
From: Wei Xu only userspace virtio net backend has been supported by the CLI so far. (cherry picked from commit 0b3ec96f4a9402cca467c40353066e57608ac6b6) Signed-off-by: Wei Xu (cherry picked from commit a1a3b85f00299ccc6f4bc819abe470da88059fb7) Signed-off-by: Wei Xu --- include/hw/virtio/virtio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index e323e76..9af8839 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, false) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); -- 1.8.3.1
[Qemu-devel] [[RFC v3 02/12] virtio: redefine structure & memory cache for packed ring
From: Wei Xu Redefine packed ring structure according to qemu nomenclature, also supported data(event index, wrap counter, etc) are introduced. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 94f5c8e..500eecf 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingPackedDesc { +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingPackedDesc; + typedef struct VRingAvail { uint16_t flags; @@ -62,8 +69,14 @@ typedef struct VRingUsed typedef struct VRingMemoryRegionCaches { struct rcu_head rcu; MemoryRegionCache desc; -MemoryRegionCache avail; -MemoryRegionCache used; +union { +MemoryRegionCache avail; +MemoryRegionCache driver; +}; +union { +MemoryRegionCache used; +MemoryRegionCache device; +}; } VRingMemoryRegionCaches; typedef struct VRing @@ -77,6 +90,11 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t off_wrap; +uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; @@ -87,6 +105,10 @@ struct VirtQueue /* Last avail_idx read from VQ. */ uint16_t shadow_avail_idx; +uint16_t event_idx; +bool event_wrap_counter; +bool avail_wrap_counter; + uint16_t used_idx; /* Last used index value we have signalled on */ -- 1.8.3.1
[Qemu-devel] [[RFC v3 07/12] virtio: fill/flush/pop for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 258 ++--- 1 file changed, 244 insertions(+), 14 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 13c6c98..d12a7e3 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -386,6 +386,21 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc, virtio_tswap16s(vdev, >id); } +static void vring_packed_desc_write(VirtIODevice *vdev, VRingPackedDesc *desc, +MemoryRegionCache *cache, int i) +{ +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, + sizeof(VRingPackedDesc) * i, desc, + sizeof(VRingPackedDesc)); +address_space_cache_invalidate(cache, + sizeof(VRingPackedDesc) * i, + sizeof(VRingPackedDesc)); +} + static void vring_packed_desc_read_flags(VirtIODevice *vdev, VRingPackedDesc *desc, MemoryRegionCache *cache, int i) { @@ -559,19 +574,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -583,16 +590,64 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) { -uint16_t old, new; +uint16_t w, head; +VRingMemoryRegionCaches *caches; +VRingPackedDesc desc = { +.addr = 0, +.flags = 0, +}; + +if (unlikely(!vq->vring.desc)) { +return; +} + +caches = vring_get_region_caches(vq); +head = vq->used_idx + idx; +head = head >= vq->vring.num ? (head - vq->vring.num) : head; +vring_packed_desc_read(vq->vdev, , >desc, head); + +w = (desc.flags & AVAIL_DESC_PACKED(1)) >> 7; +desc.flags &= ~(AVAIL_DESC_PACKED(1) | USED_DESC_PACKED(1)); +desc.flags |= AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w); +if (!(desc.flags & VRING_DESC_F_INDIRECT)) { +if (!(desc.flags & VRING_DESC_F_WRITE)) { +desc.len = 0; +} else { +desc.len = len; +} +} +vring_packed_desc_write(vq->vdev, , >desc, head); + +/* Make sure flags has been updated */ +smp_mb(); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_fill(vq, elem, len, idx); +} else { +virtqueue_split_fill(vq, elem, len, idx); +} +} + +/* Called within rcu_read_lock(). */ +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ +uint16_t old, new; + if (unlikely(!vq->vring.used)) { return; } @@ -608,6 +663,33 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) vq->signalled_used_valid = false; } +static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) +{ +if (unlikely(!vq->vring.desc)) { +return; +} + +vq->inuse -= count; +vq->used_idx += count; +if (vq->used_idx >= vq->vring.num) { +vq->used_idx -= vq->vring.num; +} +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ +if (unlikely(vq->vdev->broken)) { +vq->inuse -= count; +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_flush(vq, count); +} else { +virtqueue_split_flush(vq, count); +} +} + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -1091,7 +1173,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu return elem; } -void *virtqueue_pop(VirtQueue *vq, size_t sz) +static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { unsigned int i, head, max; VRingMemoryRegionCaches *caches; @@ -1226,6 +1308,154 @@ err_undo_map: goto done; }
[Qemu-devel] [[RFC v3 01/12] virtio: introduce packed ring definitions
From: Wei Xu sync from 1.1 spec Signed-off-by: Wei Xu --- include/standard-headers/linux/virtio_config.h | 15 + include/standard-headers/linux/virtio_ring.h | 43 ++ 2 files changed, 58 insertions(+) diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index 0b19436..9f450fd 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -75,6 +75,21 @@ */ #define VIRTIO_F_IOMMU_PLATFORM33 +/* This feature indicates support for the packed virtqueue layout. */ +#define VIRTIO_F_RING_PACKED 34 + +/* Enable events */ +#define RING_EVENT_FLAGS_ENABLE 0x0 +/* Disable events */ +#define RING_EVENT_FLAGS_DISABLE 0x1 +/* + * * Enable events for a specific descriptor + * * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + ** Only valid if VIRTIO_F_RING_EVENT_IDX has been negotiated. + * */ +#define RING_EVENT_FLAGS_DESC 0x2 +/* The value 0x3 is reserved */ + /* * Does the device support Single Root I/O Virtualization? */ diff --git a/include/standard-headers/linux/virtio_ring.h b/include/standard-headers/linux/virtio_ring.h index d26e72b..1719c6f 100644 --- a/include/standard-headers/linux/virtio_ring.h +++ b/include/standard-headers/linux/virtio_ring.h @@ -42,6 +42,10 @@ /* This means the buffer contains a list of buffer descriptors. */ #define VRING_DESC_F_INDIRECT 4 +/* Mark a descriptor as available or used. */ +#define VRING_DESC_F_AVAIL (1ul << 7) +#define VRING_DESC_F_USED (1ul << 15) + /* The Host uses this in used->flags to advise the Guest: don't kick me when * you add a buffer. It's unreliable, so it's simply an optimization. Guest * will still kick if it's out of buffers. */ @@ -51,6 +55,17 @@ * optimization. */ #define VRING_AVAIL_F_NO_INTERRUPT 1 +/* Enable events. */ +#define VRING_EVENT_F_ENABLE 0x0 +/* Disable events. */ +#define VRING_EVENT_F_DISABLE 0x1 +/* + * Enable events for a specific descriptor + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_EVENT_F_DESC 0x2 + /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC28 @@ -169,4 +184,32 @@ static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_ return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); } +struct vring_packed_desc_event { + /* Descriptor Ring Change Event Offset/Wrap Counter. */ + __virtio16 off_wrap; + /* Descriptor Ring Change Event Flags. */ + __virtio16 flags; +}; + +struct vring_packed_desc { + /* Buffer Address. */ + __virtio64 addr; + /* Buffer Length. */ + __virtio32 len; + /* Buffer ID. */ + __virtio16 id; + /* The flags depending on descriptor type. */ + __virtio16 flags; +}; + +struct vring_packed { + unsigned int num; + + struct vring_packed_desc *desc; + + struct vring_packed_desc_event *driver; + + struct vring_packed_desc_event *device; +}; + #endif /* _LINUX_VIRTIO_RING_H */ -- 1.8.3.1
[Qemu-devel] [[RFC v3 03/12] virtio: init memory cache for packed ring
From: Wei Xu Expand 1.0 by adding offset calculation accordingly. Signed-off-by: Wei Xu --- hw/virtio/vhost.c | 16 hw/virtio/virtio.c | 35 +++ include/hw/virtio/virtio.h | 4 ++-- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 569c405..9df2da3 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -996,14 +996,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, r = -ENOMEM; goto fail_alloc_desc; } -vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx); +vq->avail_size = s = l = virtio_queue_get_driver_size(vdev, idx); vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx); vq->avail = vhost_memory_map(dev, a, , 0); if (!vq->avail || l != s) { r = -ENOMEM; goto fail_alloc_avail; } -vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); +vq->used_size = s = l = virtio_queue_get_device_size(vdev, idx); vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); vq->used = vhost_memory_map(dev, a, , 1); if (!vq->used || l != s) { @@ -1051,10 +1051,10 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, fail_vector: fail_kick: fail_alloc: -vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx), +vhost_memory_unmap(dev, vq->used, virtio_queue_get_device_size(vdev, idx), 0, 0); fail_alloc_used: -vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx), +vhost_memory_unmap(dev, vq->avail, virtio_queue_get_driver_size(vdev, idx), 0, 0); fail_alloc_avail: vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), @@ -1101,10 +1101,10 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, vhost_vq_index); } -vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx), - 1, virtio_queue_get_used_size(vdev, idx)); -vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx), - 0, virtio_queue_get_avail_size(vdev, idx)); +vhost_memory_unmap(dev, vq->used, virtio_queue_get_device_size(vdev, idx), + 1, virtio_queue_get_device_size(vdev, idx)); +vhost_memory_unmap(dev, vq->avail, virtio_queue_get_driver_size(vdev, idx), + 0, virtio_queue_get_driver_size(vdev, idx)); vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), 0, virtio_queue_get_desc_size(vdev, idx)); } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 500eecf..bfb3364 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -162,11 +162,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new = NULL; hwaddr addr, size; -int event_size; int64_t len; -event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - addr = vq->vring.desc; if (!addr) { goto out_no_cache; @@ -174,13 +171,13 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) new = g_new0(VRingMemoryRegionCaches, 1); size = virtio_queue_get_desc_size(vdev, n); len = address_space_cache_init(>desc, vdev->dma_as, - addr, size, false); + addr, size, true); if (len < size) { virtio_error(vdev, "Cannot map desc"); goto err_desc; } -size = virtio_queue_get_used_size(vdev, n) + event_size; +size = virtio_queue_get_device_size(vdev, n); len = address_space_cache_init(>used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -188,7 +185,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } -size = virtio_queue_get_avail_size(vdev, n) + event_size; +size = virtio_queue_get_driver_size(vdev, n); len = address_space_cache_init(>avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { @@ -2339,16 +2336,30 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) return sizeof(VRingDesc) * vdev->vq[n].vring.num; } -hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) +hwaddr virtio_queue_get_driver_size(VirtIODevice *vdev, int n) { -return offsetof(VRingAvail, ring) + -sizeof(uint16_t) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingAvail, ring) + +
[Qemu-devel] [[RFC v3 05/12] virtio: init and desc empty check for packed ring
From: Wei Xu Basic initialization and helpers for packed ring. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 57 +- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 9185efb..86f88da 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -24,6 +24,9 @@ #include "hw/virtio/virtio-access.h" #include "sysemu/dma.h" +#define AVAIL_DESC_PACKED(b) ((b) << 7) +#define USED_DESC_PACKED(b) ((b) << 15) + /* * The alignment to use between consumer and producer parts of vring. * x86 pagesize again. This is the default, used by transports like PCI @@ -372,6 +375,23 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read_flags(VirtIODevice *vdev, +VRingPackedDesc *desc, MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, + i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags), + >flags, sizeof(desc->flags)); +} + +static inline bool is_desc_avail(struct VRingPackedDesc *desc, bool wc) +{ +bool avail, used; + +avail = !!(desc->flags & AVAIL_DESC_PACKED(1)); +used = !!(desc->flags & USED_DESC_PACKED(1)); +return (avail != used) && (avail == wc); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ @@ -392,7 +412,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -414,6 +434,41 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ +struct VRingPackedDesc desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->vring.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_packed_desc_read_flags(vq->vdev, , >desc, +vq->last_avail_idx); + +return !is_desc_avail(, vq->avail_wrap_counter); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_packed_empty_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_packed_empty(vq); +} else { +return virtio_queue_split_empty(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -- 1.8.3.1
[Qemu-devel] [RFC v3 00/12] packed ring virtio-net userspace backend support
From: Wei Xu code base: https://github.com/Whishay/qemu.git Todo: - migration has not been support yet v2->v3 - addressed performance issue - fixed feedback from v2 v1->v2 - sync to tiwei's v5 - reuse memory cache function with 1.0 - dropped detach patch and notification helper(04 & 05 in v1) - guest virtio-net driver unload/reload support - event suppression support(not tested) - addressed feedback from v1 Wei Xu (12): virtio: introduce packed ring definitions virtio: redefine structure & memory cache for packed ring virtio: init memory cache for packed ring virtio: init wrap counter for packed ring virtio: init and desc empty check for packed ring virtio: get avail bytes check for packed ring virtio: fill/flush/pop for packed ring virtio: event suppression support for packed ring virtio-net: fill head desc after done all in a chain virtio: packed ring feature bit for userspace backend virtio: enable packed ring via a new command line virtio: feature vhost-net support for packed ring hw/net/vhost_net.c | 1 + hw/net/virtio-net.c| 11 +- hw/virtio/vhost.c | 19 +- hw/virtio/virtio.c | 685 +++-- include/hw/virtio/virtio.h | 9 +- include/standard-headers/linux/virtio_config.h | 15 + include/standard-headers/linux/virtio_ring.h | 43 ++ 7 files changed, 736 insertions(+), 47 deletions(-) -- 1.8.3.1
[Qemu-devel] [RFC v2 8/8] virtio: guest driver reload for vhost-net
From: Wei Xu last_avail, avail_wrap_count, used_idx and used_wrap_count are needed to support vhost-net backend, all these are either 16 or bool variables, since state.num is 64bit wide, so here it is possible to put them to the 'num' without introducing a new case while handling ioctl. Unload/Reload test has been done successfully with a patch in vhost kernel. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 42 ++ 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 4543974..153f6d7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2862,33 +2862,59 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) } } -uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) +uint64_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) { -return vdev->vq[n].last_avail_idx; +uint64_t num; + +num = vdev->vq[n].last_avail_idx; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +num |= ((uint64_t)vdev->vq[n].avail_wrap_counter) << 16; +num |= ((uint64_t)vdev->vq[n].used_idx) << 32; +num |= ((uint64_t)vdev->vq[n].used_wrap_counter) << 48; +} + +return num; } -void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx) +void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint64_t num) { -vdev->vq[n].last_avail_idx = idx; -vdev->vq[n].shadow_avail_idx = idx; +vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx = (uint16_t)(num); + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +vdev->vq[n].avail_wrap_counter = (uint16_t)(num >> 16); +vdev->vq[n].used_idx = (uint16_t)(num >> 32); +vdev->vq[n].used_wrap_counter = (uint16_t)(num >> 48); +} } void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) { rcu_read_lock(); -if (vdev->vq[n].vring.desc) { +if (!vdev->vq[n].vring.desc) { +goto out; +} + +if (!virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]); -vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx; } +vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx; + +out: rcu_read_unlock(); } void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) { rcu_read_lock(); -if (vdev->vq[n].vring.desc) { +if (!vdev->vq[n].vring.desc) { +goto out; +} + +if (!virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { vdev->vq[n].used_idx = vring_used_idx(>vq[n]); } + +out: rcu_read_unlock(); } -- 1.8.3.1
[Qemu-devel] [RFC v2 4/8] virtio: get avail bytes check for packed ring
From: Wei Xu mostly as same as 1.0 except traversing all desc to feed headcount, need a refactor. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 148 +++-- 1 file changed, 145 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index bd669a2..cdbb5af 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -650,9 +650,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -775,6 +775,148 @@ err: goto done; } +static void virtqueue_packed_get_avail_bytes(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingDescPacked desc; + +if (unlikely(!vq->vring.desc)) { +if (in_bytes) { +*in_bytes = 0; +} +if (out_bytes) { +*out_bytes = 0; +} +return; +} + +rcu_read_lock(); +idx = vq->last_avail_idx; +total_bufs = in_total = out_total = 0; + +max = vq->vring.num; +caches = vring_get_region_caches(vq); +if (caches->desc.len < max * sizeof(VRingDescPacked)) { +virtio_error(vdev, "Cannot map descriptor ring"); +goto err; +} + +desc_cache = >desc; +vring_packed_desc_read(vdev, , desc_cache, idx); +while (is_desc_avail()) { +unsigned int num_bufs; +unsigned int i; + +num_bufs = total_bufs; + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingDescPacked)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingDescPacked); +num_bufs = i = 0; +vring_packed_desc_read(vdev, , desc_cache, i); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +} else { +out_total += desc.len; +} +if (in_total >= max_in_bytes && out_total >= max_out_bytes) { +goto done; +} + +if (desc_cache == _desc_cache) { +if (++i >= vq->vring.num) { +i -= vq->vring.num; +} +vring_packed_desc_read(vdev, , desc_cache, i); +} else { +if (++idx >= vq->vring.num) { +idx -= vq->vring.num; +} +vring_packed_desc_read(vdev, , desc_cache, idx); +} +/* Make sure we see the flags */ +smp_mb(); +} while (desc.flags & VRING_DESC_F_NEXT); + +if (desc_cache == _desc_cache) { +address_space_cache_destroy(_desc_cache); +total_bufs++; +/* We missed one step on for indirect desc */ +idx++; +} else { +total_bufs = num_bufs; +} + +desc_cache = >desc; +vring_packed_desc_read(vdev, , desc_cache, idx % vq->vring.num); +} + +done: +address_space_cache_destroy(_desc_cache); +if (in_bytes) { +*in_bytes = in_total; +} +if (out_bytes) { +*out_bytes = out_total; +} +rcu_read_unlock(); +
[Qemu-devel] [RFC v2 3/8] virtio: empty check and desc read for packed ring
From: Wei Xu helper for ring empty check and descriptor read. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 62 +++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f6c0689..bd669a2 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -24,6 +24,9 @@ #include "hw/virtio/virtio-access.h" #include "sysemu/dma.h" +#define AVAIL_DESC_PACKED(b) ((b) << 7) +#define USED_DESC_PACKED(b) ((b) << 15) + /* * The alignment to use between consumer and producer parts of vring. * x86 pagesize again. This is the default, used by transports like PCI @@ -357,10 +360,27 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_packed_desc_read(VirtIODevice *vdev, VRingDescPacked *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingDescPacked), + desc, sizeof(VRingDescPacked)); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +virtio_tswap16s(vdev, >flags); +} + +static inline bool is_desc_avail(struct VRingDescPacked *desc) +{ +return !!(desc->flags & AVAIL_DESC_PACKED(1)) != +!!(desc->flags & USED_DESC_PACKED(1)); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ -static int virtio_queue_empty_rcu(VirtQueue *vq) +static int virtio_queue_split_empty_rcu(VirtQueue *vq) { if (unlikely(!vq->vring.avail)) { return 1; @@ -373,7 +393,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_split_empty(VirtQueue *vq) { bool empty; @@ -391,6 +411,42 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_packed_empty_rcu(VirtQueue *vq) +{ +struct VRingDescPacked desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->vring.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_packed_desc_read(vq->vdev, , >desc, vq->last_avail_idx); + +/* Make sure we see the updated flag */ +smp_mb(); +return !is_desc_avail(); +} + +static int virtio_queue_packed_empty(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_packed_empty_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_packed_empty(vq); +} else { +return virtio_queue_split_empty(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -862,7 +918,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) return NULL; } rcu_read_lock(); -if (virtio_queue_empty_rcu(vq)) { +if (virtio_queue_split_empty_rcu(vq)) { goto done; } /* Needed after virtio_queue_empty(), see comment in -- 1.8.3.1
[Qemu-devel] [RFC v2 6/8] virtio: flush/push for packed ring
From: Wei Xu Signed-off-by: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 109 ++--- 1 file changed, 96 insertions(+), 13 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 0160d03..6f2da83 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -371,6 +371,21 @@ static void vring_packed_desc_read(VirtIODevice *vdev, VRingDescPacked *desc, virtio_tswap16s(vdev, >flags); } +static void vring_packed_desc_write(VirtIODevice *vdev, VRingDescPacked *desc, +MemoryRegionCache *cache, int i) +{ +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, + sizeof(VRingDescPacked) * i, desc, + sizeof(VRingDescPacked)); +address_space_cache_invalidate(cache, + sizeof(VRingDescPacked) * i, + sizeof(VRingDescPacked)); +} + static inline bool is_desc_avail(struct VRingDescPacked *desc) { return !!(desc->flags & AVAIL_DESC_PACKED(1)) != @@ -526,19 +541,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -550,16 +557,64 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) { -uint16_t old, new; +uint16_t w, head; +VRingMemoryRegionCaches *caches; +VRingDescPacked desc = { +.addr = 0, +.flags = 0, +}; + +if (unlikely(!vq->vring.desc)) { +return; +} + +caches = vring_get_region_caches(vq); +head = vq->used_idx + idx; +head = head >= vq->vring.num ? (head - vq->vring.num) : head; +vring_packed_desc_read(vq->vdev, , >desc, head); + +w = (desc.flags & AVAIL_DESC_PACKED(1)) >> 7; +desc.flags &= ~(AVAIL_DESC_PACKED(1) | USED_DESC_PACKED(1)); +desc.flags |= AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w); +if (!(desc.flags & VRING_DESC_F_INDIRECT)) { +if (!(desc.flags & VRING_DESC_F_WRITE)) { +desc.len = 0; +} else { +desc.len = len; +} +} +vring_packed_desc_write(vq->vdev, , >desc, head); + +/* Make sure flags has been updated */ +smp_mb(); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_fill(vq, elem, len, idx); +} else { +virtqueue_split_fill(vq, elem, len, idx); +} +} + +/* Called within rcu_read_lock(). */ +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ +uint16_t old, new; + if (unlikely(!vq->vring.used)) { return; } @@ -575,6 +630,34 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) vq->signalled_used_valid = false; } +static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) +{ +if (unlikely(!vq->vring.desc)) { +return; +} + +vq->inuse -= count; +vq->used_idx += count; +if (vq->used_idx >= vq->vring.num) { +vq->used_idx -= vq->vring.num; +vq->used_wrap_counter = !vq->used_wrap_counter; +} +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ +if (unlikely(vq->vdev->broken)) { +vq->inuse -= count; +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_packed_flush(vq, count); +} else { +virtqueue_split_flush(vq, count); +} +} + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -- 1.8.3.1
[Qemu-devel] [RFC v2 7/8] virtio: event suppression for packed ring
From: Wei Xu Signed-off-by: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 115 +++-- include/standard-headers/linux/virtio_config.h | 13 +++ 2 files changed, 119 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 6f2da83..4543974 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -226,6 +226,24 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, virtio_tswap16s(vdev, >next); } +static void vring_packed_event_read(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +address_space_read_cached(cache, 0, e, sizeof(*e)); +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +} + +static void vring_packed_event_write(VirtIODevice *vdev, +MemoryRegionCache *cache, VRingPackedDescEvent *e) +{ +virtio_tswap16s(vdev, >off_wrap); +virtio_tswap16s(vdev, >flags); +address_space_write_cached(cache, 0, e, sizeof(*e)); +address_space_cache_invalidate(cache, 0, sizeof(VRingUsedElem)); +} + + static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); @@ -332,14 +350,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val) address_space_cache_invalidate(>used, pa, sizeof(val)); } -void virtio_queue_set_notification(VirtQueue *vq, int enable) +static void virtio_queue_set_notification_split(VirtQueue *vq, int enable) { -vq->notification = enable; - -if (!vq->vring.desc) { -return; -} - rcu_read_lock(); if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) { vring_set_avail_event(vq, vring_avail_idx(vq)); @@ -355,6 +367,38 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) rcu_read_unlock(); } +static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable) +{ +VRingPackedDescEvent e; +VRingMemoryRegionCaches *caches; + +rcu_read_lock(); +caches = vring_get_region_caches(vq); +vring_packed_event_read(vq->vdev, >device, ); +if (enable) { +e.flags = RING_EVENT_FLAGS_ENABLE; +} else { +e.flags = RING_EVENT_FLAGS_DISABLE; +} +vring_packed_event_write(vq->vdev, >device, ); +rcu_read_unlock(); +} + +void virtio_queue_set_notification(VirtQueue *vq, int enable) +{ +vq->notification = enable; + +if (!vq->vring.desc) { +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtio_queue_set_notification_packed(vq, enable); +} else { +virtio_queue_set_notification_split(vq, enable); +} +} + int virtio_queue_ready(VirtQueue *vq) { return vq->vring.avail != 0; @@ -2059,8 +2103,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value) } } -/* Called within rcu_read_lock(). */ -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; bool v; @@ -2083,6 +2126,60 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) return !v || vring_need_event(vring_get_used_event(vq), new, old); } +static bool vring_packed_need_event(VirtQueue *vq, uint16_t off_wrap, +uint16_t new, uint16_t old) +{ +bool wrap = vq->used_wrap_counter; +int off = off_wrap & ~(1 << 15); + +if (new < old) { +new += vq->vring.num; +wrap ^= 1; +} + +if (wrap != off_wrap >> 15) { +off += vq->vring.num; +} + +return vring_need_event(off, new, old); +} + +static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +VRingPackedDescEvent e; +uint16_t old, new; +bool v; +VRingMemoryRegionCaches *caches; + +caches = vring_get_region_caches(vq); +vring_packed_event_read(vdev, >driver, ); + +/* Make sure we see the updated flags */ +smp_mb(); +if (e.flags == RING_EVENT_FLAGS_DISABLE) { +return false; +} else if (e.flags == RING_EVENT_FLAGS_ENABLE) { +return true; +} + +v = vq->signalled_used_valid; +vq->signalled_used_valid = true; +old = vq->signalled_used; +new = vq->signalled_used = vq->used_idx; + +return !v || vring_packed_need_event(vq, e.off_wrap, new, old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return virtio_packed_should_notify(vdev, vq); +} else { +return virtio_split_should_notify(vdev, vq); +} +} + void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq) { bool should_notify; diff --git a/include/standard-headers/linux/virtio_config.h
[Qemu-devel] [RFC v2 2/8] virtio: memory cache for packed ring
From: Wei Xu Mostly reuse memory cache with 1.0 except for the offset calculation. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 29 - 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index e192a9a..f6c0689 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -150,11 +150,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) VRingMemoryRegionCaches *old = vq->vring.caches; VRingMemoryRegionCaches *new; hwaddr addr, size; -int event_size; int64_t len; -event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - addr = vq->vring.desc; if (!addr) { return; @@ -168,7 +165,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_desc; } -size = virtio_queue_get_used_size(vdev, n) + event_size; +size = virtio_queue_get_used_size(vdev, n); len = address_space_cache_init(>used, vdev->dma_as, vq->vring.used, size, true); if (len < size) { @@ -176,7 +173,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, int n) goto err_used; } -size = virtio_queue_get_avail_size(vdev, n) + event_size; +size = virtio_queue_get_avail_size(vdev, n); len = address_space_cache_init(>avail, vdev->dma_as, vq->vring.avail, size, false); if (len < size) { @@ -2320,14 +2317,28 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) { -return offsetof(VRingAvail, ring) + -sizeof(uint16_t) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingAvail, ring) + +sizeof(uint16_t) * vdev->vq[n].vring.num + s; +} } hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n) { -return offsetof(VRingUsed, ring) + -sizeof(VRingUsedElem) * vdev->vq[n].vring.num; +int s; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(struct VRingPackedDescEvent); +} else { +s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; +return offsetof(VRingUsed, ring) + +sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; +} } uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) -- 1.8.3.1
[Qemu-devel] [RFC v2 5/8] virtio: queue pop for packed ring
From: Wei Xu Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 145 - 1 file changed, 144 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index cdbb5af..0160d03 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1041,7 +1041,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu return elem; } -void *virtqueue_pop(VirtQueue *vq, size_t sz) +static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { unsigned int i, head, max; VRingMemoryRegionCaches *caches; @@ -1176,6 +1176,149 @@ err_undo_map: goto done; } +static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) +{ +unsigned int i, head, max; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +MemoryRegionCache *cache; +int64_t len; +VirtIODevice *vdev = vq->vdev; +VirtQueueElement *elem = NULL; +unsigned out_num, in_num, elem_entries; +hwaddr addr[VIRTQUEUE_MAX_SIZE]; +struct iovec iov[VIRTQUEUE_MAX_SIZE]; +VRingDescPacked desc; + +if (unlikely(vdev->broken)) { +return NULL; +} + +rcu_read_lock(); +if (virtio_queue_packed_empty_rcu(vq)) { +goto done; +} + +/* When we start there are none of either input nor output. */ +out_num = in_num = elem_entries = 0; + +max = vq->vring.num; + +if (vq->inuse >= vq->vring.num) { +virtio_error(vdev, "Virtqueue size exceeded"); +goto done; +} + +if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { +/* FIXME: TBD */ +} + +head = vq->last_avail_idx; +i = head; + +caches = vring_get_region_caches(vq); +cache = >desc; +vring_packed_desc_read(vdev, , cache, i); +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingDescPacked)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto done; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, vdev->dma_as, + desc.addr, desc.len, false); +cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto done; +} + +max = desc.len / sizeof(VRingDescPacked); +i = 0; +vring_packed_desc_read(vdev, , cache, i); +} + +/* Collect all the descriptors */ +while (1) { +bool map_ok; + +if (desc.flags & VRING_DESC_F_WRITE) { +map_ok = virtqueue_map_desc(vdev, _num, addr + out_num, +iov + out_num, +VIRTQUEUE_MAX_SIZE - out_num, true, +desc.addr, desc.len); +} else { +if (in_num) { +virtio_error(vdev, "Incorrect order for descriptors"); +goto err_undo_map; +} +map_ok = virtqueue_map_desc(vdev, _num, addr, iov, +VIRTQUEUE_MAX_SIZE, false, +desc.addr, desc.len); +} +if (!map_ok) { +goto err_undo_map; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (++elem_entries > max) { +virtio_error(vdev, "Looped descriptor"); +goto err_undo_map; +} + +if (++i >= vq->vring.num) { +i -= vq->vring.num; +} + +if (desc.flags & VRING_DESC_F_NEXT) { +vring_packed_desc_read(vq->vdev, , cache, i); +} else { +break; +} +} + +/* Now copy what we have collected and mapped */ +elem = virtqueue_alloc_element(sz, out_num, in_num); +for (i = 0; i < out_num; i++) { +elem->out_addr[i] = addr[i]; +elem->out_sg[i] = iov[i]; +} +for (i = 0; i < in_num; i++) { +elem->in_addr[i] = addr[head + out_num + i]; +elem->in_sg[i] = iov[out_num + i]; +} + +vq->last_avail_idx += (cache == _desc_cache) ? + 1 : out_num + in_num; +if (vq->last_avail_idx >= vq->vring.num) { +vq->last_avail_idx -= vq->vring.num; +vq->avail_wrap_counter = !vq->avail_wrap_counter; +} +vq->inuse++; + +trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); +done: +address_space_cache_destroy(_desc_cache); +rcu_read_unlock(); + +return elem; + +err_undo_map: +virtqueue_undo_map_desc(out_num, in_num, iov); +g_free(elem); +goto done; +} + +void *virtqueue_pop(VirtQueue *vq, size_t sz) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtqueue_packed_pop(vq, sz); +} else { +return virtqueue_split_pop(vq, sz); +} +} + /*
[Qemu-devel] [RFC v2 1/8] virtio: feature bit, data structure, init for 1.1
From: Wei Xu New feature bit and members for packed ring. Signed-off-by: Wei Xu --- hw/net/vhost_net.c | 2 ++ hw/virtio/virtio.c | 27 -- include/hw/virtio/virtio.h | 4 +++- include/standard-headers/linux/virtio_config.h | 2 ++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index e037db6..f593086 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = { VIRTIO_F_VERSION_1, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; @@ -78,6 +79,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, VIRTIO_F_IOMMU_PLATFORM, +VIRTIO_F_RING_PACKED, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 006d3d1..e192a9a 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,13 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingDescPacked { +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingDescPacked; + typedef struct VRingAvail { uint16_t flags; @@ -62,8 +69,14 @@ typedef struct VRingUsed typedef struct VRingMemoryRegionCaches { struct rcu_head rcu; MemoryRegionCache desc; -MemoryRegionCache avail; -MemoryRegionCache used; +union { +MemoryRegionCache avail; +MemoryRegionCache driver; +}; +union { +MemoryRegionCache used; +MemoryRegionCache device; +}; } VRingMemoryRegionCaches; typedef struct VRing @@ -77,6 +90,11 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t off_wrap; +uint16_t flags; +} VRingPackedDescEvent ; + struct VirtQueue { VRing vring; @@ -89,6 +107,9 @@ struct VirtQueue uint16_t used_idx; +bool avail_wrap_counter; +bool used_wrap_counter; + /* Last used index value we have signalled on */ uint16_t signalled_used; @@ -1213,6 +1234,8 @@ void virtio_reset(void *opaque) vdev->vq[i].last_avail_idx = 0; vdev->vq[i].shadow_avail_idx = 0; vdev->vq[i].used_idx = 0; +vdev->vq[i].avail_wrap_counter = true; +vdev->vq[i].used_wrap_counter = true; virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR); vdev->vq[i].signalled_used = 0; vdev->vq[i].signalled_used_valid = false; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 098bdaa..4a7fb21 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -262,7 +262,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, false) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index b777069..6ee5529 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -71,4 +71,6 @@ * this is for compatibility with legacy systems. */ #define VIRTIO_F_IOMMU_PLATFORM33 + +#define VIRTIO_F_RING_PACKED 34 #endif /* _LINUX_VIRTIO_CONFIG_H */ -- 1.8.3.1
[Qemu-devel] [RFC v2 0/8] packed ring virtio-net userspace backend support
From: Wei Xu Todo: - address Rx slow performance - event index interrupt suppression test v1->v2 - sync to tiwei's v5 - reuse memory cache function with 1.0 - dropped detach patch and notification helper(04 & 05 in v1) - guest virtio-net driver unload/reload support - event suppression support(not tested) - addressed feedback from v1 About guest virtio-net load/unload: Since last_avail, avail_wrap_count, used_idx and used_wrap_count are all 16 or bool type variables, so I turned to merge them to 'vhost_vring_state.num' in stead of introducing a new case in handling ioctl, test has been done with a tweak in kernel side like: --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1439,10 +1439,16 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) r = -EFAULT; break; } - if (s.num > 0x) { - r = -EINVAL; - break; - } + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + vq->avail_wrap_counter = (bool)(uint16_t)(s.num >> 16); + vq->last_used_idx = (uint16_t)(s.num >> 32); + vq->used_wrap_counter = (bool)(uint16_t)(s.num >> 48); +} else { +if (s.num > 0x) { +r = -EINVAL; +break; +} +} vq->last_avail_idx = s.num; /* Forget the cached index value. */ vq->avail_idx = vq->last_avail_idx; @@ -1450,8 +1456,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) case VHOST_GET_VRING_BASE: s.index = idx; s.num = vq->last_avail_idx; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { + s.num |= vq->avail_wrap_counter << 16; + s.num |= vq->last_used_idx << 32; + s.num |= vq->used_wrap_counter << 48; +} if (copy_to_user(argp, , sizeof s)) r = -EFAULT; + if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) + s.num |= vq->avail_wrap_counter << 31; break; case VHOST_SET_VRING_ADDR: if (copy_from_user(, argp, sizeof a)) { Wei Xu (8): virtio: feature bit, data structure, init for packed ring virtio: memory cache for packed ring virtio: empty check and desc read for packed ring virtio: get avail bytes check for packed ring virtio: queue pop support for packed ring virtio: flush/push support for packed ring virtio: event suppression support for packed ring virtio: support guest driver reload for vhost-net hw/net/vhost_net.c | 2 + hw/virtio/virtio.c | 677 +++-- include/hw/virtio/virtio.h | 4 +- include/standard-headers/linux/virtio_config.h | 15 + 4 files changed, 649 insertions(+), 49 deletions(-) -- 1.8.3.1
[Qemu-devel] [PATCH 8/8] virtio: queue pop support for packed ring
From: Wei Xucloned from split ring pop, a global static length array and the inside-element length array are introduced to easy prototype, this consumes more memory and it is valuable to move to dynamic allocation as the out/in sg does. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 154 - 1 file changed, 153 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index cf726f3..0eafb38 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1221,7 +1221,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu return elem; } -void *virtqueue_pop(VirtQueue *vq, size_t sz) +static void *virtqueue_pop_split(VirtQueue *vq, size_t sz) { unsigned int i, head, max; VRingMemoryRegionCaches *caches; @@ -1356,6 +1356,158 @@ err_undo_map: goto done; } +static uint16_t dma_len[VIRTQUEUE_MAX_SIZE]; +static void *virtqueue_pop_packed(VirtQueue *vq, size_t sz) +{ +unsigned int i, head, max; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +MemoryRegionCache *cache; +int64_t len; +VirtIODevice *vdev = vq->vdev; +VirtQueueElement *elem = NULL; +unsigned out_num, in_num, elem_entries; +hwaddr addr[VIRTQUEUE_MAX_SIZE]; +struct iovec iov[VIRTQUEUE_MAX_SIZE]; +VRingDescPacked desc; +uint8_t wrap_counter; + +if (unlikely(vdev->broken)) { +return NULL; +} + +vq->last_avail_idx %= vq->packed.num; + +rcu_read_lock(); +if (virtio_queue_empty_packed_rcu(vq)) { +goto done; +} + +/* When we start there are none of either input nor output. */ +out_num = in_num = elem_entries = 0; + +max = vq->vring.num; + +if (vq->inuse >= vq->vring.num) { +virtio_error(vdev, "Virtqueue size exceeded"); +goto done; +} + +if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { +/* FIXME: TBD */ +} + +head = vq->last_avail_idx; +i = head; + +caches = vring_get_region_caches(vq); +cache = >desc_packed; +vring_desc_read_packed(vdev, , cache, i); +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingDescPacked)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto done; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, vdev->dma_as, + desc.addr, desc.len, false); +cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto done; +} + +max = desc.len / sizeof(VRingDescPacked); +i = 0; +vring_desc_read_packed(vdev, , cache, i); +} + +wrap_counter = vq->wrap_counter; +/* Collect all the descriptors */ +while (1) { +bool map_ok; + +if (desc.flags & VRING_DESC_F_WRITE) { +map_ok = virtqueue_map_desc(vdev, _num, addr + out_num, +iov + out_num, +VIRTQUEUE_MAX_SIZE - out_num, true, +desc.addr, desc.len); +} else { +if (in_num) { +virtio_error(vdev, "Incorrect order for descriptors"); +goto err_undo_map; +} +map_ok = virtqueue_map_desc(vdev, _num, addr, iov, +VIRTQUEUE_MAX_SIZE, false, +desc.addr, desc.len); +} +if (!map_ok) { +goto err_undo_map; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (++elem_entries > max) { +virtio_error(vdev, "Looped descriptor"); +goto err_undo_map; +} + +dma_len[i++] = desc.len; +/* Toggle wrap_counter for non indirect desc */ +if ((i == vq->packed.num) && (cache != _desc_cache)) { +vq->wrap_counter ^= 1; +} + +if (desc.flags & VRING_DESC_F_NEXT) { +vring_desc_read_packed(vq->vdev, , cache, i % vq->packed.num); +} else { +break; +} +} + +/* Now copy what we have collected and mapped */ +elem = virtqueue_alloc_element(sz, out_num, in_num); +elem->index = head; +elem->wrap_counter = wrap_counter; +elem->count = (cache == _desc_cache) ? 1 : out_num + in_num; +for (i = 0; i < out_num; i++) { +/* DMA Done by marking the length as 0 */ +elem->len[i] = 0; +elem->out_addr[i] = addr[i]; +elem->out_sg[i] = iov[i]; +} +for (i = 0; i < in_num; i++) { +elem->len[out_num + i] = dma_len[head + out_num + i]; +elem->in_addr[i] = addr[out_num + i]; +elem->in_sg[i] =
[Qemu-devel] [PATCH 6/8] virtio: flush/push support for packed ring
From: Wei XuSigned-off-by: Wei Xu --- hw/virtio/virtio.c | 104 + 1 file changed, 90 insertions(+), 14 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 95a4681..def07c6 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -26,6 +26,7 @@ #define AVAIL_DESC_PACKED(b) ((b) << 7) #define USED_DESC_PACKED(b) ((b) << 15) +#define VIRTQ_F_DESC_USED(w) (AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w)) /* * The alignment to use between consumer and producer parts of vring. @@ -636,19 +637,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num) } /* Called within rcu_read_lock(). */ -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +static void virtqueue_fill_split(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len, unsigned int idx) { VRingUsedElem uelem; -trace_virtqueue_fill(vq, elem, len, idx); - -virtqueue_unmap_sg(vq, elem, len); - -if (unlikely(vq->vdev->broken)) { -return; -} - if (unlikely(!vq->vring.used)) { return; } @@ -660,16 +653,66 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, vring_used_write(vq, , idx); } -/* Called within rcu_read_lock(). */ -void virtqueue_flush(VirtQueue *vq, unsigned int count) +static void virtqueue_fill_packed(VirtQueue *vq, const VirtQueueElement *elem) { -uint16_t old, new; +uint16_t i, w, head; +VRingMemoryRegionCaches *caches; +VRingDescPacked desc = { +.addr = 0, +.flags = 0, +}; + +if (unlikely(!vq->packed.desc)) { +return; +} + +w = elem->wrap_counter; +caches = vring_get_region_caches(vq); +for (i = 0; i < elem->count; i++) { +head = (elem->index + i) % vq->packed.num; +/* Don't toggle the first one since it is the originally one */ +if ((i > 0) && (!head)) { +w ^= 1; +} + +desc.id = elem->index; +desc.flags = VIRTQ_F_DESC_USED(w); +desc.len = elem->len[i]; +virtio_tswap16s(vq->vdev, ); +virtio_tswap32s(vq->vdev, ); +virtio_tswap16s(vq->vdev, ); +address_space_write_cached(>desc, + sizeof(VRingDescPacked) * head, , + sizeof(VRingDescPacked)); +address_space_cache_invalidate(>desc, + sizeof(VRingDescPacked) * head, + sizeof(VRingDescPacked)); +} +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, +unsigned int len, unsigned int idx) +{ +trace_virtqueue_fill(vq, elem, len, idx); + +virtqueue_unmap_sg(vq, elem, len); if (unlikely(vq->vdev->broken)) { -vq->inuse -= count; return; } +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_fill_packed(vq, elem); +} else { +virtqueue_fill_split(vq, elem, len, idx); +} +} + +/* Called within rcu_read_lock(). */ +static void virtqueue_flush_split(VirtQueue *vq, unsigned int count) +{ +uint16_t old, new; + if (unlikely(!vq->vring.used)) { return; } @@ -685,12 +728,45 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) vq->signalled_used_valid = false; } +static void virtqueue_flush_packed(VirtQueue *vq, unsigned int count) +{ +if (unlikely(!vq->packed.desc)) { +return; +} + +vq->inuse -= count; + +/* FIXME: is this correct? */ +if (vq->inuse) { +return; +} +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ +if (unlikely(vq->vdev->broken)) { +vq->inuse -= count; +return; +} + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_flush_packed(vq, count); +} else { +virtqueue_flush_split(vq, count); +} +} + void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { rcu_read_lock(); virtqueue_fill(vq, elem, len, 0); -virtqueue_flush(vq, 1); +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +/* FIXME: How to deal with the length field for chained desc */ +virtqueue_flush(vq, elem->count); +} else { +virtqueue_flush(vq, 1); +} rcu_read_unlock(); } -- 2.7.4
[Qemu-devel] [PATCH 3/8] virtio: add empty check for packed ring
From: Wei Xuhelper for ring empty check. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 62 +++--- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 73a35a4..478df3d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -24,6 +24,9 @@ #include "hw/virtio/virtio-access.h" #include "sysemu/dma.h" +#define AVAIL_DESC_PACKED(b) ((b) << 7) +#define USED_DESC_PACKED(b) ((b) << 15) + /* * The alignment to use between consumer and producer parts of vring. * x86 pagesize again. This is the default, used by transports like PCI @@ -446,10 +449,27 @@ int virtio_queue_ready(VirtQueue *vq) return vq->vring.avail != 0; } +static void vring_desc_read_packed(VirtIODevice *vdev, VRingDescPacked *desc, +MemoryRegionCache *cache, int i) +{ +address_space_read_cached(cache, i * sizeof(VRingDescPacked), + desc, sizeof(VRingDescPacked)); +virtio_tswap64s(vdev, >addr); +virtio_tswap32s(vdev, >len); +virtio_tswap16s(vdev, >id); +virtio_tswap16s(vdev, >flags); +} + +static inline bool is_desc_avail(struct VRingDescPacked* desc) +{ +return (!!(desc->flags & AVAIL_DESC_PACKED(1)) != +!!(desc->flags & USED_DESC_PACKED(1))); +} + /* Fetch avail_idx from VQ memory only when we really need to know if * guest has added some buffers. * Called within rcu_read_lock(). */ -static int virtio_queue_empty_rcu(VirtQueue *vq) +static int virtio_queue_empty_split_rcu(VirtQueue *vq) { if (unlikely(!vq->vring.avail)) { return 1; @@ -462,7 +482,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq) return vring_avail_idx(vq) == vq->last_avail_idx; } -int virtio_queue_empty(VirtQueue *vq) +static int virtio_queue_empty_split(VirtQueue *vq) { bool empty; @@ -480,6 +500,42 @@ int virtio_queue_empty(VirtQueue *vq) return empty; } +static int virtio_queue_empty_packed_rcu(VirtQueue *vq) +{ +struct VRingDescPacked desc; +VRingMemoryRegionCaches *cache; + +if (unlikely(!vq->packed.desc)) { +return 1; +} + +cache = vring_get_region_caches(vq); +vring_desc_read_packed(vq->vdev, , >desc_packed, vq->last_avail_idx); + +/* Make sure we see the updated flag */ +smp_mb(); +return !is_desc_avail(); +} + +static int virtio_queue_empty_packed(VirtQueue *vq) +{ +bool empty; + +rcu_read_lock(); +empty = virtio_queue_empty_packed_rcu(vq); +rcu_read_unlock(); +return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return virtio_queue_empty_packed(vq); +} else { +return virtio_queue_empty_split(vq); +} +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -951,7 +1007,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz) return NULL; } rcu_read_lock(); -if (virtio_queue_empty_rcu(vq)) { +if (virtio_queue_empty_split_rcu(vq)) { goto done; } /* Needed after virtio_queue_empty(), see comment in -- 2.7.4
[Qemu-devel] [PATCH 4/8] virtio: add detach element for packed ring(1.1)
From: Wei Xuhelper for packed ring Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 478df3d..fdee40f 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -561,6 +561,20 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, elem->out_sg[i].iov_len); } +static void virtqueue_detach_element_split(VirtQueue *vq, +const VirtQueueElement *elem, unsigned int len) +{ +vq->inuse--; +virtqueue_unmap_sg(vq, elem, len); +} + +static void virtqueue_detach_element_packed(VirtQueue *vq, +const VirtQueueElement *elem, unsigned int len) +{ +vq->inuse -= elem->count; +virtqueue_unmap_sg(vq, elem, len); +} + /* virtqueue_detach_element: * @vq: The #VirtQueue * @elem: The #VirtQueueElement @@ -573,8 +587,11 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { -vq->inuse--; -virtqueue_unmap_sg(vq, elem, len); +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +virtqueue_detach_element_packed(vq, elem, len); +} else { +virtqueue_detach_element_split(vq, elem, len); +} } /* virtqueue_unpop: -- 2.7.4
[Qemu-devel] [PATCH 5/8] virtio: notification tweak for packed ring
From: Wei XuAlways enable notify and bypass set notification before supporting driver and device area. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 9 + 1 file changed, 9 insertions(+) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index fdee40f..95a4681 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -425,6 +425,10 @@ void virtio_queue_set_notification(VirtQueue *vq, int enable) { vq->notification = enable; +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +return; +} + if (!vq->vring.desc) { return; } @@ -1801,6 +1805,11 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) { uint16_t old, new; bool v; + +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return true; +} + /* We need to expose used array entries before checking used event. */ smp_mb(); /* Always notify when queue is empty (when feature acknowledge) */ -- 2.7.4
[Qemu-devel] [PATCH 7/8] virtio: get avail bytes check for packed ring
From: Wei Xumostly as same as 1.0, copy it separately for prototype, need a refactoring. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 142 +++-- 1 file changed, 139 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index def07c6..cf726f3 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -836,9 +836,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, return VIRTQUEUE_READ_DESC_MORE; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +static void virtqueue_get_avail_bytes_split(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) { VirtIODevice *vdev = vq->vdev; unsigned int max, idx; @@ -961,6 +961,142 @@ err: goto done; } +static void virtqueue_get_avail_bytes_packed(VirtQueue *vq, +unsigned int *in_bytes, unsigned int *out_bytes, +unsigned max_in_bytes, unsigned max_out_bytes) +{ +VirtIODevice *vdev = vq->vdev; +unsigned int max, idx; +unsigned int total_bufs, in_total, out_total; +MemoryRegionCache *desc_cache; +VRingMemoryRegionCaches *caches; +MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID; +int64_t len = 0; +VRingDescPacked desc; + +if (unlikely(!vq->packed.desc)) { +if (in_bytes) { +*in_bytes = 0; +} +if (out_bytes) { +*out_bytes = 0; +} +return; +} + +rcu_read_lock(); +idx = vq->last_avail_idx; +total_bufs = in_total = out_total = 0; + +max = vq->packed.num; +caches = vring_get_region_caches(vq); +if (caches->desc.len < max * sizeof(VRingDescPacked)) { +virtio_error(vdev, "Cannot map descriptor ring"); +goto err; +} + +desc_cache = >desc; +vring_desc_read_packed(vdev, , desc_cache, idx); +while (is_desc_avail()) { +unsigned int num_bufs; +unsigned int i; + +num_bufs = total_bufs; + +if (desc.flags & VRING_DESC_F_INDIRECT) { +if (desc.len % sizeof(VRingDescPacked)) { +virtio_error(vdev, "Invalid size for indirect buffer table"); +goto err; +} + +/* If we've got too many, that implies a descriptor loop. */ +if (num_bufs >= max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +/* loop over the indirect descriptor table */ +len = address_space_cache_init(_desc_cache, + vdev->dma_as, + desc.addr, desc.len, false); +desc_cache = _desc_cache; +if (len < desc.len) { +virtio_error(vdev, "Cannot map indirect buffer"); +goto err; +} + +max = desc.len / sizeof(VRingDescPacked); +num_bufs = i = 0; +vring_desc_read_packed(vdev, , desc_cache, i); +} + +do { +/* If we've got too many, that implies a descriptor loop. */ +if (++num_bufs > max) { +virtio_error(vdev, "Looped descriptor"); +goto err; +} + +if (desc.flags & VRING_DESC_F_WRITE) { +in_total += desc.len; +} else { +out_total += desc.len; +} +if (in_total >= max_in_bytes && out_total >= max_out_bytes) { +goto done; +} + +if (desc_cache == _desc_cache) { +vring_desc_read_packed(vdev, , desc_cache, + ++i % vq->packed.num); +} else { +vring_desc_read_packed(vdev, , desc_cache, + ++idx % vq->packed.num); +} +} while (desc.flags & VRING_DESC_F_NEXT); + +if (desc_cache == _desc_cache) { +address_space_cache_destroy(_desc_cache); +total_bufs++; +/* We missed one step on for indirect desc */ +idx++; +} else { +total_bufs = num_bufs; +} + +desc_cache = >desc; +vring_desc_read_packed(vdev, , desc_cache, idx % vq->packed.num); +} + +done: +address_space_cache_destroy(_desc_cache); +if (in_bytes) { +*in_bytes = in_total; +} +if (out_bytes) { +*out_bytes = out_total; +} +rcu_read_unlock(); +return; + +err: +in_total = out_total = 0; +goto done; +} + +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, +
[Qemu-devel] [PATCH 2/8] virtio: memory cache for packed ring
From: Wei XuA new memory cache is introduced to for packed ring, the code looks pretty duplicated with split(1.0) ring, any refactor idea? Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 79 +++--- 1 file changed, 76 insertions(+), 3 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 9a6bfe7..73a35a4 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -155,13 +155,15 @@ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) return; } +/* FIX ME: pass in 1.1 device here, reuse 1.0 fields at current */ + address_space_cache_destroy(>desc); address_space_cache_destroy(>avail); address_space_cache_destroy(>used); g_free(caches); } -static void virtio_init_region_cache(VirtIODevice *vdev, int n) +static void virtio_init_region_cache_split(VirtIODevice *vdev, int n) { VirtQueue *vq = >vq[n]; VRingMemoryRegionCaches *old = vq->vring.caches; @@ -215,6 +217,65 @@ err_desc: g_free(new); } +static void virtio_init_region_cache_packed(VirtIODevice *vdev, int n) +{ +VirtQueue *vq = >vq[n]; +VRingMemoryRegionCaches *old = vq->vring.caches; +VRingMemoryRegionCaches *new; +hwaddr addr, size; +int64_t len; + +addr = vq->packed.desc; +if (!addr) { +return; +} +new = g_new0(VRingMemoryRegionCaches, 1); +size = virtio_queue_get_desc_size(vdev, n); +len = address_space_cache_init(>desc_packed, vdev->dma_as, + addr, size, false); +if (len < size) { +virtio_error(vdev, "Cannot map desc"); +goto err_desc; +} + +size = sizeof(struct VRingPackedDescEvent); +len = address_space_cache_init(>driver, vdev->dma_as, + vq->packed.driver, size, true); +if (len < size) { +virtio_error(vdev, "Cannot map driver area"); +goto err_driver; +} + +len = address_space_cache_init(>device, vdev->dma_as, + vq->packed.device, size, true); +if (len < size) { +virtio_error(vdev, "Cannot map device area"); +goto err_device; +} + +atomic_rcu_set(>packed.caches, new); +if (old) { +call_rcu(old, virtio_free_region_cache, rcu); +} +return; + +err_device: +address_space_cache_destroy(>driver); +err_driver: +address_space_cache_destroy(>desc); +err_desc: +g_free(new); +} + +static void virtio_init_region_cache(VirtIODevice *vdev, int n) +{ +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +virtio_init_region_cache_packed(vdev, n); +} else { +virtio_init_region_cache_split(vdev, n); +} +} + /* virt queue functions */ void virtio_queue_update_rings(VirtIODevice *vdev, int n) { @@ -245,10 +306,18 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc, static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq) { -VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches); +VRingMemoryRegionCaches *caches; + +if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { +caches = atomic_rcu_read(>packed.caches); +} else { +caches = atomic_rcu_read(>vring.caches); +} + assert(caches != NULL); return caches; } + /* Called within rcu_read_lock(). */ static inline uint16_t vring_avail_flags(VirtQueue *vq) { @@ -2331,7 +2400,11 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n) hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n) { -return sizeof(VRingDesc) * vdev->vq[n].vring.num; +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { +return sizeof(VRingDescPacked) * vdev->vq[n].packed.num; +} else { +return sizeof(VRingDesc) * vdev->vq[n].vring.num; +} } hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n) -- 2.7.4
[Qemu-devel] [RFC PATCH 0/8] virtio-net 1.1 userspace backend support
From: Wei XuThis is a prototype for virtio-net 1.1 support in userspace backend, only minimum part are included in this RFC(roughly synced to v8 as Jason and Tiwei's RFC). Test has been done together with Tiwei's RFC guest virtio-net driver patch, ping and a quick iperf test successfully. Issues: 1. Rx performance of Iperf is much slower than TX. TX: 13-15Gb RX: 100-300Mb Missing: - device and driver - indirect descriptor - migration - vIOMMU support - other revisions since v8 - see FIXME Wei Xu (8): virtio: feature bit, data structure for packed ring virtio: memory cache for packed ring virtio: add empty check for packed ring virtio: add detach element for packed ring(1.1) virtio: notification tweak for packed ring virtio: flush/push support for packed ring virtio: get avail bytes check for packed ring virtio: queue pop support for packed ring hw/virtio/virtio.c | 618 +++-- include/hw/virtio/virtio.h | 12 +- include/standard-headers/linux/virtio_config.h | 2 + 3 files changed, 601 insertions(+), 31 deletions(-) -- 2.7.4
[Qemu-devel] [PATCH 1/8] virtio: feature bit, data structure for packed ring
From: Wei XuOnly minimum definitions from the spec are included for prototype. Signed-off-by: Wei Xu --- hw/virtio/virtio.c | 47 +++--- include/hw/virtio/virtio.h | 12 ++- include/standard-headers/linux/virtio_config.h | 2 ++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 006d3d1..9a6bfe7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -39,6 +39,14 @@ typedef struct VRingDesc uint16_t next; } VRingDesc; +typedef struct VRingDescPacked +{ +uint64_t addr; +uint32_t len; +uint16_t id; +uint16_t flags; +} VRingDescPacked; + typedef struct VRingAvail { uint16_t flags; @@ -61,9 +69,18 @@ typedef struct VRingUsed typedef struct VRingMemoryRegionCaches { struct rcu_head rcu; -MemoryRegionCache desc; -MemoryRegionCache avail; -MemoryRegionCache used; +union { +struct { +MemoryRegionCache desc; +MemoryRegionCache avail; +MemoryRegionCache used; +}; +struct { +MemoryRegionCache desc_packed; +MemoryRegionCache driver; +MemoryRegionCache device; +}; +}; } VRingMemoryRegionCaches; typedef struct VRing @@ -77,10 +94,31 @@ typedef struct VRing VRingMemoryRegionCaches *caches; } VRing; +typedef struct VRingPackedDescEvent { +uint16_t desc_event_off:15, + desc_event_wrap:1; +uint16_t desc_event_flags:2; +} VRingPackedDescEvent ; + +typedef struct VRingPacked +{ +unsigned int num; +unsigned int num_default; +unsigned int align; +hwaddr desc; +hwaddr driver; +hwaddr device; +VRingMemoryRegionCaches *caches; +} VRingPacked; + struct VirtQueue { -VRing vring; +union { +struct VRing vring; +struct VRingPacked packed; +}; +uint8_t wrap_counter:1; /* Next head to pop */ uint16_t last_avail_idx; @@ -1220,6 +1258,7 @@ void virtio_reset(void *opaque) vdev->vq[i].vring.num = vdev->vq[i].vring.num_default; vdev->vq[i].inuse = 0; virtio_virtqueue_reset_region_cache(>vq[i]); +vdev->vq[i].wrap_counter = 1; } } diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 098bdaa..563e88e 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -46,6 +46,14 @@ typedef struct VirtQueueElement unsigned int index; unsigned int out_num; unsigned int in_num; + +/* Number of descriptors used by packed ring */ +uint16_t count; +uint8_t wrap_counter:1; +/* FIXME: Length of every used buffer for a descriptor, + move to dynamical allocating due to out/in sgs numbers */ +uint32_t len[VIRTQUEUE_MAX_SIZE]; + hwaddr *in_addr; hwaddr *out_addr; struct iovec *in_sg; @@ -262,7 +270,9 @@ typedef struct VirtIORNGConf VirtIORNGConf; DEFINE_PROP_BIT64("any_layout", _state, _field, \ VIRTIO_F_ANY_LAYOUT, true), \ DEFINE_PROP_BIT64("iommu_platform", _state, _field, \ - VIRTIO_F_IOMMU_PLATFORM, false) + VIRTIO_F_IOMMU_PLATFORM, false), \ +DEFINE_PROP_BIT64("ring_packed", _state, _field, \ + VIRTIO_F_RING_PACKED, true) hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); diff --git a/include/standard-headers/linux/virtio_config.h b/include/standard-headers/linux/virtio_config.h index b777069..6ee5529 100644 --- a/include/standard-headers/linux/virtio_config.h +++ b/include/standard-headers/linux/virtio_config.h @@ -71,4 +71,6 @@ * this is for compatibility with legacy systems. */ #define VIRTIO_F_IOMMU_PLATFORM33 + +#define VIRTIO_F_RING_PACKED 34 #endif /* _LINUX_VIRTIO_CONFIG_H */ -- 2.7.4
[Qemu-devel] [Patch 3/3] vfio: remove checking duplicated vfio device
From: Wei XuThis has been done when introducing 'vfio_lookup_as()' patch as a side work to reuse the loop. Signed-off-by: Wei Xu --- hw/vfio/pci.c | 9 - 1 file changed, 9 deletions(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 856cefd..d78f756 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2632,7 +2632,6 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) static void vfio_realize(PCIDevice *pdev, Error **errp) { VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); -VFIODevice *vbasedev_iter; VFIOGroup *group; char *tmp, group_path[PATH_MAX], *group_name; Error *err = NULL; @@ -2697,14 +2696,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } -QLIST_FOREACH(vbasedev_iter, >device_list, next) { -if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { -error_setg(errp, "device is already attached"); -vfio_put_group(group); -goto error; -} -} - ret = vfio_get_device(group, vdev->vbasedev.name, >vbasedev, errp); if (ret) { vfio_put_group(group); -- 1.8.3.1
[Qemu-devel] [Patch 2/3] vfio: invoke looking up address space.
From: Wei XuInvoke looking up correct address space before getting an IOMMU group. Signed-off-by: Wei Xu --- hw/vfio/pci.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 31e1edf..856cefd 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2640,6 +2640,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) struct stat st; int groupid; int i, ret; +AddressSpace *as; if (!vdev->vbasedev.sysfsdev) { if (!(~vdev->host.domain || ~vdev->host.bus || @@ -2686,7 +2687,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) trace_vfio_realize(vdev->vbasedev.name, groupid); -group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), errp); +as = vfio_lookup_as(groupid, pdev, errp); +if (!as) { +goto error; +} + +group = vfio_get_group(groupid, as, errp); if (!group) { goto error; } -- 1.8.3.1
[Qemu-devel] [Patch 0/3] vfio: reusing address space for the same iommu group devices
From: Wei XuRecently I have been testing passing through 2 ixgbe(82599ES) nics which belong to the same iommu group to a guest with virtual iommu(vIOMMU) on my desktop, while vfio failed to realize the second device and prompted error message as 'group xxx used in multiple address spaces'. It turned out to be that vtd doesn't know any group info while choosing an address space for the two devices, therefore it creates two separate address space for each which breaks granularity isolation. This patch fixes this by looking up if there is any exist device within the same iommu group and shares the address space before creating a new one. I am not sure if this fixes the problem in a correct way due to my limited knowledge about vfio, please come back to me for any feedback & comments, Thanks. Wei Xu (3): vfio: reusing address space for the same iommu group devices vfio: invoke looking up address space. vfio: remove checking duplicated vfio device hw/vfio/common.c | 28 hw/vfio/pci.c | 15 ++- include/hw/vfio/vfio-common.h | 1 + 3 files changed, 35 insertions(+), 9 deletions(-) -- 1.8.3.1
[Qemu-devel] [Patch 1/3] vfio: reusing address space for the same IOMMU group devices
From: Wei XuCurrently address space of a vfio device is selected by directly looking up pci device IOMMU address space during realizing, this usually works for most none separate address space targeted cases since they are using the system address space, i.e. a q35 machine without virtual IOMMU. Unfortunately, when it comes down to the case having a virtual IOMMU(x86 vtd in this case) and two vfio devices in the same IOMMU group, the virtual IOMMU(vtd) creates two separate address space for each device, this breaks the minimum granularity for vfio, and the device fails realizing by prompting 'group xxx used in multiple address spaces'. This patch is a helper looking up the same IOMMU device before invoking creating an new address space for a device, thus fixes the issue. As a side work for the all groups/devices loop, also it checks if the device has been assigned to the guest twice before creating an extra group and removing it later which is not necessary. Signed-off-by: Wei Xu --- hw/vfio/common.c | 28 include/hw/vfio/vfio-common.h | 1 + 2 files changed, 29 insertions(+) diff --git a/hw/vfio/common.c b/hw/vfio/common.c index 7b2924c..63c3609 100644 --- a/hw/vfio/common.c +++ b/hw/vfio/common.c @@ -35,6 +35,7 @@ #include "sysemu/kvm.h" #include "trace.h" #include "qapi/error.h" +#include "hw/vfio/pci.h" struct vfio_group_head vfio_group_list = QLIST_HEAD_INITIALIZER(vfio_group_list); @@ -1183,6 +1184,33 @@ static void vfio_disconnect_container(VFIOGroup *group) } } +AddressSpace *vfio_lookup_as(int groupid, PCIDevice *pdev, Error **errp) +{ +VFIOGroup *group; +VFIODevice *vbasedev_iter; +VFIOPCIDevice *vdev, *vd; + +vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev); +QLIST_FOREACH(group, _group_list, next) { +QLIST_FOREACH(vbasedev_iter, >device_list, next) { +if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) { +error_setg(errp, "device is already attached"); +return 0; +} + +if (vbasedev_iter->group->groupid == groupid) { +vd = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev); + +if (vd->pdev.bus == pdev->bus) { +return vbasedev_iter->group->container->space->as; +} +} +} +} + +return pci_device_iommu_address_space(pdev); +} + VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp) { VFIOGroup *group; diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index f3a2ac9..5b4827b 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -157,6 +157,7 @@ void vfio_region_mmaps_set_enabled(VFIORegion *region, bool enabled); void vfio_region_exit(VFIORegion *region); void vfio_region_finalize(VFIORegion *region); void vfio_reset_handler(void *opaque); +AddressSpace *vfio_lookup_as(int groupid, PCIDevice *pdev, Error **errp); VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); void vfio_put_group(VFIOGroup *group); int vfio_get_device(VFIOGroup *group, const char *name, -- 1.8.3.1
[Qemu-devel] [PATCH 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic
From: Wei XuAll the data packets in a tcp connection are cached to a single buffer in every receive interval, and will be sent out via a timer, the 'virtio_net_rsc_timeout' controls the interval, this value may impact the performance and response time of tcp connection, 5(50us) is an experience value to gain a performance improvement, since the whql test sends packets every 100us, so '30(300us)' passes the test case, it is the default value as well, tune it via the command line parameter 'rsc_interval' within 'virtio-net-pci' device, for example, to launch a guest with interval set as '50': 'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,rsc_interval=50' The timer will only be triggered if the packets pool is not empty, and it'll drain off all the cached packets. 'NetRscChain' is used to save the segments of IPv4/6 in a VirtIONet device. A new segment becomes a 'Candidate' as well as it passed sanity check, the main handler of TCP includes TCP window update, duplicated ACK check and the real data coalescing. An 'Candidate' segment means: 1. Segment is within current window and the sequence is the expected one. 2. 'ACK' of the segment is in the valid window. Sanity check includes: 1. Incorrect version in IP header 2. An IP options or IP fragment 3. Not a TCP packet 4. Sanity size check to prevent buffer overflow attack. 5. An ECN packet Even though, there might more cases should be considered such as ip identification other flags, while it breaks the test because windows set it to the same even it's not a fragment. Normally it includes 2 typical ways to handle a TCP control flag, 'bypass' and 'finalize', 'bypass' means should be sent out directly, while 'finalize' means the packets should also be bypassed, but this should be done after search for the same connection packets in the pool and drain all of them out, this is to avoid out of order fragment. All the 'SYN' packets will be bypassed since this always begin a new' connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a finalization, because this normally happens upon a connection is going to be closed, an 'URG' packet also finalize current coalescing unit. Statistics can be used to monitor the basic coalescing status, the 'out of order' and 'out of window' means how many retransmitting packets, thus describe the performance intuitively. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 602 ++-- include/hw/virtio/virtio-net.h | 5 +- include/hw/virtio/virtio.h | 76 include/net/eth.h | 2 + include/standard-headers/linux/virtio_net.h | 14 + net/tap.c | 3 +- 6 files changed, 670 insertions(+), 32 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 06bfe4b..d1824d9 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -15,10 +15,12 @@ #include "qemu/iov.h" #include "hw/virtio/virtio.h" #include "net/net.h" +#include "net/eth.h" #include "net/checksum.h" #include "net/tap.h" #include "qemu/error-report.h" #include "qemu/timer.h" +#include "qemu/sockets.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "hw/virtio/virtio-bus.h" @@ -43,6 +45,24 @@ #define endof(container, field) \ (offsetof(container, field) + sizeof(((container *)0)->field)) +#define VIRTIO_NET_IP4_ADDR_SIZE 8/* ipv4 saddr + daddr */ + +#define VIRTIO_NET_TCP_FLAG 0x3F +#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 + +/* IPv4 max payload, 16 bits in the header */ +#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) +#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 + +/* header length value in ip header without option */ +#define VIRTIO_NET_IP4_HEADER_LENGTH 5 + +/* Purge coalesced packets timer interval, This value affects the performance + a lot, and should be tuned carefully, '30'(300us) is the recommended + value to pass the WHQL test, '5' can gain 2x netperf throughput with + tso/gso/gro 'off'. */ +#define VIRTIO_NET_RSC_INTERVAL 30 + typedef struct VirtIOFeature { uint32_t flags; size_t end; @@ -589,7 +609,12 @@ static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) (1ULL << VIRTIO_NET_F_GUEST_ECN) | (1ULL << VIRTIO_NET_F_GUEST_UFO); -return guest_offloads_mask & features; +if (features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) { +return (guest_offloads_mask & features) | + (1ULL << VIRTIO_NET_F_GUEST_RSC4); +} else { +return guest_offloads_mask & features; +} } static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) @@ -600,6 +625,7 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n) static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) { +NetClientState *nc; VirtIONet
[Qemu-devel] [PATCH 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic
From: Wei XuMost process flows work like ipv4, 2 differences between ipv4 and ipv6. 1. Fragment length in ipv4 header includes itself, while it's not included for ipv6, thus means ipv6 can carry a real '65535' payload. 2. IPv6 header does not need calculate header checksum. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 168 +--- include/standard-headers/linux/virtio_net.h | 6 +- 2 files changed, 159 insertions(+), 15 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index d1824d9..1027a67 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -57,6 +57,10 @@ /* header length value in ip header without option */ #define VIRTIO_NET_IP4_HEADER_LENGTH 5 +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD + /* Purge coalesced packets timer interval, This value affects the performance a lot, and should be tuned carefully, '30'(300us) is the recommended value to pass the WHQL test, '5' can gain 2x netperf throughput with @@ -611,7 +615,8 @@ static uint64_t virtio_net_guest_offloads_by_features(uint32_t features) if (features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) { return (guest_offloads_mask & features) | - (1ULL << VIRTIO_NET_F_GUEST_RSC4); + (1ULL << VIRTIO_NET_F_GUEST_RSC4) | + (1ULL << VIRTIO_NET_F_GUEST_RSC6); } else { return guest_offloads_mask & features; } @@ -1612,7 +1617,8 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)); -if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_RSC4)) { +if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_RSC4) +|| virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_RSC6)) { n->guest_hdr_len = sizeof(struct virtio_net_hdr_rsc); n->host_hdr_len = n->guest_hdr_len; } @@ -1730,6 +1736,24 @@ static void virtio_net_rsc_extract_unit4(NetRscChain *chain, unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; } +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, + const uint8_t *buf, NetRscUnit* unit) +{ +struct ip6_header *ip6; + +ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len + + sizeof(struct eth_header)); +unit->ip = ip6; +unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); +unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ ++ sizeof(struct ip6_header)); +unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; + +/* There is a difference between payload lenght in ipv4 and v6, + ip header is excluded in ipv6 */ +unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; +} + static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr_rsc *rhdr, struct ip_header *ip) { @@ -1750,12 +1774,14 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg) h = (struct virtio_net_hdr_rsc *)seg->buf; if (seg->is_coalesced) { -h->hdr.flags = VIRTIO_NET_HDR_RSC_TCPV4; -virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); +if (chain->proto == ETH_P_IP) { +h->hdr.flags = VIRTIO_NET_HDR_RSC_TCPV4; +virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); +} else { +h->hdr.flags = VIRTIO_NET_HDR_RSC_TCPV6; +} } -h = (struct virtio_net_hdr_rsc *)seg->buf; -virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); h->rsc_pkts = seg->packets; h->rsc_dup_acks = seg->dup_ack; ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); @@ -1813,7 +1839,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, hdr_len = chain->n->guest_hdr_len; seg = g_malloc(sizeof(NetRscSeg)); seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)\ - + VIRTIO_NET_MAX_TCP_PAYLOAD); + + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); memcpy(seg->buf, buf, size); seg->size = size; seg->packets = 1; @@ -1824,7 +1850,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, QTAILQ_INSERT_TAIL(>buffers, seg, next); chain->stat.cache++; -virtio_net_rsc_extract_unit4(chain, seg->buf, >unit); +switch (chain->proto) { +case ETH_P_IP: +virtio_net_rsc_extract_unit4(chain, seg->buf, >unit); +break; + +case ETH_P_IPV6: +virtio_net_rsc_extract_unit6(chain, seg->buf, >unit); +break; + +default: +g_assert_not_reached(); +} }
[Qemu-devel] [ RFC Patch v7 0/2] Support Receive-Segment-Offload(RSC) for WHQL
From: Wei XuThis patch is to support WHQL test for Windows guest, while this feature also benifits other guest works as a kernel 'gro' like feature with userspace implementation. Feature information: http://msdn.microsoft.com/en-us/library/windows/hardware/jj853324 v6->v7 - Change the drain timer from 'virtual' to 'host' since it invisible to guest. - Move the buffer list empty check to virtio_net_rsc_do_coalesc(). - The header comparision is a bit odd for ipv4 in this patch, it should be simpler with equal check, but this is also a helper for ipv6 in next patch, and ipv6 used a different size address fields, so i used an 'address + size' byte comparision for address, and change comparing the tcp port with 'int' equal check. - Add count for packets whose size less than a normal tcp packet in sanity check. - Move constant value comparison to the right side of the equal symbol. - Use host header length in stead of guest header length to verify a packet in virtio_net_rsc_receive(), in case of the different header length for guest and host. - Check whether the packet size is enough to hold a legal packet before extract ip unit. - Bypass ip/tcp ECN packets. - Expand the feature bit definition from 32 to 64 bits. Other notes: - About tcp windows scale, we don't have connection tracking about all tcp connections, so we don't know what the exact window size is using, thus this feature may get negative influence to it, have to turn this feature off for such a user case currently. - There are 2 new fields in the virtio net header, it's not in either kernel tree or maintainer's tree right now, I just put it directly here. - The statistics is kept in this version since it's helpful for troubleshooting. Changes in V6: - Sync upstream code - Split new fields in 'virtio_net_hdr' to a seperate patch - Remove feature bit code, replace it with a command line parameter 'guest_rsc' which is turned off by default. Changes in V5: - Passed all IPv4/6 test cases - Add new fields in 'virtio_net_hdr' - Set 'gso_type' & 'coalesced packets' in new field. - Bypass all 'tcp option' packet - Bypass all 'pure ack' packet - Bypass all 'duplicate ack' packet - Change 'guest_rsc' feature bit to 'false' by default - Feedbacks from v4, typo, etc. Changes in V4: - Add new host feature bit - Replace using fixed header lenght with dynamic header lenght in VirtIONet - Change ip/ip6 header union in NetRscUnit to void* pointer - Add macro prefix, adjust code indent, etc. Changes in V3: - Removed big param list, replace it with 'NetRscUnit' - Different virtio header size - Modify callback function to direct call. - Needn't check the failure of g_malloc() - Other code format adjustment, macro naming, etc Changes in V2: - Add detailed commit log Wei Xu (2): virtio-net rsc: support coalescing ipv4 tcp traffic virtio-net rsc: support coalescing ipv6 tcp traffic hw/net/virtio-net.c | 742 ++-- include/hw/virtio/virtio-net.h | 5 +- include/hw/virtio/virtio.h | 76 +++ include/net/eth.h | 2 + include/standard-headers/linux/virtio_net.h | 18 + net/tap.c | 3 +- 6 files changed, 814 insertions(+), 32 deletions(-) -- 2.7.1
[Qemu-devel] [ RFC Patch v6 3/3] virtio-net rsc: add 2 new rsc information fields to 'virtio_net_hdr'
From: Wei XuField 'coalesced' is to indicate how many packets are coalesced and field 'dup_ack' is how many duplicate acks are merged, guest driver can use these information to notify what's the exact scene of original traffic over the networks. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 8 include/standard-headers/linux/virtio_net.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index cc8cbe4..20f552a 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -1768,6 +1768,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg) if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); } +h->coalesced = seg->packets; +h->dup_ack = seg->dup_ack; +h->gso_type = chain->gso_type; +h->gso_size = chain->max_payload; ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); QTAILQ_REMOVE(>buffers, seg, next); g_free(seg->buf); @@ -2302,9 +2306,13 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) { VirtIONet *n; +struct virtio_net_hdr *h; n = qemu_get_nic_opaque(nc); if (n->host_features & (1ULL << VIRTIO_NET_F_GUEST_RSC)) { +h = (struct virtio_net_hdr *)buf; +h->coalesced = 0; +h->dup_ack = 0; return virtio_net_rsc_receive(nc, buf, size); } else { return virtio_net_do_receive(nc, buf, size); diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h index 5b95762..c837417 100644 --- a/include/standard-headers/linux/virtio_net.h +++ b/include/standard-headers/linux/virtio_net.h @@ -114,6 +114,8 @@ struct virtio_net_hdr { __virtio16 gso_size;/* Bytes to append to hdr_len per frame */ __virtio16 csum_start; /* Position to start checksumming from */ __virtio16 csum_offset; /* Offset after that to place checksum */ +__virtio16 coalesced; /* packets coalesced by host */ +__virtio16 dup_ack; /* duplicate ack count */ }; /* This is the version of the header to use when the MRG_RXBUF -- 2.7.1
[Qemu-devel] [ RFC Patch v6 1/3] virtio-net rsc: support coalescing ipv4 tcp traffic
From: Wei XuAll the data packets in a tcp connection will be cached to a big buffer in every receive interval, and will be sent out via a timer, the 'virtio_net_rsc_timeout' controls the interval, the value will influent the performance and response of tcp connection extremely, 5(50us) is a experience value to gain a performance improvement, since the whql test sends packets every 100us, so '30(300us)' can pass the test case, this is also the default value, it's tunable via the command line parameter 'rsc_interval' with 'virtio-net-pci' device, for example, below parameter is to launch a guest with interval set as '50'. 'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,rsc_interval=50' will The timer will only be triggered if the packets pool is not empty, and it'll drain off all the cached packets. 'NetRscChain' is used to save the segments of different protocols in a VirtIONet device. The main handler of TCP includes TCP window update, duplicated ACK check and the real data coalescing if the new segment passed sanity check and is identified as an 'wanted' one. An 'wanted' segment means: 1. Segment is within current window and the sequence is the expected one. 2. 'ACK' of the segment is in the valid window. Sanity check includes: 1. Incorrect version in IP header 2. IP options & IP fragment 3. Not a TCP packets 4. Sanity size check to prevent buffer overflow attack. There maybe more cases should be considered such as ip identification other flags, while it broke the test because windows set it to the same even it's not a fragment. Normally it includes 2 typical ways to handle a TCP control flag, 'bypass' and 'finalize', 'bypass' means should be sent out directly, and 'finalize' means the packets should also be bypassed, and this should be done after searching for the same connection packets in the pool and sending all of them out, this is to avoid out of data. All the 'SYN' packets will be bypassed since this always begin a new' connection, other flags such 'FIN/RST' will trigger a finalization, because this normally happens upon a connection is going to be closed, an 'URG' packet also finalize current coalescing unit. Statistics can be used to monitor the basic coalescing status, the 'out of order' and 'out of window' means how many retransmitting packets, thus describe the performance intuitively. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 498 +++- include/hw/virtio/virtio-net.h | 2 + include/hw/virtio/virtio.h | 75 + include/standard-headers/linux/virtio_net.h | 1 + 4 files changed, 575 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 5798f87..b3bb63b 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -15,10 +15,12 @@ #include "qemu/iov.h" #include "hw/virtio/virtio.h" #include "net/net.h" +#include "net/eth.h" #include "net/checksum.h" #include "net/tap.h" #include "qemu/error-report.h" #include "qemu/timer.h" +#include "qemu/sockets.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "hw/virtio/virtio-bus.h" @@ -38,6 +40,25 @@ #define endof(container, field) \ (offsetof(container, field) + sizeof(((container *)0)->field)) +#define VIRTIO_NET_IP4_ADDR_SIZE 8/* ipv4 saddr + daddr */ +#define VIRTIO_NET_TCP_PORT_SIZE 4/* sport + dport */ + +#define VIRTIO_NET_TCP_FLAG 0x3F +#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 + +/* IPv4 max payload, 16 bits in the header */ +#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) +#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 + +/* header length value in ip header without option */ +#define VIRTIO_NET_IP4_HEADER_LENGTH 5 + +/* Purge coalesced packets timer interval, This value affects the performance + a lot, and should be tuned carefully, '30'(300us) is the recommended + value to pass the WHQL test, '5' can gain 2x netperf throughput with + tso/gso/gro 'off'. */ +#define VIRTIO_NET_RSC_INTERVAL 30 + typedef struct VirtIOFeature { uint32_t flags; size_t end; @@ -1089,7 +1110,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) return 0; } -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) +static ssize_t virtio_net_do_receive(NetClientState *nc, + const uint8_t *buf, size_t size) { VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q = virtio_net_get_subqueue(nc); @@ -1685,6 +1707,474 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } +static void virtio_net_rsc_extract_unit4(NetRscChain *chain, + const uint8_t *buf, NetRscUnit* unit) +{ +uint16_t hdr_len; +uint16_t ip_hdrlen; +struct ip_header *ip; + +hdr_len =
[Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 tcp traffic
From: Wei XuMost stuffs are like ipv4 2 differences between ipv4 and ipv6. 1. Fragment length in ipv4 header includes itself, while it's not included for ipv6, thus means ipv6 can carry a real '65535' payload. 2. IPv6 header does not need calculate header checksum. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 152 +--- 1 file changed, 144 insertions(+), 8 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index b3bb63b..cc8cbe4 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -53,6 +53,10 @@ /* header length value in ip header without option */ #define VIRTIO_NET_IP4_HEADER_LENGTH 5 +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD + /* Purge coalesced packets timer interval, This value affects the performance a lot, and should be tuned carefully, '30'(300us) is the recommended value to pass the WHQL test, '5' can gain 2x netperf throughput with @@ -1724,6 +1728,25 @@ static void virtio_net_rsc_extract_unit4(NetRscChain *chain, unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; } +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, + const uint8_t *buf, NetRscUnit* unit) +{ +uint16_t hdr_len; +struct ip6_header *ip6; + +hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len; +ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_header)); +unit->ip = ip6; +unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); +unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\ ++ sizeof(struct ip6_header)); +unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; + +/* There is a difference between payload lenght in ipv4 and v6, + ip header is excluded in ipv6 */ +unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen; +} + static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr *vhdr, struct ip_header *ip) { @@ -1742,7 +1765,9 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg) struct virtio_net_hdr *h; h = (struct virtio_net_hdr *)seg->buf; -virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); +if ((chain->proto == ETH_P_IP) && seg->is_coalesced) { +virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); +} ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); QTAILQ_REMOVE(>buffers, seg, next); g_free(seg->buf); @@ -1798,7 +1823,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, hdr_len = chain->n->guest_hdr_len; seg = g_malloc(sizeof(NetRscSeg)); seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)\ - + VIRTIO_NET_MAX_TCP_PAYLOAD); + + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD); memcpy(seg->buf, buf, size); seg->size = size; seg->packets = 1; @@ -1809,7 +1834,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, QTAILQ_INSERT_TAIL(>buffers, seg, next); chain->stat.cache++; -virtio_net_rsc_extract_unit4(chain, seg->buf, >unit); +switch (chain->proto) { +case ETH_P_IP: +virtio_net_rsc_extract_unit4(chain, seg->buf, >unit); +break; + +case ETH_P_IPV6: +virtio_net_rsc_extract_unit6(chain, seg->buf, >unit); +break; + +default: +g_assert_not_reached(); +} } static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, @@ -1929,6 +1965,24 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg, return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); } +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg, +const uint8_t *buf, size_t size, NetRscUnit *unit) +{ +struct ip6_header *ip1, *ip2; + +ip1 = (struct ip6_header *)(unit->ip); +ip2 = (struct ip6_header *)(seg->unit.ip); +if (memcmp(>ip6_src, >ip6_src, sizeof(struct in6_address)) +|| memcmp(>ip6_dst, >ip6_dst, sizeof(struct in6_address)) +|| (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) +|| (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { +chain->stat.no_match++; +return RSC_NO_MATCH; +} + +return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); +} + /* Pakcets with 'SYN' should bypass, other flag should be sent after drain * to prevent out of order */ static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, @@ -1981,7 +2035,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc, NetRscSeg *seg, *nseg; QTAILQ_FOREACH_SAFE(seg, >buffers, next, nseg) { -ret =
[Qemu-devel] [ RFC Patch v6 0/2] Support Receive-Segment-Offload(RSC) for WHQL
From: Wei XuChanges in V6: - Sync upstream code - Split new fields in 'virtio_net_hdr' to a seperate patch - Remove feature bit code, replace it with a command line parameter 'guest_rsc' which is turned off by default. Changes in V5: - Passed all IPv4/6 test cases - Add new fields in 'virtio_net_hdr' - Set 'gso_type' & 'coalesced packets' in new field. - Bypass all 'tcp option' packet - Bypass all 'pure ack' packet - Bypass all 'duplicate ack' packet - Change 'guest_rsc' feature bit to 'false' by default - Feedbacks from v4, typo, etc. Note: There is still a few pending issues about the feature bit, and need to be discussed with windows driver maintainer, so linux guests with this patch won't work at current, haven't figure it out yet, but i'm guessing it's caused by the 'gso_type' is set to 'VIRTIO_NET_HDR_GSO_TCPV4/6', will fix it after get the final solution, the below test steps and performance data is based on v4. Another suggestion from Jason is to adjust part of the code to make it more readable, since there maybe still few change about the flowchart in the future, such as timestamp, duplicate ack, so i'd like to delay it temporarily. Changes in V4: - Add new host feature bit - Replace using fixed header lenght with dynamic header lenght in VirtIONet - Change ip/ip6 header union in NetRscUnit to void* pointer - Add macro prefix, adjust code indent, etc. Changes in V3: - Removed big param list, replace it with 'NetRscUnit' - Different virtio header size - Modify callback function to direct call. - Needn't check the failure of g_malloc() - Other code format adjustment, macro naming, etc Changes in V2: - Add detailed commit log This patch is to support WHQL test for Windows guest, while this feature also benifits other guest works as a kernel 'gro' like feature with userspace implementation. Feature information: http://msdn.microsoft.com/en-us/library/windows/hardware/jj853324 Both IPv4 and IPv6 are supported, though performance with userspace virtio is slow than vhost-net, there is about 1.5x to 2x performance improvement to userspace virtio, this is done by turning this feature on and disable 'tso/gso/gro' on corresponding tap interface and guest interface, while get less improment with all these feature on. Linux guest performance data(Netperf): MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.2.101 () port 0 AF_INET : nodelay Size SizeSize Time Throughput bytes bytes bytessecs.10^6bits/sec 87380 16384 646.00 1221.20 87380 16384 646.00 1260.30 87380 163841286.00 1978.51 87380 163841286.00 2286.05 87380 163842566.00 2677.94 87380 163842566.00 4615.42 87380 163845126.00 2956.54 87380 163845126.00 5356.39 87380 16384 10246.00 2798.17 87380 16384 10246.00 4943.30 87380 16384 20486.00 2681.09 87380 16384 20486.00 4835.81 87380 16384 40966.00 3390.14 87380 16384 40966.00 5391.54 87380 16384 80926.00 3008.27 87380 16384 80926.00 5381.68 87380 16384 102406.00 2999.89 87380 16384 102406.00 5393.11 Test steps: Although this feature is mainly used for window guest, i used linux guest to help test the feature, to make things simple, i used 3 steps to test the patch as i moved on. 1. With a tcp socket client/server pair running on 2 linux guest, thus i can control the traffic and debugging the code as i want. 2. Netperf on linux guest test the throughput. 3. WHQL test with 2 Windows guests. Wei Xu (3): virtio-net rsc: support coalescing ipv4 tcp traffic virtio-net rsc: support coalescing ipv6 tcp traffic virtio-net rsc: add 2 new rsc information fields to 'virtio_net_hdr' hw/net/virtio-net.c | 642 +++- include/hw/virtio/virtio-net.h | 2 + include/hw/virtio/virtio.h | 75 include/standard-headers/linux/virtio_net.h | 3 + 4 files changed, 721 insertions(+), 1 deletion(-) -- 2.7.1
[Qemu-devel] [ RFC Patch v5 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic
From: Wei XuAll the data packets in a tcp connection will be cached to a big buffer in every receive interval, and will be sent out via a timer, the 'virtio_net_rsc_timeout' controls the interval, the value will influent the performance and response of tcp connection extremely, 5(50us) is a experience value to gain a performance improvement, since the whql test sends packets every 100us, so '30(300us)' can pass the test case, this is also the default value, it's tunable via the command line parameter 'rsc_interval' with 'virtio-net-pci' device, for example, below parameter is to launch a guest with interval set as '50'. 'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,rsc_interval=50' will The timer will only be triggered if the packets pool is not empty, and it'll drain off all the cached packets. 'NetRscChain' is used to save the segments of different protocols in a VirtIONet device. The main handler of TCP includes TCP window update, duplicated ACK check and the real data coalescing if the new segment passed sanity check and is identified as an 'wanted' one. An 'wanted' segment means: 1. Segment is within current window and the sequence is the expected one. 2. 'ACK' of the segment is in the valid window. Sanity check includes: 1. Incorrect version in IP header 2. IP options & IP fragment 3. Not a TCP packets 4. Sanity size check to prevent buffer overflow attack. There maybe more cases should be considered such as ip identification other flags, while it broke the test because windows set it to the same even it's not a fragment. Normally it includes 2 typical ways to handle a TCP control flag, 'bypass' and 'finalize', 'bypass' means should be sent out directly, and 'finalize' means the packets should also be bypassed, and this should be done after searching for the same connection packets in the pool and sending all of them out, this is to avoid out of data. All the 'SYN' packets will be bypassed since this always begin a new' connection, other flags such 'FIN/RST' will trigger a finalization, because this normally happens upon a connection is going to be closed, an 'URG' packet also finalize current coalescing unit. Statistics can be used to monitor the basic coalescing status, the 'out of order' and 'out of window' means how many retransmitting packets, thus describe the performance intuitively. Signed-off-by: Wei Xu --- hw/net/virtio-net.c | 487 +++- include/hw/virtio/virtio-net.h | 2 + include/hw/virtio/virtio.h | 75 + include/standard-headers/linux/virtio_net.h | 2 + 4 files changed, 563 insertions(+), 3 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index bd91a4b..1ff0135 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -15,10 +15,12 @@ #include "qemu/iov.h" #include "hw/virtio/virtio.h" #include "net/net.h" +#include "net/eth.h" #include "net/checksum.h" #include "net/tap.h" #include "qemu/error-report.h" #include "qemu/timer.h" +#include "qemu/sockets.h" #include "hw/virtio/virtio-net.h" #include "net/vhost_net.h" #include "hw/virtio/virtio-bus.h" @@ -38,6 +40,25 @@ #define endof(container, field) \ (offsetof(container, field) + sizeof(((container *)0)->field)) +#define VIRTIO_NET_IP4_ADDR_SIZE 8/* ipv4 saddr + daddr */ +#define VIRTIO_NET_TCP_PORT_SIZE 4/* sport + dport */ + +#define VIRTIO_NET_TCP_FLAG 0x3F +#define VIRTIO_NET_TCP_HDR_LENGTH 0xF000 + +/* IPv4 max payload, 16 bits in the header */ +#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header)) +#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535 + +/* header length value in ip header without option */ +#define VIRTIO_NET_IP4_HEADER_LENGTH 5 + +/* Purge coalesced packets timer interval, This value affects the performance + a lot, and should be tuned carefully, '30'(300us) is the recommended + value to pass the WHQL test, '5' can gain 2x netperf throughput with + tso/gso/gro 'off'. */ +#define VIRTIO_NET_RSC_INTERVAL 30 + typedef struct VirtIOFeature { uint32_t flags; size_t end; @@ -1688,20 +1709,476 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f, return 0; } +static void virtio_net_rsc_extract_unit4(NetRscChain *chain, + const uint8_t *buf, NetRscUnit* unit) +{ +uint16_t hdr_len; +uint16_t ip_hdrlen; +struct ip_header *ip; + +hdr_len = chain->n->guest_hdr_len; +ip = (struct ip_header *)(buf + hdr_len + sizeof(struct eth_header)); +unit->ip = (void *)ip; +ip_hdrlen = (ip->ip_ver_len & 0xF) << 2; +unit->ip_plen = >ip_len; +unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen); +unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10; +unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen; +} + +static