[Qemu-devel] [PATCH v4 06/11] virtio: get avail bytes check for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Add packed ring headcount check.

Common part of split/packed ring are kept.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 197 -
 1 file changed, 179 insertions(+), 18 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f2ff980..832287b 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
+  desc, sizeof(VRingPackedDesc));
+virtio_tswap16s(vdev, >flags);
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 int64_t len = 0;
 int rc;
 
-if (unlikely(!vq->vring.desc)) {
-if (in_bytes) {
-*in_bytes = 0;
-}
-if (out_bytes) {
-*out_bytes = 0;
-}
-return;
-}
-
 rcu_read_lock();
 idx = vq->last_avail_idx;
 total_bufs = in_total = out_total = 0;
 
 max = vq->vring.num;
 caches = vring_get_region_caches(vq);
-if (caches->desc.len < max * sizeof(VRingDesc)) {
-virtio_error(vdev, "Cannot map descriptor ring");
-goto err;
-}
-
 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
 MemoryRegionCache *desc_cache = >desc;
 unsigned int num_bufs;
@@ -792,6 +788,171 @@ err:
 goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingPackedDesc desc;
+bool wrap_counter;
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+wrap_counter = vq->last_avail_wrap_counter;
+total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+caches = vring_get_region_caches(vq);
+desc_cache = >desc;
+vring_packed_desc_read_flags(vdev, , desc_cache, idx);
+while (is_desc_avail(, wrap_counter)) {
+unsigned int num_bufs;
+unsigned int i = 0;
+
+num_bufs = total_bufs;
+
+/* Make sure flags has been read before all the fields. */
+smp_rmb();
+vring_packed_desc_read(vdev, , desc_cache, idx);
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingPackedDesc);
+num_bufs = i = 0;
+vring_packed_desc_read(vdev, , desc_cache, i);
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} 

[Qemu-devel] [PATCH v4 09/11] virtio-net: update the head descriptor in a chain lastly

2019-02-13 Thread wexu
From: Wei Xu 

This is a helper for packed ring.

To support packed ring, the head descriptor in a chain should be updated
lastly since no 'avail_idx' like in packed ring to explicitly tell the
driver side that all payload is ready after having done the chain, so
the head is always visible immediately.

This patch fills the header after done all the other ones.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3f319ef..330abea 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1251,6 +1251,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
 struct virtio_net_hdr_mrg_rxbuf mhdr;
 unsigned mhdr_cnt = 0;
 size_t offset, i, guest_offset;
+VirtQueueElement head;
+int head_len = 0;
 
 if (!virtio_net_can_receive(nc)) {
 return -1;
@@ -1328,7 +1330,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 }
 
 /* signal other side */
-virtqueue_fill(q->rx_vq, elem, total, i++);
+if (i == 0) {
+head_len = total;
+head = *elem;
+} else {
+virtqueue_fill(q->rx_vq, elem, len, i);
+}
+i++;
 g_free(elem);
 }
 
@@ -1339,6 +1347,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  _buffers, sizeof mhdr.num_buffers);
 }
 
+virtqueue_fill(q->rx_vq, , head_len, 0);
 virtqueue_flush(q->rx_vq, i);
 virtio_notify(vdev, q->rx_vq);
 
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 08/11] virtio: event suppression support for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
For Tx(guest transmitting), they are the same after each pop of a desc.

For Rx(guest receiving), they are also the same when there are enough
descriptors to carry the payload for a packet(e.g. usually 16 descs are
needed for a 64k packet in typical iperf tcp connection with tso enabled),
however, when the ring is running out of descriptors while there are
still a few free ones, e.g. 6 descriptors are available which is not
enough to carry an entire packet which needs 16 descriptors, in this
case the 'avail_wrap_counter' should be set as the first one pending
being handled by guest driver in order to get a notification, and the
'last_avail_wrap_counter' should stay unchanged to the head of available
descriptors, like below:

Mark meaning:
| | -- available
|*| -- used

A Snapshot of the queue:
  last_avail_idx = 253
  last_avail_wrap_counter = 1
 |
+-+
 0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
+-+
   |
  shadow_avail_idx = 3
  avail_wrap_counter = 0

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 137 +
 1 file changed, 128 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 7e276b4..8cfc7b6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 virtio_tswap16s(vdev, >next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+address_space_read_cached(cache, 0, e, sizeof(*e));
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t off_wrap)
+{
+virtio_tswap16s(vdev, _wrap);
+address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
+_wrap, sizeof(off_wrap));
+address_space_cache_invalidate(cache,
+offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t flags)
+{
+virtio_tswap16s(vdev, );
+address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
+, sizeof(flags));
+address_space_cache_invalidate(cache,
+offsetof(VRingPackedDescEvent, flags), sizeof(flags));
+}
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
 VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
@@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, 
uint16_t val)
 address_space_cache_invalidate(>used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
-vq->notification = enable;
-
-if (!vq->vring.desc) {
-return;
-}
-
 rcu_read_lock();
 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+VRingPackedDescEvent e;
+VRingMemoryRegionCaches *caches;
+
+rcu_read_lock();
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vq->vdev, >used, );
+
+if (!enable) {
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+/* no need to write device area since this is outdated. */
+goto out;
+}
+
+e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
+goto update;
+}
+
+e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 
15;
+
+vring_packed_off_wrap_write(vq->vdev, >used, off_wrap);
+/* Make sure off_wrap is wrote before flags */
+smp_wmb();
+
+e.flags = VRING_PACKED_EVENT_FLAG_DESC;
+}
+
+update:
+vring_packed_flags_write(vq->vdev, >used, e.flags);
+out:
+rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+vq->notification = enable;
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtio_queue_set_notification_packed(vq, enable);
+} else {
+

[Qemu-devel] [PATCH v4 10/11] virtio: migration support for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Both userspace and vhost-net/user are supported with this patch.

A new subsection is introduced for packed ring, only 'last_avail_idx'
and 'last_avail_wrap_counter' are saved/loaded presumably based on
all the others relevant data(inuse, used/avail index and wrap count
should be the same.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 69 +++---
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 8cfc7b6..7c5de07 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2349,6 +2349,13 @@ static bool virtio_virtqueue_needed(void *opaque)
 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
 }
 
+static bool virtio_packed_virtqueue_needed(void *opaque)
+{
+VirtIODevice *vdev = opaque;
+
+return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
+}
+
 static bool virtio_ringsize_needed(void *opaque)
 {
 VirtIODevice *vdev = opaque;
@@ -2390,6 +2397,17 @@ static const VMStateDescription vmstate_virtqueue = {
 }
 };
 
+static const VMStateDescription vmstate_packed_virtqueue = {
+.name = "packed_virtqueue_state",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
+VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_virtio_virtqueues = {
 .name = "virtio/virtqueues",
 .version_id = 1,
@@ -2402,6 +2420,18 @@ static const VMStateDescription 
vmstate_virtio_virtqueues = {
 }
 };
 
+static const VMStateDescription vmstate_virtio_packed_virtqueues = {
+.name = "virtio/packed_virtqueues",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = _packed_virtqueue_needed,
+.fields = (VMStateField[]) {
+VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+  VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, 
VirtQueue),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_ringsize = {
 .name = "ringsize_state",
 .version_id = 1,
@@ -2522,6 +2552,7 @@ static const VMStateDescription vmstate_virtio = {
 _virtio_ringsize,
 _virtio_broken,
 _virtio_extra_state,
+_virtio_packed_virtqueues,
 NULL
 }
 };
@@ -2794,6 +2825,17 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 virtio_queue_update_rings(vdev, i);
 }
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
+vdev->vq[i].avail_wrap_counter =
+vdev->vq[i].last_avail_wrap_counter;
+
+vdev->vq[i].used_idx = vdev->vq[i].last_avail_idx;
+vdev->vq[i].used_wrap_counter =
+vdev->vq[i].last_avail_wrap_counter;
+continue;
+}
+
 nheads = vring_avail_idx(>vq[i]) - 
vdev->vq[i].last_avail_idx;
 /* Check it isn't doing strange things with descriptor numbers. */
 if (nheads > vdev->vq[i].vring.num) {
@@ -2955,17 +2997,34 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
-return vdev->vq[n].last_avail_idx;
+uint16_t idx;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+idx = vdev->vq[n].last_avail_idx;
+idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15;
+} else {
+idx = (int)vdev->vq[n].last_avail_idx;
+}
+return idx;
 }
 
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
 {
-vdev->vq[n].last_avail_idx = idx;
-vdev->vq[n].shadow_avail_idx = idx;
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[n].last_avail_idx = idx & 0x7fff;
+vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000);
+} else {
+vdev->vq[n].last_avail_idx = idx;
+vdev->vq[n].shadow_avail_idx = idx;
+}
 }
 
 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]);
@@ -2976,6 +3035,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice 
*vdev, int n)
 
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].used_idx = vring_used_idx(>vq[n]);
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 05/11] virtio: queue/descriptor check helpers for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

These are descriptor available and queue empty check helpers.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 56 +-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 54dc098..f2ff980 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -368,6 +368,25 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+virtio_tswap16s(vdev, >flags);
+}
+
+static inline bool is_desc_avail(struct VRingPackedDesc *desc,
+bool wrap_counter)
+{
+bool avail, used;
+
+avail = !!(desc->flags & (1 << VRING_PACKED_DESC_F_AVAIL));
+used = !!(desc->flags & (1 << VRING_PACKED_DESC_F_USED));
+return (avail != used) && (avail == wrap_counter);
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
@@ -388,7 +407,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_split_empty(VirtQueue *vq)
 {
 bool empty;
 
@@ -410,6 +429,41 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+struct VRingPackedDesc desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->vring.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_packed_desc_read_flags(vq->vdev, , >desc,
+vq->last_avail_idx);
+
+return !is_desc_avail(, vq->last_avail_wrap_counter);
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_packed_empty_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_packed_empty(vq);
+} else {
+return virtio_queue_split_empty(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 11/11] virtio: CLI and provide packed ring feature bit by default

2019-02-13 Thread wexu
From: Wei Xu 

Add userspace and vhost kernel/user support.

Add CLI "ring_packed=true/false" to enable/disable packed ring provision.
Usage:
-device virtio-net-pci,netdev=xx,mac=xx:xx:xx:xx:xx:xx,ring_packed=false

By default it is provided.

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 2 ++
 include/hw/virtio/virtio.h | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e037db6..f593086 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_VERSION_1,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 VHOST_INVALID_FEATURE_BIT
 };
 
@@ -78,6 +79,7 @@ static const int user_feature_bits[] = {
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9c1fa07..2eb27d2 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, true)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 01/11] virtio: rename structure for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Redefine packed ring structure according to Qemu nomenclature.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index a1ff647..eafb4cc 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,13 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingPackedDesc {
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingPackedDesc;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -77,17 +84,25 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t off_wrap;
+uint16_t flags;
+} VRingPackedDescEvent ;
+
 struct VirtQueue
 {
 VRing vring;
 
 /* Next head to pop */
 uint16_t last_avail_idx;
+bool last_avail_wrap_counter;
 
 /* Last avail_idx read from VQ. */
 uint16_t shadow_avail_idx;
+bool avail_wrap_counter;
 
 uint16_t used_idx;
+bool used_wrap_counter;
 
 /* Last used index value we have signalled on */
 uint16_t signalled_used;
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 00/11] packed ring virtio-net backends support

2019-02-13 Thread wexu
From: Wei Xu 

https://github.com/Whishay/qemu.git 

Userspace and vhost-net backend test has been done with upstream kernel
in guest.

v3->v4:
- add version number to the subject of each patch.(mst)

v2->v3:
v2/01 - drop it since the header has been synchronized from kernel.(mst & 
jason)
v3/01 - rename 'avail_wrap_counter' to 'last_avail_wrap_counter',
'event_wrap_counter' to 'avail_wrap_counter' to make it easier
to understand.(Jason)
  - revise commit message.(Jason)
v3/02 - split packed ring areas size calculation to next patch.(Jason)
to not break bisect(Jason).
v3/03 - initialize packed ring region with correct size and attribute.
  - remove unnecessary 'else' checks. (Jason)
v3/06 - add commit log.
  - replace 'event_wrap-counter' with 'avail_wrap_counter'.
  - merge common memory cache size check to 
virtqueue_get_avail_bytes().(Jason)
  - revise memory barrier comment.(Jason) 
  - check indirect descriptors by desc.len/sizeof(desc).(Jason)
  - flip wrap counter with '^=1'.(Jason)
v3/07 - move desc.id/len initialization to the declaration.(Jason)
  - flip wrap counter '!' with '^=1'.(Jason)
  - add memory barrier comments in commit message.
v3/08 - use offsetof() when writing cache.(Jason)
  - avoid duplicated memory region write when turning off event_idx
supported notification.(Jason)
  - add commit log.(Jason)
  - add avail & last_avail wrap counter difference description in 
commit log.
v3/09 - remove unnecessary used/avail idx/wrap-counter from subsection.
  - put new subsection to the end of vmstate_virtio.(Jason)
  - squash the two userspace and vhost-net migration patches in 
v2.(Jason)
v3/10 - reword commit message.
  - this is a help not a bug fix so I would like to keep it as a
separate patch still.(Proposed a merge it by Jason)
  - the virtqueue_fill() is also not like an API so I would prefer not
to touch it, please correct me if I did not get it in the right
way.(Proposed a squash by Jason)
v3/11 - squash feature bits for user space and vhost kernel/user backends.
  - enable packed ring feature bit provision on host by default.(Jason)

Wei Xu (11):
  virtio: rename structure for packed ring
  virtio: device/driver area size calculation helper for split ring
  virtio: initialize packed ring region
  virtio: initialize wrap counter for packed ring
  virtio: queue/descriptor check helpers for packed ring
  virtio: get avail bytes check for packed ring
  virtio: fill/flush/pop for packed ring
  virtio: event suppression support for packed ring
  virtio-net: update the head descriptor in a chain lastly
  virtio: migration support for packed ring
  virtio: CLI and provide packed ring feature bit by default

 hw/net/vhost_net.c |   2 +
 hw/net/virtio-net.c|  11 +-
 hw/virtio/virtio.c | 798 +
 include/hw/virtio/virtio.h |   4 +-
 4 files changed, 757 insertions(+), 58 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH v4 07/11] virtio: fill/flush/pop for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

last_used_idx/wrap_counter should be equal to last_avail_idx/wrap_counter
after a successful flush.

Batching in vhost-net & dpdk testpmd is not equivalently supported in
userspace backend, but a chained descriptors for Rx is similarly presented
as a lightweight batch, so a write barrier is nailed only for the
first(head) descriptor.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 291 +
 1 file changed, 274 insertions(+), 17 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 832287b..7e276b4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -379,6 +379,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, 
VRingPackedDesc *desc,
 virtio_tswap16s(vdev, >id);
 }
 
+static void vring_packed_desc_write_data(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  >id, sizeof(desc->id));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  sizeof(desc->id));
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  >len, sizeof(desc->len));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  sizeof(desc->len));
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -388,6 +407,18 @@ static void vring_packed_desc_read_flags(VirtIODevice 
*vdev,
 virtio_tswap16s(vdev, >flags);
 }
 
+static void vring_packed_desc_write_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  sizeof(desc->flags));
+}
+
 static inline bool is_desc_avail(struct VRingPackedDesc *desc,
 bool wrap_counter)
 {
@@ -554,19 +585,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -578,16 +601,71 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
 {
-uint16_t old, new;
+uint16_t head;
+VRingMemoryRegionCaches *caches;
+VRingPackedDesc desc = {
+.flags = 0,
+.id = elem->index,
+.len = len,
+};
+bool wrap_counter = vq->used_wrap_counter;
+
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+head = vq->used_idx + idx;
+if (head >= vq->vring.num) {
+head -= vq->vring.num;
+wrap_counter ^= 1;
+}
+if (wrap_counter) {
+desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
+desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
+} else {
+desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
+desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
+}
+
+caches = vring_get_region_caches(vq);
+vring_packed_desc_write_data(vq->vdev, , >desc, head);
+if (idx == 0) {
+/*
+ * Make sure descriptor id and len is written before
+ * flags for the first used buffer.
+ */
+smp_wmb();
+}
+
+vring_packed_desc_write_flags(vq->vdev, , >desc, head);
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_fill(vq, elem, len, idx);
+} else {
+virtqueue_split_fill(vq, elem, 

[Qemu-devel] [PATCH v4 04/11] virtio: initialize wrap counter for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Set to 'true' by default due to spec.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 1a98e61..54dc098 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1238,6 +1238,9 @@ void virtio_reset(void *opaque)
 vdev->vq[i].last_avail_idx = 0;
 vdev->vq[i].shadow_avail_idx = 0;
 vdev->vq[i].used_idx = 0;
+vdev->vq[i].last_avail_wrap_counter = true;
+vdev->vq[i].avail_wrap_counter = true;
+vdev->vq[i].used_wrap_counter = true;
 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 vdev->vq[i].signalled_used = 0;
 vdev->vq[i].signalled_used_valid = false;
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 03/11] virtio: initialize packed ring region

2019-02-13 Thread wexu
From: Wei Xu 

Initialize packed ring memory region with correct size and attribute.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 6769e54..1a98e61 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -156,7 +156,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
 int64_t len;
-
+bool attr;
 
 addr = vq->vring.desc;
 if (!addr) {
@@ -164,8 +164,10 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 }
 new = g_new0(VRingMemoryRegionCaches, 1);
 size = virtio_queue_get_desc_size(vdev, n);
+attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+   true : false;
 len = address_space_cache_init(>desc, vdev->dma_as,
-   addr, size, false);
+   addr, size, attr);
 if (len < size) {
 virtio_error(vdev, "Cannot map desc");
 goto err_desc;
@@ -2335,6 +2337,10 @@ hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, 
int n)
 {
 int s;
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+}
+
 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingAvail, ring) +
 sizeof(uint16_t) * vdev->vq[n].vring.num + s;
@@ -2344,6 +2350,10 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 {
 int s;
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+}
+
 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingUsed, ring) +
 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
-- 
1.8.3.1




[Qemu-devel] [PATCH v4 02/11] virtio: device/driver area size calculation helper for split ring

2019-02-13 Thread wexu
From: Wei Xu 

There is slight size difference between split/packed rings.

This is a refactor of split ring as well as a helper to expand
device and driver area size calculation for packed ring.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index eafb4cc..6769e54 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -155,10 +155,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *old = vq->vring.caches;
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
-int event_size;
 int64_t len;
 
-event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
 
 addr = vq->vring.desc;
 if (!addr) {
@@ -173,7 +171,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_desc;
 }
 
-size = virtio_queue_get_used_size(vdev, n) + event_size;
+size = virtio_queue_get_used_size(vdev, n);
 len = address_space_cache_init(>used, vdev->dma_as,
vq->vring.used, size, true);
 if (len < size) {
@@ -181,7 +179,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_used;
 }
 
-size = virtio_queue_get_avail_size(vdev, n) + event_size;
+size = virtio_queue_get_avail_size(vdev, n);
 len = address_space_cache_init(>avail, vdev->dma_as,
vq->vring.avail, size, false);
 if (len < size) {
@@ -2335,14 +2333,20 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, 
int n)
 
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
 {
+int s;
+
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingAvail, ring) +
-sizeof(uint16_t) * vdev->vq[n].vring.num;
+sizeof(uint16_t) * vdev->vq[n].vring.num + s;
 }
 
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
 {
+int s;
+
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingUsed, ring) +
-sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
 }
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
-- 
1.8.3.1




[Qemu-devel] [PATCH 10/11] virtio: migration support for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Both userspace and vhost-net/user are supported with this patch.

A new subsection is introduced for packed ring, only 'last_avail_idx'
and 'last_avail_wrap_counter' are saved/loaded presumably based on
all the others relevant data(inuse, used/avail index and wrap count
should be the same.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 69 +++---
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 8cfc7b6..7c5de07 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2349,6 +2349,13 @@ static bool virtio_virtqueue_needed(void *opaque)
 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
 }
 
+static bool virtio_packed_virtqueue_needed(void *opaque)
+{
+VirtIODevice *vdev = opaque;
+
+return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
+}
+
 static bool virtio_ringsize_needed(void *opaque)
 {
 VirtIODevice *vdev = opaque;
@@ -2390,6 +2397,17 @@ static const VMStateDescription vmstate_virtqueue = {
 }
 };
 
+static const VMStateDescription vmstate_packed_virtqueue = {
+.name = "packed_virtqueue_state",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT16(last_avail_idx, struct VirtQueue),
+VMSTATE_BOOL(last_avail_wrap_counter, struct VirtQueue),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_virtio_virtqueues = {
 .name = "virtio/virtqueues",
 .version_id = 1,
@@ -2402,6 +2420,18 @@ static const VMStateDescription 
vmstate_virtio_virtqueues = {
 }
 };
 
+static const VMStateDescription vmstate_virtio_packed_virtqueues = {
+.name = "virtio/packed_virtqueues",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = _packed_virtqueue_needed,
+.fields = (VMStateField[]) {
+VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+  VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, 
VirtQueue),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_ringsize = {
 .name = "ringsize_state",
 .version_id = 1,
@@ -2522,6 +2552,7 @@ static const VMStateDescription vmstate_virtio = {
 _virtio_ringsize,
 _virtio_broken,
 _virtio_extra_state,
+_virtio_packed_virtqueues,
 NULL
 }
 };
@@ -2794,6 +2825,17 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 virtio_queue_update_rings(vdev, i);
 }
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[i].shadow_avail_idx = vdev->vq[i].last_avail_idx;
+vdev->vq[i].avail_wrap_counter =
+vdev->vq[i].last_avail_wrap_counter;
+
+vdev->vq[i].used_idx = vdev->vq[i].last_avail_idx;
+vdev->vq[i].used_wrap_counter =
+vdev->vq[i].last_avail_wrap_counter;
+continue;
+}
+
 nheads = vring_avail_idx(>vq[i]) - 
vdev->vq[i].last_avail_idx;
 /* Check it isn't doing strange things with descriptor numbers. */
 if (nheads > vdev->vq[i].vring.num) {
@@ -2955,17 +2997,34 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
-return vdev->vq[n].last_avail_idx;
+uint16_t idx;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+idx = vdev->vq[n].last_avail_idx;
+idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15;
+} else {
+idx = (int)vdev->vq[n].last_avail_idx;
+}
+return idx;
 }
 
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
 {
-vdev->vq[n].last_avail_idx = idx;
-vdev->vq[n].shadow_avail_idx = idx;
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[n].last_avail_idx = idx & 0x7fff;
+vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000);
+} else {
+vdev->vq[n].last_avail_idx = idx;
+vdev->vq[n].shadow_avail_idx = idx;
+}
 }
 
 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]);
@@ -2976,6 +3035,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice 
*vdev, int n)
 
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].used_idx = vring_used_idx(>vq[n]);
-- 
1.8.3.1




[Qemu-devel] [PATCH 05/11] virtio: queue/descriptor check helpers for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

These are descriptor available and queue empty check helpers.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 56 +-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 54dc098..f2ff980 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -368,6 +368,25 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+virtio_tswap16s(vdev, >flags);
+}
+
+static inline bool is_desc_avail(struct VRingPackedDesc *desc,
+bool wrap_counter)
+{
+bool avail, used;
+
+avail = !!(desc->flags & (1 << VRING_PACKED_DESC_F_AVAIL));
+used = !!(desc->flags & (1 << VRING_PACKED_DESC_F_USED));
+return (avail != used) && (avail == wrap_counter);
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
@@ -388,7 +407,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_split_empty(VirtQueue *vq)
 {
 bool empty;
 
@@ -410,6 +429,41 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+struct VRingPackedDesc desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->vring.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_packed_desc_read_flags(vq->vdev, , >desc,
+vq->last_avail_idx);
+
+return !is_desc_avail(, vq->last_avail_wrap_counter);
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_packed_empty_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_packed_empty(vq);
+} else {
+return virtio_queue_split_empty(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
-- 
1.8.3.1




[Qemu-devel] [PATCH 11/11] virtio: CLI and provide packed ring feature bit by default

2019-02-13 Thread wexu
From: Wei Xu 

Add userspace and vhost kernel/user support.

Add CLI "ring_packed=true/false" to enable/disable packed ring provision.
Usage:
-device virtio-net-pci,netdev=xx,mac=xx:xx:xx:xx:xx:xx,ring_packed=false

By default it is provided.

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 2 ++
 include/hw/virtio/virtio.h | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e037db6..f593086 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_VERSION_1,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 VHOST_INVALID_FEATURE_BIT
 };
 
@@ -78,6 +79,7 @@ static const int user_feature_bits[] = {
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9c1fa07..2eb27d2 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, true)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
-- 
1.8.3.1




[Qemu-devel] [PATCH 06/11] virtio: get avail bytes check for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Add packed ring headcount check.

Common part of split/packed ring are kept.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 197 -
 1 file changed, 179 insertions(+), 18 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f2ff980..832287b 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -368,6 +368,17 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
+  desc, sizeof(VRingPackedDesc));
+virtio_tswap16s(vdev, >flags);
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -667,9 +678,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -679,27 +690,12 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned 
int *in_bytes,
 int64_t len = 0;
 int rc;
 
-if (unlikely(!vq->vring.desc)) {
-if (in_bytes) {
-*in_bytes = 0;
-}
-if (out_bytes) {
-*out_bytes = 0;
-}
-return;
-}
-
 rcu_read_lock();
 idx = vq->last_avail_idx;
 total_bufs = in_total = out_total = 0;
 
 max = vq->vring.num;
 caches = vring_get_region_caches(vq);
-if (caches->desc.len < max * sizeof(VRingDesc)) {
-virtio_error(vdev, "Cannot map descriptor ring");
-goto err;
-}
-
 while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
 MemoryRegionCache *desc_cache = >desc;
 unsigned int num_bufs;
@@ -792,6 +788,171 @@ err:
 goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingPackedDesc desc;
+bool wrap_counter;
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+wrap_counter = vq->last_avail_wrap_counter;
+total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+caches = vring_get_region_caches(vq);
+desc_cache = >desc;
+vring_packed_desc_read_flags(vdev, , desc_cache, idx);
+while (is_desc_avail(, wrap_counter)) {
+unsigned int num_bufs;
+unsigned int i = 0;
+
+num_bufs = total_bufs;
+
+/* Make sure flags has been read before all the fields. */
+smp_rmb();
+vring_packed_desc_read(vdev, , desc_cache, idx);
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingPackedDesc);
+num_bufs = i = 0;
+vring_packed_desc_read(vdev, , desc_cache, i);
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} 

[Qemu-devel] [PATCH 03/11] virtio: initialize packed ring region

2019-02-13 Thread wexu
From: Wei Xu 

Initialize packed ring memory region with correct size and attribute.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 6769e54..1a98e61 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -156,7 +156,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
 int64_t len;
-
+bool attr;
 
 addr = vq->vring.desc;
 if (!addr) {
@@ -164,8 +164,10 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 }
 new = g_new0(VRingMemoryRegionCaches, 1);
 size = virtio_queue_get_desc_size(vdev, n);
+attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+   true : false;
 len = address_space_cache_init(>desc, vdev->dma_as,
-   addr, size, false);
+   addr, size, attr);
 if (len < size) {
 virtio_error(vdev, "Cannot map desc");
 goto err_desc;
@@ -2335,6 +2337,10 @@ hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, 
int n)
 {
 int s;
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+}
+
 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingAvail, ring) +
 sizeof(uint16_t) * vdev->vq[n].vring.num + s;
@@ -2344,6 +2350,10 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 {
 int s;
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+}
+
 s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingUsed, ring) +
 sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
-- 
1.8.3.1




[Qemu-devel] [PATCH 08/11] virtio: event suppression support for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Difference between 'avail_wrap_counter' and 'last_avail_wrap_counter':
For Tx(guest transmitting), they are the same after each pop of a desc.

For Rx(guest receiving), they are also the same when there are enough
descriptors to carry the payload for a packet(e.g. usually 16 descs are
needed for a 64k packet in typical iperf tcp connection with tso enabled),
however, when the ring is running out of descriptors while there are
still a few free ones, e.g. 6 descriptors are available which is not
enough to carry an entire packet which needs 16 descriptors, in this
case the 'avail_wrap_counter' should be set as the first one pending
being handled by guest driver in order to get a notification, and the
'last_avail_wrap_counter' should stay unchanged to the head of available
descriptors, like below:

Mark meaning:
| | -- available
|*| -- used

A Snapshot of the queue:
  last_avail_idx = 253
  last_avail_wrap_counter = 1
 |
+-+
 0  | | | |*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*|*| | | | 255
+-+
   |
  shadow_avail_idx = 3
  avail_wrap_counter = 0

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 137 +
 1 file changed, 128 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 7e276b4..8cfc7b6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -234,6 +234,34 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 virtio_tswap16s(vdev, >next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+address_space_read_cached(cache, 0, e, sizeof(*e));
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t off_wrap)
+{
+virtio_tswap16s(vdev, _wrap);
+address_space_write_cached(cache, offsetof(VRingPackedDescEvent, off_wrap),
+_wrap, sizeof(off_wrap));
+address_space_cache_invalidate(cache,
+offsetof(VRingPackedDescEvent, off_wrap), sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t flags)
+{
+virtio_tswap16s(vdev, );
+address_space_write_cached(cache, offsetof(VRingPackedDescEvent, flags),
+, sizeof(flags));
+address_space_cache_invalidate(cache,
+offsetof(VRingPackedDescEvent, flags), sizeof(flags));
+}
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
 VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
@@ -340,14 +368,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, 
uint16_t val)
 address_space_cache_invalidate(>used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
-vq->notification = enable;
-
-if (!vq->vring.desc) {
-return;
-}
-
 rcu_read_lock();
 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -363,6 +385,57 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+VRingPackedDescEvent e;
+VRingMemoryRegionCaches *caches;
+
+rcu_read_lock();
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vq->vdev, >used, );
+
+if (!enable) {
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+/* no need to write device area since this is outdated. */
+goto out;
+}
+
+e.flags = VRING_PACKED_EVENT_FLAG_DISABLE;
+goto update;
+}
+
+e.flags = VRING_PACKED_EVENT_FLAG_ENABLE;
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+uint16_t off_wrap = vq->shadow_avail_idx | vq->avail_wrap_counter << 
15;
+
+vring_packed_off_wrap_write(vq->vdev, >used, off_wrap);
+/* Make sure off_wrap is wrote before flags */
+smp_wmb();
+
+e.flags = VRING_PACKED_EVENT_FLAG_DESC;
+}
+
+update:
+vring_packed_flags_write(vq->vdev, >used, e.flags);
+out:
+rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+vq->notification = enable;
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtio_queue_set_notification_packed(vq, enable);
+} else {
+

[Qemu-devel] [PATCH 02/11] virtio: device/driver area size calculation helper for split ring

2019-02-13 Thread wexu
From: Wei Xu 

There is slight size difference between split/packed rings.

This is a refactor of split ring as well as a helper to expand
device and driver area size calculation for packed ring.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index eafb4cc..6769e54 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -155,10 +155,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *old = vq->vring.caches;
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
-int event_size;
 int64_t len;
 
-event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
 
 addr = vq->vring.desc;
 if (!addr) {
@@ -173,7 +171,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_desc;
 }
 
-size = virtio_queue_get_used_size(vdev, n) + event_size;
+size = virtio_queue_get_used_size(vdev, n);
 len = address_space_cache_init(>used, vdev->dma_as,
vq->vring.used, size, true);
 if (len < size) {
@@ -181,7 +179,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_used;
 }
 
-size = virtio_queue_get_avail_size(vdev, n) + event_size;
+size = virtio_queue_get_avail_size(vdev, n);
 len = address_space_cache_init(>avail, vdev->dma_as,
vq->vring.avail, size, false);
 if (len < size) {
@@ -2335,14 +2333,20 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, 
int n)
 
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
 {
+int s;
+
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingAvail, ring) +
-sizeof(uint16_t) * vdev->vq[n].vring.num;
+sizeof(uint16_t) * vdev->vq[n].vring.num + s;
 }
 
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
 {
+int s;
+
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
 return offsetof(VRingUsed, ring) +
-sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
 }
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
-- 
1.8.3.1




[Qemu-devel] [PATCH 04/11] virtio: initialize wrap counter for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Set to 'true' by default due to spec.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 1a98e61..54dc098 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1238,6 +1238,9 @@ void virtio_reset(void *opaque)
 vdev->vq[i].last_avail_idx = 0;
 vdev->vq[i].shadow_avail_idx = 0;
 vdev->vq[i].used_idx = 0;
+vdev->vq[i].last_avail_wrap_counter = true;
+vdev->vq[i].avail_wrap_counter = true;
+vdev->vq[i].used_wrap_counter = true;
 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 vdev->vq[i].signalled_used = 0;
 vdev->vq[i].signalled_used_valid = false;
-- 
1.8.3.1




[Qemu-devel] [PATCH 07/11] virtio: fill/flush/pop for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

last_used_idx/wrap_counter should be equal to last_avail_idx/wrap_counter
after a successful flush.

Batching in vhost-net & dpdk testpmd is not equivalently supported in
userspace backend, but a chained descriptors for Rx is similarly presented
as a lightweight batch, so a write barrier is nailed only for the
first(head) descriptor.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 291 +
 1 file changed, 274 insertions(+), 17 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 832287b..7e276b4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -379,6 +379,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, 
VRingPackedDesc *desc,
 virtio_tswap16s(vdev, >id);
 }
 
+static void vring_packed_desc_write_data(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  >id, sizeof(desc->id));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  sizeof(desc->id));
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  >len, sizeof(desc->len));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  sizeof(desc->len));
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -388,6 +407,18 @@ static void vring_packed_desc_read_flags(VirtIODevice 
*vdev,
 virtio_tswap16s(vdev, >flags);
 }
 
+static void vring_packed_desc_write_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  sizeof(desc->flags));
+}
+
 static inline bool is_desc_avail(struct VRingPackedDesc *desc,
 bool wrap_counter)
 {
@@ -554,19 +585,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -578,16 +601,71 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
 {
-uint16_t old, new;
+uint16_t head;
+VRingMemoryRegionCaches *caches;
+VRingPackedDesc desc = {
+.flags = 0,
+.id = elem->index,
+.len = len,
+};
+bool wrap_counter = vq->used_wrap_counter;
+
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+head = vq->used_idx + idx;
+if (head >= vq->vring.num) {
+head -= vq->vring.num;
+wrap_counter ^= 1;
+}
+if (wrap_counter) {
+desc.flags |= (1 << VRING_PACKED_DESC_F_AVAIL);
+desc.flags |= (1 << VRING_PACKED_DESC_F_USED);
+} else {
+desc.flags &= ~(1 << VRING_PACKED_DESC_F_AVAIL);
+desc.flags &= ~(1 << VRING_PACKED_DESC_F_USED);
+}
+
+caches = vring_get_region_caches(vq);
+vring_packed_desc_write_data(vq->vdev, , >desc, head);
+if (idx == 0) {
+/*
+ * Make sure descriptor id and len is written before
+ * flags for the first used buffer.
+ */
+smp_wmb();
+}
+
+vring_packed_desc_write_flags(vq->vdev, , >desc, head);
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_fill(vq, elem, len, idx);
+} else {
+virtqueue_split_fill(vq, elem, 

[Qemu-devel] [PATCH 09/11] virtio-net: update the head descriptor in a chain lastly

2019-02-13 Thread wexu
From: Wei Xu 

This is a helper for packed ring.

To support packed ring, the head descriptor in a chain should be updated
lastly since no 'avail_idx' like in packed ring to explicitly tell the
driver side that all payload is ready after having done the chain, so
the head is always visible immediately.

This patch fills the header after done all the other ones.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 3f319ef..330abea 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1251,6 +1251,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
 struct virtio_net_hdr_mrg_rxbuf mhdr;
 unsigned mhdr_cnt = 0;
 size_t offset, i, guest_offset;
+VirtQueueElement head;
+int head_len = 0;
 
 if (!virtio_net_can_receive(nc)) {
 return -1;
@@ -1328,7 +1330,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 }
 
 /* signal other side */
-virtqueue_fill(q->rx_vq, elem, total, i++);
+if (i == 0) {
+head_len = total;
+head = *elem;
+} else {
+virtqueue_fill(q->rx_vq, elem, len, i);
+}
+i++;
 g_free(elem);
 }
 
@@ -1339,6 +1347,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  _buffers, sizeof mhdr.num_buffers);
 }
 
+virtqueue_fill(q->rx_vq, , head_len, 0);
 virtqueue_flush(q->rx_vq, i);
 virtio_notify(vdev, q->rx_vq);
 
-- 
1.8.3.1




[Qemu-devel] [PATCH 01/11] virtio: rename structure for packed ring

2019-02-13 Thread wexu
From: Wei Xu 

Redefine packed ring structure according to Qemu nomenclature.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index a1ff647..eafb4cc 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,13 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingPackedDesc {
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingPackedDesc;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -77,17 +84,25 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t off_wrap;
+uint16_t flags;
+} VRingPackedDescEvent ;
+
 struct VirtQueue
 {
 VRing vring;
 
 /* Next head to pop */
 uint16_t last_avail_idx;
+bool last_avail_wrap_counter;
 
 /* Last avail_idx read from VQ. */
 uint16_t shadow_avail_idx;
+bool avail_wrap_counter;
 
 uint16_t used_idx;
+bool used_wrap_counter;
 
 /* Last used index value we have signalled on */
 uint16_t signalled_used;
-- 
1.8.3.1




[Qemu-devel] [PATCH v3 00/11] packed ring virtio-net backends support

2019-02-13 Thread wexu
From: Wei Xu 

https://github.com/Whishay/qemu.git 

Userspace and vhost-net backedn test has been done with upstream kernel
in guest.

v2->v3
v2/01 - drop it since the header has been synchronized from kernel.(mst & 
jason)
v3/01 - rename 'avail_wrap_counter' to 'last_avail_wrap_counter',
'event_wrap_counter' to 'avail_wrap_counter' to make it easier
to understand.(Jason)
  - revise commit message.(Jason)
v3/02 - split packed ring areas size calculation to next patch.(Jason)
to not break bisect(Jason).
v3/03 - initialize packed ring region with correct size and attribute.
  - remove unnecessary 'else' checks. (Jason)
v3/06 - add commit log.
  - replace 'event_wrap-counter' with 'avail_wrap_counter'.
  - merge common memory cache size check to 
virtqueue_get_avail_bytes().(Jason)
  - revise memory barrier comment.(Jason) 
  - check indirect descriptors by desc.len/sizeof(desc).(Jason)
  - flip wrap counter with '^=1'.(Jason)
v3/07 - move desc.id/len initialization to the declaration.(Jason)
  - flip wrap counter '!' with '^=1'.(Jason)
  - add memory barrier comments in commit message.
v3/08 - use offsetof() when writing cache.(Jason)
  - avoid duplicated memory region write when turning off event_idx
supported notification.(Jason)
  - add commit log.(Jason)
  - add avail & last_avail wrap counter difference description in 
commit log.
v3/09 - remove unnecessary used/avail idx/wrap-counter from subsection.
  - put new subsection to the end of vmstate_virtio.(Jason)
  - squash the two userspace and vhost-net migration patches in 
v2.(Jason)
v3/10 - reword commit message.
  - this is a help not a bug fix so I would like to keep it as a
separate patch still.(Proposed a merge it by Jason)
  - the virtqueue_fill() is also not like an API so I would prefer not
to touch it, please correct me if I did not get it in the right
way.(Proposed a squash by Jason)
v3/11 - squash feature bits for user space and vhost kernel/user backends.
  - enable packed ring feature bit provision on host by default.(Jason)

Wei Xu (11):
  virtio: rename structure for packed ring
  virtio: device/driver area size calculation helper for split ring
  virtio: initialize packed ring region
  virtio: initialize wrap counter for packed ring
  virtio: queue/descriptor check helpers for packed ring
  virtio: get avail bytes check for packed ring
  virtio: fill/flush/pop for packed ring
  virtio: event suppression support for packed ring
  virtio-net: update the head descriptor in a chain lastly
  virtio: migration support for packed ring
  virtio: CLI and provide packed ring feature bit by default

 hw/net/vhost_net.c |   2 +
 hw/net/virtio-net.c|  11 +-
 hw/virtio/virtio.c | 798 +
 include/hw/virtio/virtio.h |   4 +-
 4 files changed, 757 insertions(+), 58 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH v2 15/15] virtio: enable packed ring via a new command line

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 include/hw/virtio/virtio.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9c1fa07..cb286bb 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, false)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 14/15] vhost: enable packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index fb4b18f..f593086 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_VERSION_1,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 VHOST_INVALID_FEATURE_BIT
 };
 
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 11/15] virtio: add userspace migration for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 0bcf8a5..722a4fd 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2346,6 +2346,13 @@ static bool virtio_virtqueue_needed(void *opaque)
 return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
 }
 
+static bool virtio_packed_virtqueue_needed(void *opaque)
+{
+VirtIODevice *vdev = opaque;
+
+return virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED);
+}
+
 static bool virtio_ringsize_needed(void *opaque)
 {
 VirtIODevice *vdev = opaque;
@@ -2387,6 +2394,21 @@ static const VMStateDescription vmstate_virtqueue = {
 }
 };
 
+static const VMStateDescription vmstate_packed_virtqueue = {
+.name = "packed_virtqueue_state",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_BOOL(avail_wrap_counter, struct VirtQueue),
+VMSTATE_BOOL(event_wrap_counter, struct VirtQueue),
+VMSTATE_BOOL(used_wrap_counter, struct VirtQueue),
+VMSTATE_UINT16(used_idx, struct VirtQueue),
+VMSTATE_UINT16(shadow_avail_idx, struct VirtQueue),
+VMSTATE_UINT32(inuse, struct VirtQueue),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_virtio_virtqueues = {
 .name = "virtio/virtqueues",
 .version_id = 1,
@@ -2399,6 +2421,18 @@ static const VMStateDescription 
vmstate_virtio_virtqueues = {
 }
 };
 
+static const VMStateDescription vmstate_virtio_packed_virtqueues = {
+.name = "virtio/packed_virtqueues",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = _packed_virtqueue_needed,
+.fields = (VMStateField[]) {
+VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
+  VIRTIO_QUEUE_MAX, 0, vmstate_packed_virtqueue, 
VirtQueue),
+VMSTATE_END_OF_LIST()
+}
+};
+
 static const VMStateDescription vmstate_ringsize = {
 .name = "ringsize_state",
 .version_id = 1,
@@ -2516,6 +2550,7 @@ static const VMStateDescription vmstate_virtio = {
 _virtio_device_endian,
 _virtio_64bit_features,
 _virtio_virtqueues,
+_virtio_packed_virtqueues,
 _virtio_ringsize,
 _virtio_broken,
 _virtio_extra_state,
@@ -2791,6 +2826,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 virtio_queue_update_rings(vdev, i);
 }
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+continue;
+}
+
 nheads = vring_avail_idx(>vq[i]) - 
vdev->vq[i].last_avail_idx;
 /* Check it isn't doing strange things with descriptor numbers. */
 if (nheads > vdev->vq[i].vring.num) {
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 09/15] virtio: event suppression support for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 121 +++--
 1 file changed, 118 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 5562ecd..0bcf8a5 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -238,6 +238,30 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 virtio_tswap16s(vdev, >next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+address_space_read_cached(cache, 0, e, sizeof(*e));
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t off_wrap)
+{
+virtio_tswap16s(vdev, _wrap);
+address_space_write_cached(cache, 0, _wrap, sizeof(off_wrap));
+address_space_cache_invalidate(cache, 0, sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t flags)
+{
+virtio_tswap16s(vdev, );
+address_space_write_cached(cache, sizeof(uint16_t), , sizeof(flags));
+address_space_cache_invalidate(cache, sizeof(uint16_t), sizeof(flags));
+}
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
 VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
@@ -344,7 +368,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, 
uint16_t val)
 address_space_cache_invalidate(>used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
 vq->notification = enable;
 
@@ -367,6 +391,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+VRingPackedDescEvent e;
+VRingMemoryRegionCaches *caches;
+
+rcu_read_lock();
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vq->vdev, >used, );
+
+if (!enable) {
+e.flags = RING_EVENT_FLAGS_DISABLE;
+goto out;
+}
+
+e.flags = RING_EVENT_FLAGS_ENABLE;
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+uint16_t off_wrap = vq->shadow_avail_idx | vq->event_wrap_counter << 
15;
+
+vring_packed_off_wrap_write(vq->vdev, >used, off_wrap);
+/* Make sure off_wrap is wrote before flags */
+smp_wmb();
+
+e.flags = RING_EVENT_FLAGS_DESC;
+}
+
+out:
+vring_packed_flags_write(vq->vdev, >used, e.flags);
+rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+vq->notification = enable;
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtio_queue_set_notification_packed(vq, enable);
+} else {
+virtio_queue_set_notification_split(vq, enable);
+}
+}
+
 int virtio_queue_ready(VirtQueue *vq)
 {
 return vq->vring.avail != 0;
@@ -2118,8 +2187,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
 }
 }
 
-/* Called within rcu_read_lock().  */
-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 uint16_t old, new;
 bool v;
@@ -2142,6 +2210,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, 
VirtQueue *vq)
 return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+static bool vring_packed_need_event(VirtQueue *vq, bool wrap,
+uint16_t off_wrap, uint16_t new, uint16_t old)
+{
+int off = off_wrap & ~(1 << 15);
+
+if (wrap != off_wrap >> 15) {
+off -= vq->vring.num;
+}
+
+return vring_need_event(off, new, old);
+}
+
+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+VRingPackedDescEvent e;
+uint16_t old, new;
+bool v;
+VRingMemoryRegionCaches *caches;
+
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vdev, >avail, );
+
+old = vq->signalled_used;
+new = vq->signalled_used = vq->used_idx;
+v = vq->signalled_used_valid;
+vq->signalled_used_valid = true;
+
+if (e.flags == RING_EVENT_FLAGS_DISABLE) {
+return false;
+} else if (e.flags == RING_EVENT_FLAGS_ENABLE) {
+return true;
+}
+
+return !v || vring_packed_need_event(vq,
+vq->used_wrap_counter, e.off_wrap, new, old);
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_packed_should_notify(vdev, vq);
+} else {
+return virtio_split_should_notify(vdev, 

[Qemu-devel] [PATCH v2 12/15] virtio: add vhost-net migration for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 722a4fd..0cb912e 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2991,17 +2991,34 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
-return vdev->vq[n].last_avail_idx;
+uint16_t idx;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+idx = vdev->vq[n].last_avail_idx;
+idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15;
+} else {
+idx = (int)vdev->vq[n].last_avail_idx;
+}
+return idx;
 }
 
 void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
 {
-vdev->vq[n].last_avail_idx = idx;
-vdev->vq[n].shadow_avail_idx = idx;
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[n].last_avail_idx = idx & 0x7fff;
+vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000);
+} else {
+vdev->vq[n].last_avail_idx = idx;
+vdev->vq[n].shadow_avail_idx = idx;
+}
 }
 
 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]);
@@ -3012,6 +3029,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice 
*vdev, int n)
 
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].used_idx = vring_used_idx(>vq[n]);
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 13/15] virtio: packed ring feature bit for userspace backend

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e037db6..fb4b18f 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,7 @@ static const int user_feature_bits[] = {
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 07/15] virtio: get avail bytes check for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 180 +
 1 file changed, 167 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index e728201..cb599e9 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -372,6 +372,17 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
+  desc, sizeof(VRingPackedDesc));
+virtio_tswap16s(vdev, >flags);
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -671,9 +682,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -683,16 +694,6 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int 
*in_bytes,
 int64_t len = 0;
 int rc;
 
-if (unlikely(!vq->vring.desc)) {
-if (in_bytes) {
-*in_bytes = 0;
-}
-if (out_bytes) {
-*out_bytes = 0;
-}
-return;
-}
-
 rcu_read_lock();
 idx = vq->last_avail_idx;
 total_bufs = in_total = out_total = 0;
@@ -796,6 +797,159 @@ err:
 goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingPackedDesc desc;
+bool wrap_counter;
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+wrap_counter = vq->avail_wrap_counter;
+total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+caches = vring_get_region_caches(vq);
+if (caches->desc.len < max * sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Cannot map descriptor ring");
+goto err;
+}
+
+desc_cache = >desc;
+vring_packed_desc_read_flags(vdev, , desc_cache, idx);
+while (is_desc_avail(, wrap_counter)) {
+unsigned int num_bufs;
+unsigned int i = 0;
+
+num_bufs = total_bufs;
+
+/* Make sure all the fields have been exposed. */
+smp_rmb();
+vring_packed_desc_read(vdev, , desc_cache, idx);
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingPackedDesc);
+num_bufs = i = 0;
+vring_packed_desc_read(vdev, , desc_cache, i);
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} else {
+out_total += desc.len;
+}
+if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+goto done;
+}
+
+if (desc_cache == _desc_cache) {
+vring_packed_desc_read(vdev, , desc_cache, i);
+ 

[Qemu-devel] [PATCH v2 06/15] virtio: init and desc empty check for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

ring check and other basical helpers for packed ring.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 59 +-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 833289e..e728201 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -24,6 +24,9 @@
 #include "hw/virtio/virtio-access.h"
 #include "sysemu/dma.h"
 
+#define AVAIL_DESC_PACKED(b) ((b) << 7)
+#define USED_DESC_PACKED(b)  ((b) << 15)
+
 /*
  * The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. This is the default, used by transports like PCI
@@ -369,6 +372,25 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+virtio_tswap16s(vdev, >flags);
+}
+
+static inline bool is_desc_avail(struct VRingPackedDesc *desc,
+bool wrap_counter)
+{
+bool avail, used;
+
+avail = !!(desc->flags & AVAIL_DESC_PACKED(1));
+used = !!(desc->flags & USED_DESC_PACKED(1));
+return (avail != used) && (avail == wrap_counter);
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
@@ -389,7 +411,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_split_empty(VirtQueue *vq)
 {
 bool empty;
 
@@ -411,6 +433,41 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+struct VRingPackedDesc desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->vring.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_packed_desc_read_flags(vq->vdev, , >desc,
+vq->last_avail_idx);
+
+return !is_desc_avail(, vq->avail_wrap_counter);
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_packed_empty_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_packed_empty(vq);
+} else {
+return virtio_queue_split_empty(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 08/15] virtio: fill/flush/pop for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 295 ++---
 1 file changed, 278 insertions(+), 17 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index cb599e9..5562ecd 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -383,6 +383,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, 
VRingPackedDesc *desc,
 virtio_tswap16s(vdev, >id);
 }
 
+static void vring_packed_desc_write_data(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  >id, sizeof(desc->id));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  sizeof(desc->id));
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  >len, sizeof(desc->len));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  sizeof(desc->len));
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -392,6 +411,18 @@ static void vring_packed_desc_read_flags(VirtIODevice 
*vdev,
 virtio_tswap16s(vdev, >flags);
 }
 
+static void vring_packed_desc_write_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  sizeof(desc->flags));
+}
+
 static inline bool is_desc_avail(struct VRingPackedDesc *desc,
 bool wrap_counter)
 {
@@ -558,19 +589,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -582,16 +605,72 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
 {
-uint16_t old, new;
+uint16_t head;
+VRingMemoryRegionCaches *caches;
+VRingPackedDesc desc = {
+.flags = 0,
+};
+bool wrap_counter = vq->used_wrap_counter;
+
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+caches = vring_get_region_caches(vq);
+desc.id = elem->index;
+desc.len = len;
+
+head = vq->used_idx + idx;
+if (head >= vq->vring.num) {
+head -= vq->vring.num;
+wrap_counter ^= 1;
+}
+if (wrap_counter) {
+desc.flags |= VRING_DESC_F_AVAIL;
+desc.flags |= VRING_DESC_F_USED;
+} else {
+desc.flags &= ~VRING_DESC_F_AVAIL;
+desc.flags &= ~VRING_DESC_F_USED;
+}
+
+vring_packed_desc_write_data(vq->vdev, , >desc, head);
+if (idx == 0) {
+/*
+ * Make sure descriptor id and len is written before
+ * flags for the first used buffer.
+ */
+smp_wmb();
+}
+
+vring_packed_desc_write_flags(vq->vdev, , >desc, head);
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_fill(vq, elem, len, idx);
+} else {
+virtqueue_split_fill(vq, elem, len, idx);
+}
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
+{
+uint16_t old, new;
+
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -607,6 +686,31 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
 vq->signalled_used_valid = false;
 }
 
+static void virtqueue_packed_flush(VirtQueue 

[Qemu-devel] [PATCH v2 10/15] virtio-net: fill head desc after done all in a chain

2019-01-16 Thread wexu
From: Wei Xu 

With the support of marking a descriptor used/unused in 'flags'
field for 1.1, the current way of filling a chained descriptors
does not work since driver side may get the wrong 'num_buffer'
information in case of the head descriptor has been filled in
while the subsequent ones are still in processing in device side.

This patch fills the head one after done all the others one.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index e37fc34..39336b9 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1198,6 +1198,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
 struct virtio_net_hdr_mrg_rxbuf mhdr;
 unsigned mhdr_cnt = 0;
 size_t offset, i, guest_offset;
+VirtQueueElement head;
+int head_len = 0;
 
 if (!virtio_net_can_receive(nc)) {
 return -1;
@@ -1275,7 +1277,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 }
 
 /* signal other side */
-virtqueue_fill(q->rx_vq, elem, total, i++);
+if (i == 0) {
+head_len = total;
+head = *elem;
+} else {
+virtqueue_fill(q->rx_vq, elem, len, i);
+}
+i++;
 g_free(elem);
 }
 
@@ -1286,6 +1294,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  _buffers, sizeof mhdr.num_buffers);
 }
 
+virtqueue_fill(q->rx_vq, , head_len, 0);
 virtqueue_flush(q->rx_vq, i);
 virtio_notify(vdev, q->rx_vq);
 
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 05/15] virtio: init wrap counter for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 454da3d..833289e 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1239,6 +1239,9 @@ void virtio_reset(void *opaque)
 vdev->vq[i].last_avail_idx = 0;
 vdev->vq[i].shadow_avail_idx = 0;
 vdev->vq[i].used_idx = 0;
+vdev->vq[i].avail_wrap_counter = true;
+vdev->vq[i].event_wrap_counter = true;
+vdev->vq[i].used_wrap_counter = true;
 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 vdev->vq[i].signalled_used = 0;
 vdev->vq[i].signalled_used_valid = false;
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 00/15] packed ring virtio-net backends support

2019-01-16 Thread wexu
From: Wei Xu 

v1->v2:
 - fix patchew complaint
 - only set/get last_avail_idx/wrap_counter for vhost migration(Maxime)
 - replace 'out_num' and 'in_num' with 'elem_entries'in packed_pop()(Maxime)
 - set last used idx/wrap_counter to last avail ones when flushing(Maxime)
 - replace '*host_has_feature()' with '*vdev_has_feature()' for ioctl(Maxime)
 - replace going through indirect descriptors with desc.len/sizeof(desc)(btw)
 - add new subsection for packed ring(Jason)

rfc v3 -> v1
- migration support for both userspace and vhost-net, need tweak vhost
  ioctl() to make it work(the code is pasted in the commit message of
  vhost migration patch #13).

Note:
  the high 32-bit guest feature bit is saved as a subsection for
  virtio devices which makes packed ring feature bit check unusable when
  loading the saved per-queue variables(this is done before loading
  subsection which is the last action for device during migration),
  so I save and load all the things generally for now, any idea to fix this?

- Fixed comments from Jason for rfc v3 sorted by patch #, two comments I
  didn't take were(from patch) listed here:
09: - introduce new API(virtqueue_fill_n()).
  - Didn't take it since userspace backend does not support batching,
so only one element is popped and current API should be enough.
06 & 07: Refactor split and packed pop()/get_avail_bytes().
 - the duplicated code interwined with split/packed ring specific
   things and it might make it unclear, so I only extracted the few
   common parts out side rcu and keep the others separate.

The other revised comments:
02: - reuse current 'avail/used' for 'driver/device' in VRingMemoryRegionCache.
- remove event_idx since shadow_avail_idx works.
03: - move size recalculation to a separate patch.
- keep 'avail/used' in current calculation function name.
- initialize 'desc' memory region as 'false' for 1.0('true' for 1.1)
04: - delete 'event_idx'
05: - rename 'wc' to wrap_counter.
06: - converge common part outside rcu section for 1.0/1.1.
- move memory barrier for the first 'desc' in between checking flag
  and read other fields.
- remove unnecessary memory barriers for indirect descriptors.
- no need to destroy indirect memory cache since it is generally done
  before return from the function.
- remove redundant maximum chained descriptors limitation check.
- there are some differences(desc name, wrap idx/counter, flags) between
  split and packed rings, so keep them separate for now.
- amend the comment when recording index and wrap counter for a kick
  from guest.
07: - calculate fields in descriptor instead of read it when filling.
- put memory barrier correctly before filling the flags in descriptor.
- replace full memory barrier with a write barrier in fill.
- shift to read descriptor flags and descriptor necessarily and
  separately in packed_pop().
- correct memory barrier in packed_pop() as in packed_fill().
08: - reuse 'shadow_avail_idx' instead of adding a new 'event_idx'.
- use the compact and verified vring_packed_need_event()
  version for vhost net/user.
12: - remove the odd cherry-pick comment.
- used bit '15' for wrap_counters.

rfc v2->v3
- addressed performance issue
- fixed feedback from v2

rfc v1->v2
- sync to tiwei's v5
- reuse memory cache function with 1.0
- dropped detach patch and notification helper(04 & 05 in v1)
- guest virtio-net driver unload/reload support
- event suppression support(not tested)
- addressed feedback from v1


Wei Xu (15):
  virtio: introduce packed ring definitions
  virtio: redefine structure & memory cache for packed ring
  virtio: expand offset calculation for packed ring
  virtio: add memory region init for packed ring
  virtio: init wrap counter for packed ring
  virtio: init and desc empty check for packed ring
  virtio: get avail bytes check for packed ring
  virtio: fill/flush/pop for packed ring
  virtio: event suppression support for packed ring
  virtio-net: fill head desc after done all in a chain
  virtio: add userspace migration for packed ring
  virtio: add vhost-net migration for packed ring
  virtio: packed ring feature bit for userspace backend
  vhost: enable packed ring
  virtio: enable packed ring via a new command line

 hw/net/vhost_net.c |   2 +
 hw/net/virtio-net.c|  11 +-
 hw/virtio/virtio.c | 774 +++--
 include/hw/virtio/virtio.h |   4 +-
 include/standard-headers/linux/virtio_config.h |  15 +
 include/standard-headers/linux/virtio_ring.h   |  43 ++
 6 files changed, 800 insertions(+), 49 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH v2 03/15] virtio: expand offset calculation for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Expand 1.0 to 1.1 by adding offset calculation accordingly.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 827e745..112845c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2336,14 +2336,28 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, 
int n)
 
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingAvail, ring) +
-sizeof(uint16_t) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingAvail, ring) +
+sizeof(uint16_t) * vdev->vq[n].vring.num + s;
+}
 }
 
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingUsed, ring) +
-sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingUsed, ring) +
+sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
+}
 }
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 02/15] virtio: redefine structure & memory cache for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Redefine packed ring structure according to Qemu nomenclature,
field data(wrap counter, etc) are introduced also.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 22bd1ac..827e745 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,13 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingPackedDesc {
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingPackedDesc;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -77,6 +84,11 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t off_wrap;
+uint16_t flags;
+} VRingPackedDescEvent ;
+
 struct VirtQueue
 {
 VRing vring;
@@ -87,7 +99,11 @@ struct VirtQueue
 /* Last avail_idx read from VQ. */
 uint16_t shadow_avail_idx;
 
+bool event_wrap_counter;
+bool avail_wrap_counter;
+
 uint16_t used_idx;
+bool used_wrap_counter;
 
 /* Last used index value we have signalled on */
 uint16_t signalled_used;
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 04/15] virtio: add memory region init for packed ring

2019-01-16 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 112845c..454da3d 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -156,10 +156,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *old = vq->vring.caches;
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
-int event_size;
 int64_t len;
-
-event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
+bool attr;
 
 addr = vq->vring.desc;
 if (!addr) {
@@ -167,14 +165,16 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 }
 new = g_new0(VRingMemoryRegionCaches, 1);
 size = virtio_queue_get_desc_size(vdev, n);
+attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+   true : false;
 len = address_space_cache_init(>desc, vdev->dma_as,
-   addr, size, false);
+   addr, size, attr);
 if (len < size) {
 virtio_error(vdev, "Cannot map desc");
 goto err_desc;
 }
 
-size = virtio_queue_get_used_size(vdev, n) + event_size;
+size = virtio_queue_get_used_size(vdev, n);
 len = address_space_cache_init(>used, vdev->dma_as,
vq->vring.used, size, true);
 if (len < size) {
@@ -182,7 +182,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_used;
 }
 
-size = virtio_queue_get_avail_size(vdev, n) + event_size;
+size = virtio_queue_get_avail_size(vdev, n);
 len = address_space_cache_init(>avail, vdev->dma_as,
vq->vring.avail, size, false);
 if (len < size) {
-- 
1.8.3.1




[Qemu-devel] [PATCH v2 01/15] virtio: introduce packed ring definitions

2019-01-16 Thread wexu
From: Wei Xu 

>From 1.1 spec.

Signed-off-by: Wei Xu 
---
 include/standard-headers/linux/virtio_config.h | 15 +
 include/standard-headers/linux/virtio_ring.h   | 43 ++
 2 files changed, 58 insertions(+)

diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h
index 0b19436..9f450fd 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -75,6 +75,21 @@
  */
 #define VIRTIO_F_IOMMU_PLATFORM33
 
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED   34
+
+/* Enable events */
+#define RING_EVENT_FLAGS_ENABLE 0x0
+/* Disable events */
+#define RING_EVENT_FLAGS_DISABLE 0x1
+/*
+ *  * Enable events for a specific descriptor
+ *   * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ ** Only valid if VIRTIO_F_RING_EVENT_IDX has been negotiated.
+ * */
+#define RING_EVENT_FLAGS_DESC 0x2
+/* The value 0x3 is reserved */
+
 /*
  * Does the device support Single Root I/O Virtualization?
  */
diff --git a/include/standard-headers/linux/virtio_ring.h 
b/include/standard-headers/linux/virtio_ring.h
index d26e72b..1719c6f 100644
--- a/include/standard-headers/linux/virtio_ring.h
+++ b/include/standard-headers/linux/virtio_ring.h
@@ -42,6 +42,10 @@
 /* This means the buffer contains a list of buffer descriptors. */
 #define VRING_DESC_F_INDIRECT  4
 
+/* Mark a descriptor as available or used. */
+#define VRING_DESC_F_AVAIL (1ul << 7)
+#define VRING_DESC_F_USED  (1ul << 15)
+
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
  * will still kick if it's out of buffers. */
@@ -51,6 +55,17 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT 1
 
+/* Enable events. */
+#define VRING_EVENT_F_ENABLE   0x0
+/* Disable events. */
+#define VRING_EVENT_F_DISABLE  0x1
+/*
+ * Enable events for a specific descriptor
+ * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
+ */
+#define VRING_EVENT_F_DESC 0x2
+
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC28
 
@@ -169,4 +184,32 @@ static inline int vring_need_event(uint16_t event_idx, 
uint16_t new_idx, uint16_
return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
 }
 
+struct vring_packed_desc_event {
+   /* Descriptor Ring Change Event Offset/Wrap Counter. */
+   __virtio16 off_wrap;
+   /* Descriptor Ring Change Event Flags. */
+   __virtio16 flags;
+};
+
+struct vring_packed_desc {
+   /* Buffer Address. */
+   __virtio64 addr;
+   /* Buffer Length. */
+   __virtio32 len;
+   /* Buffer ID. */
+   __virtio16 id;
+   /* The flags depending on descriptor type. */
+   __virtio16 flags;
+};
+
+struct vring_packed {
+   unsigned int num;
+
+   struct vring_packed_desc *desc;
+
+   struct vring_packed_desc_event *driver;
+
+   struct vring_packed_desc_event *device;
+};
+
 #endif /* _LINUX_VIRTIO_RING_H */
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 15/16] vhost: enable packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index fb4b18f..f593086 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_VERSION_1,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 VHOST_INVALID_FEATURE_BIT
 };
 
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 16/16] virtio: enable packed ring via a new command line

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 include/hw/virtio/virtio.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index a6fdf3f..36fc4ef 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, false)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 14/16] virtio: packed ring feature bit for userspace backend

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e037db6..fb4b18f 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,7 @@ static const int user_feature_bits[] = {
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 12/16] virtio: add userspace migration of packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 240c4e3..64d5c04 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2558,6 +2558,12 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f)
  */
 qemu_put_be64(f, vdev->vq[i].vring.desc);
 qemu_put_be16s(f, >vq[i].last_avail_idx);
+qemu_put_8s(f, (const uint8_t *)>vq[i].avail_wrap_counter);
+qemu_put_8s(f, (const uint8_t *)>vq[i].event_wrap_counter);
+qemu_put_8s(f, (const uint8_t *)>vq[i].used_wrap_counter);
+qemu_put_be16s(f, >vq[i].used_idx);
+qemu_put_be16s(f, >vq[i].shadow_avail_idx);
+qemu_put_be32s(f, >vq[i].inuse);
 if (k->save_queue) {
 k->save_queue(qbus->parent, i, f);
 }
@@ -2705,6 +2711,14 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 }
 vdev->vq[i].vring.desc = qemu_get_be64(f);
 qemu_get_be16s(f, >vq[i].last_avail_idx);
+
+qemu_get_8s(f, (uint8_t *)>vq[i].avail_wrap_counter);
+qemu_get_8s(f, (uint8_t *)>vq[i].event_wrap_counter);
+qemu_get_8s(f, (uint8_t *)>vq[i].used_wrap_counter);
+qemu_get_be16s(f, >vq[i].used_idx);
+qemu_get_be16s(f, >vq[i].shadow_avail_idx);
+qemu_get_be32s(f, >vq[i].inuse);
+
 vdev->vq[i].signalled_used_valid = false;
 vdev->vq[i].notification = true;
 
@@ -2786,6 +2800,10 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 virtio_queue_update_rings(vdev, i);
 }
 
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+continue;
+}
+
 nheads = vring_avail_idx(>vq[i]) - 
vdev->vq[i].last_avail_idx;
 /* Check it isn't doing strange things with descriptor numbers. */
 if (nheads > vdev->vq[i].vring.num) {
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 13/16] virtio: add vhost-net migration of packed ring

2018-11-22 Thread wexu
From: Wei Xu 

tweaked vhost-net code to test migration.

@@ -1414,64 +1430,20 @@ long vhost_vring_ioctl(struct vhost_dev
r = -EFAULT;
break;
}
+   vq->last_avail_idx = s.num & 0x7FFF;
+   /* Forget the cached index value. */
+   vq->avail_idx = vq->last_avail_idx;
+   if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+   vq->last_avail_wrap_counter = !!(s.num & 0x8000);
+   vq->avail_wrap_counter = vq->last_avail_wrap_counter;
+
+   vq->last_used_idx = (s.num & 0x7fFF) >> 16;
+   vq->last_used_wrap_counter = !!(s.num & 0x8000);
+   }
+   break;
+   case VHOST_GET_VRING_BASE:
+   s.index = idx;
+s.num = vq->last_avail_idx;
+   if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+   s.num |= vq->last_avail_wrap_counter << 15;
+   s.num |= vq->last_used_idx << 16;
+   s.num |= vq->last_used_wrap_counter << 31;
+   }
+   if (copy_to_user(argp, , sizeof(s)))
+   r = -EFAULT;
+   break;

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 35 ++-
 include/hw/virtio/virtio.h |  4 ++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 64d5c04..7487d3d 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2963,19 +2963,40 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 }
 }
 
-uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
-return vdev->vq[n].last_avail_idx;
+int idx;
+
+if (virtio_host_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+idx = vdev->vq[n].last_avail_idx;
+idx |= ((int)vdev->vq[n].avail_wrap_counter) << 15;
+idx |= (vdev->vq[n].used_idx) << 16;
+idx |= ((int)vdev->vq[n].used_wrap_counter) << 31;
+} else {
+idx = (int)vdev->vq[n].last_avail_idx;
+}
+return idx;
 }
 
-void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, int idx)
 {
-vdev->vq[n].last_avail_idx = idx;
-vdev->vq[n].shadow_avail_idx = idx;
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[n].last_avail_idx = idx & 0x7fff;
+vdev->vq[n].avail_wrap_counter = !!(idx & 0x8000);
+vdev->vq[n].used_idx = (idx & 0x7fff) >> 16;
+vdev->vq[n].used_wrap_counter = !!(idx & 0x8000);
+} else {
+vdev->vq[n].last_avail_idx = idx;
+vdev->vq[n].shadow_avail_idx = idx;
+}
 }
 
 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]);
@@ -2986,6 +3007,10 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice 
*vdev, int n)
 
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
 {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 rcu_read_lock();
 if (vdev->vq[n].vring.desc) {
 vdev->vq[n].used_idx = vring_used_idx(>vq[n]);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9c1fa07..a6fdf3f 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -272,8 +272,8 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int 
n);
 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n);
-uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
-void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx);
+int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n);
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, int idx);
 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n);
 void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n);
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n);
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 10/16] virtio: event suppression support for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 121 +++--
 1 file changed, 118 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 99a6601..240c4e3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -238,6 +238,30 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 virtio_tswap16s(vdev, >next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+address_space_read_cached(cache, 0, e, sizeof(*e));
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t off_wrap)
+{
+virtio_tswap16s(vdev, _wrap);
+address_space_write_cached(cache, 0, _wrap, sizeof(off_wrap));
+address_space_cache_invalidate(cache, 0, sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t flags)
+{
+virtio_tswap16s(vdev, );
+address_space_write_cached(cache, sizeof(uint16_t), , sizeof(flags));
+address_space_cache_invalidate(cache, sizeof(uint16_t), sizeof(flags));
+}
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
 VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
@@ -344,7 +368,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, 
uint16_t val)
 address_space_cache_invalidate(>used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
 vq->notification = enable;
 
@@ -367,6 +391,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+VRingPackedDescEvent e;
+VRingMemoryRegionCaches *caches;
+
+rcu_read_lock();
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vq->vdev, >used, );
+
+if (!enable) {
+e.flags = RING_EVENT_FLAGS_DISABLE;
+goto out;
+}
+
+e.flags = RING_EVENT_FLAGS_ENABLE;
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+uint16_t off_wrap = vq->shadow_avail_idx | vq->event_wrap_counter << 
15;
+
+vring_packed_off_wrap_write(vq->vdev, >used, off_wrap);
+/* Make sure off_wrap is wrote before flags */
+smp_wmb();
+
+e.flags = RING_EVENT_FLAGS_DESC;
+}
+
+out:
+vring_packed_flags_write(vq->vdev, >used, e.flags);
+rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+vq->notification = enable;
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtio_queue_set_notification_packed(vq, enable);
+} else {
+virtio_queue_set_notification_split(vq, enable);
+}
+}
+
 int virtio_queue_ready(VirtQueue *vq)
 {
 return vq->vring.avail != 0;
@@ -2113,8 +2182,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
 }
 }
 
-/* Called within rcu_read_lock().  */
-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 uint16_t old, new;
 bool v;
@@ -2137,6 +2205,53 @@ static bool virtio_should_notify(VirtIODevice *vdev, 
VirtQueue *vq)
 return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+static bool vring_packed_need_event(VirtQueue *vq, bool wrap, 
+uint16_t off_wrap, uint16_t new, uint16_t old)
+{
+int off = off_wrap & ~(1 << 15);
+
+if (wrap != off_wrap >> 15) {
+off -= vq->vring.num;
+}
+
+return vring_need_event(off, new, old);
+}
+
+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+VRingPackedDescEvent e;
+uint16_t old, new;
+bool v;
+VRingMemoryRegionCaches *caches;
+
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vdev, >avail, );
+
+old = vq->signalled_used;
+new = vq->signalled_used = vq->used_idx;
+v = vq->signalled_used_valid;
+vq->signalled_used_valid = true;
+
+if (e.flags == RING_EVENT_FLAGS_DISABLE) {
+return false;
+} else if (e.flags == RING_EVENT_FLAGS_ENABLE) {
+return true;
+}
+
+return !v || vring_packed_need_event(vq,
+vq->used_wrap_counter, e.off_wrap, new, old);
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_packed_should_notify(vdev, vq);
+} else {
+return virtio_split_should_notify(vdev, 

[Qemu-devel] [PATCH v1 07/16] virtio: init and desc empty check for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

ring check and other basical helpers for packed ring.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 59 +-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 74d9710..9d485e4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -24,6 +24,9 @@
 #include "hw/virtio/virtio-access.h"
 #include "sysemu/dma.h"
 
+#define AVAIL_DESC_PACKED(b) ((b) << 7)
+#define USED_DESC_PACKED(b)  ((b) << 15)
+
 /*
  * The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. This is the default, used by transports like PCI
@@ -369,6 +372,25 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+virtio_tswap16s(vdev, >flags);
+}
+
+static inline bool is_desc_avail(struct VRingPackedDesc *desc,
+bool wrap_counter)
+{
+bool avail, used;
+
+avail = !!(desc->flags & AVAIL_DESC_PACKED(1));
+used = !!(desc->flags & USED_DESC_PACKED(1));
+return (avail != used) && (avail == wrap_counter);
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
@@ -389,7 +411,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_split_empty(VirtQueue *vq)
 {
 bool empty;
 
@@ -411,6 +433,41 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+struct VRingPackedDesc desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->vring.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_packed_desc_read_flags(vq->vdev, , >desc,
+vq->last_avail_idx);
+
+return !is_desc_avail(, vq->avail_wrap_counter);
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_packed_empty_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_packed_empty(vq);
+} else {
+return virtio_queue_split_empty(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 09/16] virtio: fill/flush/pop for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 290 +
 1 file changed, 273 insertions(+), 17 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 13265e3..99a6601 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -383,6 +383,25 @@ static void vring_packed_desc_read(VirtIODevice *vdev, 
VRingPackedDesc *desc,
 virtio_tswap16s(vdev, >id);
 }
 
+static void vring_packed_desc_write_data(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  >id, sizeof(desc->id));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, id),
+  sizeof(desc->id));
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  >len, sizeof(desc->len));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, len),
+  sizeof(desc->len));
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -392,6 +411,18 @@ static void vring_packed_desc_read_flags(VirtIODevice 
*vdev,
 virtio_tswap16s(vdev, >flags);
 }
 
+static void vring_packed_desc_write_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+address_space_cache_invalidate(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  sizeof(desc->flags));
+}
+
 static inline bool is_desc_avail(struct VRingPackedDesc *desc,
 bool wrap_counter)
 {
@@ -558,19 +589,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -582,16 +605,71 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
 {
-uint16_t old, new;
+uint16_t head;
+VRingMemoryRegionCaches *caches;
+VRingPackedDesc desc = {
+.flags = 0,
+};
+bool wrap_counter = vq->used_wrap_counter;
+
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+caches = vring_get_region_caches(vq);
+desc.id = elem->index;
+desc.len = len;
+
+head = vq->used_idx + idx;
+if (head >= vq->vring.num) {
+head -= vq->vring.num;
+wrap_counter ^= 1;
+}
+if (wrap_counter) {
+desc.flags |= VRING_DESC_F_AVAIL;
+desc.flags |= VRING_DESC_F_USED;
+} else {
+desc.flags &= ~VRING_DESC_F_AVAIL;
+desc.flags &= ~VRING_DESC_F_USED;
+}
+
+vring_packed_desc_write_data(vq->vdev, , >desc, head);
+if (idx == 0) {
+/* Make sure descriptor id and len is written before
+ * flags for the first used buffer.
+ */
+smp_wmb();
+}
+
+vring_packed_desc_write_flags(vq->vdev, , >desc, head);
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_fill(vq, elem, len, idx);
+} else {
+virtqueue_split_fill(vq, elem, len, idx);
+}
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
+{
+uint16_t old, new;
+
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -607,6 +685,34 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
 vq->signalled_used_valid = false;
 }
 
+static void virtqueue_packed_flush(VirtQueue *vq, 

[Qemu-devel] [PATCH v1 06/16] virtio: init wrap counter for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 99565c6..74d9710 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1239,6 +1239,9 @@ void virtio_reset(void *opaque)
 vdev->vq[i].last_avail_idx = 0;
 vdev->vq[i].shadow_avail_idx = 0;
 vdev->vq[i].used_idx = 0;
+vdev->vq[i].avail_wrap_counter = true;
+vdev->vq[i].event_wrap_counter = true;
+vdev->vq[i].used_wrap_counter = true;
 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 vdev->vq[i].signalled_used = 0;
 vdev->vq[i].signalled_used_valid = false;
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 11/16] virtio-net: fill head desc after done all in a chain

2018-11-22 Thread wexu
From: Wei Xu 

With the support of marking a descriptor used/unused in 'flags'
field for 1.1, the current way of filling a chained descriptors
does not work since driver side may get the wrong 'num_buffer'
information in case of the head descriptor has been filled in
while the subsequent ones are still in processing in device side.

This patch fills the head one after done all the others one.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 385b1a0..2db0e8b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1198,6 +1198,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
 struct virtio_net_hdr_mrg_rxbuf mhdr;
 unsigned mhdr_cnt = 0;
 size_t offset, i, guest_offset;
+VirtQueueElement head;
+int head_len = 0;
 
 if (!virtio_net_can_receive(nc)) {
 return -1;
@@ -1275,7 +1277,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 }
 
 /* signal other side */
-virtqueue_fill(q->rx_vq, elem, total, i++);
+if (i == 0) {
+head_len = total;
+head = *elem;
+} else {
+virtqueue_fill(q->rx_vq, elem, len, i);
+}
+i++;
 g_free(elem);
 }
 
@@ -1286,6 +1294,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  _buffers, sizeof mhdr.num_buffers);
 }
 
+virtqueue_fill(q->rx_vq, , head_len, 0);
 virtqueue_flush(q->rx_vq, i);
 virtio_notify(vdev, q->rx_vq);
 
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 05/16] virtio: add memory region init for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index a41c2d3..99565c6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -156,10 +156,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *old = vq->vring.caches;
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
-int event_size;
 int64_t len;
-
-event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
+bool attr;
 
 addr = vq->vring.desc;
 if (!addr) {
@@ -167,14 +165,16 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 }
 new = g_new0(VRingMemoryRegionCaches, 1);
 size = virtio_queue_get_desc_size(vdev, n);
+attr = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED) ?
+   true : false;
 len = address_space_cache_init(>desc, vdev->dma_as,
-   addr, size, false);
+   addr, size, attr);
 if (len < size) {
 virtio_error(vdev, "Cannot map desc");
 goto err_desc;
 }
 
-size = virtio_queue_get_used_size(vdev, n) + event_size;
+size = virtio_queue_get_used_size(vdev, n);
 len = address_space_cache_init(>used, vdev->dma_as,
vq->vring.used, size, true);
 if (len < size) {
@@ -182,7 +182,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_used;
 }
 
-size = virtio_queue_get_avail_size(vdev, n) + event_size;
+size = virtio_queue_get_avail_size(vdev, n);
 len = address_space_cache_init(>avail, vdev->dma_as,
vq->vring.avail, size, false);
 if (len < size) {
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 04/16] virtio: expand offset calculation for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Expand 1.0 to 1.1 by adding offset calculation accordingly.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index a8e737c..a41c2d3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2336,14 +2336,28 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, 
int n)
 
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingAvail, ring) +
-sizeof(uint16_t) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingAvail, ring) +
+sizeof(uint16_t) * vdev->vq[n].vring.num + s;
+}
 }
 
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingUsed, ring) +
-sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingUsed, ring) +
+sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
+}
 }
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 01/16] Update version for v3.1.0-rc2 release

2018-11-22 Thread wexu
From: Peter Maydell 

Signed-off-by: Peter Maydell 
---
 VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION b/VERSION
index 3af1c22..bbcce69 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-3.0.91
+3.0.92
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 08/16] virtio: get avail bytes check for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 180 +
 1 file changed, 167 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 9d485e4..13265e3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -372,6 +372,17 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
+  desc, sizeof(VRingPackedDesc));
+virtio_tswap16s(vdev, >flags);
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -671,9 +682,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -683,16 +694,6 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int 
*in_bytes,
 int64_t len = 0;
 int rc;
 
-if (unlikely(!vq->vring.desc)) {
-if (in_bytes) {
-*in_bytes = 0;
-}
-if (out_bytes) {
-*out_bytes = 0;
-}
-return;
-}
-
 rcu_read_lock();
 idx = vq->last_avail_idx;
 total_bufs = in_total = out_total = 0;
@@ -796,6 +797,159 @@ err:
 goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingPackedDesc desc;
+bool wrap_counter;
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+wrap_counter = vq->avail_wrap_counter;
+total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+caches = vring_get_region_caches(vq);
+if (caches->desc.len < max * sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Cannot map descriptor ring");
+goto err;
+}
+
+desc_cache = >desc;
+vring_packed_desc_read_flags(vdev, , desc_cache, idx);
+while (is_desc_avail(, wrap_counter)) {
+unsigned int num_bufs;
+unsigned int i = 0;
+
+num_bufs = total_bufs;
+
+/* Make sure all the fields have been exposed. */
+smp_rmb();
+vring_packed_desc_read(vdev, , desc_cache, idx);
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingPackedDesc);
+num_bufs = i = 0;
+vring_packed_desc_read(vdev, , desc_cache, i);
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} else {
+out_total += desc.len;
+}
+if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+goto done;
+}
+
+if (desc_cache == _desc_cache) {
+vring_packed_desc_read(vdev, , desc_cache, i);
+ 

[Qemu-devel] [PATCH v1 00/16] packed ring virtio-net backend support

2018-11-22 Thread wexu
From: Wei Xu 

Code base:
https://github.com/Whishay/qemu.git

rfc v3 -> v1
- migration support for both userspace and vhost-net, need tweak vhost
  ioctl() to make it work(the code is pasted in the commit message of
  vhost migration patch #13).

Note: 
  the high 32-bit guest feature bit is saved as a subsection for
  virtio devices which makes packed ring feature bit check unusable when
  loading the saved per-queue variables(this is done before loading 
  subsection which is the last action for device during migration),
  so I save and load all the things generally for now, any idea to fix this?

- Fixed comments from Jason for rfc v3 sorted by patch #, two comments I
  didn't take were(from patch) listed here:
09: - introduce new API(virtqueue_fill_n()).
  - Didn't take it since userspace backend does not support batching,
so only one element is popped and current API should be enough.
06 & 07: Refactor split and packed pop()/get_avail_bytes().
 - the duplicated code interwined with split/packed ring specific
   things and it might make it unclear, so I only extracted the few
   common parts out side rcu and keep the others separate.

The other revised comments:
02: - reuse current 'avail/used' for 'driver/device' in VRingMemoryRegionCache.
- remove event_idx since shadow_avail_idx works.
03: - move size recalculation to a separate patch.
- keep 'avail/used' in current calculation function name.
- initialize 'desc' memory region as 'false' for 1.0('true' for 1.1)
04: - delete 'event_idx'
05: - rename 'wc' to wrap_counter.
06: - converge common part outside rcu section for 1.0/1.1.
- move memory barrier for the first 'desc' in between checking flag
  and read other fields.
- remove unnecessary memory barriers for indirect descriptors.
- no need to destroy indirect memory cache since it is generally done
  before return from the function.
- remove redundant maximum chained descriptors limitation check.
- there are some differences(desc name, wrap idx/counter, flags) between
  split and packed rings, so keep them separate for now.
- amend the comment when recording index and wrap counter for a kick
  from guest.
07: - calculate fields in descriptor instead of read it when filling.
- put memory barrier correctly before filling the flags in descriptor.
- replace full memory barrier with a write barrier in fill.
- shift to read descriptor flags and descriptor necessarily and
  separately in packed_pop().
- correct memory barrier in packed_pop() as in packed_fill().
08: - reuse 'shadow_avail_idx' instead of adding a new 'event_idx'.
- use the compact and verified vring_packed_need_event()
  version for vhost net/user.
12: - remove the odd cherry-pick comment.
- used bit '15' for wrap_counters.

rfc v2->v3
- addressed performance issue
- fixed feedback from v2

rfc v1->v2
- sync to tiwei's v5
- reuse memory cache function with 1.0
- dropped detach patch and notification helper(04 & 05 in v1)
- guest virtio-net driver unload/reload support
- event suppression support(not tested)
- addressed feedback from v1

Wei Xu (15):
  virtio: introduce packed ring definitions
  virtio: redefine structure & memory cache for packed ring
  virtio: expand offset calculation for packed ring
  virtio: add memory region init for packed ring
  virtio: init wrap counter for packed ring
  virtio: init and desc empty check for packed ring
  virtio: get avail bytes check for packed ring
  virtio: fill/flush/pop for packed ring
  virtio: event suppression support for packed ring
  virtio-net: fill head desc after done all in a chain
  virtio: add userspace migration of packed ring
  virtio: add vhost-net migration of packed ring
  virtio: packed ring feature bit for userspace backend
  vhost: enable packed ring
  virtio: enable packed ring via a new command line

 VERSION|   2 +-
 hw/net/vhost_net.c |   2 +
 hw/net/virtio-net.c|  11 +-
 hw/virtio/virtio.c | 756 +++--
 include/hw/virtio/virtio.h |   8 +-
 include/standard-headers/linux/virtio_config.h |  15 +
 include/standard-headers/linux/virtio_ring.h   |  43 ++
 7 files changed, 783 insertions(+), 54 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH v1 03/16] virtio: redefine structure & memory cache for packed ring

2018-11-22 Thread wexu
From: Wei Xu 

Redefine packed ring structure according to Qemu nomenclature,
field data(wrap counter, etc) are introduced also.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 4136d23..a8e737c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,13 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingPackedDesc {
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingPackedDesc;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -77,6 +84,11 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t off_wrap;
+uint16_t flags;
+} VRingPackedDescEvent ;
+
 struct VirtQueue
 {
 VRing vring;
@@ -87,7 +99,11 @@ struct VirtQueue
 /* Last avail_idx read from VQ. */
 uint16_t shadow_avail_idx;
 
+bool event_wrap_counter;
+bool avail_wrap_counter;
+
 uint16_t used_idx;
+bool used_wrap_counter;
 
 /* Last used index value we have signalled on */
 uint16_t signalled_used;
-- 
1.8.3.1




[Qemu-devel] [PATCH v1 02/16] virtio: introduce packed ring definitions

2018-11-22 Thread wexu
From: Wei Xu 

>From 1.1 spec.

Signed-off-by: Wei Xu 
---
 include/standard-headers/linux/virtio_config.h | 15 +
 include/standard-headers/linux/virtio_ring.h   | 43 ++
 2 files changed, 58 insertions(+)

diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h
index 0b19436..9f450fd 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -75,6 +75,21 @@
  */
 #define VIRTIO_F_IOMMU_PLATFORM33
 
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED   34
+
+/* Enable events */
+#define RING_EVENT_FLAGS_ENABLE 0x0
+/* Disable events */
+#define RING_EVENT_FLAGS_DISABLE 0x1
+/*
+ *  * Enable events for a specific descriptor
+ *   * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ ** Only valid if VIRTIO_F_RING_EVENT_IDX has been negotiated.
+ * */
+#define RING_EVENT_FLAGS_DESC 0x2
+/* The value 0x3 is reserved */
+
 /*
  * Does the device support Single Root I/O Virtualization?
  */
diff --git a/include/standard-headers/linux/virtio_ring.h 
b/include/standard-headers/linux/virtio_ring.h
index d26e72b..1719c6f 100644
--- a/include/standard-headers/linux/virtio_ring.h
+++ b/include/standard-headers/linux/virtio_ring.h
@@ -42,6 +42,10 @@
 /* This means the buffer contains a list of buffer descriptors. */
 #define VRING_DESC_F_INDIRECT  4
 
+/* Mark a descriptor as available or used. */
+#define VRING_DESC_F_AVAIL (1ul << 7)
+#define VRING_DESC_F_USED  (1ul << 15)
+
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
  * will still kick if it's out of buffers. */
@@ -51,6 +55,17 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT 1
 
+/* Enable events. */
+#define VRING_EVENT_F_ENABLE   0x0
+/* Disable events. */
+#define VRING_EVENT_F_DISABLE  0x1
+/*
+ * Enable events for a specific descriptor
+ * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
+ */
+#define VRING_EVENT_F_DESC 0x2
+
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC28
 
@@ -169,4 +184,32 @@ static inline int vring_need_event(uint16_t event_idx, 
uint16_t new_idx, uint16_
return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
 }
 
+struct vring_packed_desc_event {
+   /* Descriptor Ring Change Event Offset/Wrap Counter. */
+   __virtio16 off_wrap;
+   /* Descriptor Ring Change Event Flags. */
+   __virtio16 flags;
+};
+
+struct vring_packed_desc {
+   /* Buffer Address. */
+   __virtio64 addr;
+   /* Buffer Length. */
+   __virtio32 len;
+   /* Buffer ID. */
+   __virtio16 id;
+   /* The flags depending on descriptor type. */
+   __virtio16 flags;
+};
+
+struct vring_packed {
+   unsigned int num;
+
+   struct vring_packed_desc *desc;
+
+   struct vring_packed_desc_event *driver;
+
+   struct vring_packed_desc_event *device;
+};
+
 #endif /* _LINUX_VIRTIO_RING_H */
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 09/12] virtio-net: fill head desc after done all in a chain

2018-10-11 Thread wexu
From: Wei Xu 

With the support of marking a descriptor used/unused in 'flags'
field for 1.1, the current way of filling a chained descriptors
does not work since driver side may get the wrong 'num_buffer'
information in case of the head descriptor has been filled in
while the subsequent ones are still in processing in device side.

This patch fills the head one after done all the others one.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 4bdd5b8..186c86cd2 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1198,6 +1198,8 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
 struct virtio_net_hdr_mrg_rxbuf mhdr;
 unsigned mhdr_cnt = 0;
 size_t offset, i, guest_offset;
+VirtQueueElement head;
+int head_len = 0;
 
 if (!virtio_net_can_receive(nc)) {
 return -1;
@@ -1275,7 +1277,13 @@ static ssize_t virtio_net_receive_rcu(NetClientState 
*nc, const uint8_t *buf,
 }
 
 /* signal other side */
-virtqueue_fill(q->rx_vq, elem, total, i++);
+if (i == 0) {
+head_len = total;
+head = *elem;
+} else {
+virtqueue_fill(q->rx_vq, elem, len, i);
+}
+i++;
 g_free(elem);
 }
 
@@ -1286,6 +1294,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, 
const uint8_t *buf,
  _buffers, sizeof mhdr.num_buffers);
 }
 
+virtqueue_fill(q->rx_vq, , head_len, 0);
 virtqueue_flush(q->rx_vq, i);
 virtio_notify(vdev, q->rx_vq);
 
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 10/12] virtio: packed ring feature bit for userspace backend

2018-10-11 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e037db6..fb4b18f 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -78,6 +78,7 @@ static const int user_feature_bits[] = {
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 12/12] virtio: feature vhost-net support for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

(cherry picked from commit 305a2c4640c15c5717245067ab937fd10f478ee6)
Signed-off-by: Wei Xu 
(cherry picked from commit 46476dae6f44c6fef8802a4a0ac7d0d79fe399e3)
Signed-off-by: Wei Xu 
---
 hw/virtio/vhost.c  | 3 +++
 hw/virtio/virtio.c | 4 
 include/hw/virtio/virtio.h | 1 +
 3 files changed, 8 insertions(+)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 9df2da3..de06d55 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -974,6 +974,9 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
 }
 
 state.num = virtio_queue_get_last_avail_idx(vdev, idx);
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+state.num |= ((int)virtio_queue_packed_get_wc(vdev, idx)) << 31;
+}
 r = dev->vhost_ops->vhost_set_vring_base(dev, );
 if (r) {
 VHOST_OPS_DEBUG("vhost_set_vring_base failed");
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 1d25776..2a90163 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2894,6 +2894,10 @@ void virtio_init(VirtIODevice *vdev, const char *name,
 vdev->use_guest_notifier_mask = true;
 }
 
+bool virtio_queue_packed_get_wc(VirtIODevice *vdev, int n)
+{
+return vdev->vq[n].avail_wrap_counter;
+}
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
 {
 return vdev->vq[n].vring.desc;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 9af8839..0bb3be5 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -295,6 +295,7 @@ void virtio_queue_aio_set_host_notifier_handler(VirtQueue 
*vq, AioContext *ctx,
 VirtIOHandleAIOOutput 
handle_output);
 VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
 VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
+bool virtio_queue_packed_get_wc(VirtIODevice *vdev, int n);
 
 static inline void virtio_add_feature(uint64_t *features, unsigned int fbit)
 {
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 06/12] virtio: get avail bytes check for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Same thought as 1.0 except a bit confused when trying to reuse
'shadow_avail_idx', so the interrelated new event_idx and the wrap
counter for notifications has been introduced in previous patch.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 176 -
 1 file changed, 173 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 86f88da..13c6c98 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -375,6 +375,17 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingPackedDesc *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingPackedDesc),
+  desc, sizeof(VRingPackedDesc));
+virtio_tswap16s(vdev, >flags);
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -672,9 +683,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -797,6 +808,165 @@ err:
 goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingPackedDesc desc;
+bool wrap_counter;
+
+if (unlikely(!vq->vring.desc)) {
+if (in_bytes) {
+*in_bytes = 0;
+}
+if (out_bytes) {
+*out_bytes = 0;
+}
+return;
+}
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+wrap_counter = vq->avail_wrap_counter;
+total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+caches = vring_get_region_caches(vq);
+if (caches->desc.len < max * sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Cannot map descriptor ring");
+goto err;
+}
+
+desc_cache = >desc;
+vring_packed_desc_read(vdev, , desc_cache, idx);
+/* Make sure we see all the fields*/
+smp_rmb();
+while (is_desc_avail(, wrap_counter)) {
+unsigned int num_bufs;
+unsigned int i = 0;
+
+num_bufs = total_bufs;
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingPackedDesc)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingPackedDesc);
+num_bufs = i = 0;
+vring_packed_desc_read(vdev, , desc_cache, i);
+/* Make sure we see all the fields*/
+smp_rmb();
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} else {
+out_total += desc.len;
+}
+if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+goto done;
+}
+
+if (desc_cache == _desc_cache) {
+if (++i > vq->vring.num) {
+virtio_error(vdev, "Looped descriptor");
+   

[Qemu-devel] [[RFC v3 08/12] virtio: event suppression support for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 126 +++--
 1 file changed, 123 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d12a7e3..1d25776 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -241,6 +241,30 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 virtio_tswap16s(vdev, >next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+address_space_read_cached(cache, 0, e, sizeof(*e));
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+}
+
+static void vring_packed_off_wrap_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t off_wrap)
+{
+virtio_tswap16s(vdev, _wrap);
+address_space_write_cached(cache, 0, _wrap, sizeof(off_wrap));
+address_space_cache_invalidate(cache, 0, sizeof(off_wrap));
+}
+
+static void vring_packed_flags_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, uint16_t flags)
+{
+virtio_tswap16s(vdev, );
+address_space_write_cached(cache, sizeof(uint16_t), , sizeof(flags));
+address_space_cache_invalidate(cache, sizeof(uint16_t), sizeof(flags));
+}
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
 VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
@@ -347,7 +371,7 @@ static inline void vring_set_avail_event(VirtQueue *vq, 
uint16_t val)
 address_space_cache_invalidate(>used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
 vq->notification = enable;
 
@@ -370,6 +394,51 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+VRingPackedDescEvent e;
+VRingMemoryRegionCaches *caches;
+
+rcu_read_lock();
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vq->vdev, >device, );
+
+if (!enable) {
+e.flags = RING_EVENT_FLAGS_DISABLE;
+goto out;
+}
+
+e.flags = RING_EVENT_FLAGS_ENABLE;
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
+uint16_t off_wrap = vq->event_idx | vq->event_wrap_counter << 15;
+
+vring_packed_off_wrap_write(vq->vdev, >device, off_wrap);
+/* Make sure off_wrap is wrote before flags */
+smp_wmb();
+
+e.flags = RING_EVENT_FLAGS_DESC;
+}
+
+out:
+vring_packed_flags_write(vq->vdev, >device, e.flags);
+rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+vq->notification = enable;
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtio_queue_set_notification_packed(vq, enable);
+} else {
+virtio_queue_set_notification_split(vq, enable);
+}
+}
+
 int virtio_queue_ready(VirtQueue *vq)
 {
 return vq->vring.avail != 0;
@@ -2103,8 +2172,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
 }
 }
 
-/* Called within rcu_read_lock().  */
-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 uint16_t old, new;
 bool v;
@@ -2127,6 +2195,58 @@ static bool virtio_should_notify(VirtIODevice *vdev, 
VirtQueue *vq)
 return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+static bool vring_packed_need_event(VirtQueue *vq, uint16_t off_wrap,
+uint16_t new, uint16_t old)
+{
+bool wrap = vq->event_wrap_counter;
+int off = off_wrap & ~(1 << 15);
+
+if (new < old) {
+new += vq->vring.num;
+wrap ^= 1;
+}
+
+if (wrap != off_wrap >> 15) {
+off += vq->vring.num;
+}
+
+return vring_need_event(off, new, old);
+}
+
+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+VRingPackedDescEvent e;
+uint16_t old, new;
+bool v;
+VRingMemoryRegionCaches *caches;
+
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vdev, >driver, );
+
+old = vq->signalled_used;
+new = vq->signalled_used = vq->used_idx;
+v = vq->signalled_used_valid;
+vq->signalled_used_valid = true;
+
+if (e.flags == RING_EVENT_FLAGS_DISABLE) {
+return false;
+} else if (e.flags == RING_EVENT_FLAGS_ENABLE) {
+return true;
+}
+
+return !v || vring_packed_need_event(vq, e.off_wrap, new, old);
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return 

[Qemu-devel] [[RFC v3 04/12] virtio: init wrap counter for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index bfb3364..9185efb 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1243,6 +1243,9 @@ void virtio_reset(void *opaque)
 vdev->vq[i].last_avail_idx = 0;
 vdev->vq[i].shadow_avail_idx = 0;
 vdev->vq[i].used_idx = 0;
+vdev->vq[i].avail_wrap_counter = true;
+vdev->vq[i].event_idx = 0;
+vdev->vq[i].event_wrap_counter = true;
 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 vdev->vq[i].signalled_used = 0;
 vdev->vq[i].signalled_used_valid = false;
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 11/12] virtio: enable packed ring via a new command line

2018-10-11 Thread wexu
From: Wei Xu 

only userspace virtio net backend has been supported by
the CLI so far.

(cherry picked from commit 0b3ec96f4a9402cca467c40353066e57608ac6b6)
Signed-off-by: Wei Xu 
(cherry picked from commit a1a3b85f00299ccc6f4bc819abe470da88059fb7)
Signed-off-by: Wei Xu 
---
 include/hw/virtio/virtio.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index e323e76..9af8839 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -264,7 +264,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, false)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 02/12] virtio: redefine structure & memory cache for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Redefine packed ring structure according to qemu nomenclature,
also supported data(event index, wrap counter, etc) are introduced.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 26 --
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 94f5c8e..500eecf 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,13 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingPackedDesc {
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingPackedDesc;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -62,8 +69,14 @@ typedef struct VRingUsed
 typedef struct VRingMemoryRegionCaches {
 struct rcu_head rcu;
 MemoryRegionCache desc;
-MemoryRegionCache avail;
-MemoryRegionCache used;
+union {
+MemoryRegionCache avail;
+MemoryRegionCache driver;
+};
+union {
+MemoryRegionCache used;
+MemoryRegionCache device;
+};
 } VRingMemoryRegionCaches;
 
 typedef struct VRing
@@ -77,6 +90,11 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t off_wrap;
+uint16_t flags;
+} VRingPackedDescEvent ;
+
 struct VirtQueue
 {
 VRing vring;
@@ -87,6 +105,10 @@ struct VirtQueue
 /* Last avail_idx read from VQ. */
 uint16_t shadow_avail_idx;
 
+uint16_t event_idx;
+bool event_wrap_counter;
+bool avail_wrap_counter;
+
 uint16_t used_idx;
 
 /* Last used index value we have signalled on */
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 07/12] virtio: fill/flush/pop for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 258 ++---
 1 file changed, 244 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 13c6c98..d12a7e3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -386,6 +386,21 @@ static void vring_packed_desc_read(VirtIODevice *vdev, 
VRingPackedDesc *desc,
 virtio_tswap16s(vdev, >id);
 }
 
+static void vring_packed_desc_write(VirtIODevice *vdev, VRingPackedDesc *desc,
+MemoryRegionCache *cache, int i)
+{
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache,
+   sizeof(VRingPackedDesc) * i, desc,
+   sizeof(VRingPackedDesc));
+address_space_cache_invalidate(cache,
+   sizeof(VRingPackedDesc) * i,
+   sizeof(VRingPackedDesc));
+}
+
 static void vring_packed_desc_read_flags(VirtIODevice *vdev,
 VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
 {
@@ -559,19 +574,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -583,16 +590,64 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
 {
-uint16_t old, new;
+uint16_t w, head;
+VRingMemoryRegionCaches *caches;
+VRingPackedDesc desc = {
+.addr = 0,
+.flags = 0,
+};
+
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+caches = vring_get_region_caches(vq);
+head = vq->used_idx + idx;
+head = head >= vq->vring.num ? (head - vq->vring.num) : head;
+vring_packed_desc_read(vq->vdev, , >desc, head);
+
+w = (desc.flags & AVAIL_DESC_PACKED(1)) >> 7;
+desc.flags &= ~(AVAIL_DESC_PACKED(1) | USED_DESC_PACKED(1));
+desc.flags |= AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w);
+if (!(desc.flags & VRING_DESC_F_INDIRECT)) {
+if (!(desc.flags & VRING_DESC_F_WRITE)) {
+desc.len = 0;
+} else {
+desc.len = len;
+}
+}
+vring_packed_desc_write(vq->vdev, , >desc, head);
+
+/* Make sure flags has been updated */
+smp_mb();
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_fill(vq, elem, len, idx);
+} else {
+virtqueue_split_fill(vq, elem, len, idx);
+}
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
+{
+uint16_t old, new;
+
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -608,6 +663,33 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
 vq->signalled_used_valid = false;
 }
 
+static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
+{
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+vq->inuse -= count;
+vq->used_idx += count;
+if (vq->used_idx >= vq->vring.num) {
+vq->used_idx -= vq->vring.num;
+}
+}
+
+void virtqueue_flush(VirtQueue *vq, unsigned int count)
+{
+if (unlikely(vq->vdev->broken)) {
+vq->inuse -= count;
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_flush(vq, count);
+} else {
+virtqueue_split_flush(vq, count);
+}
+}
+
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len)
 {
@@ -1091,7 +1173,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned 
out_num, unsigned in_nu
 return elem;
 }
 
-void *virtqueue_pop(VirtQueue *vq, size_t sz)
+static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
 {
 unsigned int i, head, max;
 VRingMemoryRegionCaches *caches;
@@ -1226,6 +1308,154 @@ err_undo_map:
 goto done;
 }
 

[Qemu-devel] [[RFC v3 01/12] virtio: introduce packed ring definitions

2018-10-11 Thread wexu
From: Wei Xu 

sync from 1.1 spec

Signed-off-by: Wei Xu 
---
 include/standard-headers/linux/virtio_config.h | 15 +
 include/standard-headers/linux/virtio_ring.h   | 43 ++
 2 files changed, 58 insertions(+)

diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h
index 0b19436..9f450fd 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -75,6 +75,21 @@
  */
 #define VIRTIO_F_IOMMU_PLATFORM33
 
+/* This feature indicates support for the packed virtqueue layout. */
+#define VIRTIO_F_RING_PACKED   34
+
+/* Enable events */
+#define RING_EVENT_FLAGS_ENABLE 0x0
+/* Disable events */
+#define RING_EVENT_FLAGS_DISABLE 0x1
+/*
+ *  * Enable events for a specific descriptor
+ *   * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ ** Only valid if VIRTIO_F_RING_EVENT_IDX has been negotiated.
+ * */
+#define RING_EVENT_FLAGS_DESC 0x2
+/* The value 0x3 is reserved */
+
 /*
  * Does the device support Single Root I/O Virtualization?
  */
diff --git a/include/standard-headers/linux/virtio_ring.h 
b/include/standard-headers/linux/virtio_ring.h
index d26e72b..1719c6f 100644
--- a/include/standard-headers/linux/virtio_ring.h
+++ b/include/standard-headers/linux/virtio_ring.h
@@ -42,6 +42,10 @@
 /* This means the buffer contains a list of buffer descriptors. */
 #define VRING_DESC_F_INDIRECT  4
 
+/* Mark a descriptor as available or used. */
+#define VRING_DESC_F_AVAIL (1ul << 7)
+#define VRING_DESC_F_USED  (1ul << 15)
+
 /* The Host uses this in used->flags to advise the Guest: don't kick me when
  * you add a buffer.  It's unreliable, so it's simply an optimization.  Guest
  * will still kick if it's out of buffers. */
@@ -51,6 +55,17 @@
  * optimization.  */
 #define VRING_AVAIL_F_NO_INTERRUPT 1
 
+/* Enable events. */
+#define VRING_EVENT_F_ENABLE   0x0
+/* Disable events. */
+#define VRING_EVENT_F_DISABLE  0x1
+/*
+ * Enable events for a specific descriptor
+ * (as specified by Descriptor Ring Change Event Offset/Wrap Counter).
+ * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated.
+ */
+#define VRING_EVENT_F_DESC 0x2
+
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC28
 
@@ -169,4 +184,32 @@ static inline int vring_need_event(uint16_t event_idx, 
uint16_t new_idx, uint16_
return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
 }
 
+struct vring_packed_desc_event {
+   /* Descriptor Ring Change Event Offset/Wrap Counter. */
+   __virtio16 off_wrap;
+   /* Descriptor Ring Change Event Flags. */
+   __virtio16 flags;
+};
+
+struct vring_packed_desc {
+   /* Buffer Address. */
+   __virtio64 addr;
+   /* Buffer Length. */
+   __virtio32 len;
+   /* Buffer ID. */
+   __virtio16 id;
+   /* The flags depending on descriptor type. */
+   __virtio16 flags;
+};
+
+struct vring_packed {
+   unsigned int num;
+
+   struct vring_packed_desc *desc;
+
+   struct vring_packed_desc_event *driver;
+
+   struct vring_packed_desc_event *device;
+};
+
 #endif /* _LINUX_VIRTIO_RING_H */
-- 
1.8.3.1




[Qemu-devel] [[RFC v3 03/12] virtio: init memory cache for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Expand 1.0 by adding offset calculation accordingly.

Signed-off-by: Wei Xu 
---
 hw/virtio/vhost.c  | 16 
 hw/virtio/virtio.c | 35 +++
 include/hw/virtio/virtio.h |  4 ++--
 3 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 569c405..9df2da3 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -996,14 +996,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
 r = -ENOMEM;
 goto fail_alloc_desc;
 }
-vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx);
+vq->avail_size = s = l = virtio_queue_get_driver_size(vdev, idx);
 vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx);
 vq->avail = vhost_memory_map(dev, a, , 0);
 if (!vq->avail || l != s) {
 r = -ENOMEM;
 goto fail_alloc_avail;
 }
-vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx);
+vq->used_size = s = l = virtio_queue_get_device_size(vdev, idx);
 vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx);
 vq->used = vhost_memory_map(dev, a, , 1);
 if (!vq->used || l != s) {
@@ -1051,10 +1051,10 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
 fail_vector:
 fail_kick:
 fail_alloc:
-vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
+vhost_memory_unmap(dev, vq->used, virtio_queue_get_device_size(vdev, idx),
0, 0);
 fail_alloc_used:
-vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
+vhost_memory_unmap(dev, vq->avail, virtio_queue_get_driver_size(vdev, idx),
0, 0);
 fail_alloc_avail:
 vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
@@ -1101,10 +1101,10 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
 vhost_vq_index);
 }
 
-vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
-   1, virtio_queue_get_used_size(vdev, idx));
-vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
-   0, virtio_queue_get_avail_size(vdev, idx));
+vhost_memory_unmap(dev, vq->used, virtio_queue_get_device_size(vdev, idx),
+   1, virtio_queue_get_device_size(vdev, idx));
+vhost_memory_unmap(dev, vq->avail, virtio_queue_get_driver_size(vdev, idx),
+   0, virtio_queue_get_driver_size(vdev, idx));
 vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
0, virtio_queue_get_desc_size(vdev, idx));
 }
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 500eecf..bfb3364 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -162,11 +162,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *old = vq->vring.caches;
 VRingMemoryRegionCaches *new = NULL;
 hwaddr addr, size;
-int event_size;
 int64_t len;
 
-event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
-
 addr = vq->vring.desc;
 if (!addr) {
 goto out_no_cache;
@@ -174,13 +171,13 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 new = g_new0(VRingMemoryRegionCaches, 1);
 size = virtio_queue_get_desc_size(vdev, n);
 len = address_space_cache_init(>desc, vdev->dma_as,
-   addr, size, false);
+   addr, size, true);
 if (len < size) {
 virtio_error(vdev, "Cannot map desc");
 goto err_desc;
 }
 
-size = virtio_queue_get_used_size(vdev, n) + event_size;
+size = virtio_queue_get_device_size(vdev, n);
 len = address_space_cache_init(>used, vdev->dma_as,
vq->vring.used, size, true);
 if (len < size) {
@@ -188,7 +185,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_used;
 }
 
-size = virtio_queue_get_avail_size(vdev, n) + event_size;
+size = virtio_queue_get_driver_size(vdev, n);
 len = address_space_cache_init(>avail, vdev->dma_as,
vq->vring.avail, size, false);
 if (len < size) {
@@ -2339,16 +2336,30 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, 
int n)
 return sizeof(VRingDesc) * vdev->vq[n].vring.num;
 }
 
-hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
+hwaddr virtio_queue_get_driver_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingAvail, ring) +
-sizeof(uint16_t) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingAvail, ring) +
+

[Qemu-devel] [[RFC v3 05/12] virtio: init and desc empty check for packed ring

2018-10-11 Thread wexu
From: Wei Xu 

Basic initialization and helpers for packed ring.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 57 +-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 9185efb..86f88da 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -24,6 +24,9 @@
 #include "hw/virtio/virtio-access.h"
 #include "sysemu/dma.h"
 
+#define AVAIL_DESC_PACKED(b) ((b) << 7)
+#define USED_DESC_PACKED(b)  ((b) << 15)
+
 /*
  * The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. This is the default, used by transports like PCI
@@ -372,6 +375,23 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read_flags(VirtIODevice *vdev,
+VRingPackedDesc *desc, MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache,
+  i * sizeof(VRingPackedDesc) + offsetof(VRingPackedDesc, flags),
+  >flags, sizeof(desc->flags));
+}
+
+static inline bool is_desc_avail(struct VRingPackedDesc *desc, bool wc)
+{
+bool avail, used;
+
+avail = !!(desc->flags & AVAIL_DESC_PACKED(1));
+used = !!(desc->flags & USED_DESC_PACKED(1));
+return (avail != used) && (avail == wc);
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
@@ -392,7 +412,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_split_empty(VirtQueue *vq)
 {
 bool empty;
 
@@ -414,6 +434,41 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+struct VRingPackedDesc desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->vring.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_packed_desc_read_flags(vq->vdev, , >desc,
+vq->last_avail_idx);
+
+return !is_desc_avail(, vq->avail_wrap_counter);
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_packed_empty_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_packed_empty(vq);
+} else {
+return virtio_queue_split_empty(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
-- 
1.8.3.1




[Qemu-devel] [RFC v3 00/12] packed ring virtio-net userspace backend support

2018-10-11 Thread wexu
From: Wei Xu 

code base:
https://github.com/Whishay/qemu.git

Todo:
- migration has not been support yet

v2->v3
- addressed performance issue
- fixed feedback from v2

v1->v2
- sync to tiwei's v5
- reuse memory cache function with 1.0
- dropped detach patch and notification helper(04 & 05 in v1)
- guest virtio-net driver unload/reload support
- event suppression support(not tested)
- addressed feedback from v1

Wei Xu (12):
  virtio: introduce packed ring definitions
  virtio: redefine structure & memory cache for packed ring
  virtio: init memory cache for packed ring
  virtio: init wrap counter for packed ring
  virtio: init and desc empty check for packed ring
  virtio: get avail bytes check for packed ring
  virtio: fill/flush/pop for packed ring
  virtio: event suppression support for packed ring
  virtio-net: fill head desc after done all in a chain
  virtio: packed ring feature bit for userspace backend
  virtio: enable packed ring via a new command line
  virtio: feature vhost-net support for packed ring

 hw/net/vhost_net.c |   1 +
 hw/net/virtio-net.c|  11 +-
 hw/virtio/vhost.c  |  19 +-
 hw/virtio/virtio.c | 685 +++--
 include/hw/virtio/virtio.h |   9 +-
 include/standard-headers/linux/virtio_config.h |  15 +
 include/standard-headers/linux/virtio_ring.h   |  43 ++
 7 files changed, 736 insertions(+), 47 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [RFC v2 8/8] virtio: guest driver reload for vhost-net

2018-06-05 Thread wexu
From: Wei Xu 

last_avail, avail_wrap_count, used_idx and used_wrap_count are
needed to support vhost-net backend, all these are either 16 or
bool variables, since state.num is 64bit wide, so here it is
possible to put them to the 'num' without introducing a new case
while handling ioctl.

Unload/Reload test has been done successfully with a patch in vhost kernel.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 42 ++
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 4543974..153f6d7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2862,33 +2862,59 @@ hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, 
int n)
 }
 }
 
-uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+uint64_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
 {
-return vdev->vq[n].last_avail_idx;
+uint64_t num;
+
+num = vdev->vq[n].last_avail_idx;
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+num |= ((uint64_t)vdev->vq[n].avail_wrap_counter) << 16;
+num |= ((uint64_t)vdev->vq[n].used_idx) << 32;
+num |= ((uint64_t)vdev->vq[n].used_wrap_counter) << 48;
+}
+
+return num;
 }
 
-void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint64_t num)
 {
-vdev->vq[n].last_avail_idx = idx;
-vdev->vq[n].shadow_avail_idx = idx;
+vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx = 
(uint16_t)(num);
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vdev->vq[n].avail_wrap_counter = (uint16_t)(num >> 16);
+vdev->vq[n].used_idx = (uint16_t)(num >> 32);
+vdev->vq[n].used_wrap_counter = (uint16_t)(num >> 48);
+}
 }
 
 void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
 {
 rcu_read_lock();
-if (vdev->vq[n].vring.desc) {
+if (!vdev->vq[n].vring.desc) {
+goto out;
+}
+
+if (!virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
 vdev->vq[n].last_avail_idx = vring_used_idx(>vq[n]);
-vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
 }
+vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
+
+out:
 rcu_read_unlock();
 }
 
 void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
 {
 rcu_read_lock();
-if (vdev->vq[n].vring.desc) {
+if (!vdev->vq[n].vring.desc) {
+goto out;
+}
+
+if (!virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
 vdev->vq[n].used_idx = vring_used_idx(>vq[n]);
 }
+
+out:
 rcu_read_unlock();
 }
 
-- 
1.8.3.1




[Qemu-devel] [RFC v2 4/8] virtio: get avail bytes check for packed ring

2018-06-05 Thread wexu
From: Wei Xu 

mostly as same as 1.0 except traversing all desc to feed
headcount, need a refactor.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 148 +++--
 1 file changed, 145 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index bd669a2..cdbb5af 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -650,9 +650,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -775,6 +775,148 @@ err:
 goto done;
 }
 
+static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingDescPacked desc;
+
+if (unlikely(!vq->vring.desc)) {
+if (in_bytes) {
+*in_bytes = 0;
+}
+if (out_bytes) {
+*out_bytes = 0;
+}
+return;
+}
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+total_bufs = in_total = out_total = 0;
+
+max = vq->vring.num;
+caches = vring_get_region_caches(vq);
+if (caches->desc.len < max * sizeof(VRingDescPacked)) {
+virtio_error(vdev, "Cannot map descriptor ring");
+goto err;
+}
+
+desc_cache = >desc;
+vring_packed_desc_read(vdev, , desc_cache, idx);
+while (is_desc_avail()) {
+unsigned int num_bufs;
+unsigned int i;
+
+num_bufs = total_bufs;
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingDescPacked)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingDescPacked);
+num_bufs = i = 0;
+vring_packed_desc_read(vdev, , desc_cache, i);
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} else {
+out_total += desc.len;
+}
+if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+goto done;
+}
+
+if (desc_cache == _desc_cache) {
+if (++i >= vq->vring.num) {
+i -= vq->vring.num;
+}
+vring_packed_desc_read(vdev, , desc_cache, i);
+} else {
+if (++idx >= vq->vring.num) {
+idx -= vq->vring.num;
+}
+vring_packed_desc_read(vdev, , desc_cache, idx);
+}
+/* Make sure we see the flags */
+smp_mb();
+} while (desc.flags & VRING_DESC_F_NEXT);
+
+if (desc_cache == _desc_cache) {
+address_space_cache_destroy(_desc_cache);
+total_bufs++;
+/* We missed one step on for indirect desc */
+idx++;
+} else {
+total_bufs = num_bufs;
+}
+
+desc_cache = >desc;
+vring_packed_desc_read(vdev, , desc_cache, idx % vq->vring.num);
+}
+
+done:
+address_space_cache_destroy(_desc_cache);
+if (in_bytes) {
+*in_bytes = in_total;
+}
+if (out_bytes) {
+*out_bytes = out_total;
+}
+rcu_read_unlock();
+

[Qemu-devel] [RFC v2 3/8] virtio: empty check and desc read for packed ring

2018-06-05 Thread wexu
From: Wei Xu 

helper for ring empty check and descriptor read.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 62 +++---
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index f6c0689..bd669a2 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -24,6 +24,9 @@
 #include "hw/virtio/virtio-access.h"
 #include "sysemu/dma.h"
 
+#define AVAIL_DESC_PACKED(b) ((b) << 7)
+#define USED_DESC_PACKED(b)  ((b) << 15)
+
 /*
  * The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. This is the default, used by transports like PCI
@@ -357,10 +360,27 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_packed_desc_read(VirtIODevice *vdev, VRingDescPacked *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingDescPacked),
+  desc, sizeof(VRingDescPacked));
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+virtio_tswap16s(vdev, >flags);
+}
+
+static inline bool is_desc_avail(struct VRingDescPacked *desc)
+{
+return !!(desc->flags & AVAIL_DESC_PACKED(1)) !=
+!!(desc->flags & USED_DESC_PACKED(1));
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
-static int virtio_queue_empty_rcu(VirtQueue *vq)
+static int virtio_queue_split_empty_rcu(VirtQueue *vq)
 {
 if (unlikely(!vq->vring.avail)) {
 return 1;
@@ -373,7 +393,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_split_empty(VirtQueue *vq)
 {
 bool empty;
 
@@ -391,6 +411,42 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
+{
+struct VRingDescPacked desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->vring.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_packed_desc_read(vq->vdev, , >desc, vq->last_avail_idx);
+
+/* Make sure we see the updated flag */
+smp_mb();
+return !is_desc_avail();
+}
+
+static int virtio_queue_packed_empty(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_packed_empty_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_packed_empty(vq);
+} else {
+return virtio_queue_split_empty(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
@@ -862,7 +918,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 return NULL;
 }
 rcu_read_lock();
-if (virtio_queue_empty_rcu(vq)) {
+if (virtio_queue_split_empty_rcu(vq)) {
 goto done;
 }
 /* Needed after virtio_queue_empty(), see comment in
-- 
1.8.3.1




[Qemu-devel] [RFC v2 6/8] virtio: flush/push for packed ring

2018-06-05 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 109 ++---
 1 file changed, 96 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 0160d03..6f2da83 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -371,6 +371,21 @@ static void vring_packed_desc_read(VirtIODevice *vdev, 
VRingDescPacked *desc,
 virtio_tswap16s(vdev, >flags);
 }
 
+static void vring_packed_desc_write(VirtIODevice *vdev, VRingDescPacked *desc,
+MemoryRegionCache *cache, int i)
+{
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache,
+   sizeof(VRingDescPacked) * i, desc,
+   sizeof(VRingDescPacked));
+address_space_cache_invalidate(cache,
+   sizeof(VRingDescPacked) * i,
+   sizeof(VRingDescPacked));
+}
+
 static inline bool is_desc_avail(struct VRingDescPacked *desc)
 {
 return !!(desc->flags & AVAIL_DESC_PACKED(1)) !=
@@ -526,19 +541,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -550,16 +557,64 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
 {
-uint16_t old, new;
+uint16_t w, head;
+VRingMemoryRegionCaches *caches;
+VRingDescPacked desc = {
+.addr = 0,
+.flags = 0,
+};
+
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+caches = vring_get_region_caches(vq);
+head = vq->used_idx + idx;
+head = head >= vq->vring.num ? (head - vq->vring.num) : head;
+vring_packed_desc_read(vq->vdev, , >desc, head);
+
+w = (desc.flags & AVAIL_DESC_PACKED(1)) >> 7;
+desc.flags &= ~(AVAIL_DESC_PACKED(1) | USED_DESC_PACKED(1));
+desc.flags |= AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w);
+if (!(desc.flags & VRING_DESC_F_INDIRECT)) {
+if (!(desc.flags & VRING_DESC_F_WRITE)) {
+desc.len = 0;
+} else {
+desc.len = len;
+}
+}
+vring_packed_desc_write(vq->vdev, , >desc, head);
+
+/* Make sure flags has been updated */
+smp_mb();
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_fill(vq, elem, len, idx);
+} else {
+virtqueue_split_fill(vq, elem, len, idx);
+}
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_split_flush(VirtQueue *vq, unsigned int count)
+{
+uint16_t old, new;
+
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -575,6 +630,34 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
 vq->signalled_used_valid = false;
 }
 
+static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count)
+{
+if (unlikely(!vq->vring.desc)) {
+return;
+}
+
+vq->inuse -= count;
+vq->used_idx += count;
+if (vq->used_idx >= vq->vring.num) {
+vq->used_idx -= vq->vring.num;
+vq->used_wrap_counter = !vq->used_wrap_counter;
+}
+}
+
+void virtqueue_flush(VirtQueue *vq, unsigned int count)
+{
+if (unlikely(vq->vdev->broken)) {
+vq->inuse -= count;
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_packed_flush(vq, count);
+} else {
+virtqueue_split_flush(vq, count);
+}
+}
+
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len)
 {
-- 
1.8.3.1




[Qemu-devel] [RFC v2 7/8] virtio: event suppression for packed ring

2018-06-05 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 115 +++--
 include/standard-headers/linux/virtio_config.h |  13 +++
 2 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 6f2da83..4543974 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -226,6 +226,24 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 virtio_tswap16s(vdev, >next);
 }
 
+static void vring_packed_event_read(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+address_space_read_cached(cache, 0, e, sizeof(*e));
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+}
+
+static void vring_packed_event_write(VirtIODevice *vdev,
+MemoryRegionCache *cache, VRingPackedDescEvent *e)
+{
+virtio_tswap16s(vdev, >off_wrap);
+virtio_tswap16s(vdev, >flags);
+address_space_write_cached(cache, 0, e, sizeof(*e));
+address_space_cache_invalidate(cache, 0, sizeof(VRingUsedElem));
+}
+
+
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
 VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
@@ -332,14 +350,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, 
uint16_t val)
 address_space_cache_invalidate(>used, pa, sizeof(val));
 }
 
-void virtio_queue_set_notification(VirtQueue *vq, int enable)
+static void virtio_queue_set_notification_split(VirtQueue *vq, int enable)
 {
-vq->notification = enable;
-
-if (!vq->vring.desc) {
-return;
-}
-
 rcu_read_lock();
 if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
 vring_set_avail_event(vq, vring_avail_idx(vq));
@@ -355,6 +367,38 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 rcu_read_unlock();
 }
 
+static void virtio_queue_set_notification_packed(VirtQueue *vq, int enable)
+{
+VRingPackedDescEvent e;
+VRingMemoryRegionCaches *caches;
+
+rcu_read_lock();
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vq->vdev, >device, );
+if (enable) {
+e.flags = RING_EVENT_FLAGS_ENABLE;
+} else {
+e.flags = RING_EVENT_FLAGS_DISABLE;
+}
+vring_packed_event_write(vq->vdev, >device, );
+rcu_read_unlock();
+}
+
+void virtio_queue_set_notification(VirtQueue *vq, int enable)
+{
+vq->notification = enable;
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtio_queue_set_notification_packed(vq, enable);
+} else {
+virtio_queue_set_notification_split(vq, enable);
+}
+}
+
 int virtio_queue_ready(VirtQueue *vq)
 {
 return vq->vring.avail != 0;
@@ -2059,8 +2103,7 @@ static void virtio_set_isr(VirtIODevice *vdev, int value)
 }
 }
 
-/* Called within rcu_read_lock().  */
-static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq)
 {
 uint16_t old, new;
 bool v;
@@ -2083,6 +2126,60 @@ static bool virtio_should_notify(VirtIODevice *vdev, 
VirtQueue *vq)
 return !v || vring_need_event(vring_get_used_event(vq), new, old);
 }
 
+static bool vring_packed_need_event(VirtQueue *vq, uint16_t off_wrap,
+uint16_t new, uint16_t old)
+{
+bool wrap = vq->used_wrap_counter;
+int off = off_wrap & ~(1 << 15);
+
+if (new < old) {
+new += vq->vring.num;
+wrap ^= 1;
+}
+
+if (wrap != off_wrap >> 15) {
+off += vq->vring.num;
+}
+
+return vring_need_event(off, new, old);
+}
+
+static bool virtio_packed_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+VRingPackedDescEvent e;
+uint16_t old, new;
+bool v;
+VRingMemoryRegionCaches *caches;
+
+caches  = vring_get_region_caches(vq);
+vring_packed_event_read(vdev, >driver, );
+
+/* Make sure we see the updated flags */
+smp_mb();
+if (e.flags == RING_EVENT_FLAGS_DISABLE) {
+return false;
+} else if (e.flags == RING_EVENT_FLAGS_ENABLE) {
+return true;
+}
+
+v = vq->signalled_used_valid;
+vq->signalled_used_valid = true;
+old = vq->signalled_used;
+new = vq->signalled_used = vq->used_idx;
+
+return !v || vring_packed_need_event(vq, e.off_wrap, new, old);
+}
+
+/* Called within rcu_read_lock().  */
+static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_packed_should_notify(vdev, vq);
+} else {
+return virtio_split_should_notify(vdev, vq);
+}
+}
+
 void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
 {
 bool should_notify;
diff --git a/include/standard-headers/linux/virtio_config.h 

[Qemu-devel] [RFC v2 2/8] virtio: memory cache for packed ring

2018-06-05 Thread wexu
From: Wei Xu 

Mostly reuse memory cache with 1.0 except for the offset calculation.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 29 -
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index e192a9a..f6c0689 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -150,11 +150,8 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 VRingMemoryRegionCaches *old = vq->vring.caches;
 VRingMemoryRegionCaches *new;
 hwaddr addr, size;
-int event_size;
 int64_t len;
 
-event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 
2 : 0;
-
 addr = vq->vring.desc;
 if (!addr) {
 return;
@@ -168,7 +165,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_desc;
 }
 
-size = virtio_queue_get_used_size(vdev, n) + event_size;
+size = virtio_queue_get_used_size(vdev, n);
 len = address_space_cache_init(>used, vdev->dma_as,
vq->vring.used, size, true);
 if (len < size) {
@@ -176,7 +173,7 @@ static void virtio_init_region_cache(VirtIODevice *vdev, 
int n)
 goto err_used;
 }
 
-size = virtio_queue_get_avail_size(vdev, n) + event_size;
+size = virtio_queue_get_avail_size(vdev, n);
 len = address_space_cache_init(>avail, vdev->dma_as,
vq->vring.avail, size, false);
 if (len < size) {
@@ -2320,14 +2317,28 @@ hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, 
int n)
 
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingAvail, ring) +
-sizeof(uint16_t) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingAvail, ring) +
+sizeof(uint16_t) * vdev->vq[n].vring.num + s;
+}
 }
 
 hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
 {
-return offsetof(VRingUsed, ring) +
-sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+int s;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(struct VRingPackedDescEvent);
+} else {
+s = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+return offsetof(VRingUsed, ring) +
+sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s;
+}
 }
 
 uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
-- 
1.8.3.1




[Qemu-devel] [RFC v2 5/8] virtio: queue pop for packed ring

2018-06-05 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 145 -
 1 file changed, 144 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index cdbb5af..0160d03 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1041,7 +1041,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned 
out_num, unsigned in_nu
 return elem;
 }
 
-void *virtqueue_pop(VirtQueue *vq, size_t sz)
+static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
 {
 unsigned int i, head, max;
 VRingMemoryRegionCaches *caches;
@@ -1176,6 +1176,149 @@ err_undo_map:
 goto done;
 }
 
+static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
+{
+unsigned int i, head, max;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+MemoryRegionCache *cache;
+int64_t len;
+VirtIODevice *vdev = vq->vdev;
+VirtQueueElement *elem = NULL;
+unsigned out_num, in_num, elem_entries;
+hwaddr addr[VIRTQUEUE_MAX_SIZE];
+struct iovec iov[VIRTQUEUE_MAX_SIZE];
+VRingDescPacked desc;
+
+if (unlikely(vdev->broken)) {
+return NULL;
+}
+
+rcu_read_lock();
+if (virtio_queue_packed_empty_rcu(vq)) {
+goto done;
+}
+
+/* When we start there are none of either input nor output. */
+out_num = in_num = elem_entries = 0;
+
+max = vq->vring.num;
+
+if (vq->inuse >= vq->vring.num) {
+virtio_error(vdev, "Virtqueue size exceeded");
+goto done;
+}
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+/* FIXME: TBD */
+}
+
+head = vq->last_avail_idx;
+i = head;
+
+caches = vring_get_region_caches(vq);
+cache = >desc;
+vring_packed_desc_read(vdev, , cache, i);
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingDescPacked)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto done;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache, vdev->dma_as,
+   desc.addr, desc.len, false);
+cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto done;
+}
+
+max = desc.len / sizeof(VRingDescPacked);
+i = 0;
+vring_packed_desc_read(vdev, , cache, i);
+}
+
+/* Collect all the descriptors */
+while (1) {
+bool map_ok;
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+map_ok = virtqueue_map_desc(vdev, _num, addr + out_num,
+iov + out_num,
+VIRTQUEUE_MAX_SIZE - out_num, true,
+desc.addr, desc.len);
+} else {
+if (in_num) {
+virtio_error(vdev, "Incorrect order for descriptors");
+goto err_undo_map;
+}
+map_ok = virtqueue_map_desc(vdev, _num, addr, iov,
+VIRTQUEUE_MAX_SIZE, false,
+desc.addr, desc.len);
+}
+if (!map_ok) {
+goto err_undo_map;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (++elem_entries > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err_undo_map;
+}
+
+if (++i >= vq->vring.num) {
+i -= vq->vring.num;
+}
+
+if (desc.flags & VRING_DESC_F_NEXT) {
+vring_packed_desc_read(vq->vdev, , cache, i);
+} else {
+break;
+}
+}
+
+/* Now copy what we have collected and mapped */
+elem = virtqueue_alloc_element(sz, out_num, in_num);
+for (i = 0; i < out_num; i++) {
+elem->out_addr[i] = addr[i];
+elem->out_sg[i] = iov[i];
+}
+for (i = 0; i < in_num; i++) {
+elem->in_addr[i] = addr[head + out_num + i];
+elem->in_sg[i] = iov[out_num + i];
+}
+
+vq->last_avail_idx += (cache == _desc_cache) ?
+  1 : out_num + in_num;
+if (vq->last_avail_idx >= vq->vring.num) {
+vq->last_avail_idx -= vq->vring.num;
+vq->avail_wrap_counter = !vq->avail_wrap_counter;
+}
+vq->inuse++;
+
+trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
+done:
+address_space_cache_destroy(_desc_cache);
+rcu_read_unlock();
+
+return elem;
+
+err_undo_map:
+virtqueue_undo_map_desc(out_num, in_num, iov);
+g_free(elem);
+goto done;
+}
+
+void *virtqueue_pop(VirtQueue *vq, size_t sz)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtqueue_packed_pop(vq, sz);
+} else {
+return virtqueue_split_pop(vq, sz);
+}
+}
+
 /* 

[Qemu-devel] [RFC v2 1/8] virtio: feature bit, data structure, init for 1.1

2018-06-05 Thread wexu
From: Wei Xu 

New feature bit and members for packed ring.

Signed-off-by: Wei Xu 
---
 hw/net/vhost_net.c |  2 ++
 hw/virtio/virtio.c | 27 --
 include/hw/virtio/virtio.h |  4 +++-
 include/standard-headers/linux/virtio_config.h |  2 ++
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e037db6..f593086 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -53,6 +53,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_VERSION_1,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 VHOST_INVALID_FEATURE_BIT
 };
 
@@ -78,6 +79,7 @@ static const int user_feature_bits[] = {
 VIRTIO_NET_F_MRG_RXBUF,
 VIRTIO_NET_F_MTU,
 VIRTIO_F_IOMMU_PLATFORM,
+VIRTIO_F_RING_PACKED,
 
 /* This bit implies RARP isn't sent by QEMU out of band */
 VIRTIO_NET_F_GUEST_ANNOUNCE,
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 006d3d1..e192a9a 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,13 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingDescPacked {
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingDescPacked;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -62,8 +69,14 @@ typedef struct VRingUsed
 typedef struct VRingMemoryRegionCaches {
 struct rcu_head rcu;
 MemoryRegionCache desc;
-MemoryRegionCache avail;
-MemoryRegionCache used;
+union {
+MemoryRegionCache avail;
+MemoryRegionCache driver;
+};
+union {
+MemoryRegionCache used;
+MemoryRegionCache device;
+};
 } VRingMemoryRegionCaches;
 
 typedef struct VRing
@@ -77,6 +90,11 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t off_wrap;
+uint16_t flags;
+} VRingPackedDescEvent ;
+
 struct VirtQueue
 {
 VRing vring;
@@ -89,6 +107,9 @@ struct VirtQueue
 
 uint16_t used_idx;
 
+bool avail_wrap_counter;
+bool used_wrap_counter;
+
 /* Last used index value we have signalled on */
 uint16_t signalled_used;
 
@@ -1213,6 +1234,8 @@ void virtio_reset(void *opaque)
 vdev->vq[i].last_avail_idx = 0;
 vdev->vq[i].shadow_avail_idx = 0;
 vdev->vq[i].used_idx = 0;
+vdev->vq[i].avail_wrap_counter = true;
+vdev->vq[i].used_wrap_counter = true;
 virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
 vdev->vq[i].signalled_used = 0;
 vdev->vq[i].signalled_used_valid = false;
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 098bdaa..4a7fb21 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -262,7 +262,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, false)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h
index b777069..6ee5529 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -71,4 +71,6 @@
  * this is for compatibility with legacy systems.
  */
 #define VIRTIO_F_IOMMU_PLATFORM33
+
+#define VIRTIO_F_RING_PACKED   34
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
1.8.3.1




[Qemu-devel] [RFC v2 0/8] packed ring virtio-net userspace backend support

2018-06-05 Thread wexu
From: Wei Xu 

Todo:
- address Rx slow performance
- event index interrupt suppression test

v1->v2
- sync to tiwei's v5
- reuse memory cache function with 1.0
- dropped detach patch and notification helper(04 & 05 in v1)
- guest virtio-net driver unload/reload support
- event suppression support(not tested)
- addressed feedback from v1

About guest virtio-net load/unload:
Since last_avail, avail_wrap_count, used_idx and used_wrap_count are
all 16 or bool type variables, so I turned to merge them to
'vhost_vring_state.num' in stead of introducing a new case
in handling ioctl, test has been done with a tweak in kernel side like:

--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1439,10 +1439,16 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, 
void __user *argp)
r = -EFAULT;
break;
}
-   if (s.num > 0x) {
-   r = -EINVAL;
-   break;
-   }
+   if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+   vq->avail_wrap_counter = (bool)(uint16_t)(s.num >> 16);
+   vq->last_used_idx = (uint16_t)(s.num >> 32);
+   vq->used_wrap_counter = (bool)(uint16_t)(s.num >> 48);
+} else {
+if (s.num > 0x) {
+r = -EINVAL;
+break;
+}
+}
vq->last_avail_idx = s.num;
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
@@ -1450,8 +1456,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, 
void __user *argp)
case VHOST_GET_VRING_BASE:
s.index = idx;
s.num = vq->last_avail_idx;
+   if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+   s.num |= vq->avail_wrap_counter << 16;
+   s.num |= vq->last_used_idx << 32;
+   s.num |= vq->used_wrap_counter << 48;
+}
if (copy_to_user(argp, , sizeof s))
r = -EFAULT;
+   if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+   s.num |= vq->avail_wrap_counter << 31;
break;
case VHOST_SET_VRING_ADDR:
if (copy_from_user(, argp, sizeof a)) {

Wei Xu (8):
  virtio: feature bit, data structure, init for packed ring
  virtio: memory cache for packed ring
  virtio: empty check and desc read for packed ring
  virtio: get avail bytes check for packed ring
  virtio: queue pop support for packed ring
  virtio: flush/push support for packed ring
  virtio: event suppression support for packed ring
  virtio: support guest driver reload for vhost-net

 hw/net/vhost_net.c |   2 +
 hw/virtio/virtio.c | 677 +++--
 include/hw/virtio/virtio.h |   4 +-
 include/standard-headers/linux/virtio_config.h |  15 +
 4 files changed, 649 insertions(+), 49 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [PATCH 8/8] virtio: queue pop support for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

cloned from split ring pop, a global static length array
and the inside-element length array are introduced to
easy prototype, this consumes more memory and it is valuable
to move to dynamic allocation as the out/in sg does.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 154 -
 1 file changed, 153 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index cf726f3..0eafb38 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1221,7 +1221,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned 
out_num, unsigned in_nu
 return elem;
 }
 
-void *virtqueue_pop(VirtQueue *vq, size_t sz)
+static void *virtqueue_pop_split(VirtQueue *vq, size_t sz)
 {
 unsigned int i, head, max;
 VRingMemoryRegionCaches *caches;
@@ -1356,6 +1356,158 @@ err_undo_map:
 goto done;
 }
 
+static uint16_t dma_len[VIRTQUEUE_MAX_SIZE];
+static void *virtqueue_pop_packed(VirtQueue *vq, size_t sz)
+{
+unsigned int i, head, max;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+MemoryRegionCache *cache;
+int64_t len;
+VirtIODevice *vdev = vq->vdev;
+VirtQueueElement *elem = NULL;
+unsigned out_num, in_num, elem_entries;
+hwaddr addr[VIRTQUEUE_MAX_SIZE];
+struct iovec iov[VIRTQUEUE_MAX_SIZE];
+VRingDescPacked desc;
+uint8_t wrap_counter;
+
+if (unlikely(vdev->broken)) {
+return NULL;
+}
+
+vq->last_avail_idx %= vq->packed.num;
+
+rcu_read_lock();
+if (virtio_queue_empty_packed_rcu(vq)) {
+goto done;
+}
+
+/* When we start there are none of either input nor output. */
+out_num = in_num = elem_entries = 0;
+
+max = vq->vring.num;
+
+if (vq->inuse >= vq->vring.num) {
+virtio_error(vdev, "Virtqueue size exceeded");
+goto done;
+}
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+/* FIXME: TBD */
+}
+
+head = vq->last_avail_idx;
+i = head;
+
+caches = vring_get_region_caches(vq);
+cache = >desc_packed;
+vring_desc_read_packed(vdev, , cache, i);
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingDescPacked)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto done;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache, vdev->dma_as,
+   desc.addr, desc.len, false);
+cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto done;
+}
+
+max = desc.len / sizeof(VRingDescPacked);
+i = 0;
+vring_desc_read_packed(vdev, , cache, i);
+}
+
+wrap_counter = vq->wrap_counter;
+/* Collect all the descriptors */
+while (1) {
+bool map_ok;
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+map_ok = virtqueue_map_desc(vdev, _num, addr + out_num,
+iov + out_num,
+VIRTQUEUE_MAX_SIZE - out_num, true,
+desc.addr, desc.len);
+} else {
+if (in_num) {
+virtio_error(vdev, "Incorrect order for descriptors");
+goto err_undo_map;
+}
+map_ok = virtqueue_map_desc(vdev, _num, addr, iov,
+VIRTQUEUE_MAX_SIZE, false,
+desc.addr, desc.len);
+}
+if (!map_ok) {
+goto err_undo_map;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (++elem_entries > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err_undo_map;
+}
+
+dma_len[i++] = desc.len;
+/* Toggle wrap_counter for non indirect desc */
+if ((i == vq->packed.num) && (cache != _desc_cache)) {
+vq->wrap_counter ^= 1;
+}
+
+if (desc.flags & VRING_DESC_F_NEXT) {
+vring_desc_read_packed(vq->vdev, , cache, i % vq->packed.num);
+} else {
+break;
+}
+}
+
+/* Now copy what we have collected and mapped */
+elem = virtqueue_alloc_element(sz, out_num, in_num);
+elem->index = head;
+elem->wrap_counter = wrap_counter;
+elem->count = (cache == _desc_cache) ? 1 : out_num + in_num;
+for (i = 0; i < out_num; i++) {
+/* DMA Done by marking the length as 0 */
+elem->len[i] = 0;
+elem->out_addr[i] = addr[i];
+elem->out_sg[i] = iov[i];
+}
+for (i = 0; i < in_num; i++) {
+elem->len[out_num + i] = dma_len[head + out_num + i];
+elem->in_addr[i] = addr[out_num + i];
+elem->in_sg[i] = 

[Qemu-devel] [PATCH 6/8] virtio: flush/push support for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 104 +
 1 file changed, 90 insertions(+), 14 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 95a4681..def07c6 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -26,6 +26,7 @@
 
 #define AVAIL_DESC_PACKED(b) ((b) << 7)
 #define USED_DESC_PACKED(b)  ((b) << 15)
+#define VIRTQ_F_DESC_USED(w)  (AVAIL_DESC_PACKED(w) | USED_DESC_PACKED(w))
 
 /*
  * The alignment to use between consumer and producer parts of vring.
@@ -636,19 +637,11 @@ bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
 }
 
 /* Called within rcu_read_lock().  */
-void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+static void virtqueue_fill_split(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len, unsigned int idx)
 {
 VRingUsedElem uelem;
 
-trace_virtqueue_fill(vq, elem, len, idx);
-
-virtqueue_unmap_sg(vq, elem, len);
-
-if (unlikely(vq->vdev->broken)) {
-return;
-}
-
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -660,16 +653,66 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement 
*elem,
 vring_used_write(vq, , idx);
 }
 
-/* Called within rcu_read_lock().  */
-void virtqueue_flush(VirtQueue *vq, unsigned int count)
+static void virtqueue_fill_packed(VirtQueue *vq, const VirtQueueElement *elem)
 {
-uint16_t old, new;
+uint16_t i, w, head;
+VRingMemoryRegionCaches *caches;
+VRingDescPacked desc = {
+.addr = 0,
+.flags = 0,
+};
+
+if (unlikely(!vq->packed.desc)) {
+return;
+}
+
+w = elem->wrap_counter;
+caches = vring_get_region_caches(vq);
+for (i = 0; i < elem->count; i++) {
+head = (elem->index + i) % vq->packed.num;
+/* Don't toggle the first one since it is the originally one */
+if ((i > 0) && (!head)) {
+w ^= 1;
+}
+
+desc.id = elem->index;
+desc.flags = VIRTQ_F_DESC_USED(w);
+desc.len = elem->len[i];
+virtio_tswap16s(vq->vdev, );
+virtio_tswap32s(vq->vdev, );
+virtio_tswap16s(vq->vdev, );
+address_space_write_cached(>desc,
+   sizeof(VRingDescPacked) * head, ,
+   sizeof(VRingDescPacked));
+address_space_cache_invalidate(>desc,
+   sizeof(VRingDescPacked) * head,
+   sizeof(VRingDescPacked));
+}
+}
+
+void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
+unsigned int len, unsigned int idx)
+{
+trace_virtqueue_fill(vq, elem, len, idx);
+
+virtqueue_unmap_sg(vq, elem, len);
 
 if (unlikely(vq->vdev->broken)) {
-vq->inuse -= count;
 return;
 }
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_fill_packed(vq, elem);
+} else {
+virtqueue_fill_split(vq, elem, len, idx);
+}
+}
+
+/* Called within rcu_read_lock().  */
+static void virtqueue_flush_split(VirtQueue *vq, unsigned int count)
+{
+uint16_t old, new;
+
 if (unlikely(!vq->vring.used)) {
 return;
 }
@@ -685,12 +728,45 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
 vq->signalled_used_valid = false;
 }
 
+static void virtqueue_flush_packed(VirtQueue *vq, unsigned int count)
+{
+if (unlikely(!vq->packed.desc)) {
+return;
+}
+
+vq->inuse -= count;
+
+/* FIXME: is this correct? */
+if (vq->inuse) {
+return;
+}
+}
+
+void virtqueue_flush(VirtQueue *vq, unsigned int count)
+{
+if (unlikely(vq->vdev->broken)) {
+vq->inuse -= count;
+return;
+}
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_flush_packed(vq, count);
+} else {
+virtqueue_flush_split(vq, count);
+}
+}
+
 void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
 unsigned int len)
 {
 rcu_read_lock();
 virtqueue_fill(vq, elem, len, 0);
-virtqueue_flush(vq, 1);
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+/* FIXME: How to deal with the length field for chained desc */
+virtqueue_flush(vq, elem->count);
+} else {
+virtqueue_flush(vq, 1);
+}
 rcu_read_unlock();
 }
 
-- 
2.7.4




[Qemu-devel] [PATCH 3/8] virtio: add empty check for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

helper for ring empty check.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 62 +++---
 1 file changed, 59 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 73a35a4..478df3d 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -24,6 +24,9 @@
 #include "hw/virtio/virtio-access.h"
 #include "sysemu/dma.h"
 
+#define AVAIL_DESC_PACKED(b) ((b) << 7)
+#define USED_DESC_PACKED(b)  ((b) << 15)
+
 /*
  * The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. This is the default, used by transports like PCI
@@ -446,10 +449,27 @@ int virtio_queue_ready(VirtQueue *vq)
 return vq->vring.avail != 0;
 }
 
+static void vring_desc_read_packed(VirtIODevice *vdev, VRingDescPacked *desc,
+MemoryRegionCache *cache, int i)
+{
+address_space_read_cached(cache, i * sizeof(VRingDescPacked),
+  desc, sizeof(VRingDescPacked));
+virtio_tswap64s(vdev, >addr);
+virtio_tswap32s(vdev, >len);
+virtio_tswap16s(vdev, >id);
+virtio_tswap16s(vdev, >flags);
+}
+
+static inline bool is_desc_avail(struct VRingDescPacked* desc)
+{
+return (!!(desc->flags & AVAIL_DESC_PACKED(1)) !=
+!!(desc->flags & USED_DESC_PACKED(1)));
+}
+
 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers.
  * Called within rcu_read_lock().  */
-static int virtio_queue_empty_rcu(VirtQueue *vq)
+static int virtio_queue_empty_split_rcu(VirtQueue *vq)
 {
 if (unlikely(!vq->vring.avail)) {
 return 1;
@@ -462,7 +482,7 @@ static int virtio_queue_empty_rcu(VirtQueue *vq)
 return vring_avail_idx(vq) == vq->last_avail_idx;
 }
 
-int virtio_queue_empty(VirtQueue *vq)
+static int virtio_queue_empty_split(VirtQueue *vq)
 {
 bool empty;
 
@@ -480,6 +500,42 @@ int virtio_queue_empty(VirtQueue *vq)
 return empty;
 }
 
+static int virtio_queue_empty_packed_rcu(VirtQueue *vq)
+{
+struct VRingDescPacked desc;
+VRingMemoryRegionCaches *cache;
+
+if (unlikely(!vq->packed.desc)) {
+return 1;
+}
+
+cache = vring_get_region_caches(vq);
+vring_desc_read_packed(vq->vdev, , >desc_packed, 
vq->last_avail_idx);
+
+/* Make sure we see the updated flag */
+smp_mb();
+return !is_desc_avail();
+}
+
+static int virtio_queue_empty_packed(VirtQueue *vq)
+{
+bool empty;
+
+rcu_read_lock();
+empty = virtio_queue_empty_packed_rcu(vq);
+rcu_read_unlock();
+return empty;
+}
+
+int virtio_queue_empty(VirtQueue *vq)
+{
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return virtio_queue_empty_packed(vq);
+} else {
+return virtio_queue_empty_split(vq);
+}
+}
+
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
 {
@@ -951,7 +1007,7 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
 return NULL;
 }
 rcu_read_lock();
-if (virtio_queue_empty_rcu(vq)) {
+if (virtio_queue_empty_split_rcu(vq)) {
 goto done;
 }
 /* Needed after virtio_queue_empty(), see comment in
-- 
2.7.4




[Qemu-devel] [PATCH 4/8] virtio: add detach element for packed ring(1.1)

2018-04-04 Thread wexu
From: Wei Xu 

helper for packed ring

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 478df3d..fdee40f 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -561,6 +561,20 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const 
VirtQueueElement *elem,
  elem->out_sg[i].iov_len);
 }
 
+static void virtqueue_detach_element_split(VirtQueue *vq,
+const VirtQueueElement *elem, unsigned int len)
+{
+vq->inuse--;
+virtqueue_unmap_sg(vq, elem, len);
+}
+
+static void virtqueue_detach_element_packed(VirtQueue *vq,
+const VirtQueueElement *elem, unsigned int len)
+{
+vq->inuse -= elem->count;
+virtqueue_unmap_sg(vq, elem, len);
+}
+
 /* virtqueue_detach_element:
  * @vq: The #VirtQueue
  * @elem: The #VirtQueueElement
@@ -573,8 +587,11 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const 
VirtQueueElement *elem,
 void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
   unsigned int len)
 {
-vq->inuse--;
-virtqueue_unmap_sg(vq, elem, len);
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+virtqueue_detach_element_packed(vq, elem, len);
+} else {
+virtqueue_detach_element_split(vq, elem, len);
+}
 }
 
 /* virtqueue_unpop:
-- 
2.7.4




[Qemu-devel] [PATCH 5/8] virtio: notification tweak for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

Always enable notify and bypass set notification
before supporting driver and device area.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index fdee40f..95a4681 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -425,6 +425,10 @@ void virtio_queue_set_notification(VirtQueue *vq, int 
enable)
 {
 vq->notification = enable;
 
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+return;
+}
+
 if (!vq->vring.desc) {
 return;
 }
@@ -1801,6 +1805,11 @@ static bool virtio_should_notify(VirtIODevice *vdev, 
VirtQueue *vq)
 {
 uint16_t old, new;
 bool v;
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return true;
+}
+
 /* We need to expose used array entries before checking used event. */
 smp_mb();
 /* Always notify when queue is empty (when feature acknowledge) */
-- 
2.7.4




[Qemu-devel] [PATCH 7/8] virtio: get avail bytes check for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

mostly as same as 1.0, copy it separately for
prototype, need a refactoring.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 142 +++--
 1 file changed, 139 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index def07c6..cf726f3 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -836,9 +836,9 @@ static int virtqueue_read_next_desc(VirtIODevice *vdev, 
VRingDesc *desc,
 return VIRTQUEUE_READ_DESC_MORE;
 }
 
-void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
-   unsigned int *out_bytes,
-   unsigned max_in_bytes, unsigned max_out_bytes)
+static void virtqueue_get_avail_bytes_split(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
 {
 VirtIODevice *vdev = vq->vdev;
 unsigned int max, idx;
@@ -961,6 +961,142 @@ err:
 goto done;
 }
 
+static void virtqueue_get_avail_bytes_packed(VirtQueue *vq,
+unsigned int *in_bytes, unsigned int *out_bytes,
+unsigned max_in_bytes, unsigned max_out_bytes)
+{
+VirtIODevice *vdev = vq->vdev;
+unsigned int max, idx;
+unsigned int total_bufs, in_total, out_total;
+MemoryRegionCache *desc_cache;
+VRingMemoryRegionCaches *caches;
+MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+int64_t len = 0;
+VRingDescPacked desc;
+
+if (unlikely(!vq->packed.desc)) {
+if (in_bytes) {
+*in_bytes = 0;
+}
+if (out_bytes) {
+*out_bytes = 0;
+}
+return;
+}
+
+rcu_read_lock();
+idx = vq->last_avail_idx;
+total_bufs = in_total = out_total = 0;
+
+max = vq->packed.num;
+caches = vring_get_region_caches(vq);
+if (caches->desc.len < max * sizeof(VRingDescPacked)) {
+virtio_error(vdev, "Cannot map descriptor ring");
+goto err;
+}
+
+desc_cache = >desc;
+vring_desc_read_packed(vdev, , desc_cache, idx);
+while (is_desc_avail()) {
+unsigned int num_bufs;
+unsigned int i;
+
+num_bufs = total_bufs;
+
+if (desc.flags & VRING_DESC_F_INDIRECT) {
+if (desc.len % sizeof(VRingDescPacked)) {
+virtio_error(vdev, "Invalid size for indirect buffer table");
+goto err;
+}
+
+/* If we've got too many, that implies a descriptor loop. */
+if (num_bufs >= max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+/* loop over the indirect descriptor table */
+len = address_space_cache_init(_desc_cache,
+   vdev->dma_as,
+   desc.addr, desc.len, false);
+desc_cache = _desc_cache;
+if (len < desc.len) {
+virtio_error(vdev, "Cannot map indirect buffer");
+goto err;
+}
+
+max = desc.len / sizeof(VRingDescPacked);
+num_bufs = i = 0;
+vring_desc_read_packed(vdev, , desc_cache, i);
+}
+
+do {
+/* If we've got too many, that implies a descriptor loop. */
+if (++num_bufs > max) {
+virtio_error(vdev, "Looped descriptor");
+goto err;
+}
+
+if (desc.flags & VRING_DESC_F_WRITE) {
+in_total += desc.len;
+} else {
+out_total += desc.len;
+}
+if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
+goto done;
+}
+
+if (desc_cache == _desc_cache) {
+vring_desc_read_packed(vdev, , desc_cache,
+   ++i % vq->packed.num);
+} else {
+vring_desc_read_packed(vdev, , desc_cache,
+   ++idx % vq->packed.num);
+}
+} while (desc.flags & VRING_DESC_F_NEXT);
+
+if (desc_cache == _desc_cache) {
+address_space_cache_destroy(_desc_cache);
+total_bufs++;
+/* We missed one step on for indirect desc */
+idx++;
+} else {
+total_bufs = num_bufs;
+}
+
+desc_cache = >desc;
+vring_desc_read_packed(vdev, , desc_cache, idx % vq->packed.num);
+}
+
+done:
+address_space_cache_destroy(_desc_cache);
+if (in_bytes) {
+*in_bytes = in_total;
+}
+if (out_bytes) {
+*out_bytes = out_total;
+}
+rcu_read_unlock();
+return;
+
+err:
+in_total = out_total = 0;
+goto done;
+}
+
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+  

[Qemu-devel] [PATCH 2/8] virtio: memory cache for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

A new memory cache is introduced to for packed ring,
the code looks pretty duplicated with split(1.0) ring,
any refactor idea?

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 79 +++---
 1 file changed, 76 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 9a6bfe7..73a35a4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -155,13 +155,15 @@ static void 
virtio_free_region_cache(VRingMemoryRegionCaches *caches)
 return;
 }
 
+/* FIX ME: pass in 1.1 device here, reuse 1.0 fields at current */
+
 address_space_cache_destroy(>desc);
 address_space_cache_destroy(>avail);
 address_space_cache_destroy(>used);
 g_free(caches);
 }
 
-static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+static void virtio_init_region_cache_split(VirtIODevice *vdev, int n)
 {
 VirtQueue *vq = >vq[n];
 VRingMemoryRegionCaches *old = vq->vring.caches;
@@ -215,6 +217,65 @@ err_desc:
 g_free(new);
 }
 
+static void virtio_init_region_cache_packed(VirtIODevice *vdev, int n)
+{
+VirtQueue *vq = >vq[n];
+VRingMemoryRegionCaches *old = vq->vring.caches;
+VRingMemoryRegionCaches *new;
+hwaddr addr, size;
+int64_t len;
+
+addr = vq->packed.desc;
+if (!addr) {
+return;
+}
+new = g_new0(VRingMemoryRegionCaches, 1);
+size = virtio_queue_get_desc_size(vdev, n);
+len = address_space_cache_init(>desc_packed, vdev->dma_as,
+   addr, size, false);
+if (len < size) {
+virtio_error(vdev, "Cannot map desc");
+goto err_desc;
+}
+
+size = sizeof(struct VRingPackedDescEvent);
+len = address_space_cache_init(>driver, vdev->dma_as,
+   vq->packed.driver, size, true);
+if (len < size) {
+virtio_error(vdev, "Cannot map driver area");
+goto err_driver;
+}
+
+len = address_space_cache_init(>device, vdev->dma_as,
+   vq->packed.device, size, true);
+if (len < size) {
+virtio_error(vdev, "Cannot map device area");
+goto err_device;
+}
+
+atomic_rcu_set(>packed.caches, new);
+if (old) {
+call_rcu(old, virtio_free_region_cache, rcu);
+}
+return;
+
+err_device:
+address_space_cache_destroy(>driver);
+err_driver:
+address_space_cache_destroy(>desc);
+err_desc:
+g_free(new);
+}
+
+static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+{
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+virtio_init_region_cache_packed(vdev, n);
+} else {
+virtio_init_region_cache_split(vdev, n);
+}
+}
+
 /* virt queue functions */
 void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 {
@@ -245,10 +306,18 @@ static void vring_desc_read(VirtIODevice *vdev, VRingDesc 
*desc,
 
 static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
 {
-VRingMemoryRegionCaches *caches = atomic_rcu_read(>vring.caches);
+VRingMemoryRegionCaches *caches;
+
+if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) {
+caches = atomic_rcu_read(>packed.caches);
+} else {
+caches = atomic_rcu_read(>vring.caches);
+}
+
 assert(caches != NULL);
 return caches;
 }
+
 /* Called within rcu_read_lock().  */
 static inline uint16_t vring_avail_flags(VirtQueue *vq)
 {
@@ -2331,7 +2400,11 @@ hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, 
int n)
 
 hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
 {
-return sizeof(VRingDesc) * vdev->vq[n].vring.num;
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+return sizeof(VRingDescPacked) * vdev->vq[n].packed.num;
+} else {
+return sizeof(VRingDesc) * vdev->vq[n].vring.num;
+}
 }
 
 hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
-- 
2.7.4




[Qemu-devel] [RFC PATCH 0/8] virtio-net 1.1 userspace backend support

2018-04-04 Thread wexu
From: Wei Xu 

This is a prototype for virtio-net 1.1 support in userspace backend,
only minimum part are included in this RFC(roughly synced to v8 as
Jason and Tiwei's RFC).

Test has been done together with Tiwei's RFC guest virtio-net driver
patch, ping and a quick iperf test successfully.

Issues:
1. Rx performance of Iperf is much slower than TX.
TX: 13-15Gb
RX: 100-300Mb

Missing:
- device and driver
- indirect descriptor
- migration
- vIOMMU support
- other revisions since v8
- see FIXME

Wei Xu (8):
  virtio: feature bit, data structure for packed ring
  virtio: memory cache for packed ring
  virtio: add empty check for packed ring
  virtio: add detach element for packed ring(1.1)
  virtio: notification tweak for packed ring
  virtio: flush/push support for packed ring
  virtio: get avail bytes check for packed ring
  virtio: queue pop support for packed ring

 hw/virtio/virtio.c | 618 +++--
 include/hw/virtio/virtio.h |  12 +-
 include/standard-headers/linux/virtio_config.h |   2 +
 3 files changed, 601 insertions(+), 31 deletions(-)

-- 
2.7.4




[Qemu-devel] [PATCH 1/8] virtio: feature bit, data structure for packed ring

2018-04-04 Thread wexu
From: Wei Xu 

Only minimum definitions from the spec are included
for prototype.

Signed-off-by: Wei Xu 
---
 hw/virtio/virtio.c | 47 +++---
 include/hw/virtio/virtio.h | 12 ++-
 include/standard-headers/linux/virtio_config.h |  2 ++
 3 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 006d3d1..9a6bfe7 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -39,6 +39,14 @@ typedef struct VRingDesc
 uint16_t next;
 } VRingDesc;
 
+typedef struct VRingDescPacked
+{
+uint64_t addr;
+uint32_t len;
+uint16_t id;
+uint16_t flags;
+} VRingDescPacked;
+
 typedef struct VRingAvail
 {
 uint16_t flags;
@@ -61,9 +69,18 @@ typedef struct VRingUsed
 
 typedef struct VRingMemoryRegionCaches {
 struct rcu_head rcu;
-MemoryRegionCache desc;
-MemoryRegionCache avail;
-MemoryRegionCache used;
+union {
+struct {
+MemoryRegionCache desc;
+MemoryRegionCache avail;
+MemoryRegionCache used;
+};
+struct {
+MemoryRegionCache desc_packed;
+MemoryRegionCache driver;
+MemoryRegionCache device;
+};
+};
 } VRingMemoryRegionCaches;
 
 typedef struct VRing
@@ -77,10 +94,31 @@ typedef struct VRing
 VRingMemoryRegionCaches *caches;
 } VRing;
 
+typedef struct VRingPackedDescEvent {
+uint16_t desc_event_off:15,
+ desc_event_wrap:1;
+uint16_t desc_event_flags:2;
+} VRingPackedDescEvent ;
+
+typedef struct VRingPacked
+{
+unsigned int num;
+unsigned int num_default;
+unsigned int align;
+hwaddr desc;
+hwaddr driver;
+hwaddr device;
+VRingMemoryRegionCaches *caches;
+} VRingPacked;
+
 struct VirtQueue
 {
-VRing vring;
+union {
+struct VRing vring;
+struct VRingPacked packed;
+};
 
+uint8_t wrap_counter:1;
 /* Next head to pop */
 uint16_t last_avail_idx;
 
@@ -1220,6 +1258,7 @@ void virtio_reset(void *opaque)
 vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
 vdev->vq[i].inuse = 0;
 virtio_virtqueue_reset_region_cache(>vq[i]);
+vdev->vq[i].wrap_counter = 1;
 }
 }
 
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 098bdaa..563e88e 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -46,6 +46,14 @@ typedef struct VirtQueueElement
 unsigned int index;
 unsigned int out_num;
 unsigned int in_num;
+
+/* Number of descriptors used by packed ring */
+uint16_t count;
+uint8_t wrap_counter:1;
+/* FIXME: Length of every used buffer for a descriptor,
+   move to dynamical allocating due to out/in sgs numbers */
+uint32_t len[VIRTQUEUE_MAX_SIZE];
+
 hwaddr *in_addr;
 hwaddr *out_addr;
 struct iovec *in_sg;
@@ -262,7 +270,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("any_layout", _state, _field, \
   VIRTIO_F_ANY_LAYOUT, true), \
 DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
-  VIRTIO_F_IOMMU_PLATFORM, false)
+  VIRTIO_F_IOMMU_PLATFORM, false), \
+DEFINE_PROP_BIT64("ring_packed", _state, _field, \
+  VIRTIO_F_RING_PACKED, true)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
diff --git a/include/standard-headers/linux/virtio_config.h 
b/include/standard-headers/linux/virtio_config.h
index b777069..6ee5529 100644
--- a/include/standard-headers/linux/virtio_config.h
+++ b/include/standard-headers/linux/virtio_config.h
@@ -71,4 +71,6 @@
  * this is for compatibility with legacy systems.
  */
 #define VIRTIO_F_IOMMU_PLATFORM33
+
+#define VIRTIO_F_RING_PACKED   34
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
2.7.4




[Qemu-devel] [Patch 3/3] vfio: remove checking duplicated vfio device

2017-09-11 Thread wexu
From: Wei Xu 

This has been done when introducing 'vfio_lookup_as()'
patch as a side work to reuse the loop.

Signed-off-by: Wei Xu 
---
 hw/vfio/pci.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 856cefd..d78f756 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2632,7 +2632,6 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice 
*vdev)
 static void vfio_realize(PCIDevice *pdev, Error **errp)
 {
 VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
-VFIODevice *vbasedev_iter;
 VFIOGroup *group;
 char *tmp, group_path[PATH_MAX], *group_name;
 Error *err = NULL;
@@ -2697,14 +2696,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 goto error;
 }
 
-QLIST_FOREACH(vbasedev_iter, >device_list, next) {
-if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
-error_setg(errp, "device is already attached");
-vfio_put_group(group);
-goto error;
-}
-}
-
 ret = vfio_get_device(group, vdev->vbasedev.name, >vbasedev, errp);
 if (ret) {
 vfio_put_group(group);
-- 
1.8.3.1




[Qemu-devel] [Patch 2/3] vfio: invoke looking up address space.

2017-09-11 Thread wexu
From: Wei Xu 

Invoke looking up correct address space before getting an
IOMMU group.

Signed-off-by: Wei Xu 
---
 hw/vfio/pci.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 31e1edf..856cefd 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2640,6 +2640,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 struct stat st;
 int groupid;
 int i, ret;
+AddressSpace *as;
 
 if (!vdev->vbasedev.sysfsdev) {
 if (!(~vdev->host.domain || ~vdev->host.bus ||
@@ -2686,7 +2687,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 
 trace_vfio_realize(vdev->vbasedev.name, groupid);
 
-group = vfio_get_group(groupid, pci_device_iommu_address_space(pdev), 
errp);
+as = vfio_lookup_as(groupid, pdev, errp);
+if (!as) {
+goto error;
+}
+
+group = vfio_get_group(groupid, as, errp);
 if (!group) {
 goto error;
 }
-- 
1.8.3.1




[Qemu-devel] [Patch 0/3] vfio: reusing address space for the same iommu group devices

2017-09-11 Thread wexu
From: Wei Xu 

Recently I have been testing passing through 2 ixgbe(82599ES) nics which
belong to the same iommu group to a guest with virtual iommu(vIOMMU) on
my desktop, while vfio failed to realize the second device and prompted
error message as 'group xxx used in multiple address spaces'.

It turned out to be that vtd doesn't know any group info while choosing
an address space for the two devices, therefore it creates two separate
address space for each which breaks granularity isolation.

This patch fixes this by looking up if there is any exist device within
the same iommu group and shares the address space before creating a new
one.

I am not sure if this fixes the problem in a correct way due to my limited
knowledge about vfio, please come back to me for any feedback & comments,
Thanks.

Wei Xu (3):
  vfio: reusing address space for the same iommu group devices
  vfio: invoke looking up address space.
  vfio: remove checking duplicated vfio device

 hw/vfio/common.c  | 28 
 hw/vfio/pci.c | 15 ++-
 include/hw/vfio/vfio-common.h |  1 +
 3 files changed, 35 insertions(+), 9 deletions(-)

-- 
1.8.3.1




[Qemu-devel] [Patch 1/3] vfio: reusing address space for the same IOMMU group devices

2017-09-11 Thread wexu
From: Wei Xu 

Currently address space of a vfio device is selected by directly
looking up pci device IOMMU address space during realizing, this
usually works for most none separate address space targeted cases
since they are using the system address space, i.e. a q35 machine
without virtual IOMMU. Unfortunately, when it comes down to the case
having a virtual IOMMU(x86 vtd in this case) and two vfio devices in
the same IOMMU group, the virtual IOMMU(vtd) creates two separate
address space for each device, this breaks the minimum granularity for
vfio, and the device fails realizing by prompting 'group xxx used in
multiple address spaces'.

This patch is a helper looking up the same IOMMU device before
invoking creating an new address space for a device, thus fixes the issue.

As a side work for the all groups/devices loop, also it checks if the device
has been assigned to the guest twice before creating an extra group and
removing it later which is not necessary.

Signed-off-by: Wei Xu 
---
 hw/vfio/common.c  | 28 
 include/hw/vfio/vfio-common.h |  1 +
 2 files changed, 29 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7b2924c..63c3609 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -35,6 +35,7 @@
 #include "sysemu/kvm.h"
 #include "trace.h"
 #include "qapi/error.h"
+#include "hw/vfio/pci.h"
 
 struct vfio_group_head vfio_group_list =
 QLIST_HEAD_INITIALIZER(vfio_group_list);
@@ -1183,6 +1184,33 @@ static void vfio_disconnect_container(VFIOGroup *group)
 }
 }
 
+AddressSpace *vfio_lookup_as(int groupid, PCIDevice *pdev, Error **errp)
+{
+VFIOGroup *group;
+VFIODevice *vbasedev_iter;
+VFIOPCIDevice *vdev, *vd;
+
+vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+QLIST_FOREACH(group, _group_list, next) {
+QLIST_FOREACH(vbasedev_iter, >device_list, next) {
+if (strcmp(vbasedev_iter->name, vdev->vbasedev.name) == 0) {
+error_setg(errp, "device is already attached");
+return 0;
+}
+
+if (vbasedev_iter->group->groupid == groupid) {
+vd = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+
+if (vd->pdev.bus == pdev->bus) {
+return vbasedev_iter->group->container->space->as;
+}
+}
+}
+}
+
+return pci_device_iommu_address_space(pdev);
+}
+
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
 {
 VFIOGroup *group;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f3a2ac9..5b4827b 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -157,6 +157,7 @@ void vfio_region_mmaps_set_enabled(VFIORegion *region, bool 
enabled);
 void vfio_region_exit(VFIORegion *region);
 void vfio_region_finalize(VFIORegion *region);
 void vfio_reset_handler(void *opaque);
+AddressSpace *vfio_lookup_as(int groupid, PCIDevice *pdev, Error **errp);
 VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
 void vfio_put_group(VFIOGroup *group);
 int vfio_get_device(VFIOGroup *group, const char *name,
-- 
1.8.3.1




[Qemu-devel] [PATCH 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic

2016-10-31 Thread wexu
From: Wei Xu 

All the data packets in a tcp connection are cached
to a single buffer in every receive interval, and will
be sent out via a timer, the 'virtio_net_rsc_timeout'
controls the interval, this value may impact the
performance and response time of tcp connection,
5(50us) is an experience value to gain a performance
improvement, since the whql test sends packets every 100us,
so '30(300us)' passes the test case, it is the default
value as well, tune it via the command line parameter
'rsc_interval' within 'virtio-net-pci' device, for example,
to launch a guest with interval set as '50':

'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,rsc_interval=50'

The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.

'NetRscChain' is used to save the segments of IPv4/6 in a
VirtIONet device.

A new segment becomes a 'Candidate' as well as it passed sanity check,
the main handler of TCP includes TCP window update, duplicated
ACK check and the real data coalescing.

An 'Candidate' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.

Sanity check includes:
1. Incorrect version in IP header
2. An IP options or IP fragment
3. Not a TCP packet
4. Sanity size check to prevent buffer overflow attack.
5. An ECN packet

Even though, there might more cases should be considered such as
ip identification other flags, while it breaks the test because
windows set it to the same even it's not a fragment.

Normally it includes 2 typical ways to handle a TCP control flag,
'bypass' and 'finalize', 'bypass' means should be sent out directly,
while 'finalize' means the packets should also be bypassed, but this
should be done after search for the same connection packets in the
pool and drain all of them out, this is to avoid out of order fragment.

All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'URG/FIN/RST/CWR/ECE' will trigger a
finalization, because this normally happens upon a connection is going
to be closed, an 'URG' packet also finalize current coalescing unit.

Statistics can be used to monitor the basic coalescing status, the
'out of order' and 'out of window' means how many retransmitting packets,
thus describe the performance intuitively.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 602 ++--
 include/hw/virtio/virtio-net.h  |   5 +-
 include/hw/virtio/virtio.h  |  76 
 include/net/eth.h   |   2 +
 include/standard-headers/linux/virtio_net.h |  14 +
 net/tap.c   |   3 +-
 6 files changed, 670 insertions(+), 32 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 06bfe4b..d1824d9 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -15,10 +15,12 @@
 #include "qemu/iov.h"
 #include "hw/virtio/virtio.h"
 #include "net/net.h"
+#include "net/eth.h"
 #include "net/checksum.h"
 #include "net/tap.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
+#include "qemu/sockets.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "hw/virtio/virtio-bus.h"
@@ -43,6 +45,24 @@
 #define endof(container, field) \
 (offsetof(container, field) + sizeof(((container *)0)->field))
 
+#define VIRTIO_NET_IP4_ADDR_SIZE   8/* ipv4 saddr + daddr */
+
+#define VIRTIO_NET_TCP_FLAG 0x3F
+#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
+
+/* IPv4 max payload, 16 bits in the header */
+#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
+#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
+
+/* header length value in ip header without option */
+#define VIRTIO_NET_IP4_HEADER_LENGTH 5
+
+/* Purge coalesced packets timer interval, This value affects the performance
+   a lot, and should be tuned carefully, '30'(300us) is the recommended
+   value to pass the WHQL test, '5' can gain 2x netperf throughput with
+   tso/gso/gro 'off'. */
+#define VIRTIO_NET_RSC_INTERVAL  30
+
 typedef struct VirtIOFeature {
 uint32_t flags;
 size_t end;
@@ -589,7 +609,12 @@ static uint64_t 
virtio_net_guest_offloads_by_features(uint32_t features)
 (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
 (1ULL << VIRTIO_NET_F_GUEST_UFO);
 
-return guest_offloads_mask & features;
+if (features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) {
+return (guest_offloads_mask & features) |
+   (1ULL << VIRTIO_NET_F_GUEST_RSC4);
+} else {
+return guest_offloads_mask & features;
+}
 }
 
 static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
@@ -600,6 +625,7 @@ static inline uint64_t 
virtio_net_supported_guest_offloads(VirtIONet *n)
 
 static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 {
+NetClientState *nc;
 VirtIONet 

[Qemu-devel] [PATCH 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic

2016-10-31 Thread wexu
From: Wei Xu 

Most process flows work like ipv4, 2 differences between ipv4 and ipv6.

1. Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real '65535' payload.

2. IPv6 header does not need calculate header checksum.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 168 +---
 include/standard-headers/linux/virtio_net.h |   6 +-
 2 files changed, 159 insertions(+), 15 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d1824d9..1027a67 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -57,6 +57,10 @@
 /* header length value in ip header without option */
 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
 
+#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
+#define VIRTIO_NET_IP6_ADDR_SIZE   32  /* ipv6 saddr + daddr */
+#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
+
 /* Purge coalesced packets timer interval, This value affects the performance
a lot, and should be tuned carefully, '30'(300us) is the recommended
value to pass the WHQL test, '5' can gain 2x netperf throughput with
@@ -611,7 +615,8 @@ static uint64_t 
virtio_net_guest_offloads_by_features(uint32_t features)
 
 if (features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) {
 return (guest_offloads_mask & features) |
-   (1ULL << VIRTIO_NET_F_GUEST_RSC4);
+   (1ULL << VIRTIO_NET_F_GUEST_RSC4) |
+   (1ULL << VIRTIO_NET_F_GUEST_RSC6);
 } else {
 return guest_offloads_mask & features;
 }
@@ -1612,7 +1617,8 @@ static int virtio_net_load_device(VirtIODevice *vdev, 
QEMUFile *f,
virtio_vdev_has_feature(vdev,
VIRTIO_F_VERSION_1));
 
-if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_RSC4)) {
+if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_RSC4)
+|| virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_RSC6)) {
 n->guest_hdr_len = sizeof(struct virtio_net_hdr_rsc);
 n->host_hdr_len = n->guest_hdr_len;
 }
@@ -1730,6 +1736,24 @@ static void virtio_net_rsc_extract_unit4(NetRscChain 
*chain,
 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
 }
 
+static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
+ const uint8_t *buf, NetRscUnit* unit)
+{
+struct ip6_header *ip6;
+
+ip6 = (struct ip6_header *)(buf + chain->n->guest_hdr_len
+ + sizeof(struct eth_header));
+unit->ip = ip6;
+unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
+unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
++ sizeof(struct ip6_header));
+unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
+
+/* There is a difference between payload lenght in ipv4 and v6,
+   ip header is excluded in ipv6 */
+unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
+}
+
 static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr_rsc *rhdr,
  struct ip_header *ip)
 {
@@ -1750,12 +1774,14 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain 
*chain, NetRscSeg *seg)
 
 h = (struct virtio_net_hdr_rsc *)seg->buf;
 if (seg->is_coalesced) {
-h->hdr.flags = VIRTIO_NET_HDR_RSC_TCPV4;
-virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
+if (chain->proto == ETH_P_IP) {
+h->hdr.flags = VIRTIO_NET_HDR_RSC_TCPV4;
+virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
+} else {
+h->hdr.flags = VIRTIO_NET_HDR_RSC_TCPV6;
+}
 }
 
-h = (struct virtio_net_hdr_rsc *)seg->buf;
-virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
 h->rsc_pkts = seg->packets;
 h->rsc_dup_acks = seg->dup_ack;
 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
@@ -1813,7 +1839,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, 
NetClientState *nc,
 hdr_len = chain->n->guest_hdr_len;
 seg = g_malloc(sizeof(NetRscSeg));
 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)\
-   + VIRTIO_NET_MAX_TCP_PAYLOAD);
+   + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
 memcpy(seg->buf, buf, size);
 seg->size = size;
 seg->packets = 1;
@@ -1824,7 +1850,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, 
NetClientState *nc,
 QTAILQ_INSERT_TAIL(>buffers, seg, next);
 chain->stat.cache++;
 
-virtio_net_rsc_extract_unit4(chain, seg->buf, >unit);
+switch (chain->proto) {
+case ETH_P_IP:
+virtio_net_rsc_extract_unit4(chain, seg->buf, >unit);
+break;
+
+case ETH_P_IPV6:
+virtio_net_rsc_extract_unit6(chain, seg->buf, >unit);
+break;
+
+default:
+g_assert_not_reached();
+}
 }
 

[Qemu-devel] [ RFC Patch v7 0/2] Support Receive-Segment-Offload(RSC) for WHQL

2016-10-31 Thread wexu
From: Wei Xu 

This patch is to support WHQL test for Windows guest, while this
feature also benifits other guest works as a kernel 'gro' like
feature with userspace implementation.

Feature information:
   http://msdn.microsoft.com/en-us/library/windows/hardware/jj853324

v6->v7
- Change the drain timer from 'virtual' to 'host' since it invisible
  to guest.
- Move the buffer list empty check to virtio_net_rsc_do_coalesc().
- The header comparision is a bit odd for ipv4 in this patch, it
  should be simpler with equal check, but this is also a helper for ipv6
  in next patch, and ipv6 used a different size address fields, so i used
  an 'address + size' byte comparision for address, and change comparing
  the tcp port with 'int' equal check.
- Add count for packets whose size less than a normal tcp packet in
  sanity check.
- Move constant value comparison to the right side of the equal symbol.
- Use host header length in stead of guest header length to verify a
  packet in virtio_net_rsc_receive(), in case of the different header
  length for guest and host.
- Check whether the packet size is enough to hold a legal packet before
  extract ip unit.
- Bypass ip/tcp ECN packets.
- Expand the feature bit definition from 32 to 64 bits.

Other notes:
- About tcp windows scale, we don't have connection tracking about all
  tcp connections, so we don't know what the exact window size is using,
  thus this feature may get negative influence to it, have to turn this
  feature off for such a user case currently.
- There are 2 new fields in the virtio net header, it's not in either
  kernel tree or maintainer's tree right now, I just put it directly here.
- The statistics is kept in this version since it's helpful for
  troubleshooting.


Changes in V6:
- Sync upstream code
- Split new fields in 'virtio_net_hdr' to a seperate patch
- Remove feature bit code, replace it with a command line parameter
  'guest_rsc' which is turned off by default. 

Changes in V5:
- Passed all IPv4/6 test cases
- Add new fields in 'virtio_net_hdr'
- Set 'gso_type' & 'coalesced packets' in new field.
- Bypass all 'tcp option' packet
- Bypass all 'pure ack' packet
- Bypass all 'duplicate ack' packet
- Change 'guest_rsc' feature bit to 'false' by default
- Feedbacks from v4, typo, etc.

Changes in V4:
- Add new host feature bit
- Replace using fixed header lenght with dynamic header lenght in
  VirtIONet
- Change ip/ip6 header union in NetRscUnit to void* pointer
- Add macro prefix, adjust code indent, etc.

Changes in V3:
- Removed big param list, replace it with 'NetRscUnit'
- Different virtio header size
- Modify callback function to direct call.
- Needn't check the failure of g_malloc()
- Other code format adjustment, macro naming, etc

Changes in V2:
- Add detailed commit log

Wei Xu (2):
  virtio-net rsc: support coalescing ipv4 tcp traffic
  virtio-net rsc: support coalescing ipv6 tcp traffic

 hw/net/virtio-net.c | 742 ++--
 include/hw/virtio/virtio-net.h  |   5 +-
 include/hw/virtio/virtio.h  |  76 +++
 include/net/eth.h   |   2 +
 include/standard-headers/linux/virtio_net.h |  18 +
 net/tap.c   |   3 +-
 6 files changed, 814 insertions(+), 32 deletions(-)

-- 
2.7.1




[Qemu-devel] [ RFC Patch v6 3/3] virtio-net rsc: add 2 new rsc information fields to 'virtio_net_hdr'

2016-05-28 Thread wexu
From: Wei Xu 

Field 'coalesced' is to indicate how many packets are coalesced and field
'dup_ack' is how many duplicate acks are merged, guest driver can use these
information to notify what's the exact scene of original traffic over the
networks.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 8 
 include/standard-headers/linux/virtio_net.h | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index cc8cbe4..20f552a 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1768,6 +1768,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain 
*chain, NetRscSeg *seg)
 if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
 virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
 }
+h->coalesced = seg->packets;
+h->dup_ack = seg->dup_ack;
+h->gso_type = chain->gso_type;
+h->gso_size = chain->max_payload;
 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
 QTAILQ_REMOVE(>buffers, seg, next);
 g_free(seg->buf);
@@ -2302,9 +2306,13 @@ static ssize_t virtio_net_receive(NetClientState *nc,
   const uint8_t *buf, size_t size)
 {
 VirtIONet *n;
+struct virtio_net_hdr *h;
 
 n = qemu_get_nic_opaque(nc);
 if (n->host_features & (1ULL << VIRTIO_NET_F_GUEST_RSC)) {
+h = (struct virtio_net_hdr *)buf;
+h->coalesced = 0;
+h->dup_ack = 0;
 return virtio_net_rsc_receive(nc, buf, size);
 } else {
 return virtio_net_do_receive(nc, buf, size);
diff --git a/include/standard-headers/linux/virtio_net.h 
b/include/standard-headers/linux/virtio_net.h
index 5b95762..c837417 100644
--- a/include/standard-headers/linux/virtio_net.h
+++ b/include/standard-headers/linux/virtio_net.h
@@ -114,6 +114,8 @@ struct virtio_net_hdr {
__virtio16 gso_size;/* Bytes to append to hdr_len per frame 
*/
__virtio16 csum_start;  /* Position to start checksumming from */
__virtio16 csum_offset; /* Offset after that to place checksum */
+__virtio16 coalesced;   /* packets coalesced by host */
+__virtio16 dup_ack; /* duplicate ack count */
 };
 
 /* This is the version of the header to use when the MRG_RXBUF
-- 
2.7.1




[Qemu-devel] [ RFC Patch v6 1/3] virtio-net rsc: support coalescing ipv4 tcp traffic

2016-05-28 Thread wexu
From: Wei Xu 

All the data packets in a tcp connection will be cached to a big buffer
in every receive interval, and will be sent out via a timer, the
'virtio_net_rsc_timeout' controls the interval, the value will influent the
performance and response of tcp connection extremely, 5(50us) is a
experience value to gain a performance improvement, since the whql test
sends packets every 100us, so '30(300us)' can pass the test case,
this is also the default value, it's tunable via the command line
parameter 'rsc_interval' with 'virtio-net-pci' device, for example, below
parameter is to launch a guest with interval set as '50'.

'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,rsc_interval=50'
will

The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.

'NetRscChain' is used to save the segments of different protocols in a
VirtIONet device.

The main handler of TCP includes TCP window update, duplicated ACK check
and the real data coalescing if the new segment passed sanity check
and is identified as an 'wanted' one.

An 'wanted' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.

Sanity check includes:
1. Incorrect version in IP header
2. IP options & IP fragment
3. Not a TCP packets
4. Sanity size check to prevent buffer overflow attack.

There maybe more cases should be considered such as ip identification other
flags, while it broke the test because windows set it to the same even it's
not a fragment.

Normally it includes 2 typical ways to handle a TCP control flag, 'bypass'
and 'finalize', 'bypass' means should be sent out directly, and 'finalize'
means the packets should also be bypassed, and this should be done
after searching for the same connection packets in the pool and sending
all of them out, this is to avoid out of data.

All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'FIN/RST' will trigger a finalization, because
this normally happens upon a connection is going to be closed, an 'URG' packet
also finalize current coalescing unit.

Statistics can be used to monitor the basic coalescing status, the 'out of 
order'
and 'out of window' means how many retransmitting packets, thus describe the
performance intuitively.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 498 +++-
 include/hw/virtio/virtio-net.h  |   2 +
 include/hw/virtio/virtio.h  |  75 +
 include/standard-headers/linux/virtio_net.h |   1 +
 4 files changed, 575 insertions(+), 1 deletion(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 5798f87..b3bb63b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -15,10 +15,12 @@
 #include "qemu/iov.h"
 #include "hw/virtio/virtio.h"
 #include "net/net.h"
+#include "net/eth.h"
 #include "net/checksum.h"
 #include "net/tap.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
+#include "qemu/sockets.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "hw/virtio/virtio-bus.h"
@@ -38,6 +40,25 @@
 #define endof(container, field) \
 (offsetof(container, field) + sizeof(((container *)0)->field))
 
+#define VIRTIO_NET_IP4_ADDR_SIZE   8/* ipv4 saddr + daddr */
+#define VIRTIO_NET_TCP_PORT_SIZE   4/* sport + dport */
+
+#define VIRTIO_NET_TCP_FLAG 0x3F
+#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
+
+/* IPv4 max payload, 16 bits in the header */
+#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
+#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
+
+/* header length value in ip header without option */
+#define VIRTIO_NET_IP4_HEADER_LENGTH 5
+
+/* Purge coalesced packets timer interval, This value affects the performance
+   a lot, and should be tuned carefully, '30'(300us) is the recommended
+   value to pass the WHQL test, '5' can gain 2x netperf throughput with
+   tso/gso/gro 'off'. */
+#define VIRTIO_NET_RSC_INTERVAL  30
+
 typedef struct VirtIOFeature {
 uint32_t flags;
 size_t end;
@@ -1089,7 +1110,8 @@ static int receive_filter(VirtIONet *n, const uint8_t 
*buf, int size)
 return 0;
 }
 
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, 
size_t size)
+static ssize_t virtio_net_do_receive(NetClientState *nc,
+ const uint8_t *buf, size_t size)
 {
 VirtIONet *n = qemu_get_nic_opaque(nc);
 VirtIONetQueue *q = virtio_net_get_subqueue(nc);
@@ -1685,6 +1707,474 @@ static int virtio_net_load_device(VirtIODevice *vdev, 
QEMUFile *f,
 return 0;
 }
 
+static void virtio_net_rsc_extract_unit4(NetRscChain *chain,
+ const uint8_t *buf, NetRscUnit* unit)
+{
+uint16_t hdr_len;
+uint16_t ip_hdrlen;
+struct ip_header *ip;
+
+hdr_len = 

[Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 tcp traffic

2016-05-28 Thread wexu
From: Wei Xu 

Most stuffs are like ipv4 2 differences between ipv4 and ipv6.

1. Fragment length in ipv4 header includes itself, while it's not
included for ipv6, thus means ipv6 can carry a real '65535' payload.

2. IPv6 header does not need calculate header checksum.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 152 +---
 1 file changed, 144 insertions(+), 8 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index b3bb63b..cc8cbe4 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -53,6 +53,10 @@
 /* header length value in ip header without option */
 #define VIRTIO_NET_IP4_HEADER_LENGTH 5
 
+#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
+#define VIRTIO_NET_IP6_ADDR_SIZE   32  /* ipv6 saddr + daddr */
+#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
+
 /* Purge coalesced packets timer interval, This value affects the performance
a lot, and should be tuned carefully, '30'(300us) is the recommended
value to pass the WHQL test, '5' can gain 2x netperf throughput with
@@ -1724,6 +1728,25 @@ static void virtio_net_rsc_extract_unit4(NetRscChain 
*chain,
 unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
 }
 
+static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
+ const uint8_t *buf, NetRscUnit* unit)
+{
+uint16_t hdr_len;
+struct ip6_header *ip6;
+
+hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
+ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_header));
+unit->ip = ip6;
+unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
+unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
++ sizeof(struct ip6_header));
+unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
+
+/* There is a difference between payload lenght in ipv4 and v6,
+   ip header is excluded in ipv6 */
+unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
+}
+
 static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr *vhdr,
  struct ip_header *ip)
 {
@@ -1742,7 +1765,9 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain 
*chain, NetRscSeg *seg)
 struct virtio_net_hdr *h;
 
 h = (struct virtio_net_hdr *)seg->buf;
-virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
+if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
+virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
+}
 ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
 QTAILQ_REMOVE(>buffers, seg, next);
 g_free(seg->buf);
@@ -1798,7 +1823,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, 
NetClientState *nc,
 hdr_len = chain->n->guest_hdr_len;
 seg = g_malloc(sizeof(NetRscSeg));
 seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)\
-   + VIRTIO_NET_MAX_TCP_PAYLOAD);
+   + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PAYLOAD);
 memcpy(seg->buf, buf, size);
 seg->size = size;
 seg->packets = 1;
@@ -1809,7 +1834,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, 
NetClientState *nc,
 QTAILQ_INSERT_TAIL(>buffers, seg, next);
 chain->stat.cache++;
 
-virtio_net_rsc_extract_unit4(chain, seg->buf, >unit);
+switch (chain->proto) {
+case ETH_P_IP:
+virtio_net_rsc_extract_unit4(chain, seg->buf, >unit);
+break;
+
+case ETH_P_IPV6:
+virtio_net_rsc_extract_unit6(chain, seg->buf, >unit);
+break;
+
+default:
+g_assert_not_reached();
+}
 }
 
 static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain,
@@ -1929,6 +1965,24 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain 
*chain, NetRscSeg *seg,
 return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
 }
 
+static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg,
+const uint8_t *buf, size_t size, NetRscUnit *unit)
+{
+struct ip6_header *ip1, *ip2;
+
+ip1 = (struct ip6_header *)(unit->ip);
+ip2 = (struct ip6_header *)(seg->unit.ip);
+if (memcmp(>ip6_src, >ip6_src, sizeof(struct in6_address))
+|| memcmp(>ip6_dst, >ip6_dst, sizeof(struct in6_address))
+|| (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
+|| (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
+chain->stat.no_match++;
+return RSC_NO_MATCH;
+}
+
+return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
+}
+
 /* Pakcets with 'SYN' should bypass, other flag should be sent after drain
  * to prevent out of order */
 static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
@@ -1981,7 +2035,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain 
*chain, NetClientState *nc,
 NetRscSeg *seg, *nseg;
 
 QTAILQ_FOREACH_SAFE(seg, >buffers, next, nseg) {
-ret = 

[Qemu-devel] [ RFC Patch v6 0/2] Support Receive-Segment-Offload(RSC) for WHQL

2016-05-28 Thread wexu
From: Wei Xu 

Changes in V6:
- Sync upstream code
- Split new fields in 'virtio_net_hdr' to a seperate patch
- Remove feature bit code, replace it with a command line parameter 'guest_rsc'
which is turned off by default.

Changes in V5:
- Passed all IPv4/6 test cases
- Add new fields in 'virtio_net_hdr'
- Set 'gso_type' & 'coalesced packets' in new field.
- Bypass all 'tcp option' packet
- Bypass all 'pure ack' packet
- Bypass all 'duplicate ack' packet
- Change 'guest_rsc' feature bit to 'false' by default
- Feedbacks from v4, typo, etc.

Note:
There is still a few pending issues about the feature bit, and need to be 
discussed with windows driver maintainer, so linux guests with this patch
won't work at current, haven't figure it out yet, but i'm guessing it's
caused by the 'gso_type' is set to 'VIRTIO_NET_HDR_GSO_TCPV4/6',
will fix it after get the final solution, the below test steps and
performance data is based on v4.

Another suggestion from Jason is to adjust part of the code to make it
more readable, since there maybe still few change about the flowchart
in the future, such as timestamp, duplicate ack, so i'd like to delay it
temporarily.

Changes in V4:
- Add new host feature bit
- Replace using fixed header lenght with dynamic header lenght in VirtIONet 
- Change ip/ip6 header union in NetRscUnit to void* pointer
- Add macro prefix, adjust code indent, etc.

Changes in V3:
- Removed big param list, replace it with 'NetRscUnit' 
- Different virtio header size
- Modify callback function to direct call.
- Needn't check the failure of g_malloc()
- Other code format adjustment, macro naming, etc 

Changes in V2:
- Add detailed commit log

This patch is to support WHQL test for Windows guest, while this feature also
benifits other guest works as a kernel 'gro' like feature with userspace 
implementation.
Feature information:
  http://msdn.microsoft.com/en-us/library/windows/hardware/jj853324

Both IPv4 and IPv6 are supported, though performance with userspace virtio
is slow than vhost-net, there is about 1.5x to 2x performance improvement to
userspace virtio, this is done by turning this feature on and disable
'tso/gso/gro' on corresponding tap interface and guest interface, while get
less improment with all these feature on.

Linux guest performance data(Netperf):
MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.2.101 
() port 0 AF_INET : nodelay
Size   SizeSize Time Throughput  
bytes  bytes   bytessecs.10^6bits/sec  

 87380  16384 646.00 1221.20   
 87380  16384 646.00 1260.30   

 87380  163841286.00 1978.51   
 87380  163841286.00 2286.05   

 87380  163842566.00 2677.94   
 87380  163842566.00 4615.42   

 87380  163845126.00 2956.54   
 87380  163845126.00 5356.39   

 87380  16384   10246.00 2798.17   
 87380  16384   10246.00 4943.30   

 87380  16384   20486.00 2681.09   
 87380  16384   20486.00 4835.81   

 87380  16384   40966.00 3390.14   
 87380  16384   40966.00 5391.54   

 87380  16384   80926.00 3008.27   
 87380  16384   80926.00 5381.68   

 87380  16384  102406.00 2999.89   
 87380  16384  102406.00 5393.11 

Test steps:
Although this feature is mainly used for window guest, i used linux guest to 
help test the feature, to make things simple, i used 3 steps to test the patch
as i moved on.

1. With a tcp socket client/server pair running on 2 linux guest, thus i can 
control
the traffic and debugging the code as i want.
2. Netperf on linux guest test the throughput.
3. WHQL test with 2 Windows guests.

Wei Xu (3):
  virtio-net rsc: support coalescing ipv4 tcp traffic
  virtio-net rsc: support coalescing ipv6 tcp traffic
  virtio-net rsc: add 2 new rsc information fields to 'virtio_net_hdr'

 hw/net/virtio-net.c | 642 +++-
 include/hw/virtio/virtio-net.h  |   2 +
 include/hw/virtio/virtio.h  |  75 
 include/standard-headers/linux/virtio_net.h |   3 +
 4 files changed, 721 insertions(+), 1 deletion(-)

-- 
2.7.1




[Qemu-devel] [ RFC Patch v5 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic

2016-05-23 Thread wexu
From: Wei Xu 

All the data packets in a tcp connection will be cached to a big buffer
in every receive interval, and will be sent out via a timer, the
'virtio_net_rsc_timeout' controls the interval, the value will influent the
performance and response of tcp connection extremely, 5(50us) is a
experience value to gain a performance improvement, since the whql test
sends packets every 100us, so '30(300us)' can pass the test case,
this is also the default value, it's tunable via the command line
parameter 'rsc_interval' with 'virtio-net-pci' device, for example, below
parameter is to launch a guest with interval set as '50'.

'virtio-net-pci,netdev=hostnet1,bus=pci.0,id=net1,mac=00,rsc_interval=50'
will

The timer will only be triggered if the packets pool is not empty,
and it'll drain off all the cached packets.

'NetRscChain' is used to save the segments of different protocols in a
VirtIONet device.

The main handler of TCP includes TCP window update, duplicated ACK check
and the real data coalescing if the new segment passed sanity check
and is identified as an 'wanted' one.

An 'wanted' segment means:
1. Segment is within current window and the sequence is the expected one.
2. 'ACK' of the segment is in the valid window.

Sanity check includes:
1. Incorrect version in IP header
2. IP options & IP fragment
3. Not a TCP packets
4. Sanity size check to prevent buffer overflow attack.

There maybe more cases should be considered such as ip identification other
flags, while it broke the test because windows set it to the same even it's
not a fragment.

Normally it includes 2 typical ways to handle a TCP control flag, 'bypass'
and 'finalize', 'bypass' means should be sent out directly, and 'finalize'
means the packets should also be bypassed, and this should be done
after searching for the same connection packets in the pool and sending
all of them out, this is to avoid out of data.

All the 'SYN' packets will be bypassed since this always begin a new'
connection, other flags such 'FIN/RST' will trigger a finalization, because
this normally happens upon a connection is going to be closed, an 'URG' packet
also finalize current coalescing unit.

Statistics can be used to monitor the basic coalescing status, the 'out of 
order'
and 'out of window' means how many retransmitting packets, thus describe the
performance intuitively.

Signed-off-by: Wei Xu 
---
 hw/net/virtio-net.c | 487 +++-
 include/hw/virtio/virtio-net.h  |   2 +
 include/hw/virtio/virtio.h  |  75 +
 include/standard-headers/linux/virtio_net.h |   2 +
 4 files changed, 563 insertions(+), 3 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index bd91a4b..1ff0135 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -15,10 +15,12 @@
 #include "qemu/iov.h"
 #include "hw/virtio/virtio.h"
 #include "net/net.h"
+#include "net/eth.h"
 #include "net/checksum.h"
 #include "net/tap.h"
 #include "qemu/error-report.h"
 #include "qemu/timer.h"
+#include "qemu/sockets.h"
 #include "hw/virtio/virtio-net.h"
 #include "net/vhost_net.h"
 #include "hw/virtio/virtio-bus.h"
@@ -38,6 +40,25 @@
 #define endof(container, field) \
 (offsetof(container, field) + sizeof(((container *)0)->field))
 
+#define VIRTIO_NET_IP4_ADDR_SIZE   8/* ipv4 saddr + daddr */
+#define VIRTIO_NET_TCP_PORT_SIZE   4/* sport + dport */
+
+#define VIRTIO_NET_TCP_FLAG 0x3F
+#define VIRTIO_NET_TCP_HDR_LENGTH   0xF000
+
+/* IPv4 max payload, 16 bits in the header */
+#define VIRTIO_NET_MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
+#define VIRTIO_NET_MAX_TCP_PAYLOAD 65535
+
+/* header length value in ip header without option */
+#define VIRTIO_NET_IP4_HEADER_LENGTH 5
+
+/* Purge coalesced packets timer interval, This value affects the performance
+   a lot, and should be tuned carefully, '30'(300us) is the recommended
+   value to pass the WHQL test, '5' can gain 2x netperf throughput with
+   tso/gso/gro 'off'. */
+#define VIRTIO_NET_RSC_INTERVAL  30
+
 typedef struct VirtIOFeature {
 uint32_t flags;
 size_t end;
@@ -1688,20 +1709,476 @@ static int virtio_net_load_device(VirtIODevice *vdev, 
QEMUFile *f,
 return 0;
 }
 
+static void virtio_net_rsc_extract_unit4(NetRscChain *chain,
+ const uint8_t *buf, NetRscUnit* unit)
+{
+uint16_t hdr_len;
+uint16_t ip_hdrlen;
+struct ip_header *ip;
+
+hdr_len = chain->n->guest_hdr_len;
+ip = (struct ip_header *)(buf + hdr_len + sizeof(struct eth_header));
+unit->ip = (void *)ip;
+ip_hdrlen = (ip->ip_ver_len & 0xF) << 2;
+unit->ip_plen = >ip_len;
+unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
+unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
+unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
+}
+
+static 

  1   2   >