This patch optimizes the vhost enqueue function: rte_vhost_enqueue_burst.

Currently there're 2 callbacks for vhost enqueue:
 *  virtio_dev_merge_rx for mrg_rxbuf turned on cases.
 *  virtio_dev_rx for mrg_rxbuf turned off cases.

The virtio_dev_merge_rx doesn't provide optimal performance, also it is
reported having compatibility issue working with Windows VMs.

Besides, having 2 separated functions increases maintenance efforts.

This patch uses a single function logic to replace the current 2 for
better maintainability, and provides better performance by optimizing
caching behavior especially for mrg_rxbuf turned on cases.

It also fixes the issue working with Windows VMs.

Signed-off-by: Zhihong Wang <zhihong.wang at intel.com>
---
 lib/librte_vhost/vhost-net.h  |   6 +-
 lib/librte_vhost/vhost_rxtx.c | 582 ++++++++++++++----------------------------
 lib/librte_vhost/virtio-net.c |  15 +-
 3 files changed, 208 insertions(+), 395 deletions(-)

diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
index 38593a2..a15182c 100644
--- a/lib/librte_vhost/vhost-net.h
+++ b/lib/librte_vhost/vhost-net.h
@@ -71,7 +71,7 @@ struct vhost_virtqueue {
        uint32_t                size;

        /* Last index used on the available ring */
-       volatile uint16_t       last_used_idx;
+       uint16_t                last_used_idx;
 #define VIRTIO_INVALID_EVENTFD         (-1)
 #define VIRTIO_UNINITIALIZED_EVENTFD   (-2)

@@ -85,6 +85,10 @@ struct vhost_virtqueue {

        /* Physical address of used ring, for logging */
        uint64_t                log_guest_addr;
+
+       /* Shadow used ring for performance */
+       struct vring_used_elem  *shadow_used_ring;
+       uint32_t                shadow_used_idx;
 } __rte_cache_aligned;

 /* Old kernels have no such macro defined */
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index 08a73fd..1263168 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -91,7 +91,7 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t 
qp_nb)
        return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
 }

-static void
+static inline void __attribute__((always_inline))
 virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
 {
        if (m_buf->ol_flags & PKT_TX_L4_MASK) {
@@ -125,427 +125,227 @@ virtio_enqueue_offload(struct rte_mbuf *m_buf, struct 
virtio_net_hdr *net_hdr)
        }
 }

-static inline void
-copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr,
-                   struct virtio_net_hdr_mrg_rxbuf hdr)
-{
-       if (dev->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf))
-               *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr;
-       else
-               *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr;
-}
-
-static inline int __attribute__((always_inline))
-copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
-                 struct rte_mbuf *m, uint16_t desc_idx)
+uint16_t
+rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
+       struct rte_mbuf **pkts, uint16_t count)
 {
-       uint32_t desc_avail, desc_offset;
-       uint32_t mbuf_avail, mbuf_offset;
-       uint32_t cpy_len;
+       struct virtio_net_hdr_mrg_rxbuf *virtio_hdr;
+       struct vhost_virtqueue *vq;
        struct vring_desc *desc;
-       uint64_t desc_addr;
-       struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
-
-       desc = &vq->desc[desc_idx];
-       desc_addr = gpa_to_vva(dev, desc->addr);
-       /*
-        * Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
-        * performance issue with some versions of gcc (4.8.4 and 5.3.0) which
-        * otherwise stores offset on the stack instead of in a register.
-        */
-       if (unlikely(desc->len < dev->vhost_hlen) || !desc_addr)
-               return -1;
-
-       rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-       virtio_enqueue_offload(m, &virtio_hdr.hdr);
-       copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
-       vhost_log_write(dev, desc->addr, dev->vhost_hlen);
-       PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-
-       desc_offset = dev->vhost_hlen;
-       desc_avail  = desc->len - dev->vhost_hlen;
-
-       mbuf_avail  = rte_pktmbuf_data_len(m);
-       mbuf_offset = 0;
-       while (mbuf_avail != 0 || m->next != NULL) {
-               /* done with current mbuf, fetch next */
-               if (mbuf_avail == 0) {
-                       m = m->next;
-
-                       mbuf_offset = 0;
-                       mbuf_avail  = rte_pktmbuf_data_len(m);
-               }
-
-               /* done with current desc buf, fetch next */
-               if (desc_avail == 0) {
-                       if ((desc->flags & VRING_DESC_F_NEXT) == 0) {
-                               /* Room in vring buffer is not enough */
-                               return -1;
-                       }
-                       if (unlikely(desc->next >= vq->size))
-                               return -1;
-
-                       desc = &vq->desc[desc->next];
-                       desc_addr = gpa_to_vva(dev, desc->addr);
-                       if (unlikely(!desc_addr))
-                               return -1;
-
-                       desc_offset = 0;
-                       desc_avail  = desc->len;
-               }
-
-               cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-               rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
-                       rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-                       cpy_len);
-               vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
-               PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-                            cpy_len, 0);
-
-               mbuf_avail  -= cpy_len;
-               mbuf_offset += cpy_len;
-               desc_avail  -= cpy_len;
-               desc_offset += cpy_len;
-       }
-
-       return 0;
-}
+       struct virtio_net *dev;
+       struct rte_mbuf *mbuf;
+       uint64_t desc_host_write_addr = 0;
+       uint32_t desc_chain_head = 0;
+       uint32_t desc_chain_len = 0;
+       uint32_t desc_current = 0;
+       uint32_t desc_write_offset = 0;
+       uint32_t used_idx_static = 0;
+       uint32_t pkt_idx = 0;
+       uint32_t pkt_left = 0;
+       uint32_t pkt_sent = 0;
+       uint32_t mbuf_len = 0;
+       uint32_t mbuf_len_left = 0;
+       uint32_t copy_len = 0;
+       uint32_t copy_virtio_hdr = 0;
+       uint32_t is_mrg_rxbuf = 0;
+       uint32_t is_virtio_1 = 0;
+
+       if (unlikely(count == 0))
+               return 0;

-/**
- * This function adds buffers to the virtio devices RX virtqueue. Buffers can
- * be received from the physical port or from another virtio device. A packet
- * count is returned to indicate the number of packets that are succesfully
- * added to the RX queue. This function works when the mbuf is scattered, but
- * it doesn't support the mergeable feature.
- */
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
-             struct rte_mbuf **pkts, uint32_t count)
-{
-       struct vhost_virtqueue *vq;
-       uint16_t avail_idx, free_entries, start_idx;
-       uint16_t desc_indexes[MAX_PKT_BURST];
-       uint16_t used_idx;
-       uint32_t i;
+       count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);

-       LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-       if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-               RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-                       dev->vid, __func__, queue_id);
+       dev = get_device(vid);
+       if (unlikely(!dev))
                return 0;
-       }

-       vq = dev->virtqueue[queue_id];
-       if (unlikely(vq->enabled == 0))
+       if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb)))
                return 0;

-       avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-       start_idx = vq->last_used_idx;
-       free_entries = avail_idx - start_idx;
-       count = RTE_MIN(count, free_entries);
-       count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST);
-       if (count == 0)
+       vq = dev->virtqueue[queue_id];
+       if (unlikely(!vq->enabled))
                return 0;

-       LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n",
-               dev->vid, start_idx, start_idx + count);
+       if (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))
+               is_mrg_rxbuf = 1;
+
+       if (dev->features & (1ULL << VIRTIO_F_VERSION_1))
+               is_virtio_1 = 1;
+
+       pkt_idx = 0;
+       pkt_left = count;
+       used_idx_static = vq->last_used_idx & (vq->size - 1);
+       vq->shadow_used_idx = 0;
+
+       while (pkt_left > 0) {
+               if (unlikely(vq->avail->idx == vq->last_used_idx))
+                       goto done;
+
+               if (pkt_left > 1 && vq->avail->idx != vq->last_used_idx + 1)
+                       rte_prefetch0(&vq->desc[
+                                       vq->avail->ring[
+                                       (vq->last_used_idx + 1) &
+                                       (vq->size - 1)]]);
+
+               mbuf = pkts[pkt_idx];
+               mbuf_len = rte_pktmbuf_data_len(mbuf);
+               mbuf_len_left = mbuf_len;
+               pkt_idx++;
+               pkt_left--;
+
+               desc_chain_head = vq->avail->ring[(vq->last_used_idx) &
+                       (vq->size - 1)];
+               desc_current = desc_chain_head;
+               desc = &vq->desc[desc_current];
+               desc_host_write_addr = gpa_to_vva(dev, desc->addr);
+               if (unlikely(!desc_host_write_addr))
+                       goto done;
+
+               virtio_hdr = (struct virtio_net_hdr_mrg_rxbuf *)
+                       (uintptr_t)desc_host_write_addr;
+               copy_virtio_hdr = 1;
+
+               vhost_log_write(dev, desc->addr, dev->vhost_hlen);
+               desc_write_offset = dev->vhost_hlen;
+               desc_chain_len = desc_write_offset;
+               desc_host_write_addr += desc_write_offset;
+
+               while (1) {
+                       if (!mbuf_len_left) {
+                               if (mbuf->next) {
+                                       mbuf = mbuf->next;
+                                       mbuf_len = rte_pktmbuf_data_len(mbuf);
+                                       mbuf_len_left = mbuf_len;
+                               } else
+                                       break;
+                       }

-       /* Retrieve all of the desc indexes first to avoid caching issues. */
-       rte_prefetch0(&vq->avail->ring[start_idx & (vq->size - 1)]);
-       for (i = 0; i < count; i++) {
-               used_idx = (start_idx + i) & (vq->size - 1);
-               desc_indexes[i] = vq->avail->ring[used_idx];
-               vq->used->ring[used_idx].id = desc_indexes[i];
-               vq->used->ring[used_idx].len = pkts[i]->pkt_len +
-                                              dev->vhost_hlen;
-               vhost_log_used_vring(dev, vq,
-                       offsetof(struct vring_used, ring[used_idx]),
-                       sizeof(vq->used->ring[used_idx]));
-       }
+                       if (desc->len <= desc_write_offset) {
+                               if (desc->flags & VRING_DESC_F_NEXT) {
+                                       desc_write_offset = 0;
+                                       desc_current = desc->next;
+                                       desc = &vq->desc[desc_current];
+                                       desc_host_write_addr =
+                                               gpa_to_vva(dev, desc->addr);
+                                       if (unlikely(!desc_host_write_addr))
+                                               goto rollback;
+                               } else if (is_mrg_rxbuf) {
+                                       vq->shadow_used_ring[
+                                               vq->shadow_used_idx].id =
+                                               desc_chain_head;
+                                       vq->shadow_used_ring[
+                                               vq->shadow_used_idx].len =
+                                               desc_chain_len;
+                                       vq->shadow_used_idx++;
+                                       vq->last_used_idx++;
+                                       virtio_hdr->num_buffers++;
+                                       if (unlikely(vq->avail->idx ==
+                                                       vq->last_used_idx))
+                                               goto rollback;
+
+                                       desc_chain_head = vq->avail->ring[
+                                               (vq->last_used_idx) &
+                                               (vq->size - 1)];
+                                       desc_current = desc_chain_head;
+                                       desc = &vq->desc[desc_current];
+                                       desc_host_write_addr =
+                                               gpa_to_vva(dev, desc->addr);
+                                       if (unlikely(!desc_host_write_addr))
+                                               goto rollback;
+
+                                       desc_chain_len = 0;
+                                       desc_write_offset = 0;
+                               } else
+                                       goto rollback;
+                       }

-       rte_prefetch0(&vq->desc[desc_indexes[0]]);
-       for (i = 0; i < count; i++) {
-               uint16_t desc_idx = desc_indexes[i];
-               int err;
+                       copy_len = RTE_MIN(desc->len - desc_write_offset,
+                                       mbuf_len_left);
+                       if (copy_virtio_hdr) {
+                               copy_virtio_hdr = 0;
+                               memset((void *)(uintptr_t)&(virtio_hdr->hdr),
+                                               0, dev->vhost_hlen);
+                               virtio_enqueue_offload(mbuf,
+                                               &(virtio_hdr->hdr));
+                               if (is_mrg_rxbuf || is_virtio_1)
+                                       virtio_hdr->num_buffers = 1;
+                       }

-               err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx);
-               if (unlikely(err)) {
-                       used_idx = (start_idx + i) & (vq->size - 1);
-                       vq->used->ring[used_idx].len = dev->vhost_hlen;
-                       vhost_log_used_vring(dev, vq,
-                               offsetof(struct vring_used, ring[used_idx]),
-                               sizeof(vq->used->ring[used_idx]));
+                       rte_memcpy((void *)(uintptr_t)desc_host_write_addr,
+                                       rte_pktmbuf_mtod_offset(mbuf, void *,
+                                               mbuf_len - mbuf_len_left),
+                                       copy_len);
+                       vhost_log_write(dev, desc->addr + desc_write_offset,
+                                       copy_len);
+                       mbuf_len_left -= copy_len;
+                       desc_write_offset += copy_len;
+                       desc_host_write_addr += copy_len;
+                       desc_chain_len += copy_len;
                }

-               if (i + 1 < count)
-                       rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+               vq->shadow_used_ring[vq->shadow_used_idx].id = desc_chain_head;
+               vq->shadow_used_ring[vq->shadow_used_idx].len = desc_chain_len;
+               vq->shadow_used_idx++;
+               vq->last_used_idx++;
+               pkt_sent++;
        }

-       rte_smp_wmb();
-
-       *(volatile uint16_t *)&vq->used->idx += count;
-       vq->last_used_idx += count;
-       vhost_log_used_vring(dev, vq,
-               offsetof(struct vring_used, idx),
-               sizeof(vq->used->idx));
-
-       /* flush used->idx update before we read avail->flags. */
-       rte_mb();
-
-       /* Kick the guest if necessary. */
-       if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
-                       && (vq->callfd >= 0))
-               eventfd_write(vq->callfd, (eventfd_t)1);
-       return count;
-}
-
-static inline int
-fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx,
-            uint32_t *allocated, uint32_t *vec_idx,
-            struct buf_vector *buf_vec)
-{
-       uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
-       uint32_t vec_id = *vec_idx;
-       uint32_t len    = *allocated;
-
-       while (1) {
-               if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
-                       return -1;
-
-               len += vq->desc[idx].len;
-               buf_vec[vec_id].buf_addr = vq->desc[idx].addr;
-               buf_vec[vec_id].buf_len  = vq->desc[idx].len;
-               buf_vec[vec_id].desc_idx = idx;
-               vec_id++;
-
-               if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0)
-                       break;
-
-               idx = vq->desc[idx].next;
-       }
-
-       *allocated = len;
-       *vec_idx   = vec_id;
-
-       return 0;
-}
-
-/*
- * Returns -1 on fail, 0 on success
- */
-static inline int
-reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size,
-                           uint16_t *end, struct buf_vector *buf_vec)
-{
-       uint16_t cur_idx;
-       uint16_t avail_idx;
-       uint32_t allocated = 0;
-       uint32_t vec_idx = 0;
-       uint16_t tries = 0;
-
-       cur_idx  = vq->last_used_idx;
-
-       while (1) {
-               avail_idx = *((volatile uint16_t *)&vq->avail->idx);
-               if (unlikely(cur_idx == avail_idx))
-                       return -1;
-
-               if (unlikely(fill_vec_buf(vq, cur_idx, &allocated,
-                                         &vec_idx, buf_vec) < 0))
-                       return -1;
-
-               cur_idx++;
-               tries++;
-
-               if (allocated >= size)
-                       break;
-
-               /*
-                * if we tried all available ring items, and still
-                * can't get enough buf, it means something abnormal
-                * happened.
-                */
-               if (unlikely(tries >= vq->size))
-                       return -1;
-       }
-
-       *end = cur_idx;
-       return 0;
-}
-
-static inline uint32_t __attribute__((always_inline))
-copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
-                           uint16_t end_idx, struct rte_mbuf *m,
-                           struct buf_vector *buf_vec)
-{
-       struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
-       uint32_t vec_idx = 0;
-       uint16_t start_idx = vq->last_used_idx;
-       uint16_t cur_idx = start_idx;
-       uint64_t desc_addr;
-       uint32_t mbuf_offset, mbuf_avail;
-       uint32_t desc_offset, desc_avail;
-       uint32_t cpy_len;
-       uint16_t desc_idx, used_idx;
-
-       if (unlikely(m == NULL))
-               return 0;
-
-       LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n",
-               dev->vid, cur_idx, end_idx);
-
-       desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
-       if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
-               return 0;
-
-       rte_prefetch0((void *)(uintptr_t)desc_addr);
-
-       virtio_hdr.num_buffers = end_idx - start_idx;
-       LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n",
-               dev->vid, virtio_hdr.num_buffers);
-
-       virtio_enqueue_offload(m, &virtio_hdr.hdr);
-       copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
-       vhost_log_write(dev, buf_vec[vec_idx].buf_addr, dev->vhost_hlen);
-       PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
-
-       desc_avail  = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
-       desc_offset = dev->vhost_hlen;
-
-       mbuf_avail  = rte_pktmbuf_data_len(m);
-       mbuf_offset = 0;
-       while (mbuf_avail != 0 || m->next != NULL) {
-               /* done with current desc buf, get the next one */
-               if (desc_avail == 0) {
-                       desc_idx = buf_vec[vec_idx].desc_idx;
-
-                       if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) {
-                               /* Update used ring with desc information */
-                               used_idx = cur_idx++ & (vq->size - 1);
-                               vq->used->ring[used_idx].id  = desc_idx;
-                               vq->used->ring[used_idx].len = desc_offset;
-                               vhost_log_used_vring(dev, vq,
+done:
+       if (likely(vq->shadow_used_idx > 0)) {
+               if (used_idx_static + vq->shadow_used_idx < vq->size) {
+                       rte_memcpy(&vq->used->ring[used_idx_static],
+                                       &vq->shadow_used_ring[0],
+                                       vq->shadow_used_idx *
+                                       sizeof(struct vring_used_elem));
+                       vhost_log_used_vring(dev, vq,
                                        offsetof(struct vring_used,
-                                                ring[used_idx]),
-                                       sizeof(vq->used->ring[used_idx]));
-                       }
-
-                       vec_idx++;
-                       desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
-                       if (unlikely(!desc_addr))
-                               return 0;
-
-                       /* Prefetch buffer address. */
-                       rte_prefetch0((void *)(uintptr_t)desc_addr);
-                       desc_offset = 0;
-                       desc_avail  = buf_vec[vec_idx].buf_len;
-               }
-
-               /* done with current mbuf, get the next one */
-               if (mbuf_avail == 0) {
-                       m = m->next;
+                                               ring[used_idx_static]),
+                                       vq->shadow_used_idx *
+                                       sizeof(struct vring_used_elem));
+               } else {
+                       uint32_t part_1 = vq->size - used_idx_static;
+                       uint32_t part_2 = vq->shadow_used_idx - part_1;

-                       mbuf_offset = 0;
-                       mbuf_avail  = rte_pktmbuf_data_len(m);
+                       rte_memcpy(&vq->used->ring[used_idx_static],
+                                       &vq->shadow_used_ring[0],
+                                       part_1 *
+                                       sizeof(struct vring_used_elem));
+                       vhost_log_used_vring(dev, vq,
+                                       offsetof(struct vring_used,
+                                               ring[used_idx_static]),
+                                       part_1 *
+                                       sizeof(struct vring_used_elem));
+                       rte_memcpy(&vq->used->ring[0],
+                                       &vq->shadow_used_ring[part_1],
+                                       part_2 *
+                                       sizeof(struct vring_used_elem));
+                       vhost_log_used_vring(dev, vq,
+                                       offsetof(struct vring_used,
+                                               ring[0]),
+                                       part_2 *
+                                       sizeof(struct vring_used_elem));
                }
-
-               cpy_len = RTE_MIN(desc_avail, mbuf_avail);
-               rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
-                       rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
-                       cpy_len);
-               vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
-                       cpy_len);
-               PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
-                       cpy_len, 0);
-
-               mbuf_avail  -= cpy_len;
-               mbuf_offset += cpy_len;
-               desc_avail  -= cpy_len;
-               desc_offset += cpy_len;
        }

-       used_idx = cur_idx & (vq->size - 1);
-       vq->used->ring[used_idx].id = buf_vec[vec_idx].desc_idx;
-       vq->used->ring[used_idx].len = desc_offset;
+       rte_smp_wmb();
+       vq->used->idx = vq->last_used_idx;
        vhost_log_used_vring(dev, vq,
-               offsetof(struct vring_used, ring[used_idx]),
-               sizeof(vq->used->ring[used_idx]));
-
-       return end_idx - start_idx;
-}
-
-static inline uint32_t __attribute__((always_inline))
-virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id,
-       struct rte_mbuf **pkts, uint32_t count)
-{
-       struct vhost_virtqueue *vq;
-       uint32_t pkt_idx = 0, nr_used = 0;
-       uint16_t end;
-       struct buf_vector buf_vec[BUF_VECTOR_MAX];
-
-       LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
-       if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
-               RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n",
-                       dev->vid, __func__, queue_id);
-               return 0;
-       }
-
-       vq = dev->virtqueue[queue_id];
-       if (unlikely(vq->enabled == 0))
-               return 0;
-
-       count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
-       if (count == 0)
-               return 0;
-
-       for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
-               uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
-
-               if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len,
-                                                        &end, buf_vec) < 0)) {
-                       LOG_DEBUG(VHOST_DATA,
-                               "(%d) failed to get enough desc from vring\n",
-                               dev->vid);
-                       break;
-               }
-
-               nr_used = copy_mbuf_to_desc_mergeable(dev, vq, end,
-                                                     pkts[pkt_idx], buf_vec);
-               rte_smp_wmb();
-
-               *(volatile uint16_t *)&vq->used->idx += nr_used;
-               vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
+                       offsetof(struct vring_used, idx),
                        sizeof(vq->used->idx));
-               vq->last_used_idx += nr_used;
-       }
-
-       if (likely(pkt_idx)) {
-               /* flush used->idx update before we read avail->flags. */
-               rte_mb();
-
-               /* Kick the guest if necessary. */
+       rte_mb();
+       if (likely(pkt_sent)) {
                if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)
                                && (vq->callfd >= 0))
                        eventfd_write(vq->callfd, (eventfd_t)1);
        }

-       return pkt_idx;
-}
-
-uint16_t
-rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
-       struct rte_mbuf **pkts, uint16_t count)
-{
-       struct virtio_net *dev = get_device(vid);
+       return pkt_sent;

-       if (!dev)
-               return 0;
+rollback:
+       if (is_mrg_rxbuf || is_virtio_1)
+               vq->last_used_idx -= virtio_hdr->num_buffers - 1;

-       if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
-               return virtio_dev_merge_rx(dev, queue_id, pkts, count);
-       else
-               return virtio_dev_rx(dev, queue_id, pkts, count);
+       goto done;
 }

 static void
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 1785695..87d09fa 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -152,10 +152,14 @@ cleanup_device(struct virtio_net *dev, int destroy)
 static void
 free_device(struct virtio_net *dev)
 {
+       struct vhost_virtqueue *vq;
        uint32_t i;

-       for (i = 0; i < dev->virt_qp_nb; i++)
-               rte_free(dev->virtqueue[i * VIRTIO_QNUM]);
+       for (i = 0; i < dev->virt_qp_nb; i++) {
+               vq = dev->virtqueue[i * VIRTIO_QNUM];
+               rte_free(vq->shadow_used_ring);
+               rte_free(vq);
+       }

        rte_free(dev);
 }
@@ -418,13 +422,18 @@ int
 vhost_set_vring_num(int vid, struct vhost_vring_state *state)
 {
        struct virtio_net *dev;
+       struct vhost_virtqueue *vq;

        dev = get_device(vid);
        if (dev == NULL)
                return -1;

        /* State->index refers to the queue index. The txq is 1, rxq is 0. */
-       dev->virtqueue[state->index]->size = state->num;
+       vq = dev->virtqueue[state->index];
+       vq->size = state->num;
+       vq->shadow_used_ring = rte_malloc("",
+                       vq->size * sizeof(struct vring_used_elem),
+                       RTE_CACHE_LINE_SIZE);

        return 0;
 }
-- 
2.7.4

Reply via email to