On Fri, Apr 06, 2018 at 11:30:10AM +0200, Maxime Coquelin wrote:


On 04/05/2018 12:10 PM, Jens Freimann wrote:
Implement code to dequeue and process descriptors from
the vring if VIRTIO_F_RING_PACKED is enabled.

Check if descriptor was made available by driver by looking at
VIRTIO_F_DESC_AVAIL flag in descriptor. If so dequeue and set
the used flag VIRTIO_F_DESC_USED to the current value of the
used wrap counter.

Used ring wrap counter needs to be toggled when last descriptor is
written out. This allows the host/guest to detect new descriptors even
after the ring has wrapped.

Signed-off-by: Jens Freimann <jfreim...@redhat.com>
---
 lib/librte_vhost/vhost.c      |   1 +
 lib/librte_vhost/vhost.h      |   1 +
 lib/librte_vhost/virtio_net.c | 228 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 230 insertions(+)

diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index 1f17cdd75..eb5a98875 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -185,6 +185,7 @@ init_vring_queue(struct virtio_net *dev, uint32_t vring_idx)
        vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD;
        vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD;
+       vq->used_wrap_counter = 1;
        vhost_user_iotlb_init(dev, vring_idx);
        /* Backends are set to -1 indicating an inactive device. */
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 20d78f883..c8aa946fd 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -112,6 +112,7 @@ struct vhost_virtqueue {
        struct batch_copy_elem  *batch_copy_elems;
        uint16_t                batch_copy_nb_elems;
+       uint16_t                used_wrap_counter;
        rte_rwlock_t    iotlb_lock;
        rte_rwlock_t    iotlb_pending_lock;
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ed7198dbb..7eea1da04 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -19,6 +19,7 @@
 #include "iotlb.h"
 #include "vhost.h"
+#include "virtio-1.1.h"
 #define MAX_PKT_BURST 32
@@ -1118,6 +1119,233 @@ restore_mbuf(struct rte_mbuf *m)
        }
 }
+static inline uint16_t
+dequeue_desc_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+            struct rte_mempool *mbuf_pool, struct rte_mbuf *m,
+            struct vring_desc_packed *descs)
+{
+       struct vring_desc_packed *desc;
+       uint64_t desc_addr;
+       uint32_t desc_avail, desc_offset;
+       uint32_t mbuf_avail, mbuf_offset;
+       uint32_t cpy_len;
+       struct rte_mbuf *cur = m, *prev = m;
+       struct virtio_net_hdr *hdr = NULL;
+       uint16_t head_idx = vq->last_used_idx & (vq->size - 1);
+       int wrap_counter = vq->used_wrap_counter;
+       int rc = 0;
+
+       rte_spinlock_lock(&vq->access_lock);
+
+       if (unlikely(vq->enabled == 0))
+               goto out;

It is unbalanced as it would unlock iotlb_rd_lock that isn't locked yet.

yes, will fix

+       if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+               vhost_user_iotlb_rd_lock(vq);
+
+       desc = &descs[vq->last_used_idx & (vq->size - 1)];
+       if (unlikely((desc->len < dev->vhost_hlen)) ||
+                       (desc->flags & VRING_DESC_F_INDIRECT)) {
+                       RTE_LOG(ERR, VHOST_DATA,
+                               "INDIRECT not supported yet\n");
+                       rc = -1;
+                       goto out;
+       }
+
+       desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+                                     sizeof(*desc), VHOST_ACCESS_RO);
+
+       if (unlikely(!desc_addr)) {
+               rc = -1;
+               goto out;
+       }
+
+       if (virtio_net_with_host_offload(dev)) {
+               hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+               rte_prefetch0(hdr);
+       }
+
+       /*
+        * A virtio driver normally uses at least 2 desc buffers
+        * for Tx: the first for storing the header, and others
+        * for storing the data.
+        */
+       if (likely((desc->len == dev->vhost_hlen) &&
+                  (desc->flags & VRING_DESC_F_NEXT) != 0)) {
+               if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+                       toggle_wrap_counter(vq);
+
+               desc = &descs[vq->last_used_idx & (vq->size - 1)];
+
+               if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+                       RTE_LOG(ERR, VHOST_DATA,
+                               "INDIRECT not supported yet\n");
+                       rc = -1;
+                       goto out;
+               }
+
+               desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+                                             sizeof(*desc), VHOST_ACCESS_RO);
+               if (unlikely(!desc_addr)) {
+                       rc = -1;
+                       goto out;
+               }
+
+               desc_offset = 0;
+               desc_avail  = desc->len;
+       } else {
+               desc_avail  = desc->len - dev->vhost_hlen;
+               desc_offset = dev->vhost_hlen;
+       }
+
+       rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
+
+       PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+
+       mbuf_offset = 0;
+       mbuf_avail  = m->buf_len - RTE_PKTMBUF_HEADROOM;
+       while (1) {
+               uint64_t hpa;
+
+               cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+
+               /*
+                * A desc buf might across two host physical pages that are
+                * not continuous. In such case (gpa_to_hpa returns 0), data
+                * will be copied even though zero copy is enabled.
+                */
+               if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
+                                       desc->addr + desc_offset, cpy_len)))) {
+                       cur->data_len = cpy_len;
+                       cur->data_off = 0;
+                       cur->buf_addr = (void *)(uintptr_t)desc_addr;
+                       cur->buf_physaddr = hpa;
+
+                       /*
+                        * In zero copy mode, one mbuf can only reference data
+                        * for one or partial of one desc buff.
+                        */
+                       mbuf_avail = cpy_len;
+               } else {
+                       rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *,
+                                                          mbuf_offset),
+                               (void *)((uintptr_t)(desc_addr + desc_offset)),
+                               cpy_len);
+               }
+
+               mbuf_avail  -= cpy_len;
+               mbuf_offset += cpy_len;
+               desc_avail  -= cpy_len;
+               desc_offset += cpy_len;
+
+               /* This desc reaches to its end, get the next one */
+               if (desc_avail == 0) {
+                       if ((desc->flags & VRING_DESC_F_NEXT) == 0)
+                               break;
+
+                       if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+                               toggle_wrap_counter(vq);
+
+                       desc = &descs[vq->last_used_idx & (vq->size - 1)];
+                       if (unlikely(desc->flags & VRING_DESC_F_INDIRECT)) {
+                               RTE_LOG(ERR, VHOST_DATA, "INDIRECT not supported 
yet");
+                       }
+
+                       desc_addr = vhost_iova_to_vva(dev, vq, desc->addr,
+                                                     sizeof(*desc), 
VHOST_ACCESS_RO);
+                       if (unlikely(!desc_addr)) {
+                               rc = -1;
+                               goto out;
Not sure, but, shouldn't you break here instead so rings gets updated if
some of the descs have been processed?

It should break here and in other places in this loop as well. will
fix it.
+                       }
+
+                       rte_prefetch0((void *)(uintptr_t)desc_addr);
+
+                       desc_offset = 0;
+                       desc_avail  = desc->len;
+
+                       PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+               }
+
+               /*
+                * This mbuf reaches to its end, get a new one
+                * to hold more data.
+                */
+               if (mbuf_avail == 0) {
+                       cur = rte_pktmbuf_alloc(mbuf_pool);
+                       if (unlikely(cur == NULL)) {
+                               RTE_LOG(ERR, VHOST_DATA, "Failed to "
+                                       "allocate memory for mbuf.\n");
+                               rc = -1;
+                               goto out;
+                       }
+
+                       prev->next = cur;
+                       prev->data_len = mbuf_offset;
+                       m->nb_segs += 1;
+                       m->pkt_len += mbuf_offset;
+                       prev = cur;
+
+                       mbuf_offset = 0;
+                       mbuf_avail  = cur->buf_len - RTE_PKTMBUF_HEADROOM;
+               }
+       }
+
+       if (hdr)
+               vhost_dequeue_offload(hdr, m);
+
+       if ((++vq->last_used_idx & (vq->size - 1)) == 0)
+               toggle_wrap_counter(vq);
+
+       rte_smp_wmb();
+       _set_desc_used(&descs[head_idx], wrap_counter);
+
+       prev->data_len = mbuf_offset;
+       m->pkt_len    += mbuf_offset;
+
+out:
+       if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+               vhost_user_iotlb_rd_unlock(vq);
+       rte_spinlock_unlock(&vq->access_lock);
+
+       return rc;
+}
+
+static inline uint16_t
+vhost_dequeue_burst_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
+                       struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
+                       uint16_t count)
+{
+       uint16_t i;
+       uint16_t idx;
+       struct vring_desc_packed *desc = vq->desc_packed;
+       int err;
+
+       count = RTE_MIN(MAX_PKT_BURST, count);
+       for (i = 0; i < count; i++) {
+               idx = vq->last_used_idx & (vq->size - 1);
+               if (!desc_is_avail(vq, &desc[idx]))
+                       break;
+               rte_smp_rmb();
+
+               pkts[i] = rte_pktmbuf_alloc(mbuf_pool);
+               if (unlikely(pkts[i] == NULL)) {
+                       RTE_LOG(ERR, VHOST_DATA,
+                               "Failed to allocate memory for mbuf.\n");
+                       break;
+               }
+
+               err = dequeue_desc_packed(dev, vq, mbuf_pool, pkts[i], desc);
+               if (unlikely(err)) {
+                       rte_pktmbuf_free(pkts[i]);
+                       break;
+               }
+       }
+
+       rte_spinlock_unlock(&vq->access_lock);
I think you forgot to remove this line.

yes

Thanks!

regards,
Jens

Reply via email to