This makes virtio driver work like ixgbe. Transmit buffers are
held until a transmit threshold is reached. The previous behavior
was to hold mbuf's until the ring entry was reused which caused
more memory usage than needed.

Signed-off-by: Changchun Ouyang <changchun.ouyang at intel.com>
Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 lib/librte_pmd_virtio/virtio_ethdev.c |  7 ++--
 lib/librte_pmd_virtio/virtio_rxtx.c   | 70 +++++++++++++++++++++++++++++------
 lib/librte_pmd_virtio/virtqueue.h     |  3 +-
 3 files changed, 64 insertions(+), 16 deletions(-)

diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c 
b/lib/librte_pmd_virtio/virtio_ethdev.c
index c5f21c1..1ec29e1 100644
--- a/lib/librte_pmd_virtio/virtio_ethdev.c
+++ b/lib/librte_pmd_virtio/virtio_ethdev.c
@@ -176,15 +176,16 @@ virtio_send_command(struct virtqueue *vq, struct 
virtio_pmd_ctrl *ctrl,

        virtqueue_notify(vq);

-       while (vq->vq_used_cons_idx == vq->vq_ring.used->idx)
+       rte_rmb();
+       while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) {
+               rte_rmb();
                usleep(100);
+       }

        while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) {
                uint32_t idx, desc_idx, used_idx;
                struct vring_used_elem *uep;

-               virtio_rmb();
-
                used_idx = (uint32_t)(vq->vq_used_cons_idx
                                & (vq->vq_nentries - 1));
                uep = &vq->vq_ring.used->ring[used_idx];
diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c 
b/lib/librte_pmd_virtio/virtio_rxtx.c
index b44f091..26c0a1d 100644
--- a/lib/librte_pmd_virtio/virtio_rxtx.c
+++ b/lib/librte_pmd_virtio/virtio_rxtx.c
@@ -129,9 +129,15 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct 
rte_mbuf **rx_pkts,
        return i;
 }

+#ifndef DEFAULT_TX_FREE_THRESH
+#define DEFAULT_TX_FREE_THRESH 32
+#endif
+
+/* Cleanup from completed transmits. */
 static void
-virtqueue_dequeue_pkt_tx(struct virtqueue *vq)
+virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
 {
+#if 0
        struct vring_used_elem *uep;
        uint16_t used_idx, desc_idx;

@@ -140,6 +146,25 @@ virtqueue_dequeue_pkt_tx(struct virtqueue *vq)
        desc_idx = (uint16_t) uep->id;
        vq->vq_used_cons_idx++;
        vq_ring_free_chain(vq, desc_idx);
+#endif
+       uint16_t i, used_idx, desc_idx;
+       for (i = 0; i < num ; i++) {
+               struct vring_used_elem *uep;
+               struct vq_desc_extra *dxp;
+
+               used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 
1));
+               uep = &vq->vq_ring.used->ring[used_idx];
+               dxp = &vq->vq_descx[used_idx];
+
+               desc_idx = (uint16_t) uep->id;
+               vq->vq_used_cons_idx++;
+               vq_ring_free_chain(vq, desc_idx);
+
+               if (dxp->cookie != NULL) {
+                       rte_pktmbuf_free(dxp->cookie);
+                       dxp->cookie = NULL;
+               }
+       }
 }


@@ -203,8 +228,10 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct 
rte_mbuf *cookie)

        idx = head_idx;
        dxp = &txvq->vq_descx[idx];
+#if 0
        if (dxp->cookie != NULL)
                rte_pktmbuf_free(dxp->cookie);
+#endif
        dxp->cookie = (void *)cookie;
        dxp->ndescs = needed;

@@ -404,6 +431,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
 {
        uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX;
        struct virtqueue *vq;
+       uint16_t tx_free_thresh;
        int ret;

        PMD_INIT_FUNC_TRACE();
@@ -421,6 +449,21 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev,
                return ret;
        }

+       tx_free_thresh = tx_conf->tx_free_thresh;
+       if (tx_free_thresh == 0)
+               tx_free_thresh = RTE_MIN(vq->vq_nentries / 4, 
DEFAULT_TX_FREE_THRESH);
+
+       if (tx_free_thresh >= (vq->vq_nentries - 3)) {
+               RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the "
+                       "number of TX entries minus 3 (%u)."
+                       " (tx_free_thresh=%u port=%u queue=%u)\n",
+                       vq->vq_nentries - 3,
+                       tx_free_thresh, dev->data->port_id, queue_idx);
+               return -EINVAL;
+       }
+
+       vq->vq_free_thresh = tx_free_thresh;
+
        dev->data->tx_queues[queue_idx] = vq;
        return 0;
 }
@@ -688,11 +731,9 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
 {
        struct virtqueue *txvq = tx_queue;
        struct rte_mbuf *txm;
-       uint16_t nb_used, nb_tx, num;
+       uint16_t nb_used, nb_tx;
        int error;

-       nb_tx = 0;
-
        if (unlikely(nb_pkts < 1))
                return nb_pkts;

@@ -700,21 +741,26 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf 
**tx_pkts, uint16_t nb_pkts)
        nb_used = VIRTQUEUE_NUSED(txvq);

        virtio_rmb();
+       if (likely(nb_used > txvq->vq_free_thresh))
+               virtio_xmit_cleanup(txvq, nb_used);

-       num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : 
VIRTIO_MBUF_BURST_SZ);
+       nb_tx = 0;

        while (nb_tx < nb_pkts) {
                /* Need one more descriptor for virtio header. */
                int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
-               int deq_cnt = RTE_MIN(need, (int)num);

-               num -= (deq_cnt > 0) ? deq_cnt : 0;
-               while (deq_cnt > 0) {
-                       virtqueue_dequeue_pkt_tx(txvq);
-                       deq_cnt--;
+               /*Positive value indicates it need free vring descriptors */
+               if (unlikely(need > 0)) {
+                       nb_used = VIRTQUEUE_NUSED(txvq);
+                       virtio_rmb();
+                       need = RTE_MIN(need, (int)nb_used);
+
+                       virtio_xmit_cleanup(txvq, need);
+                       need = (int)tx_pkts[nb_tx]->nb_segs -
+                               txvq->vq_free_cnt + 1;
                }

-               need = (int)tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1;
                /*
                 * Zero or negative value indicates it has enough free
                 * descriptors to use for transmitting.
@@ -723,7 +769,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, 
uint16_t nb_pkts)
                        txm = tx_pkts[nb_tx];

                        /* Do VLAN tag insertion */
-                       if (txm->ol_flags & PKT_TX_VLAN_PKT) {
+                       if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
                                error = rte_vlan_insert(&txm);
                                if (unlikely(error)) {
                                        rte_pktmbuf_free(txm);
diff --git a/lib/librte_pmd_virtio/virtqueue.h 
b/lib/librte_pmd_virtio/virtqueue.h
index 4cb82f9..7672a12 100644
--- a/lib/librte_pmd_virtio/virtqueue.h
+++ b/lib/librte_pmd_virtio/virtqueue.h
@@ -164,6 +164,7 @@ struct virtqueue {
        struct rte_mempool       *mpool;  /**< mempool for mbuf allocation */
        uint16_t    queue_id;             /**< DPDK queue index. */
        uint8_t     port_id;              /**< Device port identifier. */
+       uint16_t    vq_queue_index;       /**< PCI queue index */

        void        *vq_ring_virt_mem;    /**< linear address of vring*/
        unsigned int vq_ring_size;
@@ -172,7 +173,7 @@ struct virtqueue {
        struct vring vq_ring;    /**< vring keeping desc, used and avail */
        uint16_t    vq_free_cnt; /**< num of desc available */
        uint16_t    vq_nentries; /**< vring desc numbers */
-       uint16_t    vq_queue_index;       /**< PCI queue index */
+       uint16_t    vq_free_thresh; /**< free threshold */
        /**
         * Head of the free chain in the descriptor table. If
         * there are no free descriptors, this will be set to
-- 
1.8.4.2

Reply via email to