Add netdev_txq_flush(), that flush packets on a queue. This is needed
to transmit packets on the intermediate queue.

This commit also implements netdev_dpdk_txq_flush() function. If there
are any packets waiting in the queue, they are transmitted instantly
using the rte_eth_tx_burst function. In XPS enabled case, lock is
taken on the tx queue before flushing the queue.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/netdev-bsd.c      |  1 +
 lib/netdev-dpdk.c     | 52 ++++++++++++++++++++++++++++++++++++++++++++++-----
 lib/netdev-dummy.c    |  1 +
 lib/netdev-linux.c    |  1 +
 lib/netdev-provider.h |  8 ++++++++
 lib/netdev-vport.c    |  2 +-
 lib/netdev.c          |  9 +++++++++
 lib/netdev.h          |  1 +
 8 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 8a4cdb3..75483ad 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -1546,6 +1546,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
     netdev_bsd_rxq_recv,                             \
     netdev_bsd_rxq_wait,                             \
     netdev_bsd_rxq_drain,                            \
+    NULL,                                            \
                                                      \
     NO_OFFLOAD_API                                   \
 }
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1d82bca..50d6b29 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -313,6 +313,11 @@ struct dpdk_mp {
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
 };
 
+/* Queue 'INTERIM_QUEUE_BURST_THRESHOLD' packets before transmitting.
+ * Defaults to 'NETDEV_MAX_BURST'(32) packets.
+ */
+#define INTERIM_QUEUE_BURST_THRESHOLD NETDEV_MAX_BURST
+
 /* There should be one 'struct dpdk_tx_queue' created for
  * each cpu core. */
 struct dpdk_tx_queue {
@@ -322,6 +327,12 @@ struct dpdk_tx_queue {
                                     * pmd threads (see 'concurrent_txq'). */
     int map;                       /* Mapping of configured vhost-user queues
                                     * to enabled by guest. */
+    int dpdk_pkt_cnt;              /* Number of buffered packets waiting to
+                                      be sent on DPDK tx queue. */
+    struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+                                   /* Intermediate queue where packets can
+                                    * be buffered to amortize the cost of MMIO
+                                    * writes. */
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1931,6 +1942,32 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
     }
 }
 
+/* Flush tx queues.
+ * This is done periodically to empty the intermediate queue in case of
+ * fewer packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue.
+ */
+static int
+netdev_dpdk_txq_flush(struct netdev *netdev, int qid , bool concurrent_txq)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+    struct dpdk_tx_queue *txq = &dev->tx_q[qid];
+
+    if (OVS_LIKELY(txq->dpdk_pkt_cnt)) {
+        if (OVS_UNLIKELY(concurrent_txq)) {
+            qid = qid % dev->up.n_txq;
+            rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+        }
+
+        netdev_dpdk_eth_tx_burst(dev, qid, txq->dpdk_burst_pkts,
+                                 txq->dpdk_pkt_cnt);
+
+        if (OVS_UNLIKELY(concurrent_txq)) {
+            rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+        }
+    }
+    return 0;
+}
+
 static int
 netdev_dpdk_eth_send(struct netdev *netdev, int qid,
                      struct dp_packet_batch *batch, bool may_steal,
@@ -3313,7 +3350,7 @@ unlock:
                           SET_CONFIG, SET_TX_MULTIQ, SEND,    \
                           GET_CARRIER, GET_STATS,             \
                           GET_FEATURES, GET_STATUS,           \
-                          RECONFIGURE, RXQ_RECV)              \
+                          RECONFIGURE, RXQ_RECV, TXQ_FLUSH)   \
 {                                                             \
     NAME,                                                     \
     true,                       /* is_pmd */                  \
@@ -3381,6 +3418,7 @@ unlock:
     RXQ_RECV,                                                 \
     NULL,                       /* rx_wait */                 \
     NULL,                       /* rxq_drain */               \
+    TXQ_FLUSH,                  /* txq_flush */               \
     NO_OFFLOAD_API                                            \
 }
 
@@ -3398,7 +3436,8 @@ static const struct netdev_class dpdk_class =
         netdev_dpdk_get_features,
         netdev_dpdk_get_status,
         netdev_dpdk_reconfigure,
-        netdev_dpdk_rxq_recv);
+        netdev_dpdk_rxq_recv,
+        netdev_dpdk_txq_flush);
 
 static const struct netdev_class dpdk_ring_class =
     NETDEV_DPDK_CLASS(
@@ -3414,7 +3453,8 @@ static const struct netdev_class dpdk_ring_class =
         netdev_dpdk_get_features,
         netdev_dpdk_get_status,
         netdev_dpdk_reconfigure,
-        netdev_dpdk_rxq_recv);
+        netdev_dpdk_rxq_recv,
+        NULL);
 
 static const struct netdev_class dpdk_vhost_class =
     NETDEV_DPDK_CLASS(
@@ -3430,7 +3470,8 @@ static const struct netdev_class dpdk_vhost_class =
         NULL,
         NULL,
         netdev_dpdk_vhost_reconfigure,
-        netdev_dpdk_vhost_rxq_recv);
+        netdev_dpdk_vhost_rxq_recv,
+        NULL);
 static const struct netdev_class dpdk_vhost_client_class =
     NETDEV_DPDK_CLASS(
         "dpdkvhostuserclient",
@@ -3445,7 +3486,8 @@ static const struct netdev_class dpdk_vhost_client_class =
         NULL,
         NULL,
         netdev_dpdk_vhost_client_reconfigure,
-        netdev_dpdk_vhost_rxq_recv);
+        netdev_dpdk_vhost_rxq_recv,
+        NULL);
 
 void
 netdev_dpdk_register(void)
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 752f157..86ec902 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1413,6 +1413,7 @@ netdev_dummy_update_flags(struct netdev *netdev_,
     netdev_dummy_rxq_recv,                                      \
     netdev_dummy_rxq_wait,                                      \
     netdev_dummy_rxq_drain,                                     \
+    NULL,                                                       \
                                                                 \
     NO_OFFLOAD_API                                              \
 }
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 98820ed..992f887 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2888,6 +2888,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
     netdev_linux_rxq_recv,                                      \
     netdev_linux_rxq_wait,                                      \
     netdev_linux_rxq_drain,                                     \
+    NULL,                                                       \
                                                                 \
     FLOW_OFFLOAD_API                                            \
 }
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index b3c57d5..9c47fdf 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -347,6 +347,11 @@ struct netdev_class {
      * If the function returns a non-zero value, some of the packets might have
      * been sent anyway.
      *
+     * Some netdev provider - like in case of 'dpdk' - may buffer the batch
+     * of packets into an intermediate queue.  Buffered packets shall be
+     * transmitted when the packet count exceeds a threshold (or) by the
+     * periodic call to the flush function.
+     *
      * If 'may_steal' is false, the caller retains ownership of all the
      * packets.  If 'may_steal' is true, the caller transfers ownership of all
      * the packets to the network device, regardless of success.
@@ -788,6 +793,9 @@ struct netdev_class {
     /* Discards all packets waiting to be received from 'rx'. */
     int (*rxq_drain)(struct netdev_rxq *rx);
 
+    /* Flush all packets waiting to be sent on 'qid' queue. */
+    int (*txq_flush)(struct netdev *netdev, int qid, bool concurrent_txq);
+
     /* ## -------------------------------- ## */
     /* ## netdev flow offloading functions ## */
     /* ## -------------------------------- ## */
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index 64a3ba3..3c5eacf 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -951,10 +951,10 @@ netdev_vport_get_ifindex(const struct netdev *netdev_)
     NULL,                   /* rx_recv */                   \
     NULL,                   /* rx_wait */                   \
     NULL,                   /* rx_drain */                  \
+    NULL,                   /* tx_flush */                  \
                                                             \
     NETDEV_FLOW_OFFLOAD_API
 
-
 #define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER,   
\
                      GET_IFINDEX)                                              
\
     { DPIF_PORT,                                                               
\
diff --git a/lib/netdev.c b/lib/netdev.c
index 7e9896b..8250396 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -724,6 +724,15 @@ netdev_rxq_drain(struct netdev_rxq *rx)
             : 0);
 }
 
+/* Flush packets on the 'qid' queue. */
+int
+netdev_txq_flush(struct netdev *netdev, int qid, bool netdev_txq_flush)
+{
+    return (netdev->netdev_class->txq_flush
+            ? netdev->netdev_class->txq_flush(netdev, qid, netdev_txq_flush)
+            : EOPNOTSUPP);
+}
+
 /* Configures the number of tx queues of 'netdev'. Returns 0 if successful,
  * otherwise a positive errno value.
  *
diff --git a/lib/netdev.h b/lib/netdev.h
index f8482f7..328a158 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -183,6 +183,7 @@ int netdev_rxq_drain(struct netdev_rxq *);
 int netdev_send(struct netdev *, int qid, struct dp_packet_batch *,
                 bool may_steal, bool concurrent_txq);
 void netdev_send_wait(struct netdev *, int qid);
+int netdev_txq_flush(struct netdev *, int qid, bool concurrent_txq);
 
 /* Flow offloading. */
 struct offload_info {
-- 
2.4.11

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to