Add netdev_txq_flush(), that flush packets on a queue. This is needed to transmit packets on the intermediate queue.
This commit also implements netdev_dpdk_txq_flush() function. If there are any packets waiting in the queue, they are transmitted instantly using the rte_eth_tx_burst function. In XPS enabled case, lock is taken on the tx queue before flushing the queue. Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com> Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com> Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com> Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com> Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com> Acked-by: Eelco Chaudron <echau...@redhat.com> --- lib/netdev-bsd.c | 1 + lib/netdev-dpdk.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++----- lib/netdev-dummy.c | 1 + lib/netdev-linux.c | 1 + lib/netdev-provider.h | 8 ++++++++ lib/netdev-vport.c | 2 +- lib/netdev.c | 9 +++++++++ lib/netdev.h | 1 + 8 files changed, 69 insertions(+), 6 deletions(-) diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 8a4cdb3..75483ad 100644 --- a/lib/netdev-bsd.c +++ b/lib/netdev-bsd.c @@ -1546,6 +1546,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum netdev_flags off, netdev_bsd_rxq_recv, \ netdev_bsd_rxq_wait, \ netdev_bsd_rxq_drain, \ + NULL, \ \ NO_OFFLOAD_API \ } diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1d82bca..50d6b29 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -313,6 +313,11 @@ struct dpdk_mp { struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex); }; +/* Queue 'INTERIM_QUEUE_BURST_THRESHOLD' packets before transmitting. + * Defaults to 'NETDEV_MAX_BURST'(32) packets. + */ +#define INTERIM_QUEUE_BURST_THRESHOLD NETDEV_MAX_BURST + /* There should be one 'struct dpdk_tx_queue' created for * each cpu core. */ struct dpdk_tx_queue { @@ -322,6 +327,12 @@ struct dpdk_tx_queue { * pmd threads (see 'concurrent_txq'). */ int map; /* Mapping of configured vhost-user queues * to enabled by guest. */ + int dpdk_pkt_cnt; /* Number of buffered packets waiting to + be sent on DPDK tx queue. */ + struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD]; + /* Intermediate queue where packets can + * be buffered to amortize the cost of MMIO + * writes. */ }; /* dpdk has no way to remove dpdk ring ethernet devices @@ -1931,6 +1942,32 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, } } +/* Flush tx queues. + * This is done periodically to empty the intermediate queue in case of + * fewer packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue. + */ +static int +netdev_dpdk_txq_flush(struct netdev *netdev, int qid , bool concurrent_txq) +{ + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + struct dpdk_tx_queue *txq = &dev->tx_q[qid]; + + if (OVS_LIKELY(txq->dpdk_pkt_cnt)) { + if (OVS_UNLIKELY(concurrent_txq)) { + qid = qid % dev->up.n_txq; + rte_spinlock_lock(&dev->tx_q[qid].tx_lock); + } + + netdev_dpdk_eth_tx_burst(dev, qid, txq->dpdk_burst_pkts, + txq->dpdk_pkt_cnt); + + if (OVS_UNLIKELY(concurrent_txq)) { + rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); + } + } + return 0; +} + static int netdev_dpdk_eth_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, bool may_steal, @@ -3313,7 +3350,7 @@ unlock: SET_CONFIG, SET_TX_MULTIQ, SEND, \ GET_CARRIER, GET_STATS, \ GET_FEATURES, GET_STATUS, \ - RECONFIGURE, RXQ_RECV) \ + RECONFIGURE, RXQ_RECV, TXQ_FLUSH) \ { \ NAME, \ true, /* is_pmd */ \ @@ -3381,6 +3418,7 @@ unlock: RXQ_RECV, \ NULL, /* rx_wait */ \ NULL, /* rxq_drain */ \ + TXQ_FLUSH, /* txq_flush */ \ NO_OFFLOAD_API \ } @@ -3398,7 +3436,8 @@ static const struct netdev_class dpdk_class = netdev_dpdk_get_features, netdev_dpdk_get_status, netdev_dpdk_reconfigure, - netdev_dpdk_rxq_recv); + netdev_dpdk_rxq_recv, + netdev_dpdk_txq_flush); static const struct netdev_class dpdk_ring_class = NETDEV_DPDK_CLASS( @@ -3414,7 +3453,8 @@ static const struct netdev_class dpdk_ring_class = netdev_dpdk_get_features, netdev_dpdk_get_status, netdev_dpdk_reconfigure, - netdev_dpdk_rxq_recv); + netdev_dpdk_rxq_recv, + NULL); static const struct netdev_class dpdk_vhost_class = NETDEV_DPDK_CLASS( @@ -3430,7 +3470,8 @@ static const struct netdev_class dpdk_vhost_class = NULL, NULL, netdev_dpdk_vhost_reconfigure, - netdev_dpdk_vhost_rxq_recv); + netdev_dpdk_vhost_rxq_recv, + NULL); static const struct netdev_class dpdk_vhost_client_class = NETDEV_DPDK_CLASS( "dpdkvhostuserclient", @@ -3445,7 +3486,8 @@ static const struct netdev_class dpdk_vhost_client_class = NULL, NULL, netdev_dpdk_vhost_client_reconfigure, - netdev_dpdk_vhost_rxq_recv); + netdev_dpdk_vhost_rxq_recv, + NULL); void netdev_dpdk_register(void) diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c index 752f157..86ec902 100644 --- a/lib/netdev-dummy.c +++ b/lib/netdev-dummy.c @@ -1413,6 +1413,7 @@ netdev_dummy_update_flags(struct netdev *netdev_, netdev_dummy_rxq_recv, \ netdev_dummy_rxq_wait, \ netdev_dummy_rxq_drain, \ + NULL, \ \ NO_OFFLOAD_API \ } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 98820ed..992f887 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2888,6 +2888,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum netdev_flags off, netdev_linux_rxq_recv, \ netdev_linux_rxq_wait, \ netdev_linux_rxq_drain, \ + NULL, \ \ FLOW_OFFLOAD_API \ } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index b3c57d5..9c47fdf 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -347,6 +347,11 @@ struct netdev_class { * If the function returns a non-zero value, some of the packets might have * been sent anyway. * + * Some netdev provider - like in case of 'dpdk' - may buffer the batch + * of packets into an intermediate queue. Buffered packets shall be + * transmitted when the packet count exceeds a threshold (or) by the + * periodic call to the flush function. + * * If 'may_steal' is false, the caller retains ownership of all the * packets. If 'may_steal' is true, the caller transfers ownership of all * the packets to the network device, regardless of success. @@ -788,6 +793,9 @@ struct netdev_class { /* Discards all packets waiting to be received from 'rx'. */ int (*rxq_drain)(struct netdev_rxq *rx); + /* Flush all packets waiting to be sent on 'qid' queue. */ + int (*txq_flush)(struct netdev *netdev, int qid, bool concurrent_txq); + /* ## -------------------------------- ## */ /* ## netdev flow offloading functions ## */ /* ## -------------------------------- ## */ diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 64a3ba3..3c5eacf 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -951,10 +951,10 @@ netdev_vport_get_ifindex(const struct netdev *netdev_) NULL, /* rx_recv */ \ NULL, /* rx_wait */ \ NULL, /* rx_drain */ \ + NULL, /* tx_flush */ \ \ NETDEV_FLOW_OFFLOAD_API - #define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER, \ GET_IFINDEX) \ { DPIF_PORT, \ diff --git a/lib/netdev.c b/lib/netdev.c index 7e9896b..8250396 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -724,6 +724,15 @@ netdev_rxq_drain(struct netdev_rxq *rx) : 0); } +/* Flush packets on the 'qid' queue. */ +int +netdev_txq_flush(struct netdev *netdev, int qid, bool netdev_txq_flush) +{ + return (netdev->netdev_class->txq_flush + ? netdev->netdev_class->txq_flush(netdev, qid, netdev_txq_flush) + : EOPNOTSUPP); +} + /* Configures the number of tx queues of 'netdev'. Returns 0 if successful, * otherwise a positive errno value. * diff --git a/lib/netdev.h b/lib/netdev.h index f8482f7..328a158 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -183,6 +183,7 @@ int netdev_rxq_drain(struct netdev_rxq *); int netdev_send(struct netdev *, int qid, struct dp_packet_batch *, bool may_steal, bool concurrent_txq); void netdev_send_wait(struct netdev *, int qid); +int netdev_txq_flush(struct netdev *, int qid, bool concurrent_txq); /* Flow offloading. */ struct offload_info { -- 2.4.11 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev