Problem Statement: Before OVS 2.12 the OVS-DPDK datapath transmitted processed rx packet batches directly to the wanted tx queues. In OVS 2.12 each PMD stores the processed packets in an intermediate buffer per output port and flushes these output buffers in a separate step. This buffering was introduced to allow better batching of packets for transmit.
The current implementation of the function that flushes the output buffers performs a full scan overall output ports, even if only one single packet was buffered for a single output port. In systems with hundreds of ports this can take a long time and degrades OVS-DPDK performance significantly. Solution: Maintain a list of output ports with buffered packets for each PMD thread and only iterate over that list when flushing output buffers. Signed-off-by: Dheeraj Kumar <[email protected]> --- lib/dpif-netdev-private-thread.h | 7 ++++--- lib/dpif-netdev.c | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h index 4472b199d..2775e1a2b 100644 --- a/lib/dpif-netdev-private-thread.h +++ b/lib/dpif-netdev-private-thread.h @@ -185,9 +185,6 @@ struct dp_netdev_pmd_thread { * than 'cmap_count(dp->poll_threads)'. */ uint32_t static_tx_qid; - /* Number of filled output batches. */ - int n_output_batches; - struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'. */ /* List of rx queues to poll. */ struct hmap poll_list OVS_GUARDED; @@ -213,6 +210,10 @@ struct dp_netdev_pmd_thread { struct hmap tnl_port_cache; struct hmap send_port_cache; + /* Keep track of the ports with buffered output packets in + * send_port_cache. */ + struct ovs_list pending_tx_ports; + /* Keep track of detailed PMD performance statistics. */ struct pmd_perf_stats perf_stats; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a45b46014..2e6129877 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -500,6 +500,7 @@ struct tx_port { int qid; long long last_used; struct hmap_node node; + struct ovs_list pending_tx_node; /* Only used in pending_tx_ports. */ long long flush_time; struct dp_packet_batch output_pkts; struct dp_packet_batch *txq_pkts; /* Only for hash mode. */ @@ -5241,8 +5242,10 @@ dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd, atomic_read_relaxed(&pmd->dp->tx_flush_interval, &tx_flush_interval); p->flush_time = pmd->ctx.now + tx_flush_interval; - ovs_assert(pmd->n_output_batches > 0); - pmd->n_output_batches--; + /* Remove send port from pending port list */ + ovs_assert(!ovs_list_is_empty(&p->pending_tx_node)); + ovs_list_remove(&p->pending_tx_node); + ovs_list_init(&p->pending_tx_node); pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_PKTS, output_cnt); pmd_perf_update_counter(&pmd->perf_stats, PMD_STAT_SENT_BATCHES, 1); @@ -5267,13 +5270,8 @@ dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd, struct tx_port *p; int output_cnt = 0; - if (!pmd->n_output_batches) { - return 0; - } - - HMAP_FOR_EACH (p, node, &pmd->send_port_cache) { - if (!dp_packet_batch_is_empty(&p->output_pkts) - && (force || pmd->ctx.now >= p->flush_time)) { + LIST_FOR_EACH_SAFE (p, pending_tx_node, &pmd->pending_tx_ports) { + if (force || pmd->ctx.now >= p->flush_time) { output_cnt += dp_netdev_pmd_flush_output_on_port(pmd, p); } } @@ -6803,6 +6801,7 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd) n_txq * sizeof *tx_port->txq_pkts); tx_port_cached->txq_pkts = txq_pkts_cached; } + ovs_list_init(&tx_port_cached->pending_tx_node); hmap_insert(&pmd->send_port_cache, &tx_port_cached->node, hash_port_no(tx_port_cached->port->port_no)); } @@ -7447,7 +7446,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->core_id = core_id; pmd->numa_id = numa_id; pmd->need_reload = false; - pmd->n_output_batches = 0; ovs_refcount_init(&pmd->ref_cnt); atomic_init(&pmd->exit, false); @@ -7474,6 +7472,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, hmap_init(&pmd->tnl_port_cache); hmap_init(&pmd->send_port_cache); cmap_init(&pmd->tx_bonds); + ovs_list_init(&pmd->pending_tx_ports); /* Initialize DPIF function pointer to the default configured version. */ atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default()); @@ -7498,6 +7497,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) struct dpcls *cls; dp_netdev_pmd_flow_flush(pmd); + ovs_list_poison(&pmd->pending_tx_ports); hmap_destroy(&pmd->send_port_cache); hmap_destroy(&pmd->tnl_port_cache); hmap_destroy(&pmd->tx_ports); @@ -8713,8 +8713,8 @@ dp_execute_output_action(struct dp_netdev_pmd_thread *pmd, /* Flush here to avoid overflow. */ dp_netdev_pmd_flush_output_on_port(pmd, p); } - if (dp_packet_batch_is_empty(&p->output_pkts)) { - pmd->n_output_batches++; + if (ovs_list_is_empty(&p->pending_tx_node)) { + ovs_list_push_front(&pmd->pending_tx_ports, &p->pending_tx_node); } struct dp_packet *packet; -- 2.17.1 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
