This uses a similar method to how cycles are attributed to RXQs in dp_netdev_pmd_flush_output_on_port().
Signed-off-by: Cian Ferriter <cian.ferri...@intel.com> --- Adding this cycle counting code costs ~2.5% performance (0.975x pre cycle counting performance). This is for a PV scenario with a vhostuser port and no DMA offload. v2: - Add this commit. --- lib/dpif-netdev-private-defer.h | 2 ++ lib/dpif-netdev.c | 32 ++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/lib/dpif-netdev-private-defer.h b/lib/dpif-netdev-private-defer.h index 78c140f56..a77216ce7 100644 --- a/lib/dpif-netdev-private-defer.h +++ b/lib/dpif-netdev-private-defer.h @@ -37,6 +37,8 @@ struct dp_defer_work_item { void *netdev; int qid; uint32_t attempts; + int pkt_cnt; + struct dp_netdev_rxq **output_pkts_rxqs; }; #define WORK_RING_SIZE 128 diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index f4143a93a..5930fbada 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4604,11 +4604,17 @@ static inline unsigned int dp_defer_do_work(struct dp_defer *defer, struct pmd_perf_stats *perf_stats) { struct dp_defer_work_item *work; + struct cycle_timer timer; + uint64_t cycles; uint32_t read_idx; int ret; + int i; + cycle_timer_start(perf_stats, &timer); /* Check that there's a piece of work in the ring to do. */ if (dp_defer_work_ring_empty(defer)) { + /* Discard cycles. */ + cycle_timer_stop(perf_stats, &timer); return -ENOENT; } @@ -4630,17 +4636,31 @@ dp_defer_do_work(struct dp_defer *defer, struct pmd_perf_stats *perf_stats) pmd_perf_update_counter(perf_stats, PMD_STAT_WORK_DONE, 1); } - return 0; + ret = 0; + goto out; } - - return ret; + goto out; } defer->read_idx++; + ret = 0; pmd_perf_update_counter(perf_stats, PMD_STAT_WORK_DONE, 1); - return 0; +out: + /* Distribute send cycles evenly among transmitted packets and assign to + * their respective rx queues. */ + cycles = cycle_timer_stop(perf_stats, &timer); + + if (work->pkt_cnt) { + cycles = cycles / work->pkt_cnt; + } + for (i = 0; i < work->pkt_cnt; i++) { + dp_netdev_rxq_add_cycles(work->output_pkts_rxqs[i], + RXQ_CYCLES_PROC_CURR, cycles); + } + + return ret; } static inline void @@ -4669,6 +4689,8 @@ dp_defer_work(struct dp_defer *defer, struct pmd_perf_stats *perf_stats, ring_item->netdev = work->netdev; ring_item->qid = work->qid; ring_item->attempts = 0; + ring_item->pkt_cnt = work->pkt_cnt; + ring_item->output_pkts_rxqs = work->output_pkts_rxqs; defer->write_idx++; @@ -4718,6 +4740,8 @@ dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread *pmd, .work_func = p->cached_work_func, .netdev = netdev, .qid = tx_qid, + .pkt_cnt = output_cnt, + .output_pkts_rxqs = p->output_pkts_rxqs, }; /* Defer the work. */ -- 2.32.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev