This uses a similar method to how cycles are attributed to RXQs in
dp_netdev_pmd_flush_output_on_port().

Signed-off-by: Cian Ferriter <cian.ferri...@intel.com>

---

Adding this cycle counting code costs ~2.5% performance (0.975x pre
cycle counting performance). This is for a PV scenario with a vhostuser
port and no DMA offload.

v2:
- Add this commit.
---
 lib/dpif-netdev-private-defer.h |  2 ++
 lib/dpif-netdev.c               | 32 ++++++++++++++++++++++++++++----
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev-private-defer.h b/lib/dpif-netdev-private-defer.h
index 78c140f56..a77216ce7 100644
--- a/lib/dpif-netdev-private-defer.h
+++ b/lib/dpif-netdev-private-defer.h
@@ -37,6 +37,8 @@ struct dp_defer_work_item {
     void *netdev;
     int qid;
     uint32_t attempts;
+    int pkt_cnt;
+    struct dp_netdev_rxq **output_pkts_rxqs;
 };
 
 #define WORK_RING_SIZE 128
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f4143a93a..5930fbada 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4604,11 +4604,17 @@ static inline unsigned int
 dp_defer_do_work(struct dp_defer *defer, struct pmd_perf_stats *perf_stats)
 {
     struct dp_defer_work_item *work;
+    struct cycle_timer timer;
+    uint64_t cycles;
     uint32_t read_idx;
     int ret;
+    int i;
 
+    cycle_timer_start(perf_stats, &timer);
     /* Check that there's a piece of work in the ring to do. */
     if (dp_defer_work_ring_empty(defer)) {
+        /* Discard cycles. */
+        cycle_timer_stop(perf_stats, &timer);
         return -ENOENT;
     }
 
@@ -4630,17 +4636,31 @@ dp_defer_do_work(struct dp_defer *defer, struct 
pmd_perf_stats *perf_stats)
                 pmd_perf_update_counter(perf_stats, PMD_STAT_WORK_DONE, 1);
             }
 
-            return 0;
+            ret = 0;
+            goto out;
         }
-
-        return ret;
+        goto out;
     }
 
     defer->read_idx++;
+    ret = 0;
 
     pmd_perf_update_counter(perf_stats, PMD_STAT_WORK_DONE, 1);
 
-    return 0;
+out:
+    /* Distribute send cycles evenly among transmitted packets and assign to
+     * their respective rx queues. */
+    cycles = cycle_timer_stop(perf_stats, &timer);
+
+    if (work->pkt_cnt) {
+        cycles = cycles / work->pkt_cnt;
+    }
+    for (i = 0; i < work->pkt_cnt; i++) {
+        dp_netdev_rxq_add_cycles(work->output_pkts_rxqs[i],
+                                 RXQ_CYCLES_PROC_CURR, cycles);
+    }
+
+    return ret;
 }
 
 static inline void
@@ -4669,6 +4689,8 @@ dp_defer_work(struct dp_defer *defer, struct 
pmd_perf_stats *perf_stats,
     ring_item->netdev = work->netdev;
     ring_item->qid = work->qid;
     ring_item->attempts = 0;
+    ring_item->pkt_cnt = work->pkt_cnt;
+    ring_item->output_pkts_rxqs = work->output_pkts_rxqs;
 
     defer->write_idx++;
 
@@ -4718,6 +4740,8 @@ dp_netdev_pmd_flush_output_on_port(struct 
dp_netdev_pmd_thread *pmd,
             .work_func = p->cached_work_func,
             .netdev = netdev,
             .qid = tx_qid,
+            .pkt_cnt = output_cnt,
+            .output_pkts_rxqs = p->output_pkts_rxqs,
         };
 
         /* Defer the work. */
-- 
2.32.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to