Simplify the historically grown TSC cycle counting in PMD threads.
Cycles are currently counted for the following purposes:

1. Measure PMD ustilization

PMD utilization is defined as ratio of cycles spent in busy iterations
(at least one packet received or sent) over the total number of cycles.

This is already done in pmd_perf_start_iteration() and
pmd_perf_end_iteration() based on a TSC timestamp saved in current
iteration at start_iteration() and the actual TSC at end_iteration().
No dependency on intermediate cycle accounting.

2. Measure the processing load per RX queue

This comprises cycles spend on polling and processing packets received
from the rx queue and the cycles spent on delayed sending of these packets
to tx queues (with time-based batching).

3. Measure the cycles spend on processing upcalls

These are part of the processing cycles of PMD and rxq but are also
measured separately for the purpose of supervising upcall performance
and load.

The previous scheme using cycles_count_start(), cycles_count_intermediate()
and cycles-count_end() originally introduced to simplify cycle counting
and saving calls to rte_get_tsc_cycles() was rather obscuring things.

Replaced this with dedicated pairs of cycles_count() around each task to
be measured and accounting the difference in cycles as appropriate for
the task.

Each call to cycles_count(pmd) will now store the read TSC counter in
pmd->ctx.last_cycles, so that users with lower accuracy requirements can
read that value instead of calling cycles_count().

Signed-off-by: Jan Scheurich <[email protected]>
---
 lib/dpif-netdev.c | 132 ++++++++++++++++--------------------------------------
 1 file changed, 39 insertions(+), 93 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d16ba93..5d23128 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -522,11 +522,9 @@ struct dp_netdev_pmd_thread_ctx {
     /* Latest measured time. See 'pmd_thread_ctx_time_update()'. */
     long long now;
     /* Used to count cycles. See 'cycles_count_end()' */
-    unsigned long long last_cycles;
+    uint64_t last_cycles;
     /* RX queue from which last packet was received. */
     struct dp_netdev_rxq *last_rxq;
-    /* Indicates how should be treated last counted cycles. */
-    enum pmd_cycles_counter_type current_pmd_cycles_type;
 };
 
 /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
@@ -3246,69 +3244,16 @@ dp_netdev_actions_free(struct dp_netdev_actions 
*actions)
     free(actions);
 }
 
-static inline unsigned long long
-cycles_counter(void)
+static inline uint64_t
+cycles_counter(struct dp_netdev_pmd_thread *pmd)
 {
 #ifdef DPDK_NETDEV
-    return rte_get_tsc_cycles();
+    return pmd->ctx.last_cycles = rte_get_tsc_cycles();
 #else
-    return 0;
+    return pmd->ctx.last_cycles = 0;
 #endif
 }
 
-/* Fake mutex to make sure that the calls to cycles_count_* are balanced */
-extern struct ovs_mutex cycles_counter_fake_mutex;
-
-/* Start counting cycles.  Must be followed by 'cycles_count_end()'.
- * Counting starts from the idle type state.  */
-static inline void
-cycles_count_start(struct dp_netdev_pmd_thread *pmd)
-    OVS_ACQUIRES(&cycles_counter_fake_mutex)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    pmd->ctx.current_pmd_cycles_type = PMD_CYCLES_IDLE;
-    pmd->ctx.last_cycles = cycles_counter();
-}
-
-/* Stop counting cycles and add them to the counter of the current type. */
-static inline void
-cycles_count_end(struct dp_netdev_pmd_thread *pmd)
-    OVS_RELEASES(&cycles_counter_fake_mutex)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    unsigned long long interval = cycles_counter() - pmd->ctx.last_cycles;
-    enum pmd_cycles_counter_type type = pmd->ctx.current_pmd_cycles_type;
-
-    pmd_perf_update_counter(&pmd->perf_stats, type, interval);
-}
-
-/* Calculate the intermediate cycle result and add to the counter of
- * the current type */
-static inline void
-cycles_count_intermediate(struct dp_netdev_pmd_thread *pmd,
-                          struct dp_netdev_rxq **rxqs, int n_rxqs)
-    OVS_NO_THREAD_SAFETY_ANALYSIS
-{
-    unsigned long long new_cycles = cycles_counter();
-    unsigned long long interval = new_cycles - pmd->ctx.last_cycles;
-    enum pmd_cycles_counter_type type = pmd->ctx.current_pmd_cycles_type;
-    int i;
-
-    pmd->ctx.last_cycles = new_cycles;
-
-    pmd_perf_update_counter(&pmd->perf_stats, type, interval);
-    if (n_rxqs && (type == PMD_CYCLES_POLL_BUSY)) {
-        /* Add to the amount of current processing cycles. */
-        interval /= n_rxqs;
-        for (i = 0; i < n_rxqs; i++) {
-            if (rxqs[i]) {
-                non_atomic_ullong_add(&rxqs[i]->cycles[RXQ_CYCLES_PROC_CURR],
-                        interval);
-            }
-        }
-    }
-}
-
 static inline bool
 pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd)
 {
@@ -3325,6 +3270,14 @@ dp_netdev_rxq_set_cycles(struct dp_netdev_rxq *rx,
    atomic_store_relaxed(&rx->cycles[type], cycles);
 }
 
+static void
+dp_netdev_rxq_add_cycles(struct dp_netdev_rxq *rx,
+                         enum rxq_cycles_counter_type type,
+                         unsigned long long cycles)
+{
+    non_atomic_ullong_add(&rx->cycles[type], cycles);
+}
+
 static uint64_t
 dp_netdev_rxq_get_cycles(struct dp_netdev_rxq *rx,
                          enum rxq_cycles_counter_type type)
@@ -3358,16 +3311,9 @@ dp_netdev_pmd_flush_output_on_port(struct 
dp_netdev_pmd_thread *pmd,
     int output_cnt;
     bool dynamic_txqs;
     uint32_t tx_flush_interval;
-    enum pmd_cycles_counter_type save_pmd_cycles_type;
 
-    /* In case we're in PMD_CYCLES_PROCESSING state we need to count
-     * cycles for rxq we're processing now. */
-    cycles_count_intermediate(pmd, &pmd->ctx.last_rxq, 1);
-
-    /* Save current cycles counting state to restore after accounting
-     * send cycles. */
-    save_pmd_cycles_type = pmd->ctx.current_pmd_cycles_type;
-    pmd->ctx.current_pmd_cycles_type = PMD_CYCLES_POLL_BUSY;
+    /* Measure duration of batch transmission. */
+    uint64_t cycles = cycles_counter(pmd);
 
     dynamic_txqs = p->port->dynamic_txqs;
     if (dynamic_txqs) {
@@ -3394,9 +3340,15 @@ dp_netdev_pmd_flush_output_on_port(struct 
dp_netdev_pmd_thread *pmd,
     pmd_perf_update_counter(&pmd->perf_stats,
                             PMD_STAT_SENT_BATCHES, 1);
 
-    /* Update send cycles for all the rx queues and restore previous state. */
-    cycles_count_intermediate(pmd, p->output_pkts_rxqs, output_cnt);
-    pmd->ctx.current_pmd_cycles_type = save_pmd_cycles_type;
+    /* Divide the batch tx cost evenly over the packets' rxqs. */
+    cycles = (cycles_counter(pmd) - cycles) / output_cnt;
+    struct dp_netdev_rxq **rxqs = p->output_pkts_rxqs;
+    for (int i = 0; i < output_cnt; i++) {
+        if (rxqs[i]) {
+            dp_netdev_rxq_add_cycles(rxqs[i], RXQ_CYCLES_PROC_CURR,
+                                     cycles);
+        }
+    }
     return output_cnt;
 }
 
@@ -3429,16 +3381,17 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
     struct pmd_perf_stats *s = &pmd->perf_stats;
     struct dp_packet_batch batch;
     int error;
-    int batch_cnt = 0, output_cnt = 0;
+    int batch_cnt = 0;
 
     dp_packet_batch_init(&batch);
-
-    cycles_count_intermediate(pmd, NULL, 0);
     pmd->ctx.last_rxq = rxq;
-    pmd->ctx.current_pmd_cycles_type = PMD_CYCLES_POLL_BUSY;
+
+    /* Measure duration for polling and processing rx burst. */
+    uint64_t cycles = cycles_counter(pmd);
     error = netdev_rxq_recv(rxq->rx, &batch);
 
     if (!error) {
+        /* At least one packet received. */
         *recirc_depth_get() = 0;
         pmd_thread_ctx_time_update(pmd);
         batch_cnt = batch.count;
@@ -3448,7 +3401,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
             histogram_add_sample(&s->pkts_per_batch, batch_cnt);
             /* Update the maximum Rx queue fill level. */
             uint32_t qfill = batch.qfill;
-            switch (netdev_dpdk_get_type(netdev_rxq_get_netdev(rx))) {
+            switch (netdev_dpdk_get_type(netdev_rxq_get_netdev(rxq->rx))) {
             case DPDK_DEV_VHOST:
                 if (qfill > s->current.max_vhost_qfill) {
                     s->current.max_vhost_qfill = qfill;
@@ -3461,11 +3414,13 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
         }
         /* Process packet batch. */
         dp_netdev_input(pmd, &batch, port_no);
-        cycles_count_intermediate(pmd, &rxq, 1);
-
-        output_cnt = dp_netdev_pmd_flush_output_packets(pmd, false);
+        /* Add processing cycles to rxq stats. */
+        cycles = cycles_counter(pmd) - cycles;
+        dp_netdev_rxq_add_cycles(rxq, RXQ_CYCLES_PROC_CURR, cycles);
+        /* Flush the send queues. */
+        dp_netdev_pmd_flush_output_packets(pmd, false);
         *flushed = true;
-        
+
     } else if (error != EAGAIN && error != EOPNOTSUPP) {
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
@@ -3473,9 +3428,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
                     netdev_rxq_get_name(rxq->rx), ovs_strerror(error));
     }
 
-    pmd->ctx.current_pmd_cycles_type = PMD_CYCLES_POLL_IDLE;
     pmd->ctx.last_rxq = NULL;
-
     return batch_cnt;
 }
 
@@ -4105,7 +4058,6 @@ dpif_netdev_run(struct dpif *dpif)
     non_pmd = dp_netdev_get_pmd(dp, NON_PMD_CORE_ID);
     if (non_pmd) {
         ovs_mutex_lock(&dp->non_pmd_mutex);
-        cycles_count_start(non_pmd);
         HMAP_FOR_EACH (port, node, &dp->ports) {
             if (!netdev_is_pmd(port->netdev)) {
                 int i;
@@ -4126,7 +4078,6 @@ dpif_netdev_run(struct dpif *dpif)
             dp_netdev_pmd_flush_output_packets(non_pmd, false);
         }
 
-        cycles_count_end(non_pmd);
         dpif_netdev_xps_revalidate_pmd(non_pmd, false);
         ovs_mutex_unlock(&dp->non_pmd_mutex);
 
@@ -4305,7 +4256,6 @@ reload:
         lc = UINT_MAX;
     }
 
-    cycles_count_start(pmd);
     for (;;) {
         uint64_t iter_packets = 0;
         bool flushed = false;
@@ -4343,15 +4293,12 @@ reload:
             if (reload) {
                 break;
             }
-            cycles_count_intermediate(pmd, NULL, 0);
         }
 
-        pmd_perf_end_iteration(s, pmd->ctx.last_cycles, iter_packets,
+        pmd_perf_end_iteration(s, cycles_counter(pmd), iter_packets,
                                pmd_perf_metrics_enabled(pmd));
     }
 
-    cycles_count_end(pmd);
-
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
     exiting = latch_is_set(&pmd->exit_latch);
     /* Signal here to make sure the pmd finishes
@@ -4791,7 +4738,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, 
struct dp_netdev *dp,
     cmap_init(&pmd->flow_table);
     cmap_init(&pmd->classifiers);
     pmd->ctx.last_rxq = NULL;
-    pmd->ctx.current_pmd_cycles_type = PMD_CYCLES_IDLE;
     pmd_thread_ctx_time_update(pmd);
     pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
     pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
@@ -5247,7 +5193,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
     struct match match;
     ovs_u128 ufid;
     int error;
-    uint64_t cycles = cycles_counter();
+    uint64_t cycles = cycles_counter(pmd);
 
     match.tun_md.valid = false;
     miniflow_expand(&key->mf, &match.flow);
@@ -5303,7 +5249,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
     }
     if (pmd_perf_metrics_enabled(pmd)) {
         /* Update upcall stats. */
-        cycles = cycles_counter() - cycles;
+        cycles = cycles_counter(pmd) - cycles;
         struct pmd_perf_stats *s = &pmd->perf_stats;
         s->current.upcalls++;
         s->current.upcall_cycles += cycles;
-- 
1.9.1

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to