Since DPDK 23.03, it is possible to register a callback to report lcore
TSC cycles usage. Reuse the busy/idle cycles gathering in dpif-netdev
and expose them to the DPDK telemetry socket.

Upon dpdk_attach_thread, record the mapping between the DPDK lcore_id
and the dpif-netdev core_id. Reuse that mapping in the lcore usage
callback to invoke dpif_netdev_get_pmd_cycles.

Here is an example output:

  ~# ovs-appctl dpif-netdev/pmd-stats-show | grep -e ^pmd -e cycles:
  pmd thread numa_id 0 core_id 8:
    idle cycles: 2720796781680 (100.00%)
    processing cycles: 3566020 (0.00%)
  pmd thread numa_id 0 core_id 9:
    idle cycles: 2718974371440 (100.00%)
    processing cycles: 3136840 (0.00%)
  pmd thread numa_id 0 core_id 72:
  pmd thread numa_id 0 core_id 73:

  ~# echo /eal/lcore/usage | dpdk-telemetry.py | jq
  {
    "/eal/lcore/usage": {
      "lcore_ids": [
        3,
        5,
        11,
        15
      ],
      "total_cycles": [
        2725722342740,
        2725722347480,
        2723899464040,
        2725722354980
      ],
      "busy_cycles": [
        3566020,
        3566020,
        3136840,
        3566020
      ]
    }
  }

Link: https://git.dpdk.org/dpdk/commit/?id=9ab1804922ba583b0b16
Cc: David Marchand <david.march...@redhat.com>
Cc: Kevin Traynor <ktray...@redhat.com>
Signed-off-by: Robin Jarry <rja...@redhat.com>
---
 lib/dpdk-stub.c   |  5 +++
 lib/dpdk.c        | 95 ++++++++++++++++++++++++++++++++++++++++++++++-
 lib/dpdk.h        |  5 +++
 lib/dpif-netdev.c | 38 +++++++++++++++++++
 4 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index 58ebf6cb62cd..02fb561bea7b 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -49,6 +49,11 @@ dpdk_detach_thread(void)
 {
 }
 
+void
+dpdk_register_core_usage_callback(dpdk_core_usage_cb *cb OVS_UNUSED)
+{
+}
+
 bool
 dpdk_available(void)
 {
diff --git a/lib/dpdk.c b/lib/dpdk.c
index d76d53f8f16c..31871300f719 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -23,6 +23,7 @@
 
 #include <rte_cpuflags.h>
 #include <rte_errno.h>
+#include <rte_lcore.h>
 #include <rte_log.h>
 #include <rte_malloc.h>
 #include <rte_memzone.h>
@@ -310,6 +311,10 @@ malloc_dump_stats_wrapper(FILE *stream)
     rte_malloc_dump_stats(stream, NULL);
 }
 
+#ifdef ALLOW_EXPERIMENTAL_API
+static int dpdk_get_lcore_cycles(unsigned int, struct rte_lcore_usage *);
+#endif
+
 static bool
 dpdk_init__(const struct smap *ovs_other_config)
 {
@@ -440,6 +445,10 @@ dpdk_init__(const struct smap *ovs_other_config)
     /* We are called from the main thread here */
     RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
 
+#ifdef ALLOW_EXPERIMENTAL_API
+    rte_lcore_register_usage_cb(dpdk_get_lcore_cycles);
+#endif
+
     /* Finally, register the dpdk classes */
     netdev_dpdk_register(ovs_other_config);
     netdev_register_flow_api_provider(&netdev_offload_dpdk);
@@ -490,9 +499,52 @@ dpdk_available(void)
     return initialized;
 }
 
+struct lcore_id_map {
+    unsigned int lcore_id;
+    unsigned int pmd_core_id;
+};
+
+/* Protects against changes to 'lcore_id_maps'. */
+struct ovs_mutex lcore_id_maps_mutex = OVS_MUTEX_INITIALIZER;
+
+/* Contains all 'struct lcore_id_map's. */
+static struct shash lcore_id_maps OVS_GUARDED_BY(lcore_id_maps_mutex)
+    = SHASH_INITIALIZER(&lcore_id_maps);
+
+static void
+lcore_id_to_str(char *buf, size_t len, unsigned int lcore_id)
+{
+    int n;
+
+    n = snprintf(buf, len, "%u", lcore_id);
+    if (n < 0) {
+        VLOG_WARN("Failed to format lcore_id: %s", ovs_strerror(errno));
+        n = 0;
+    }
+    buf[n] = '\0';
+}
+
+static void
+lcore_id_map_update(unsigned int lcore_id, unsigned int cpu, bool add)
+{
+    char buf[128];
+
+    lcore_id_to_str(buf, sizeof buf, lcore_id);
+
+    ovs_mutex_lock(&lcore_id_maps_mutex);
+    if (add) {
+        shash_replace(&lcore_id_maps, buf, (void *) (uintptr_t) cpu);
+    } else {
+        shash_find_and_delete(&lcore_id_maps, buf);
+    }
+    ovs_mutex_unlock(&lcore_id_maps_mutex);
+}
+
 bool
 dpdk_attach_thread(unsigned cpu)
 {
+    unsigned int lcore_id;
+
     /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
     ovs_assert(cpu != NON_PMD_CORE_ID);
 
@@ -506,7 +558,9 @@ dpdk_attach_thread(unsigned cpu)
         return false;
     }
 
-    VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+    lcore_id = rte_lcore_id();
+    lcore_id_map_update(lcore_id, cpu, true);
+    VLOG_INFO("PMD thread uses DPDK lcore %u.", lcore_id);
     return true;
 }
 
@@ -516,10 +570,49 @@ dpdk_detach_thread(void)
     unsigned int lcore_id;
 
     lcore_id = rte_lcore_id();
+    lcore_id_map_update(lcore_id, 0, false);
+
     rte_thread_unregister();
     VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
 }
 
+static dpdk_core_usage_cb_t *core_usage_cb;
+
+void
+dpdk_register_core_usage_callback(dpdk_core_usage_cb_t *cb)
+{
+    core_usage_cb = cb;
+}
+
+#ifdef ALLOW_EXPERIMENTAL_API
+static int
+dpdk_get_lcore_cycles(unsigned int lcore_id, struct rte_lcore_usage *usage)
+{
+    struct shash_node *node;
+    unsigned int core_id;
+    char buf[128];
+
+    if (!core_usage_cb) {
+        return -1;
+    }
+
+    lcore_id_to_str(buf, sizeof buf, lcore_id);
+
+    ovs_mutex_lock(&lcore_id_maps_mutex);
+    node = shash_find(&lcore_id_maps, buf);
+    ovs_mutex_unlock(&lcore_id_maps_mutex);
+
+    if (!node) {
+        return -1;
+    }
+
+    core_id = (unsigned int) (uintptr_t) node->data;
+    core_usage_cb(core_id, &usage->busy_cycles, &usage->total_cycles);
+
+    return 0;
+}
+#endif
+
 void
 print_dpdk_version(void)
 {
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 1b790e682e4d..95a51698c74d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -18,6 +18,7 @@
 #define DPDK_H
 
 #include <stdbool.h>
+#include <stdint.h>
 
 #ifdef DPDK_NETDEV
 
@@ -42,4 +43,8 @@ bool dpdk_available(void);
 void print_dpdk_version(void);
 void dpdk_status(const struct ovsrec_open_vswitch *);
 
+typedef void (dpdk_core_usage_cb_t)(unsigned cpu, uint64_t *busy_cycles,
+                                    uint64_t *total_cycles);
+void dpdk_register_core_usage_callback(dpdk_core_usage_cb_t *cb);
+
 #endif /* dpdk.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 70b953ae6dd3..ebf43a0f62e4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1427,6 +1427,41 @@ dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int 
argc,
     ds_destroy(&reply);
 }
 
+static void
+dpif_netdev_get_pmd_cycles(unsigned int core_id,
+    uint64_t *busy_cycles, uint64_t *total_cycles)
+{
+    struct dp_netdev_pmd_thread **pmd_list = NULL;
+    uint64_t stats[PMD_N_STATS];
+    struct dp_netdev *dp;
+    size_t num_pmds;
+
+    ovs_mutex_lock(&dp_netdev_mutex);
+
+    if (shash_count(&dp_netdevs) != 1) {
+        goto out;
+    }
+
+    dp = shash_first(&dp_netdevs)->data;
+    sorted_poll_thread_list(dp, &pmd_list, &num_pmds);
+
+    for (size_t i = 0; i < num_pmds; i++) {
+        struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+
+        if (pmd->core_id == core_id) {
+            continue;
+        }
+        pmd_perf_read_counters(&pmd->perf_stats, stats);
+        *busy_cycles = stats[PMD_CYCLES_ITER_BUSY];
+        *total_cycles = *busy_cycles + stats[PMD_CYCLES_ITER_IDLE];
+        break;
+    }
+
+out:
+    free(pmd_list);
+    ovs_mutex_unlock(&dp_netdev_mutex);
+}
+
 static void
 dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
                      void *aux)
@@ -1661,6 +1696,9 @@ dpif_netdev_init(void)
     unixctl_command_register("dpif-netdev/miniflow-parser-get", "",
                              0, 0, dpif_miniflow_extract_impl_get,
                              NULL);
+
+    dpdk_register_core_usage_callback(dpif_netdev_get_pmd_cycles);
+
     return 0;
 }
 
-- 
2.41.0

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to