Since DPDK 23.03, it is possible to register a callback to report lcore
TSC cycles usage. Reuse the busy/idle cycles gathering in dpif-netdev
and expose them to the DPDK telemetry socket.
Upon dpdk_attach_thread, record the mapping between the DPDK lcore_id
and the dpif-netdev core_id. Reuse that mapping in the lcore usage
callback to invoke dpif_netdev_get_pmd_cycles.
Here is an example output:
~# ovs-appctl dpif-netdev/pmd-stats-show | grep -e ^pmd -e cycles:
pmd thread numa_id 0 core_id 8:
idle cycles: 2720796781680 (100.00%)
processing cycles: 3566020 (0.00%)
pmd thread numa_id 0 core_id 9:
idle cycles: 2718974371440 (100.00%)
processing cycles: 3136840 (0.00%)
pmd thread numa_id 0 core_id 72:
pmd thread numa_id 0 core_id 73:
~# echo /eal/lcore/usage | dpdk-telemetry.py | jq
{
"/eal/lcore/usage": {
"lcore_ids": [
3,
5,
11,
15
],
"total_cycles": [
2725722342740,
2725722347480,
2723899464040,
2725722354980
],
"busy_cycles": [
3566020,
3566020,
3136840,
3566020
]
}
}
Link: https://git.dpdk.org/dpdk/commit/?id=9ab1804922ba583b0b16
Cc: David Marchand <[email protected]>
Cc: Kevin Traynor <[email protected]>
Signed-off-by: Robin Jarry <[email protected]>
---
lib/dpdk-stub.c | 5 +++
lib/dpdk.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++-
lib/dpdk.h | 5 +++
lib/dpif-netdev.c | 38 +++++++++++++++++++
4 files changed, 142 insertions(+), 1 deletion(-)
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index 58ebf6cb62cd..02fb561bea7b 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -49,6 +49,11 @@ dpdk_detach_thread(void)
{
}
+void
+dpdk_register_core_usage_callback(dpdk_core_usage_cb *cb OVS_UNUSED)
+{
+}
+
bool
dpdk_available(void)
{
diff --git a/lib/dpdk.c b/lib/dpdk.c
index d76d53f8f16c..31871300f719 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -23,6 +23,7 @@
#include <rte_cpuflags.h>
#include <rte_errno.h>
+#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_malloc.h>
#include <rte_memzone.h>
@@ -310,6 +311,10 @@ malloc_dump_stats_wrapper(FILE *stream)
rte_malloc_dump_stats(stream, NULL);
}
+#ifdef ALLOW_EXPERIMENTAL_API
+static int dpdk_get_lcore_cycles(unsigned int, struct rte_lcore_usage *);
+#endif
+
static bool
dpdk_init__(const struct smap *ovs_other_config)
{
@@ -440,6 +445,10 @@ dpdk_init__(const struct smap *ovs_other_config)
/* We are called from the main thread here */
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
+#ifdef ALLOW_EXPERIMENTAL_API
+ rte_lcore_register_usage_cb(dpdk_get_lcore_cycles);
+#endif
+
/* Finally, register the dpdk classes */
netdev_dpdk_register(ovs_other_config);
netdev_register_flow_api_provider(&netdev_offload_dpdk);
@@ -490,9 +499,52 @@ dpdk_available(void)
return initialized;
}
+struct lcore_id_map {
+ unsigned int lcore_id;
+ unsigned int pmd_core_id;
+};
+
+/* Protects against changes to 'lcore_id_maps'. */
+struct ovs_mutex lcore_id_maps_mutex = OVS_MUTEX_INITIALIZER;
+
+/* Contains all 'struct lcore_id_map's. */
+static struct shash lcore_id_maps OVS_GUARDED_BY(lcore_id_maps_mutex)
+ = SHASH_INITIALIZER(&lcore_id_maps);
+
+static void
+lcore_id_to_str(char *buf, size_t len, unsigned int lcore_id)
+{
+ int n;
+
+ n = snprintf(buf, len, "%u", lcore_id);
+ if (n < 0) {
+ VLOG_WARN("Failed to format lcore_id: %s", ovs_strerror(errno));
+ n = 0;
+ }
+ buf[n] = '\0';
+}
+
+static void
+lcore_id_map_update(unsigned int lcore_id, unsigned int cpu, bool add)
+{
+ char buf[128];
+
+ lcore_id_to_str(buf, sizeof buf, lcore_id);
+
+ ovs_mutex_lock(&lcore_id_maps_mutex);
+ if (add) {
+ shash_replace(&lcore_id_maps, buf, (void *) (uintptr_t) cpu);
+ } else {
+ shash_find_and_delete(&lcore_id_maps, buf);
+ }
+ ovs_mutex_unlock(&lcore_id_maps_mutex);
+}
+
bool
dpdk_attach_thread(unsigned cpu)
{
+ unsigned int lcore_id;
+
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
ovs_assert(cpu != NON_PMD_CORE_ID);
@@ -506,7 +558,9 @@ dpdk_attach_thread(unsigned cpu)
return false;
}
- VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+ lcore_id = rte_lcore_id();
+ lcore_id_map_update(lcore_id, cpu, true);
+ VLOG_INFO("PMD thread uses DPDK lcore %u.", lcore_id);
return true;
}
@@ -516,10 +570,49 @@ dpdk_detach_thread(void)
unsigned int lcore_id;
lcore_id = rte_lcore_id();
+ lcore_id_map_update(lcore_id, 0, false);
+
rte_thread_unregister();
VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
}
+static dpdk_core_usage_cb_t *core_usage_cb;
+
+void
+dpdk_register_core_usage_callback(dpdk_core_usage_cb_t *cb)
+{
+ core_usage_cb = cb;
+}
+
+#ifdef ALLOW_EXPERIMENTAL_API
+static int
+dpdk_get_lcore_cycles(unsigned int lcore_id, struct rte_lcore_usage *usage)
+{
+ struct shash_node *node;
+ unsigned int core_id;
+ char buf[128];
+
+ if (!core_usage_cb) {
+ return -1;
+ }
+
+ lcore_id_to_str(buf, sizeof buf, lcore_id);
+
+ ovs_mutex_lock(&lcore_id_maps_mutex);
+ node = shash_find(&lcore_id_maps, buf);
+ ovs_mutex_unlock(&lcore_id_maps_mutex);
+
+ if (!node) {
+ return -1;
+ }
+
+ core_id = (unsigned int) (uintptr_t) node->data;
+ core_usage_cb(core_id, &usage->busy_cycles, &usage->total_cycles);
+
+ return 0;
+}
+#endif
+
void
print_dpdk_version(void)
{
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 1b790e682e4d..95a51698c74d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -18,6 +18,7 @@
#define DPDK_H
#include <stdbool.h>
+#include <stdint.h>
#ifdef DPDK_NETDEV
@@ -42,4 +43,8 @@ bool dpdk_available(void);
void print_dpdk_version(void);
void dpdk_status(const struct ovsrec_open_vswitch *);
+typedef void (dpdk_core_usage_cb_t)(unsigned cpu, uint64_t *busy_cycles,
+ uint64_t *total_cycles);
+void dpdk_register_core_usage_callback(dpdk_core_usage_cb_t *cb);
+
#endif /* dpdk.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 70b953ae6dd3..ebf43a0f62e4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1427,6 +1427,41 @@ dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int
argc,
ds_destroy(&reply);
}
+static void
+dpif_netdev_get_pmd_cycles(unsigned int core_id,
+ uint64_t *busy_cycles, uint64_t *total_cycles)
+{
+ struct dp_netdev_pmd_thread **pmd_list = NULL;
+ uint64_t stats[PMD_N_STATS];
+ struct dp_netdev *dp;
+ size_t num_pmds;
+
+ ovs_mutex_lock(&dp_netdev_mutex);
+
+ if (shash_count(&dp_netdevs) != 1) {
+ goto out;
+ }
+
+ dp = shash_first(&dp_netdevs)->data;
+ sorted_poll_thread_list(dp, &pmd_list, &num_pmds);
+
+ for (size_t i = 0; i < num_pmds; i++) {
+ struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+
+ if (pmd->core_id == core_id) {
+ continue;
+ }
+ pmd_perf_read_counters(&pmd->perf_stats, stats);
+ *busy_cycles = stats[PMD_CYCLES_ITER_BUSY];
+ *total_cycles = *busy_cycles + stats[PMD_CYCLES_ITER_IDLE];
+ break;
+ }
+
+out:
+ free(pmd_list);
+ ovs_mutex_unlock(&dp_netdev_mutex);
+}
+
static void
dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
void *aux)
@@ -1661,6 +1696,9 @@ dpif_netdev_init(void)
unixctl_command_register("dpif-netdev/miniflow-parser-get", "",
0, 0, dpif_miniflow_extract_impl_get,
NULL);
+
+ dpdk_register_core_usage_callback(dpif_netdev_get_pmd_cycles);
+
return 0;
}
--
2.41.0
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev