---
Changes since v6:
- handled corner case when registering max number of PMD threads, then
removing all successfully registered, leaving the ones who had failed,
- reworded warning when reaching max number of PMD threads,
- added a comment in command about reaching max number of PMD threads,
- fixed typo in debug command name,
Changes since v5:
- rebased,
- commitlog tweaks,
- dropped use of global ALLOW_EXPERIMENTAL flag and pinpointed
experimental API,
Changes since v4:
- rebased on the master branch,
- disabled DPDK mp feature,
- updated DPDK documentation and manual with the new command,
- added notes in NEWS,
Changes since v3:
- rebased on current HEAD,
- switched back to simple warning rather than abort when registering a
thread fails,
Changes since v2:
- introduced a new api in DPDK 20.08 (still being discussed), inbox thread at
http://inbox.dpdk.org/dev/[email protected]/T/#t
- this current patch depends on a patch on master I sent:
https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/
- dropped 'dpdk-lcore-mask' compat handling,
Changes since v1:
- rewired existing configuration 'dpdk-lcore-mask' to use --lcores,
- switched to a bitmap to track lcores,
- added a command to dump current mapping (Flavio): used an experimental
API to get DPDK lcores cpuset since it is the most reliable/portable
information,
- used the same code for the logs when starting DPDK/PMD threads,
- addressed Ilya comments,
---
Documentation/howto/dpdk.rst | 5 ++++
NEWS | 2 ++
lib/dpdk-stub.c | 9 +++++--
lib/dpdk-unixctl.man | 3 +++
lib/dpdk.c | 48 +++++++++++++++++++++++++++++++++---
lib/dpdk.h | 3 ++-
lib/dpif-netdev.c | 10 +++++++-
7 files changed, 73 insertions(+), 7 deletions(-)
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 70b64881ab..81f236d3bd 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -402,6 +402,11 @@ Supported actions for hardware offload are:
- Clone/output (tnl_push and output) for encapsulating over a tunnel.
- Tunnel pop, for packets received on physical ports.
+Multiprocess
+------------
+
+This DPDK feature is not supported and disabled during OVS initialization.
+
Further Reading
---------------
diff --git a/NEWS b/NEWS
index 90f4b15902..dc9709641c 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@ Post-v2.16.0
limiting behavior.
* Add hardware offload support for matching IPv4/IPv6 frag types
(experimental).
+ * Forbid use of DPDK multiprocess feature.
+ * Add support for running threads on cores >= RTE_MAX_LCORE.
v2.16.0 - 16 Aug 2021
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217cb..3eee1f485c 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -38,10 +38,15 @@ dpdk_init(const struct smap *ovs_other_config)
}
}
+bool
+dpdk_attach_thread(unsigned cpu OVS_UNUSED)
+{
+ return false;
+}
+
void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_detach_thread(void)
{
- /* Nothing */
}
const char *
diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man
index a0d1fa2ea3..5bac806389 100644
--- a/lib/dpdk-unixctl.man
+++ b/lib/dpdk-unixctl.man
@@ -1,5 +1,8 @@
.SS "DPDK COMMANDS"
These commands manage DPDK components.
+.IP "\fBdpdk/lcore-list\fR"
+Lists the DPDK lcores and their cpu affinity.
+When RTE_MAX_LCORE lcores are registered, some OVS PMD threads won't appear.
.IP "\fBdpdk/log-list\fR"
Lists all DPDK components that emit logs and their logging levels.
.IP "\fBdpdk/log-set\fR [\fIspec\fR]"
diff --git a/lib/dpdk.c b/lib/dpdk.c
index fe201bf29c..f9f6404998 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -470,6 +470,15 @@ dpdk_init__(const struct smap *ovs_other_config)
return false;
}
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ if (!rte_mp_disable()) {
+#pragma GCC diagnostic pop
+ VLOG_EMER("Could not disable multiprocess, DPDK won't be available.");
+ rte_eal_cleanup();
+ return false;
+ }
+
if (VLOG_IS_DBG_ENABLED()) {
size_t size;
char *response = NULL;
@@ -489,6 +498,11 @@ dpdk_init__(const struct smap *ovs_other_config)
}
}
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ unixctl_command_register("dpdk/lcore-list", "", 0, 0,
+ dpdk_unixctl_mem_stream, rte_lcore_dump);
+#pragma GCC diagnostic pop
unixctl_command_register("dpdk/log-list", "", 0, 0,
dpdk_unixctl_mem_stream, rte_log_dump);
unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
@@ -571,12 +585,40 @@ dpdk_available(void)
return dpdk_initialized;
}
-void
-dpdk_set_lcore_id(unsigned cpu)
+bool
+dpdk_attach_thread(unsigned cpu)
{
/* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
ovs_assert(cpu != NON_PMD_CORE_ID);
- RTE_PER_LCORE(_lcore_id) = cpu;
+
+ if (!dpdk_available()) {
+ return false;
+ }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ if (rte_thread_register() < 0) {
+#pragma GCC diagnostic pop
+ VLOG_WARN("DPDK max threads count has been reached. "
+ "PMD thread performance may be impacted.");
+ return false;
+ }
+
+ VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+ return true;
+}
+
+void
+dpdk_detach_thread(void)
+{
+ unsigned int lcore_id;
+
+ lcore_id = rte_lcore_id();
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ rte_thread_unregister();
+#pragma GCC diagnostic pop
+ VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
}
void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 2eb1aedbb0..64ebca47d6 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
struct ovsrec_open_vswitch;
void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+bool dpdk_attach_thread(unsigned cpu);
+void dpdk_detach_thread(void);
const char *dpdk_get_vhost_sock_dir(void);
bool dpdk_vhost_iommu_enabled(void);
bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 75f381ec18..707e222450 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6149,6 +6149,7 @@ pmd_thread_main(void *f_)
unsigned int lc = 0;
struct polled_queue *poll_list;
bool wait_for_reload = false;
+ bool dpdk_attached;
bool reload_tx_qid;
bool exiting;
bool reload;
@@ -6161,7 +6162,7 @@ pmd_thread_main(void *f_)
/* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
ovs_numa_thread_setaffinity_core(pmd->core_id);
- dpdk_set_lcore_id(pmd->core_id);
+ dpdk_attached = dpdk_attach_thread(pmd->core_id);
poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
dfc_cache_init(&pmd->flow_cache);
pmd_alloc_static_tx_qid(pmd);
@@ -6169,6 +6170,10 @@ pmd_thread_main(void *f_)
reload:
atomic_count_init(&pmd->pmd_overloaded, 0);
+ if (!dpdk_attached) {
+ dpdk_attached = dpdk_attach_thread(pmd->core_id);
+ }
+
/* List port/core affinity */
for (i = 0; i < poll_cnt; i++) {
VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
@@ -6302,6 +6307,9 @@ reload:
dfc_cache_uninit(&pmd->flow_cache);
free(poll_list);
pmd_free_cached_ports(pmd);
+ if (dpdk_attached) {
+ dpdk_detach_thread();
+ }
return NULL;
}