Previously in OVS, a PMD thread running on cpu X used lcore X. This assumption limited OVS to run PMD threads on physical cpu < RTE_MAX_LCORE.
DPDK 20.08 introduced a new API that associates a non-EAL thread to a free lcore. This new API does not change the thread characteristics (like CPU affinity) and let OVS run its PMD threads on any cpu regardless of RTE_MAX_LCORE. The DPDK multiprocess feature is not compatible with this new API and is disabled. DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64) which should be enough for OVS pmd threads (hopefully). DPDK lcore/OVS pmd threads mapping are logged at threads when trying to attach a OVS PMD thread, and when detaching. A new command is added to help get DPDK point of view of the DPDK lcores at any time: $ ovs-appctl dpdk/lcore-list lcore 0, socket 0, role RTE, cpuset 0 lcore 1, socket 0, role NON_EAL, cpuset 1 lcore 2, socket 0, role NON_EAL, cpuset 15 Signed-off-by: David Marchand <[email protected]> Acked-by: Kevin Traynor <[email protected]> --- Changes since v7: - rebased on dpdk-latest, - removed #pragma, Changes since v6: - handled corner case when registering max number of PMD threads, then removing all successfully registered, leaving the ones who had failed, - reworded warning when reaching max number of PMD threads, - added a comment in command about reaching max number of PMD threads, - fixed typo in debug command name, Changes since v5: - rebased, - commitlog tweaks, - dropped use of global ALLOW_EXPERIMENTAL flag and pinpointed experimental API, Changes since v4: - rebased on the master branch, - disabled DPDK mp feature, - updated DPDK documentation and manual with the new command, - added notes in NEWS, Changes since v3: - rebased on current HEAD, - switched back to simple warning rather than abort when registering a thread fails, Changes since v2: - introduced a new api in DPDK 20.08 (still being discussed), inbox thread at http://inbox.dpdk.org/dev/[email protected]/T/#t - this current patch depends on a patch on master I sent: https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/ - dropped 'dpdk-lcore-mask' compat handling, Changes since v1: - rewired existing configuration 'dpdk-lcore-mask' to use --lcores, - switched to a bitmap to track lcores, - added a command to dump current mapping (Flavio): used an experimental API to get DPDK lcores cpuset since it is the most reliable/portable information, - used the same code for the logs when starting DPDK/PMD threads, - addressed Ilya comments, --- Documentation/howto/dpdk.rst | 5 +++++ NEWS | 2 ++ lib/dpdk-stub.c | 9 +++++++-- lib/dpdk-unixctl.man | 3 +++ lib/dpdk.c | 36 +++++++++++++++++++++++++++++++++--- lib/dpdk.h | 3 ++- lib/dpif-netdev.c | 10 +++++++++- 7 files changed, 61 insertions(+), 7 deletions(-) diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index 70b64881ab..81f236d3bd 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -402,6 +402,11 @@ Supported actions for hardware offload are: - Clone/output (tnl_push and output) for encapsulating over a tunnel. - Tunnel pop, for packets received on physical ports. +Multiprocess +------------ + +This DPDK feature is not supported and disabled during OVS initialization. + Further Reading --------------- diff --git a/NEWS b/NEWS index 434ee570fb..0a3c799f30 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ Post-v2.16.0 limiting behavior. * Add hardware offload support for matching IPv4/IPv6 frag types (experimental). + * Forbid use of DPDK multiprocess feature. + * Add support for running threads on cores >= RTE_MAX_LCORE. - Python: * For SSL support, the use of the pyOpenSSL library has been replaced with the native 'ssl' module. diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c index fe24f9abdf..c2ad1b6962 100644 --- a/lib/dpdk-stub.c +++ b/lib/dpdk-stub.c @@ -38,10 +38,15 @@ dpdk_init(const struct smap *ovs_other_config) } } +bool +dpdk_attach_thread(unsigned cpu OVS_UNUSED) +{ + return false; +} + void -dpdk_set_lcore_id(unsigned cpu OVS_UNUSED) +dpdk_detach_thread(void) { - /* Nothing */ } const char * diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man index a0d1fa2ea3..5bac806389 100644 --- a/lib/dpdk-unixctl.man +++ b/lib/dpdk-unixctl.man @@ -1,5 +1,8 @@ .SS "DPDK COMMANDS" These commands manage DPDK components. +.IP "\fBdpdk/lcore-list\fR" +Lists the DPDK lcores and their cpu affinity. +When RTE_MAX_LCORE lcores are registered, some OVS PMD threads won't appear. .IP "\fBdpdk/log-list\fR" Lists all DPDK components that emit logs and their logging levels. .IP "\fBdpdk/log-set\fR [\fIspec\fR]" diff --git a/lib/dpdk.c b/lib/dpdk.c index b2ef31cd20..a97cb38220 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -470,6 +470,12 @@ dpdk_init__(const struct smap *ovs_other_config) return false; } + if (!rte_mp_disable()) { + VLOG_EMER("Could not disable multiprocess, DPDK won't be available."); + rte_eal_cleanup(); + return false; + } + if (VLOG_IS_DBG_ENABLED()) { size_t size; char *response = NULL; @@ -489,6 +495,8 @@ dpdk_init__(const struct smap *ovs_other_config) } } + unixctl_command_register("dpdk/lcore-list", "", 0, 0, + dpdk_unixctl_mem_stream, rte_lcore_dump); unixctl_command_register("dpdk/log-list", "", 0, 0, dpdk_unixctl_mem_stream, rte_log_dump); unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0, @@ -571,12 +579,34 @@ dpdk_available(void) return dpdk_initialized; } -void -dpdk_set_lcore_id(unsigned cpu) +bool +dpdk_attach_thread(unsigned cpu) { /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */ ovs_assert(cpu != NON_PMD_CORE_ID); - RTE_PER_LCORE(_lcore_id) = cpu; + + if (!dpdk_available()) { + return false; + } + + if (rte_thread_register() < 0) { + VLOG_WARN("DPDK max threads count has been reached. " + "PMD thread performance may be impacted."); + return false; + } + + VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id()); + return true; +} + +void +dpdk_detach_thread(void) +{ + unsigned int lcore_id; + + lcore_id = rte_lcore_id(); + rte_thread_unregister(); + VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id); } void diff --git a/lib/dpdk.h b/lib/dpdk.h index 445a51d065..e56b447c03 100644 --- a/lib/dpdk.h +++ b/lib/dpdk.h @@ -36,7 +36,8 @@ struct smap; struct ovsrec_open_vswitch; void dpdk_init(const struct smap *ovs_other_config); -void dpdk_set_lcore_id(unsigned cpu); +bool dpdk_attach_thread(unsigned cpu); +void dpdk_detach_thread(void); const char *dpdk_get_vhost_sock_dir(void); bool dpdk_vhost_iommu_enabled(void); bool dpdk_vhost_postcopy_enabled(void); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 98453a2060..1d281334bd 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -6160,6 +6160,7 @@ pmd_thread_main(void *f_) unsigned int lc = 0; struct polled_queue *poll_list; bool wait_for_reload = false; + bool dpdk_attached; bool reload_tx_qid; bool exiting; bool reload; @@ -6172,7 +6173,7 @@ pmd_thread_main(void *f_) /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); ovs_numa_thread_setaffinity_core(pmd->core_id); - dpdk_set_lcore_id(pmd->core_id); + dpdk_attached = dpdk_attach_thread(pmd->core_id); poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); dfc_cache_init(&pmd->flow_cache); pmd_alloc_static_tx_qid(pmd); @@ -6180,6 +6181,10 @@ pmd_thread_main(void *f_) reload: atomic_count_init(&pmd->pmd_overloaded, 0); + if (!dpdk_attached) { + dpdk_attached = dpdk_attach_thread(pmd->core_id); + } + /* List port/core affinity */ for (i = 0; i < poll_cnt; i++) { VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n", @@ -6313,6 +6318,9 @@ reload: dfc_cache_uninit(&pmd->flow_cache); free(poll_list); pmd_free_cached_ports(pmd); + if (dpdk_attached) { + dpdk_detach_thread(); + } return NULL; } -- 2.23.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
