Previously in OVS, a PMD thread running on cpu X used lcore X.
This assumption limited OVS to run PMD threads on physical cpu <
RTE_MAX_LCORE.

DPDK 20.08 introduced a new API that associates a non-EAL thread to a free
lcore. This new API does not change the thread characteristics (like CPU
affinity) and let OVS run its PMD threads on any cpu regardless of
RTE_MAX_LCORE.

The DPDK multiprocess feature is not compatible with this new API and is
disabled.

DPDK still limits the number of lcores to RTE_MAX_LCORE (128 on x86_64)
which should be enough for OVS pmd threads (hopefully).

DPDK lcore/OVS pmd threads mapping are logged at threads creation and
destruction.
A new command is added to help get DPDK point of view of the DPDK lcores:

$ ovs-appctl dpdk/lcores-list
lcore 0, socket 0, role RTE, cpuset 0
lcore 1, socket 0, role NON_EAL, cpuset 1
lcore 2, socket 0, role NON_EAL, cpuset 15

Signed-off-by: David Marchand <[email protected]>
---
Changes since v5:
- rebased,
- commitlog tweaks,
- dropped use of global ALLOW_EXPERIMENTAL flag and pinpointed
  experimental API,

Changes since v4:
- rebased on the master branch,
- disabled DPDK mp feature,
- updated DPDK documentation and manual with the new command,
- added notes in NEWS,

Changes since v3:
- rebased on current HEAD,
- switched back to simple warning rather than abort when registering a
  thread fails,

Changes since v2:
- introduced a new api in DPDK 20.08 (still being discussed), inbox thread at
  
http://inbox.dpdk.org/dev/[email protected]/T/#t
- this current patch depends on a patch on master I sent:
  
https://patchwork.ozlabs.org/project/openvswitch/patch/[email protected]/
- dropped 'dpdk-lcore-mask' compat handling,

Changes since v1:
- rewired existing configuration 'dpdk-lcore-mask' to use --lcores,
- switched to a bitmap to track lcores,
- added a command to dump current mapping (Flavio): used an experimental
  API to get DPDK lcores cpuset since it is the most reliable/portable
  information,
- used the same code for the logs when starting DPDK/PMD threads,
- addressed Ilya comments,

---
 Documentation/howto/dpdk.rst |  5 ++++
 NEWS                         |  2 ++
 lib/dpdk-stub.c              |  8 +++++-
 lib/dpdk-unixctl.man         |  2 ++
 lib/dpdk.c                   | 50 ++++++++++++++++++++++++++++++++++--
 lib/dpdk.h                   |  3 ++-
 lib/dpif-netdev.c            |  3 ++-
 7 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 70b64881ab..81f236d3bd 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -402,6 +402,11 @@ Supported actions for hardware offload are:
 - Clone/output (tnl_push and output) for encapsulating over a tunnel.
 - Tunnel pop, for packets received on physical ports.
 
+Multiprocess
+------------
+
+This DPDK feature is not supported and disabled during OVS initialization.
+
 Further Reading
 ---------------
 
diff --git a/NEWS b/NEWS
index 90f4b15902..dc9709641c 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,8 @@ Post-v2.16.0
        limiting behavior.
      * Add hardware offload support for matching IPv4/IPv6 frag types
        (experimental).
+     * Forbid use of DPDK multiprocess feature.
+     * Add support for running threads on cores >= RTE_MAX_LCORE.
 
 
 v2.16.0 - 16 Aug 2021
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index b7d577870d..5bc996b665 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -39,7 +39,13 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu OVS_UNUSED)
+dpdk_init_thread_context(unsigned cpu OVS_UNUSED)
+{
+    /* Nothing */
+}
+
+void
+dpdk_uninit_thread_context(void)
 {
     /* Nothing */
 }
diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man
index a0d1fa2ea3..cd8a178515 100644
--- a/lib/dpdk-unixctl.man
+++ b/lib/dpdk-unixctl.man
@@ -1,5 +1,7 @@
 .SS "DPDK COMMANDS"
 These commands manage DPDK components.
+.IP "\fBdpdk/lcore-list\fR"
+Lists the DPDK lcores and their cpu affinity.
 .IP "\fBdpdk/log-list\fR"
 Lists all DPDK components that emit logs and their logging levels.
 .IP "\fBdpdk/log-set\fR [\fIspec\fR]"
diff --git a/lib/dpdk.c b/lib/dpdk.c
index b2ef31cd20..686d080a3a 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -470,6 +470,15 @@ dpdk_init__(const struct smap *ovs_other_config)
         return false;
     }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    if (!rte_mp_disable()) {
+#pragma GCC diagnostic pop
+        VLOG_EMER("Could not disable multiprocess, DPDK won't be available.");
+        rte_eal_cleanup();
+        return false;
+    }
+
     if (VLOG_IS_DBG_ENABLED()) {
         size_t size;
         char *response = NULL;
@@ -489,6 +498,11 @@ dpdk_init__(const struct smap *ovs_other_config)
         }
     }
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    unixctl_command_register("dpdk/lcores-list", "", 0, 0,
+                             dpdk_unixctl_mem_stream, rte_lcore_dump);
+#pragma GCC diagnostic pop
     unixctl_command_register("dpdk/log-list", "", 0, 0,
                              dpdk_unixctl_mem_stream, rte_log_dump);
     unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
@@ -572,11 +586,43 @@ dpdk_available(void)
 }
 
 void
-dpdk_set_lcore_id(unsigned cpu)
+dpdk_init_thread_context(unsigned cpu)
 {
     /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
     ovs_assert(cpu != NON_PMD_CORE_ID);
-    RTE_PER_LCORE(_lcore_id) = cpu;
+
+    if (!dpdk_available()) {
+        return;
+    }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    if (rte_thread_register() < 0) {
+#pragma GCC diagnostic pop
+        VLOG_WARN("This OVS pmd thread will share resources with the non-pmd "
+                  "thread: %s.", rte_strerror(rte_errno));
+    } else {
+        VLOG_INFO("PMD thread uses DPDK lcore %u.", rte_lcore_id());
+    }
+}
+
+void
+dpdk_uninit_thread_context(void)
+{
+    unsigned int lcore_id;
+
+    if (!dpdk_available()) {
+        return;
+    }
+
+    lcore_id = rte_lcore_id();
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+    rte_thread_unregister();
+#pragma GCC diagnostic pop
+    if (lcore_id != LCORE_ID_ANY) {
+        VLOG_INFO("PMD thread released DPDK lcore %u.", lcore_id);
+    }
 }
 
 void
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 445a51d065..1bd16b31db 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -36,7 +36,8 @@ struct smap;
 struct ovsrec_open_vswitch;
 
 void dpdk_init(const struct smap *ovs_other_config);
-void dpdk_set_lcore_id(unsigned cpu);
+void dpdk_init_thread_context(unsigned cpu);
+void dpdk_uninit_thread_context(void);
 const char *dpdk_get_vhost_sock_dir(void);
 bool dpdk_vhost_iommu_enabled(void);
 bool dpdk_vhost_postcopy_enabled(void);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 75f381ec18..e5b285461c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -6161,7 +6161,7 @@ pmd_thread_main(void *f_)
     /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
     ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
     ovs_numa_thread_setaffinity_core(pmd->core_id);
-    dpdk_set_lcore_id(pmd->core_id);
+    dpdk_init_thread_context(pmd->core_id);
     poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
     dfc_cache_init(&pmd->flow_cache);
     pmd_alloc_static_tx_qid(pmd);
@@ -6302,6 +6302,7 @@ reload:
     dfc_cache_uninit(&pmd->flow_cache);
     free(poll_list);
     pmd_free_cached_ports(pmd);
+    dpdk_uninit_thread_context();
     return NULL;
 }
 
-- 
2.23.0

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to