Allow PMD threads to be scheduled within their NUMA node instead of being pinned to a specific core. When other_config:pmd-no-pin is set to true, each PMD thread's CPU affinity is set to all cores on its assigned NUMA node, letting the kernel scheduler migrate the thread freely within that node preserving NUMA locality to avoid cross-socket memory access and TSC skew.
The flag can be toggled by: - ovs-vsctl set o . other_config:pmd-no-pin=<true/false> The setting is false by default. Changing this value requires restarting the daemon. Signed-off-by: Salem Sol <[email protected]> --- lib/dpif-netdev.c | 29 ++++++++++++++++++++++++++--- vswitchd/vswitch.xml | 14 ++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 49f4fa2ac6..818cc80209 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -296,6 +296,8 @@ struct dp_netdev { uint64_t pmd_max_sleep_default; /* Enable the SMC cache from ovsdb config */ atomic_bool smc_enable_db; + /* Allow PMD threads to float within their NUMA node. */ + bool pmd_no_pin; /* Protects access to ofproto-dpif-upcall interface during revalidator * thread synchronization. */ @@ -4552,14 +4554,26 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config) bool sleep_changed = set_all_pmd_max_sleeps(dp, other_config); + bool pmd_no_pin = smap_get_bool(other_config, "pmd-no-pin", false); + if (ovsthread_once_start(&dp->once_set_config)) { + dp->pmd_no_pin = pmd_no_pin; + if (pmd_no_pin) { + VLOG_INFO("PMD threads will float within their NUMA node"); + } log_all_pmd_sleeps(dp); dpif_offload_datapath_register_flow_unreference_cb( dpif, offload_flow_reference_unreference_cb); ovsthread_once_done(&dp->once_set_config); - } else if (sleep_changed) { - log_all_pmd_sleeps(dp); + } else { + if (pmd_no_pin != dp->pmd_no_pin) { + VLOG_WARN("Reconfiguring pmd-no-pin requires restarting the " + "daemon. Change is ignored."); + } + if (sleep_changed) { + log_all_pmd_sleeps(dp); + } } return 0; @@ -6538,7 +6552,16 @@ pmd_thread_main(void *f_) /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); - ovs_numa_thread_setaffinity_core(pmd->core_id); + if (pmd->dp->pmd_no_pin) { + struct ovs_numa_dump *numa_cores; + + numa_cores = ovs_numa_dump_cores_on_numa(pmd->numa_id); + ovs_numa_thread_setaffinity_dump(numa_cores); + ovs_numa_dump_destroy(numa_cores); + } else { + ovs_numa_thread_setaffinity_core(pmd->core_id); + } + dpdk_attached = dpdk_attach_thread(pmd->core_id); poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); dfc_cache_init(&pmd->flow_cache); diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index d8c26e4da9..f67d59ab87 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -391,6 +391,20 @@ </p> </column> + <column name="other_config" key="pmd-no-pin" + type='{"type": "boolean"}'> + <p> + If set to <code>true</code>, PMD threads will not be pinned to a + specific CPU core. Instead, each PMD thread's affinity is set to + all cores on its assigned NUMA node, allowing the kernel scheduler + to schedule the thread within that NUMA node. + Default is <code>false</code>. + </p> + <p> + Changing this value requires restarting the daemon. + </p> + </column> + <column name="other_config" key="dpdk-alloc-mem" type='{"type": "integer", "minInteger": 0}'> <p> -- 2.43.7 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
