On 9/29/23 14:50, Kevin Traynor wrote: > Extend 'pmd-sleep-max' so that individual PMD thread cores > may have a specified max sleep request value. > > Existing behaviour is maintained. > > Any PMD thread core without a value will use the global value > if set or default no sleep. > > To set PMD thread cores 8 and 9 to never request a load based sleep > and all other PMD thread cores to be able to request a max sleep of > 50 usecs: > > $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0 > > To set PMD thread cores 10 and 11 to request a max sleep of 100 usecs > and all other PMD thread cores to never request a sleep: > > $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=10:100,11:100 > > 'pmd-sleep-show' is updated to show the max sleep value for each PMD thread. > > Signed-off-by: Kevin Traynor <[email protected]> > ---
Hi, Kevin. Thanks for the new version! I see a very strange 1.5% preformance degradation in a V-to-V scenario with this patch applied. Could you, please, check? Also, this patch needs a NEWS entry. Some other comments inline. > Documentation/topics/dpdk/pmd.rst | 34 +++- > lib/dpif-netdev-private-thread.h | 3 + > lib/dpif-netdev.c | 267 ++++++++++++++++++++++++--- > tests/pmd.at | 292 ++++++++++++++++++++++++++++-- > vswitchd/vswitch.xml | 32 +++- > 5 files changed, 579 insertions(+), 49 deletions(-) > > diff --git a/Documentation/topics/dpdk/pmd.rst > b/Documentation/topics/dpdk/pmd.rst > index f43819be0..dd6ee46bd 100644 > --- a/Documentation/topics/dpdk/pmd.rst > +++ b/Documentation/topics/dpdk/pmd.rst > @@ -354,8 +354,4 @@ time not processing packets will be determined by the > sleep and processor > wake-up times and should be tested with each system configuration. > > -The current configuration of the PMD load based sleeping can be shown with:: > - > - $ ovs-appctl dpif-netdev/pmd-sleep-show > - > Sleep time statistics for 10 secs can be seen with:: > > @@ -380,4 +376,34 @@ system configuration (e.g. enabling processor C-states) > and workloads. > rate. > > +Maximum sleep values can also be set for individual PMD threads using > +key:value pairs in the form of core:max_sleep. Any PMD thread that has been > +assigned a specified value will use that. Any PMD thread that does not have > +a specified value will use the current global value. > + > +Specified values for individual PMD threads can be added or removed at > +any time. > + > +For example, to set PMD threads on cores 8 and 9 to never request a load > based > +sleep and all others PMD threads to be able to request a max sleep of > +50 microseconds (us):: > + > + $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0 > + > +The max sleep value for each PMD threads can be checked in the logs or with:: > + > + $ ovs-appctl dpif-netdev/pmd-sleep-show > + pmd thread numa_id 0 core_id 8: > + max sleep: 0 us > + pmd thread numa_id 1 core_id 9: > + max sleep: 0 us > + pmd thread numa_id 0 core_id 10: > + max sleep: 50 us > + pmd thread numa_id 1 core_id 11: > + max sleep: 50 us > + pmd thread numa_id 0 core_id 12: > + max sleep: 50 us > + pmd thread numa_id 1 core_id 13: > + max sleep: 50 us > + > .. _ovs-vswitchd(8): > http://openvswitch.org/support/dist-docs/ovs-vswitchd.8.html > diff --git a/lib/dpif-netdev-private-thread.h > b/lib/dpif-netdev-private-thread.h > index 1ec3cd794..cb18e5def 100644 > --- a/lib/dpif-netdev-private-thread.h > +++ b/lib/dpif-netdev-private-thread.h > @@ -181,4 +181,7 @@ struct dp_netdev_pmd_thread { > bool isolated; > > + /* Max sleep request in microseconds.*/ A space after a period. > + atomic_uint64_t max_sleep; > + > /* Queue id used by this pmd thread to send packets on all netdevs if > * XPS disabled for this netdev. All static_tx_qid's are unique and less > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 157694bcf..72ee53a02 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -180,4 +180,9 @@ static struct odp_support dp_netdev_support = { > #define PMD_SLEEP_INC_US 1 > > +struct pmd_sleep { > + unsigned core_id; > + uint64_t max_sleep; > +}; > + > struct dpcls { > struct cmap_node node; /* Within dp_netdev_pmd_thread.classifiers */ > @@ -289,5 +294,5 @@ struct dp_netdev { > atomic_bool pmd_perf_metrics; > /* Max load based sleep request. */ We should mention that it is a default one. > - atomic_uint64_t pmd_max_sleep; > + uint64_t pmd_max_sleep; Maybe rename as well, but I'm not sure abouot that. > /* Enable the SMC cache from ovsdb config */ > atomic_bool smc_enable_db; > @@ -327,4 +332,7 @@ struct dp_netdev { > char *pmd_cmask; > > + /* PMD load based max sleep request user string. */ > + char *max_sleep_list; > + > uint64_t last_tnl_conf_seq; > > @@ -1429,4 +1437,17 @@ dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, > int argc, > } > > +static void > +pmd_info_show_sleep(struct ds *reply, unsigned core_id, int numa_id, > + uint64_t pmd_max_sleep) > +{ > + if (core_id == NON_PMD_CORE_ID) { > + return; > + } > + ds_put_format(reply, > + "pmd thread numa_id %d core_id %d:\n" > + " max sleep: %4"PRIu64" us\n", > + numa_id, core_id, pmd_max_sleep); > +} > + > static void > dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[], > @@ -1443,8 +1464,7 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int > argc, const char *argv[], > unsigned long long max_secs = (PMD_INTERVAL_LEN * PMD_INTERVAL_MAX) > / INTERVAL_USEC_TO_SEC; > - uint64_t default_max_sleep = 0; > + uint64_t max_sleep; > bool show_header = true; Reverse x-mass tree. > > - > ovs_mutex_lock(&dp_netdev_mutex); > > @@ -1513,10 +1533,11 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int > argc, const char *argv[], > } else if (type == PMD_INFO_SLEEP_SHOW) { > if (show_header) { > - atomic_read_relaxed(&dp->pmd_max_sleep, &default_max_sleep); > - ds_put_format(&reply, "Default max sleep: %4"PRIu64" us", > - default_max_sleep); > - ds_put_cstr(&reply, "\n"); > + ds_put_format(&reply, "Default max sleep: %4"PRIu64" us\n", > + dp->pmd_max_sleep); > show_header = false; > } > + atomic_read_relaxed(&pmd->max_sleep, &max_sleep); > + pmd_info_show_sleep(&reply, pmd->core_id, pmd->numa_id, > + max_sleep); > } > } > @@ -1907,4 +1928,6 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > } > > + dp->max_sleep_list = NULL; > + > dp->last_tnl_conf_seq = seq_read(tnl_conf_seq); > *dpp = dp; > @@ -2016,4 +2039,5 @@ dp_netdev_free(struct dp_netdev *dp) > dp_netdev_meter_destroy(dp); > > + free(dp->max_sleep_list); > free(dp->pmd_cmask); > free(CONST_CAST(char *, dp->name)); > @@ -4844,4 +4868,206 @@ set_pmd_auto_lb(struct dp_netdev *dp, bool state, > bool always_log) > } > > +static int > +parse_pmd_sleep_list(const char *max_sleep_list, > + struct pmd_sleep **pmd_sleeps) > +{ > + char *list, *copy, *key, *value; > + int num_vals = 0; > + > + if (!max_sleep_list) { > + return num_vals; > + } > + > + list = copy = xstrdup(max_sleep_list); > + > + while (ofputil_parse_key_value(&list, &key, &value)) { > + char *error = NULL; > + unsigned core; > + uint64_t temp, pmd_max_sleep; > + int i; Reverse x-mass tree; > + > + error = str_to_u64(key, &temp); > + if (error) { > + free(error); > + continue; > + } > + > + error = str_to_u64(value, &pmd_max_sleep); > + if (error) { > + /* No value specified. key is dp default. */ This is not fully correct. str_to_u64() may fail for varius reasons. The way to check that value is not specified is to check that it is a zero-length string, i.e. value[0] == '\0'. If it is not, but str_to_u64() fails, then it is a formatting error, i.e. user put something weird there and we should not use that value. Current code will use the 'key' as a default sleep value in this case. That doesn't seem right. Maybe add a test case '50,1:qwe,2:0' and check that defualt value is 50? > + core = UINT_MAX; > + pmd_max_sleep = temp; > + free(error); > + } else { > + /* Value specified. key is pmd core id.*/ > + if (temp >= UINT_MAX) { > + continue; > + } > + core = (unsigned) temp; > + } > + > + /* Detect duplicate max sleep values. */ > + for (i = 0; i < num_vals; i++) { > + if ((*pmd_sleeps)[i].core_id == core) { > + break; > + } > + } > + if (i == num_vals) { > + /* Not duplicate, add a new entry. */ > + *pmd_sleeps = xrealloc(*pmd_sleeps, > + (num_vals + 1) * sizeof **pmd_sleeps); > + num_vals++; > + } > + > + pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep); > + > + (*pmd_sleeps)[i].core_id = core; > + (*pmd_sleeps)[i].max_sleep = pmd_max_sleep; > + } > + > + free(copy); > + return num_vals; > +} > + > +static void > +log_pmd_sleep(unsigned core_id, int numa_id, uint64_t pmd_max_sleep) > +{ > + if (core_id == NON_PMD_CORE_ID) { > + return; > + } > + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d, " > + "max sleep: %4"PRIu64" us.", numa_id, core_id, pmd_max_sleep); > +} > + > +static void > +pmd_init_max_sleep(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd) > +{ > + uint64_t max_sleep = dp->pmd_max_sleep; > + struct pmd_sleep *pmd_sleeps = NULL; > + int num_vals; > + > + num_vals = parse_pmd_sleep_list(dp->max_sleep_list, &pmd_sleeps); > + > + /* Check if the user has set a specific value for this pmd. */ > + for (int i = 0; i < num_vals; i++) { > + if (pmd_sleeps[i].core_id == pmd->core_id) { > + max_sleep = pmd_sleeps[i].max_sleep; > + break; > + } > + } > + atomic_init(&pmd->max_sleep, max_sleep); > + log_pmd_sleep(pmd->core_id, pmd->numa_id, max_sleep); > + free(pmd_sleeps); > +} > + > +static bool > +assign_sleep_values_to_pmds(struct dp_netdev *dp, int num_vals, > + struct pmd_sleep *pmd_sleeps) > +{ > + struct dp_netdev_pmd_thread *pmd; > + bool value_changed = false; > + > + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > + uint64_t new_max_sleep, cur_pmd_max_sleep; > + > + if (pmd->core_id == NON_PMD_CORE_ID) { > + continue; > + } > + > + /* Default to global value. */ > + new_max_sleep = dp->pmd_max_sleep; > + > + /* Check for pmd specific value. */ > + for (int i = 0; i < num_vals; i++) { > + if (pmd->core_id == pmd_sleeps[i].core_id) { > + new_max_sleep = pmd_sleeps[i].max_sleep; > + break; > + } > + } > + atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep); > + if (new_max_sleep != cur_pmd_max_sleep) { > + atomic_store_relaxed(&pmd->max_sleep, new_max_sleep); > + value_changed = true; > + } > + } > + return value_changed; > +} > + > +static void > +log_all_pmd_sleeps(struct dp_netdev *dp) > +{ > + struct dp_netdev_pmd_thread **pmd_list = NULL; > + struct dp_netdev_pmd_thread *pmd; > + size_t n; > + > + VLOG_INFO("Default PMD thread max sleep: %4"PRIu64" us.", > + dp->pmd_max_sleep); > + > + sorted_poll_thread_list(dp, &pmd_list, &n); > + > + for (size_t i = 0; i < n; i++) { > + uint64_t cur_pmd_max_sleep; > + > + pmd = pmd_list[i]; > + atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep); > + log_pmd_sleep(pmd->core_id, pmd->numa_id, cur_pmd_max_sleep); > + } > + free(pmd_list); > +} > + > +static bool > +set_all_pmd_max_sleeps(struct dp_netdev *dp, const struct smap *config) > +{ > + const char *max_sleep_list = smap_get(config, "pmd-sleep-max"); > + struct pmd_sleep *pmd_sleeps = NULL; > + uint64_t default_max_sleep = 0; > + bool default_changed = false; > + bool pmd_changed = false; > + uint64_t pmd_maxsleep; > + int num_vals = 0; > + > + /* Check for deprecated 'pmd-maxsleep' value. */ > + pmd_maxsleep = smap_get_ullong(config, "pmd-maxsleep", UINT64_MAX); > + if (pmd_maxsleep != UINT64_MAX && !max_sleep_list) { > + VLOG_WARN_ONCE("pmd-maxsleep is deprecated. " > + "Please use pmd-sleep-max instead."); > + default_max_sleep = pmd_maxsleep; > + } The knob was deprecated in 3.2. Can we just remove it now? (NEWS entry will be needed.) > + > + /* Check if there is no change in string or value. */ > + if (!!dp->max_sleep_list == !!max_sleep_list) { > + if (max_sleep_list > + ? nullable_string_is_equal(max_sleep_list, dp->max_sleep_list) > + : default_max_sleep == dp->pmd_max_sleep) { > + return false; > + } > + } If we remove the deprecated knob we may just compare strings here. > + > + /* Free existing string and copy new one (if any). */ > + free(dp->max_sleep_list); > + dp->max_sleep_list = nullable_xstrdup(max_sleep_list); > + > + if (max_sleep_list) { > + num_vals = parse_pmd_sleep_list(max_sleep_list, &pmd_sleeps); > + > + /* Check if the user has set a global value. */ > + for (int i = 0; i < num_vals; i++) { > + if (pmd_sleeps[i].core_id == UINT_MAX) { > + default_max_sleep = pmd_sleeps[i].max_sleep; > + break; > + } > + } > + } > + > + if (dp->pmd_max_sleep != default_max_sleep) { > + dp->pmd_max_sleep = default_max_sleep; > + default_changed = true; > + } > + pmd_changed = assign_sleep_values_to_pmds(dp, num_vals, pmd_sleeps); > + > + free(pmd_sleeps); > + return default_changed || pmd_changed; > +} > + > /* Applies datapath configuration from the database. Some of the changes are > * actually applied in dpif_netdev_run(). */ > @@ -4861,5 +5087,4 @@ dpif_netdev_set_config(struct dpif *dpif, const struct > smap *other_config) > uint8_t cur_rebalance_load; > uint32_t rebalance_load, rebalance_improve; > - uint64_t pmd_max_sleep, cur_pmd_max_sleep; > bool log_autolb = false; > enum sched_assignment_type pmd_rxq_assign_type; > @@ -5012,24 +5237,10 @@ dpif_netdev_set_config(struct dpif *dpif, const > struct smap *other_config) > set_pmd_auto_lb(dp, autolb_state, log_autolb); > > - pmd_max_sleep = smap_get_ullong(other_config, "pmd-maxsleep", > UINT64_MAX); > - if (pmd_max_sleep != UINT64_MAX) { > - VLOG_WARN("pmd-maxsleep is deprecated. " > - "Please use pmd-sleep-max instead."); > - } else { > - pmd_max_sleep = 0; > + bool sleep_changed = set_all_pmd_max_sleeps(dp, other_config); > + if (first_set_config || sleep_changed) { > + log_all_pmd_sleeps(dp); > } > > - pmd_max_sleep = smap_get_ullong(other_config, "pmd-sleep-max", > - pmd_max_sleep); > - pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep); > - atomic_read_relaxed(&dp->pmd_max_sleep, &cur_pmd_max_sleep); > - if (first_set_config || pmd_max_sleep != cur_pmd_max_sleep) { > - atomic_store_relaxed(&dp->pmd_max_sleep, pmd_max_sleep); > - VLOG_INFO("PMD max sleep request is %"PRIu64" usecs.", > pmd_max_sleep); > - VLOG_INFO("PMD load based sleeps are %s.", > - pmd_max_sleep ? "enabled" : "disabled" ); > - } > - > - first_set_config = false; > + first_set_config = false; > return 0; > } > @@ -7060,5 +7271,5 @@ reload: > > atomic_read_relaxed(&pmd->dp->smc_enable_db, > &pmd->ctx.smc_enable_db); > - atomic_read_relaxed(&pmd->dp->pmd_max_sleep, &max_sleep); > + atomic_read_relaxed(&pmd->max_sleep, &max_sleep); > > for (i = 0; i < poll_cnt; i++) { > @@ -7647,4 +7858,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread > *pmd, struct dp_netdev *dp, > cmap_init(&pmd->tx_bonds); > > + pmd_init_max_sleep(dp, pmd); > + > /* Initialize DPIF function pointer to the default configured version. */ > atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default()); > diff --git a/tests/pmd.at b/tests/pmd.at > index 7bdaca9e7..205293982 100644 > --- a/tests/pmd.at > +++ b/tests/pmd.at > @@ -61,12 +61,11 @@ m4_define([CHECK_PMD_THREADS_CREATED], [ > ]) > > -dnl CHECK_DP_SLEEP_MAX([max_sleep], [enabled], [+line]) > +dnl CHECK_DP_SLEEP_MAX([max_sleep], [+line]) > dnl > -dnl Checks correct pmd load based sleep is set for the datapath. > +dnl Checks correct pmd load based sleep value for the datapath. Double space. <snip> Best regards, Ilya Maximets. _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
