On 9/29/23 14:50, Kevin Traynor wrote:
> Extend 'pmd-sleep-max' so that individual PMD thread cores
> may have a specified max sleep request value.
> 
> Existing behaviour is maintained.
> 
> Any PMD thread core without a value will use the global value
> if set or default no sleep.
> 
> To set PMD thread cores 8 and 9 to never request a load based sleep
> and all other PMD thread cores to be able to request a max sleep of
> 50 usecs:
> 
> $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0
> 
> To set PMD thread cores 10 and 11 to request a max sleep of 100 usecs
> and all other PMD thread cores to never request a sleep:
> 
> $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=10:100,11:100
> 
> 'pmd-sleep-show' is updated to show the max sleep value for each PMD thread.
> 
> Signed-off-by: Kevin Traynor <[email protected]>
> ---

Hi, Kevin.  Thanks for the new version!

I see a very strange 1.5% preformance degradation in a V-to-V
scenario with this patch applied.  Could you, please, check?

Also, this patch needs a NEWS entry.

Some other comments inline.

>  Documentation/topics/dpdk/pmd.rst |  34 +++-
>  lib/dpif-netdev-private-thread.h  |   3 +
>  lib/dpif-netdev.c                 | 267 ++++++++++++++++++++++++---
>  tests/pmd.at                      | 292 ++++++++++++++++++++++++++++--
>  vswitchd/vswitch.xml              |  32 +++-
>  5 files changed, 579 insertions(+), 49 deletions(-)
> 
> diff --git a/Documentation/topics/dpdk/pmd.rst 
> b/Documentation/topics/dpdk/pmd.rst
> index f43819be0..dd6ee46bd 100644
> --- a/Documentation/topics/dpdk/pmd.rst
> +++ b/Documentation/topics/dpdk/pmd.rst
> @@ -354,8 +354,4 @@ time not processing packets will be determined by the 
> sleep and processor
>  wake-up times and should be tested with each system configuration.
>  
> -The current configuration of the PMD load based sleeping can be shown with::
> -
> -    $ ovs-appctl dpif-netdev/pmd-sleep-show
> -
>  Sleep time statistics for 10 secs can be seen with::
>  
> @@ -380,4 +376,34 @@ system configuration (e.g. enabling processor C-states) 
> and workloads.
>      rate.
>  
> +Maximum sleep values can also be set for individual PMD threads using
> +key:value pairs in the form of core:max_sleep. Any PMD thread that has been
> +assigned a specified value will use that. Any PMD thread that does not have
> +a specified value will use the current global value.
> +
> +Specified values for individual PMD threads can be added or removed at
> +any time.
> +
> +For example, to set PMD threads on cores 8 and 9 to never request a load 
> based
> +sleep and all others PMD threads to be able to request a max sleep of
> +50 microseconds (us)::
> +
> +    $ ovs-vsctl set open_vswitch . other_config:pmd-sleep-max=50,8:0,9:0
> +
> +The max sleep value for each PMD threads can be checked in the logs or with::
> +
> +    $ ovs-appctl dpif-netdev/pmd-sleep-show
> +    pmd thread numa_id 0 core_id 8:
> +      max sleep:    0 us
> +    pmd thread numa_id 1 core_id 9:
> +      max sleep:    0 us
> +    pmd thread numa_id 0 core_id 10:
> +      max sleep:   50 us
> +    pmd thread numa_id 1 core_id 11:
> +      max sleep:   50 us
> +    pmd thread numa_id 0 core_id 12:
> +      max sleep:   50 us
> +    pmd thread numa_id 1 core_id 13:
> +      max sleep:   50 us
> +
>  .. _ovs-vswitchd(8):
>      http://openvswitch.org/support/dist-docs/ovs-vswitchd.8.html
> diff --git a/lib/dpif-netdev-private-thread.h 
> b/lib/dpif-netdev-private-thread.h
> index 1ec3cd794..cb18e5def 100644
> --- a/lib/dpif-netdev-private-thread.h
> +++ b/lib/dpif-netdev-private-thread.h
> @@ -181,4 +181,7 @@ struct dp_netdev_pmd_thread {
>      bool isolated;
>  
> +    /* Max sleep request in microseconds.*/

A space after a period.

> +    atomic_uint64_t max_sleep;
> +
>      /* Queue id used by this pmd thread to send packets on all netdevs if
>       * XPS disabled for this netdev. All static_tx_qid's are unique and less
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 157694bcf..72ee53a02 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -180,4 +180,9 @@ static struct odp_support dp_netdev_support = {
>  #define PMD_SLEEP_INC_US 1
>  
> +struct pmd_sleep {
> +    unsigned core_id;
> +    uint64_t max_sleep;
> +};
> +
>  struct dpcls {
>      struct cmap_node node;      /* Within dp_netdev_pmd_thread.classifiers */
> @@ -289,5 +294,5 @@ struct dp_netdev {
>      atomic_bool pmd_perf_metrics;
>      /* Max load based sleep request. */

We should mention that it is a default one.

> -    atomic_uint64_t pmd_max_sleep;
> +    uint64_t pmd_max_sleep;

Maybe rename as well, but I'm not sure abouot that.

>      /* Enable the SMC cache from ovsdb config */
>      atomic_bool smc_enable_db;
> @@ -327,4 +332,7 @@ struct dp_netdev {
>      char *pmd_cmask;
>  
> +    /* PMD load based max sleep request user string. */
> +    char *max_sleep_list;
> +
>      uint64_t last_tnl_conf_seq;
>  
> @@ -1429,4 +1437,17 @@ dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, 
> int argc,
>  }
>  
> +static void
> +pmd_info_show_sleep(struct ds *reply, unsigned core_id, int numa_id,
> +                    uint64_t pmd_max_sleep)
> +{
> +    if (core_id == NON_PMD_CORE_ID) {
> +        return;
> +    }
> +    ds_put_format(reply,
> +                  "pmd thread numa_id %d core_id %d:\n"
> +                  "  max sleep: %4"PRIu64" us\n",
> +                  numa_id, core_id, pmd_max_sleep);
> +}
> +
>  static void
>  dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
> @@ -1443,8 +1464,7 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int 
> argc, const char *argv[],
>      unsigned long long max_secs = (PMD_INTERVAL_LEN * PMD_INTERVAL_MAX)
>                                        / INTERVAL_USEC_TO_SEC;
> -    uint64_t default_max_sleep = 0;
> +    uint64_t max_sleep;
>      bool show_header = true;

Reverse x-mass tree.

>  
> -
>      ovs_mutex_lock(&dp_netdev_mutex);
>  
> @@ -1513,10 +1533,11 @@ dpif_netdev_pmd_info(struct unixctl_conn *conn, int 
> argc, const char *argv[],
>          } else if (type == PMD_INFO_SLEEP_SHOW) {
>              if (show_header) {
> -                atomic_read_relaxed(&dp->pmd_max_sleep, &default_max_sleep);
> -                ds_put_format(&reply, "Default max sleep: %4"PRIu64" us",
> -                              default_max_sleep);
> -                ds_put_cstr(&reply, "\n");
> +                ds_put_format(&reply, "Default max sleep: %4"PRIu64" us\n",
> +                              dp->pmd_max_sleep);
>                  show_header = false;
>              }
> +            atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
> +            pmd_info_show_sleep(&reply, pmd->core_id, pmd->numa_id,
> +                                max_sleep);
>          }
>      }
> @@ -1907,4 +1928,6 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>      }
>  
> +    dp->max_sleep_list = NULL;
> +
>      dp->last_tnl_conf_seq = seq_read(tnl_conf_seq);
>      *dpp = dp;
> @@ -2016,4 +2039,5 @@ dp_netdev_free(struct dp_netdev *dp)
>      dp_netdev_meter_destroy(dp);
>  
> +    free(dp->max_sleep_list);
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -4844,4 +4868,206 @@ set_pmd_auto_lb(struct dp_netdev *dp, bool state, 
> bool always_log)
>  }
>  
> +static int
> +parse_pmd_sleep_list(const char *max_sleep_list,
> +                     struct pmd_sleep **pmd_sleeps)
> +{
> +    char *list, *copy, *key, *value;
> +    int num_vals = 0;
> +
> +    if (!max_sleep_list) {
> +        return num_vals;
> +    }
> +
> +    list = copy = xstrdup(max_sleep_list);
> +
> +    while (ofputil_parse_key_value(&list, &key, &value)) {
> +        char *error = NULL;
> +        unsigned core;
> +        uint64_t temp, pmd_max_sleep;
> +        int i;

Reverse x-mass tree;

> +
> +        error = str_to_u64(key, &temp);
> +        if (error) {
> +            free(error);
> +            continue;
> +        }
> +
> +        error = str_to_u64(value, &pmd_max_sleep);
> +        if (error) {
> +            /* No value specified. key is dp default. */

This is not fully correct.  str_to_u64() may fail for varius
reasons.  The way to check that value is not specified is to
check that it is a zero-length string, i.e. value[0] == '\0'.
If it is not, but str_to_u64() fails, then it is a formatting
error, i.e. user put something weird there and we should not
use that value.  Current code will use the 'key' as a default
sleep value in this case.  That doesn't seem right.
Maybe add a test case '50,1:qwe,2:0' and check that defualt
value is 50?

> +            core = UINT_MAX;
> +            pmd_max_sleep = temp;
> +            free(error);
> +        } else {
> +            /* Value specified. key is pmd core id.*/
> +            if (temp >= UINT_MAX) {
> +                continue;
> +            }
> +            core = (unsigned) temp;
> +        }
> +
> +        /* Detect duplicate max sleep values. */
> +        for (i = 0; i < num_vals; i++) {
> +            if ((*pmd_sleeps)[i].core_id == core) {
> +                break;
> +            }
> +        }
> +        if (i == num_vals) {
> +            /* Not duplicate, add a new entry. */
> +            *pmd_sleeps = xrealloc(*pmd_sleeps,
> +                                   (num_vals + 1) * sizeof **pmd_sleeps);
> +            num_vals++;
> +        }
> +
> +        pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
> +
> +        (*pmd_sleeps)[i].core_id = core;
> +        (*pmd_sleeps)[i].max_sleep = pmd_max_sleep;
> +    }
> +
> +    free(copy);
> +    return num_vals;
> +}
> +
> +static void
> +log_pmd_sleep(unsigned core_id, int numa_id, uint64_t pmd_max_sleep)
> +{
> +    if (core_id == NON_PMD_CORE_ID) {
> +        return;
> +    }
> +    VLOG_INFO("PMD thread on numa_id: %d, core id: %2d, "
> +              "max sleep: %4"PRIu64" us.", numa_id, core_id, pmd_max_sleep);
> +}
> +
> +static void
> +pmd_init_max_sleep(struct dp_netdev *dp, struct dp_netdev_pmd_thread *pmd)
> +{
> +    uint64_t max_sleep = dp->pmd_max_sleep;
> +    struct pmd_sleep *pmd_sleeps = NULL;
> +    int num_vals;
> +
> +    num_vals = parse_pmd_sleep_list(dp->max_sleep_list, &pmd_sleeps);
> +
> +    /* Check if the user has set a specific value for this pmd. */
> +    for (int i = 0; i < num_vals; i++) {
> +        if (pmd_sleeps[i].core_id == pmd->core_id) {
> +            max_sleep = pmd_sleeps[i].max_sleep;
> +            break;
> +        }
> +    }
> +    atomic_init(&pmd->max_sleep, max_sleep);
> +    log_pmd_sleep(pmd->core_id, pmd->numa_id, max_sleep);
> +    free(pmd_sleeps);
> +}
> +
> +static bool
> +assign_sleep_values_to_pmds(struct dp_netdev *dp, int num_vals,
> +                            struct pmd_sleep *pmd_sleeps)
> +{
> +    struct dp_netdev_pmd_thread *pmd;
> +    bool value_changed = false;
> +
> +    CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> +        uint64_t new_max_sleep, cur_pmd_max_sleep;
> +
> +        if (pmd->core_id == NON_PMD_CORE_ID) {
> +            continue;
> +        }
> +
> +        /* Default to global value. */
> +        new_max_sleep = dp->pmd_max_sleep;
> +
> +        /* Check for pmd specific value. */
> +        for (int i = 0;  i < num_vals; i++) {
> +            if (pmd->core_id == pmd_sleeps[i].core_id) {
> +                new_max_sleep = pmd_sleeps[i].max_sleep;
> +                break;
> +            }
> +        }
> +        atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
> +        if (new_max_sleep != cur_pmd_max_sleep) {
> +            atomic_store_relaxed(&pmd->max_sleep, new_max_sleep);
> +            value_changed = true;
> +        }
> +    }
> +    return value_changed;
> +}
> +
> +static void
> +log_all_pmd_sleeps(struct dp_netdev *dp)
> +{
> +    struct dp_netdev_pmd_thread **pmd_list = NULL;
> +    struct dp_netdev_pmd_thread *pmd;
> +    size_t n;
> +
> +    VLOG_INFO("Default PMD thread max sleep: %4"PRIu64" us.",
> +              dp->pmd_max_sleep);
> +
> +    sorted_poll_thread_list(dp, &pmd_list, &n);
> +
> +    for (size_t i = 0; i < n; i++) {
> +        uint64_t cur_pmd_max_sleep;
> +
> +        pmd = pmd_list[i];
> +        atomic_read_relaxed(&pmd->max_sleep, &cur_pmd_max_sleep);
> +        log_pmd_sleep(pmd->core_id, pmd->numa_id, cur_pmd_max_sleep);
> +    }
> +    free(pmd_list);
> +}
> +
> +static bool
> +set_all_pmd_max_sleeps(struct dp_netdev *dp, const struct smap *config)
> +{
> +    const char *max_sleep_list = smap_get(config, "pmd-sleep-max");
> +    struct pmd_sleep *pmd_sleeps = NULL;
> +    uint64_t default_max_sleep = 0;
> +    bool default_changed = false;
> +    bool pmd_changed = false;
> +    uint64_t pmd_maxsleep;
> +    int num_vals = 0;
> +
> +    /* Check for deprecated 'pmd-maxsleep' value. */
> +    pmd_maxsleep = smap_get_ullong(config, "pmd-maxsleep", UINT64_MAX);
> +    if (pmd_maxsleep != UINT64_MAX && !max_sleep_list) {
> +        VLOG_WARN_ONCE("pmd-maxsleep is deprecated. "
> +                       "Please use pmd-sleep-max instead.");
> +        default_max_sleep = pmd_maxsleep;
> +    }

The knob was deprecated in 3.2.  Can we just remove it now?
(NEWS entry will be needed.)

> +
> +    /* Check if there is no change in string or value. */
> +    if (!!dp->max_sleep_list == !!max_sleep_list) {
> +        if (max_sleep_list
> +            ? nullable_string_is_equal(max_sleep_list, dp->max_sleep_list)
> +            : default_max_sleep == dp->pmd_max_sleep) {
> +            return false;
> +        }
> +    }

If we remove the deprecated knob we may just compare strings here.

> +
> +    /* Free existing string and copy new one (if any). */
> +    free(dp->max_sleep_list);
> +    dp->max_sleep_list = nullable_xstrdup(max_sleep_list);
> +
> +    if (max_sleep_list) {
> +        num_vals = parse_pmd_sleep_list(max_sleep_list, &pmd_sleeps);
> +
> +        /* Check if the user has set a global value. */
> +        for (int i = 0; i < num_vals; i++) {
> +            if (pmd_sleeps[i].core_id == UINT_MAX) {
> +                default_max_sleep = pmd_sleeps[i].max_sleep;
> +                break;
> +            }
> +        }
> +    }
> +
> +    if (dp->pmd_max_sleep != default_max_sleep) {
> +        dp->pmd_max_sleep = default_max_sleep;
> +        default_changed = true;
> +    }
> +    pmd_changed = assign_sleep_values_to_pmds(dp, num_vals, pmd_sleeps);
> +
> +    free(pmd_sleeps);
> +    return default_changed || pmd_changed;
> +}
> +
>  /* Applies datapath configuration from the database. Some of the changes are
>   * actually applied in dpif_netdev_run(). */
> @@ -4861,5 +5087,4 @@ dpif_netdev_set_config(struct dpif *dpif, const struct 
> smap *other_config)
>      uint8_t cur_rebalance_load;
>      uint32_t rebalance_load, rebalance_improve;
> -    uint64_t  pmd_max_sleep, cur_pmd_max_sleep;
>      bool log_autolb = false;
>      enum sched_assignment_type pmd_rxq_assign_type;
> @@ -5012,24 +5237,10 @@ dpif_netdev_set_config(struct dpif *dpif, const 
> struct smap *other_config)
>      set_pmd_auto_lb(dp, autolb_state, log_autolb);
>  
> -    pmd_max_sleep = smap_get_ullong(other_config, "pmd-maxsleep", 
> UINT64_MAX);
> -    if (pmd_max_sleep != UINT64_MAX) {
> -        VLOG_WARN("pmd-maxsleep is deprecated. "
> -                  "Please use pmd-sleep-max instead.");
> -    } else {
> -        pmd_max_sleep = 0;
> +    bool sleep_changed = set_all_pmd_max_sleeps(dp, other_config);
> +    if (first_set_config || sleep_changed) {
> +        log_all_pmd_sleeps(dp);
>      }
>  
> -    pmd_max_sleep = smap_get_ullong(other_config, "pmd-sleep-max",
> -                                    pmd_max_sleep);
> -    pmd_max_sleep = MIN(PMD_RCU_QUIESCE_INTERVAL, pmd_max_sleep);
> -    atomic_read_relaxed(&dp->pmd_max_sleep, &cur_pmd_max_sleep);
> -    if (first_set_config || pmd_max_sleep != cur_pmd_max_sleep) {
> -        atomic_store_relaxed(&dp->pmd_max_sleep, pmd_max_sleep);
> -        VLOG_INFO("PMD max sleep request is %"PRIu64" usecs.", 
> pmd_max_sleep);
> -        VLOG_INFO("PMD load based sleeps are %s.",
> -                  pmd_max_sleep ? "enabled" : "disabled" );
> -    }
> -
> -    first_set_config  = false;
> +    first_set_config = false;
>      return 0;
>  }
> @@ -7060,5 +7271,5 @@ reload:
>  
>          atomic_read_relaxed(&pmd->dp->smc_enable_db, 
> &pmd->ctx.smc_enable_db);
> -        atomic_read_relaxed(&pmd->dp->pmd_max_sleep, &max_sleep);
> +        atomic_read_relaxed(&pmd->max_sleep, &max_sleep);
>  
>          for (i = 0; i < poll_cnt; i++) {
> @@ -7647,4 +7858,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread 
> *pmd, struct dp_netdev *dp,
>      cmap_init(&pmd->tx_bonds);
>  
> +    pmd_init_max_sleep(dp, pmd);
> +
>      /* Initialize DPIF function pointer to the default configured version. */
>      atomic_init(&pmd->netdev_input_func, dp_netdev_impl_get_default());
> diff --git a/tests/pmd.at b/tests/pmd.at
> index 7bdaca9e7..205293982 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -61,12 +61,11 @@ m4_define([CHECK_PMD_THREADS_CREATED], [
>  ])
>  
> -dnl CHECK_DP_SLEEP_MAX([max_sleep], [enabled], [+line])
> +dnl CHECK_DP_SLEEP_MAX([max_sleep], [+line])
>  dnl
> -dnl Checks correct pmd load based sleep is set for the datapath.
> +dnl Checks correct pmd load based sleep value  for the datapath.

Double space.

<snip>

Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to