2017-02-21 6:49 GMT-08:00 Ilya Maximets <i.maxim...@samsung.com>:
> Currently, change of 'pmd-cpu-mask' is very heavy operation.
> It requires destroying of all the PMD threads and creating
> them back. After that, all the threads will sleep until
> ports' redistribution finished.
>
> This patch adds ability to not stop the datapath while
> adjusting number/placement of PMD threads. All not affected
> threads will forward traffic without any additional latencies.
>
> id-pool created for static tx queue ids to keep them sequential
> in a flexible way. non-PMD thread will always have
> static_tx_qid = 0 as it was before.
>
> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>

Thanks for the patch

The idea looks good to me.

I'm still looking at the code, and I have one comment below

> ---
>  lib/dpif-netdev.c | 119 
> +++++++++++++++++++++++++++++++++++++++++-------------
>  tests/pmd.at      |   2 +-
>  2 files changed, 91 insertions(+), 30 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 30907b7..6e575ab 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -48,6 +48,7 @@
>  #include "fat-rwlock.h"
>  #include "flow.h"
>  #include "hmapx.h"
> +#include "id-pool.h"
>  #include "latch.h"
>  #include "netdev.h"
>  #include "netdev-vport.h"
> @@ -241,6 +242,9 @@ struct dp_netdev {
>
>      /* Stores all 'struct dp_netdev_pmd_thread's. */
>      struct cmap poll_threads;
> +    /* id pool for per thread static_tx_qid. */
> +    struct id_pool *tx_qid_pool;
> +    struct ovs_mutex tx_qid_pool_mutex;
>
>      /* Protects the access of the 'struct dp_netdev_pmd_thread'
>       * instance for non-pmd thread. */
> @@ -514,7 +518,7 @@ struct dp_netdev_pmd_thread {
>      /* Queue id used by this pmd thread to send packets on all netdevs if
>       * XPS disabled for this netdev. All static_tx_qid's are unique and less
>       * than 'cmap_count(dp->poll_threads)'. */
> -    const int static_tx_qid;
> +    uint32_t static_tx_qid;
>
>      struct ovs_mutex port_mutex;    /* Mutex for 'poll_list' and 'tx_ports'. 
> */
>      /* List of rx queues to poll. */
> @@ -594,6 +598,8 @@ static struct dp_netdev_pmd_thread 
> *dp_netdev_get_pmd(struct dp_netdev *dp,
>                                                        unsigned core_id);
>  static struct dp_netdev_pmd_thread *
>  dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
> +static void dp_netdev_del_pmd(struct dp_netdev *dp,
> +                              struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd);
>  static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
> @@ -1077,10 +1083,17 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>      atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
>
>      cmap_init(&dp->poll_threads);
> +
> +    ovs_mutex_init(&dp->tx_qid_pool_mutex);
> +    /* We need 1 Tx queue for each possible cpu core + 1 for non-PMD 
> threads. */
> +    dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1);
> +
>      ovs_mutex_init_recursive(&dp->non_pmd_mutex);
>      ovsthread_key_create(&dp->per_pmd_key, NULL);
>
>      ovs_mutex_lock(&dp->port_mutex);
> +    /* non-PMD will be created before all other threads and will
> +     * allocate static_tx_qid = 0. */
>      dp_netdev_set_nonpmd(dp);
>
>      error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
> @@ -1164,6 +1177,9 @@ dp_netdev_free(struct dp_netdev *dp)
>      dp_netdev_destroy_all_pmds(dp, true);
>      cmap_destroy(&dp->poll_threads);
>
> +    ovs_mutex_destroy(&dp->tx_qid_pool_mutex);
> +    id_pool_destroy(dp->tx_qid_pool);
> +
>      ovs_mutex_destroy(&dp->non_pmd_mutex);
>      ovsthread_key_delete(dp->per_pmd_key);
>
> @@ -3175,7 +3191,10 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
>  {
>      struct dp_netdev_pmd_thread *pmd;
>      struct ovs_numa_dump *pmd_cores;
> -    bool changed = false;
> +    struct ovs_numa_info_core *core;
> +    struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete);
> +    struct hmapx_node *node;
> +    int created = 0, deleted = 0;
>
>      /* The pmd threads should be started only if there's a pmd port in the
>       * datapath.  If the user didn't provide any "pmd-cpu-mask", we start
> @@ -3188,45 +3207,62 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
>          pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
>      }
>
> -    /* Check for changed configuration */
> -    if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1) 
> {
> -        changed = true;
> -    } else {
> -        CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> -            if (pmd->core_id != NON_PMD_CORE_ID
> -                && !ovs_numa_dump_contains_core(pmd_cores,
> -                                                pmd->numa_id,
> -                                                pmd->core_id)) {
> -                changed = true;
> -                break;
> -            }
> +    /* Check for unwanted pmd threads */
> +    CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
> +        if (pmd->core_id != NON_PMD_CORE_ID
> +            && !ovs_numa_dump_contains_core(pmd_cores,
> +                                            pmd->numa_id,
> +                                            pmd->core_id)) {
> +            hmapx_add(&to_delete, pmd);
>          }
>      }
> +    HMAPX_FOR_EACH (node, &to_delete) {
> +        pmd = (struct dp_netdev_pmd_thread *) node->data;
> +        VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.",
> +                  pmd->numa_id, pmd->core_id);
> +        dp_netdev_del_pmd(dp, pmd);
> +    }
> +    deleted = hmapx_count(&to_delete);
> +    hmapx_destroy(&to_delete);
>
> -    /* Destroy the old and recreate the new pmd threads.  We don't perform an
> -     * incremental update because we would have to adjust 'static_tx_qid'. */
> -    if (changed) {
> -        struct ovs_numa_info_core *core;
> -        struct ovs_numa_info_numa *numa;
> -
> -        /* Do not destroy the non pmd thread. */
> -        dp_netdev_destroy_all_pmds(dp, false);
> -        FOR_EACH_CORE_ON_DUMP (core, pmd_cores) {
> -            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
> -
> +    /* Check for required new pmd threads */
> +    FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
> +        pmd = dp_netdev_get_pmd(dp, core->core_id);
> +        if (!pmd) {
> +            pmd = xzalloc(sizeof *pmd);
>              dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
> -
>              pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
> +            VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
> +                      pmd->numa_id, pmd->core_id);
> +            created++;
> +        } else {
> +            dp_netdev_pmd_unref(pmd);
>          }
> +    }
> +
> +    if (deleted || created) {
> +        struct ovs_numa_info_numa *numa;
>
>          /* Log the number of pmd threads per numa node. */
>          FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) {
> -            VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d",
> +            VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d",
>                        numa->n_cores, numa->numa_id);
>          }
>      }
>
>      ovs_numa_dump_destroy(pmd_cores);
> +
> +    if (deleted > created) {
> +        /* Static tx qids are not sequential now. Mark all threads for
> +         * reload to fix this. They will be reloaded after setting the new
> +         * txq numbers for ports to avoid using of not allocated queues by
> +         * old PMD threads. Newly created threads has no tx queues yet. */

I'm not sure this is easy to maintain.

What if we have something like:

1. delete old pmd threads
2. reconfigure txqs
3. add new pmd threads

do you this it will be clearer?

> +        CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
> +            if (pmd->core_id != NON_PMD_CORE_ID) {
> +                pmd->need_reload = true;
> +            }
> +        }
> +    }
>  }
>
>  static void
> @@ -3541,6 +3577,29 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>      }
>  }
>
> +static void
> +pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
> +{
> +    ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
> +    if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) {
> +        VLOG_ERR("static_tx_qid allocation failed for PMD on core %2d"
> +                 ", numa_id %d.", pmd->core_id, pmd->numa_id);
> +        OVS_NOT_REACHED();
> +    }
> +    ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
> +
> +    VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d"
> +             ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, 
> pmd->numa_id);
> +}
> +
> +static void
> +pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
> +{
> +    ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
> +    id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid);
> +    ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
> +}
> +
>  static int
>  pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
>                            struct polled_queue **ppoll_list)
> @@ -3586,6 +3645,7 @@ pmd_thread_main(void *f_)
>      dpdk_set_lcore_id(pmd->core_id);
>      poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
>  reload:
> +    pmd_alloc_static_tx_qid(pmd);
>      emc_cache_init(&pmd->flow_cache);
>
>      /* List port/core affinity */
> @@ -3634,6 +3694,7 @@ reload:
>      dp_netdev_pmd_reload_done(pmd);
>
>      emc_cache_uninit(&pmd->flow_cache);
> +    pmd_free_static_tx_qid(pmd);
>
>      if (!exiting) {
>          goto reload;
> @@ -3760,8 +3821,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread 
> *pmd, struct dp_netdev *dp,
>      pmd->numa_id = numa_id;
>      pmd->need_reload = false;
>
> -    *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads);
> -
>      ovs_refcount_init(&pmd->ref_cnt);
>      latch_init(&pmd->exit_latch);
>      pmd->reload_seq = seq_create();
> @@ -3782,6 +3841,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread 
> *pmd, struct dp_netdev *dp,
>       * actual thread created for NON_PMD_CORE_ID. */
>      if (core_id == NON_PMD_CORE_ID) {
>          emc_cache_init(&pmd->flow_cache);
> +        pmd_alloc_static_tx_qid(pmd);
>      }
>      cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, 
> &pmd->node),
>                  hash_int(core_id, 0));
> @@ -3824,6 +3884,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct 
> dp_netdev_pmd_thread *pmd)
>          ovs_mutex_lock(&dp->non_pmd_mutex);
>          emc_cache_uninit(&pmd->flow_cache);
>          pmd_free_cached_ports(pmd);
> +        pmd_free_static_tx_qid(pmd);
>          ovs_mutex_unlock(&dp->non_pmd_mutex);
>      } else {
>          latch_set(&pmd->exit_latch);
> diff --git a/tests/pmd.at b/tests/pmd.at
> index 5686bed..43bdb1b 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -38,7 +38,7 @@ dnl
>  dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not
>  dnl passed. Checking starts from line number 'line' in ovs-vswithd.log .
>  m4_define([CHECK_PMD_THREADS_CREATED], [
> -    PATTERN="Created [[0-9]]* pmd threads on numa node $2"
> +    PATTERN="There are [[0-9]]* pmd threads on numa node $2"
>      line_st=$3
>      if [[ -z "$line_st" ]]
>      then
> --
> 2.7.4
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to