2017-02-21 6:49 GMT-08:00 Ilya Maximets <[email protected]>:
> Currently, change of 'pmd-cpu-mask' is very heavy operation.
> It requires destroying of all the PMD threads and creating
> them back. After that, all the threads will sleep until
> ports' redistribution finished.
>
> This patch adds ability to not stop the datapath while
> adjusting number/placement of PMD threads. All not affected
> threads will forward traffic without any additional latencies.
>
> id-pool created for static tx queue ids to keep them sequential
> in a flexible way. non-PMD thread will always have
> static_tx_qid = 0 as it was before.
>
> Signed-off-by: Ilya Maximets <[email protected]>
Thanks for the patch
The idea looks good to me.
I'm still looking at the code, and I have one comment below
> ---
> lib/dpif-netdev.c | 119
> +++++++++++++++++++++++++++++++++++++++++-------------
> tests/pmd.at | 2 +-
> 2 files changed, 91 insertions(+), 30 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 30907b7..6e575ab 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -48,6 +48,7 @@
> #include "fat-rwlock.h"
> #include "flow.h"
> #include "hmapx.h"
> +#include "id-pool.h"
> #include "latch.h"
> #include "netdev.h"
> #include "netdev-vport.h"
> @@ -241,6 +242,9 @@ struct dp_netdev {
>
> /* Stores all 'struct dp_netdev_pmd_thread's. */
> struct cmap poll_threads;
> + /* id pool for per thread static_tx_qid. */
> + struct id_pool *tx_qid_pool;
> + struct ovs_mutex tx_qid_pool_mutex;
>
> /* Protects the access of the 'struct dp_netdev_pmd_thread'
> * instance for non-pmd thread. */
> @@ -514,7 +518,7 @@ struct dp_netdev_pmd_thread {
> /* Queue id used by this pmd thread to send packets on all netdevs if
> * XPS disabled for this netdev. All static_tx_qid's are unique and less
> * than 'cmap_count(dp->poll_threads)'. */
> - const int static_tx_qid;
> + uint32_t static_tx_qid;
>
> struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and 'tx_ports'.
> */
> /* List of rx queues to poll. */
> @@ -594,6 +598,8 @@ static struct dp_netdev_pmd_thread
> *dp_netdev_get_pmd(struct dp_netdev *dp,
> unsigned core_id);
> static struct dp_netdev_pmd_thread *
> dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
> +static void dp_netdev_del_pmd(struct dp_netdev *dp,
> + struct dp_netdev_pmd_thread *pmd);
> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool non_pmd);
> static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
> static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
> @@ -1077,10 +1083,17 @@ create_dp_netdev(const char *name, const struct
> dpif_class *class,
> atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
>
> cmap_init(&dp->poll_threads);
> +
> + ovs_mutex_init(&dp->tx_qid_pool_mutex);
> + /* We need 1 Tx queue for each possible cpu core + 1 for non-PMD
> threads. */
> + dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + 1);
> +
> ovs_mutex_init_recursive(&dp->non_pmd_mutex);
> ovsthread_key_create(&dp->per_pmd_key, NULL);
>
> ovs_mutex_lock(&dp->port_mutex);
> + /* non-PMD will be created before all other threads and will
> + * allocate static_tx_qid = 0. */
> dp_netdev_set_nonpmd(dp);
>
> error = do_add_port(dp, name, dpif_netdev_port_open_type(dp->class,
> @@ -1164,6 +1177,9 @@ dp_netdev_free(struct dp_netdev *dp)
> dp_netdev_destroy_all_pmds(dp, true);
> cmap_destroy(&dp->poll_threads);
>
> + ovs_mutex_destroy(&dp->tx_qid_pool_mutex);
> + id_pool_destroy(dp->tx_qid_pool);
> +
> ovs_mutex_destroy(&dp->non_pmd_mutex);
> ovsthread_key_delete(dp->per_pmd_key);
>
> @@ -3175,7 +3191,10 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
> {
> struct dp_netdev_pmd_thread *pmd;
> struct ovs_numa_dump *pmd_cores;
> - bool changed = false;
> + struct ovs_numa_info_core *core;
> + struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete);
> + struct hmapx_node *node;
> + int created = 0, deleted = 0;
>
> /* The pmd threads should be started only if there's a pmd port in the
> * datapath. If the user didn't provide any "pmd-cpu-mask", we start
> @@ -3188,45 +3207,62 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
> pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS);
> }
>
> - /* Check for changed configuration */
> - if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp->poll_threads) - 1)
> {
> - changed = true;
> - } else {
> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
> - if (pmd->core_id != NON_PMD_CORE_ID
> - && !ovs_numa_dump_contains_core(pmd_cores,
> - pmd->numa_id,
> - pmd->core_id)) {
> - changed = true;
> - break;
> - }
> + /* Check for unwanted pmd threads */
> + CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
> + if (pmd->core_id != NON_PMD_CORE_ID
> + && !ovs_numa_dump_contains_core(pmd_cores,
> + pmd->numa_id,
> + pmd->core_id)) {
> + hmapx_add(&to_delete, pmd);
> }
> }
> + HMAPX_FOR_EACH (node, &to_delete) {
> + pmd = (struct dp_netdev_pmd_thread *) node->data;
> + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d destroyed.",
> + pmd->numa_id, pmd->core_id);
> + dp_netdev_del_pmd(dp, pmd);
> + }
> + deleted = hmapx_count(&to_delete);
> + hmapx_destroy(&to_delete);
>
> - /* Destroy the old and recreate the new pmd threads. We don't perform an
> - * incremental update because we would have to adjust 'static_tx_qid'. */
> - if (changed) {
> - struct ovs_numa_info_core *core;
> - struct ovs_numa_info_numa *numa;
> -
> - /* Do not destroy the non pmd thread. */
> - dp_netdev_destroy_all_pmds(dp, false);
> - FOR_EACH_CORE_ON_DUMP (core, pmd_cores) {
> - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
> -
> + /* Check for required new pmd threads */
> + FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
> + pmd = dp_netdev_get_pmd(dp, core->core_id);
> + if (!pmd) {
> + pmd = xzalloc(sizeof *pmd);
> dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
> -
> pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
> + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
> + pmd->numa_id, pmd->core_id);
> + created++;
> + } else {
> + dp_netdev_pmd_unref(pmd);
> }
> + }
> +
> + if (deleted || created) {
> + struct ovs_numa_info_numa *numa;
>
> /* Log the number of pmd threads per numa node. */
> FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) {
> - VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node %d",
> + VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa node %d",
> numa->n_cores, numa->numa_id);
> }
> }
>
> ovs_numa_dump_destroy(pmd_cores);
> +
> + if (deleted > created) {
> + /* Static tx qids are not sequential now. Mark all threads for
> + * reload to fix this. They will be reloaded after setting the new
> + * txq numbers for ports to avoid using of not allocated queues by
> + * old PMD threads. Newly created threads has no tx queues yet. */
I'm not sure this is easy to maintain.
What if we have something like:
1. delete old pmd threads
2. reconfigure txqs
3. add new pmd threads
do you this it will be clearer?
> + CMAP_FOR_EACH(pmd, node, &dp->poll_threads) {
> + if (pmd->core_id != NON_PMD_CORE_ID) {
> + pmd->need_reload = true;
> + }
> + }
> + }
> }
>
> static void
> @@ -3541,6 +3577,29 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
> }
> }
>
> +static void
> +pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
> +{
> + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
> + if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) {
> + VLOG_ERR("static_tx_qid allocation failed for PMD on core %2d"
> + ", numa_id %d.", pmd->core_id, pmd->numa_id);
> + OVS_NOT_REACHED();
> + }
> + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
> +
> + VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core %2d"
> + ", numa_id %d.", pmd->static_tx_qid, pmd->core_id,
> pmd->numa_id);
> +}
> +
> +static void
> +pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd)
> +{
> + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex);
> + id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid);
> + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex);
> +}
> +
> static int
> pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd,
> struct polled_queue **ppoll_list)
> @@ -3586,6 +3645,7 @@ pmd_thread_main(void *f_)
> dpdk_set_lcore_id(pmd->core_id);
> poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list);
> reload:
> + pmd_alloc_static_tx_qid(pmd);
> emc_cache_init(&pmd->flow_cache);
>
> /* List port/core affinity */
> @@ -3634,6 +3694,7 @@ reload:
> dp_netdev_pmd_reload_done(pmd);
>
> emc_cache_uninit(&pmd->flow_cache);
> + pmd_free_static_tx_qid(pmd);
>
> if (!exiting) {
> goto reload;
> @@ -3760,8 +3821,6 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread
> *pmd, struct dp_netdev *dp,
> pmd->numa_id = numa_id;
> pmd->need_reload = false;
>
> - *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp->poll_threads);
> -
> ovs_refcount_init(&pmd->ref_cnt);
> latch_init(&pmd->exit_latch);
> pmd->reload_seq = seq_create();
> @@ -3782,6 +3841,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread
> *pmd, struct dp_netdev *dp,
> * actual thread created for NON_PMD_CORE_ID. */
> if (core_id == NON_PMD_CORE_ID) {
> emc_cache_init(&pmd->flow_cache);
> + pmd_alloc_static_tx_qid(pmd);
> }
> cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *,
> &pmd->node),
> hash_int(core_id, 0));
> @@ -3824,6 +3884,7 @@ dp_netdev_del_pmd(struct dp_netdev *dp, struct
> dp_netdev_pmd_thread *pmd)
> ovs_mutex_lock(&dp->non_pmd_mutex);
> emc_cache_uninit(&pmd->flow_cache);
> pmd_free_cached_ports(pmd);
> + pmd_free_static_tx_qid(pmd);
> ovs_mutex_unlock(&dp->non_pmd_mutex);
> } else {
> latch_set(&pmd->exit_latch);
> diff --git a/tests/pmd.at b/tests/pmd.at
> index 5686bed..43bdb1b 100644
> --- a/tests/pmd.at
> +++ b/tests/pmd.at
> @@ -38,7 +38,7 @@ dnl
> dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 not
> dnl passed. Checking starts from line number 'line' in ovs-vswithd.log .
> m4_define([CHECK_PMD_THREADS_CREATED], [
> - PATTERN="Created [[0-9]]* pmd threads on numa node $2"
> + PATTERN="There are [[0-9]]* pmd threads on numa node $2"
> line_st=$3
> if [[ -z "$line_st" ]]
> then
> --
> 2.7.4
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev