> >From: Ilya Maximets [mailto:[email protected]] > >Sent: Monday, June 5, 2017 9:18 AM > >To: Kavanagh, Mark B <[email protected]>; [email protected]; > >Darrell Ball <[email protected]> > >Cc: Heetae Ahn <[email protected]>; Daniele Di Proietto > ><[email protected]>; Ben Pfaff <[email protected]>; Pravin Shelar > ><[email protected]>; Loftus, Ciara <[email protected]>; Stokes, Ian > ><[email protected]>; Kevin Traynor <[email protected]> > >Subject: Re: [ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental > >addition/deletion of PMD threads. > > > >Thanks for testing, Mark. > > > >Comments inline. > > > >Best regards, Ilya Maximets. > > > >On 01.06.2017 17:30, Kavanagh, Mark B wrote: > >>> From: [email protected] > >>> [mailto:[email protected]] On Behalf > >Of > >>> Ilya Maximets > >>> Sent: Tuesday, May 30, 2017 3:12 PM > >>> To: [email protected]; Darrell Ball <[email protected]> > >>> Cc: Heetae Ahn <[email protected]>; Ilya Maximets > >>> <[email protected]> > >>> Subject: [ovs-dev] [PATCH v2 1/3] dpif-netdev: Incremental > >>> addition/deletion of PMD > >threads. > >>> > >>> Currently, change of 'pmd-cpu-mask' is very heavy operation. > >>> It requires destroying of all the PMD threads and creating them > >>> back. After that, all the threads will sleep until ports' > >>> redistribution finished. > >>> > >>> This patch adds ability to not stop the datapath while adjusting > >>> number/placement of PMD threads. All not affected threads will > >>> forward traffic without any additional latencies. > >>> > >>> id-pool created for static tx queue ids to keep them sequential in a > >>> flexible way. non-PMD thread will always have static_tx_qid = 0 as > >>> it was before. > >>> > >>> Signed-off-by: Ilya Maximets <[email protected]> > >> > >> Hi Ilya, > >> > >> Patch LGTM - just once comment inline. > >> > >> I conducted some light testing and observed the following: > >> - temporary dip in forwarding rate (~50%) when modifying > >> pmd-cpu-mask (in previous > >implementation, forwarding stopped completely) > >> - occasional drop to zero in forwarding rate when modifying > pmd-cpu-mask (e.g. > >change from '2' to '12') > > > >I guess, drop to zero in this case could be fixed by second patch of > >the series. (If it caused by port reconfiguration) > > Yes, that's true - my colleague, Ian Stokes, will review and test that > patch shortly.
Just a follow up here, I've tested the avoid port reconfiguration patch and it removes the drop caused by reconfiguration. https://mail.openvswitch.org/pipermail/ovs-dev/2017-June/334123.html Ian > > > > >> - temporary drop of ~1mpps when adding/deleting port (similar to > >> existing > >implementation) > >> > >> I've also conducted the following sanity checks on the patch, and found > no issues: > >> - checkpatch.py > >> - compilation with clang > >> - compilation with gcc > >> - sparse > >> - make check (no additional tests fail after application of this > >> patch) > >> > >> Tested-by: Mark Kavanagh <[email protected]> > >> > >> Thanks, > >> Mark > >> > >>> --- > >>> lib/dpif-netdev.c | 143 +++++++++++++++++++++++++++++++++++++++------- > -------- > >>> tests/pmd.at | 2 +- > >>> 2 files changed, 105 insertions(+), 40 deletions(-) > >>> > >>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index > >>> b50164b..596d133 100644 > >>> --- a/lib/dpif-netdev.c > >>> +++ b/lib/dpif-netdev.c > >>> @@ -48,6 +48,7 @@ > >>> #include "fat-rwlock.h" > >>> #include "flow.h" > >>> #include "hmapx.h" > >>> +#include "id-pool.h" > >>> #include "latch.h" > >>> #include "netdev.h" > >>> #include "netdev-vport.h" > >>> @@ -277,6 +278,9 @@ struct dp_netdev { > >>> > >>> /* Stores all 'struct dp_netdev_pmd_thread's. */ > >>> struct cmap poll_threads; > >>> + /* id pool for per thread static_tx_qid. */ > >>> + struct id_pool *tx_qid_pool; > >>> + struct ovs_mutex tx_qid_pool_mutex; > >>> > >>> /* Protects the access of the 'struct dp_netdev_pmd_thread' > >>> * instance for non-pmd thread. */ @@ -562,7 +566,7 @@ struct > >>> dp_netdev_pmd_thread { > >>> /* Queue id used by this pmd thread to send packets on all netdevs > if > >>> * XPS disabled for this netdev. All static_tx_qid's are unique > and less > >>> * than 'cmap_count(dp->poll_threads)'. */ > >>> - const int static_tx_qid; > >>> + uint32_t static_tx_qid; > >>> > >>> struct ovs_mutex port_mutex; /* Mutex for 'poll_list' and > 'tx_ports'. */ > >>> /* List of rx queues to poll. */ @@ -642,6 +646,8 @@ static > >>> struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp, > >>> unsigned > >>> core_id); static struct dp_netdev_pmd_thread * > >>> dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position > >>> *pos); > >>> +static void dp_netdev_del_pmd(struct dp_netdev *dp, > >>> + struct dp_netdev_pmd_thread *pmd); > >>> static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp, bool > >>> non_pmd); static void dp_netdev_pmd_clear_ports(struct > >>> dp_netdev_pmd_thread *pmd); static void > >>> dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, @@ - > 1181,10 +1187,17 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > >>> atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN); > >>> > >>> cmap_init(&dp->poll_threads); > >>> + > >>> + ovs_mutex_init(&dp->tx_qid_pool_mutex); > >>> + /* We need 1 Tx queue for each possible core + 1 for non-PMD > threads. */ > >>> + dp->tx_qid_pool = id_pool_create(0, ovs_numa_get_n_cores() + > >>> + 1); > >>> + > >>> ovs_mutex_init_recursive(&dp->non_pmd_mutex); > >>> ovsthread_key_create(&dp->per_pmd_key, NULL); > >>> > >>> ovs_mutex_lock(&dp->port_mutex); > >>> + /* non-PMD will be created before all other threads and will > >>> + * allocate static_tx_qid = 0. */ > >>> dp_netdev_set_nonpmd(dp); > >>> > >>> error = do_add_port(dp, name, > >>> dpif_netdev_port_open_type(dp->class, > >>> @@ -1279,6 +1292,9 @@ dp_netdev_free(struct dp_netdev *dp) > >>> dp_netdev_destroy_all_pmds(dp, true); > >>> cmap_destroy(&dp->poll_threads); > >>> > >>> + ovs_mutex_destroy(&dp->tx_qid_pool_mutex); > >>> + id_pool_destroy(dp->tx_qid_pool); > >>> + > >>> ovs_mutex_destroy(&dp->non_pmd_mutex); > >>> ovsthread_key_delete(dp->per_pmd_key); > >>> > >>> @@ -3302,12 +3318,29 @@ rxq_scheduling(struct dp_netdev *dp, bool > >>> pinned) OVS_REQUIRES(dp- > >>>> port_mutex) > >>> } > >>> > >>> static void > >>> +reload_affected_pmds(struct dp_netdev *dp) { > >>> + struct dp_netdev_pmd_thread *pmd; > >>> + > >>> + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > >>> + if (pmd->need_reload) { > >>> + dp_netdev_reload_pmd__(pmd); > >>> + pmd->need_reload = false; > >>> + } > >>> + } > >>> +} > >>> + > >>> +static void > >>> reconfigure_pmd_threads(struct dp_netdev *dp) > >>> OVS_REQUIRES(dp->port_mutex) > >>> { > >>> struct dp_netdev_pmd_thread *pmd; > >>> struct ovs_numa_dump *pmd_cores; > >>> + struct ovs_numa_info_core *core; > >>> + struct hmapx to_delete = HMAPX_INITIALIZER(&to_delete); > >>> + struct hmapx_node *node; > >>> bool changed = false; > >>> + bool need_to_adjust_static_tx_qids = false; > >>> > >>> /* The pmd threads should be started only if there's a pmd port in > the > >>> * datapath. If the user didn't provide any "pmd-cpu-mask", we > >>> start @@ -3320,40 +3353,61 @@ reconfigure_pmd_threads(struct dp_netdev > *dp) > >>> pmd_cores = ovs_numa_dump_n_cores_per_numa(NR_PMD_THREADS); > >>> } > >>> > >>> - /* Check for changed configuration */ > >>> - if (ovs_numa_dump_count(pmd_cores) != cmap_count(&dp- > >poll_threads) - 1) { > >>> - changed = true; > >>> - } else { > >>> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > >>> - if (pmd->core_id != NON_PMD_CORE_ID > >>> - && !ovs_numa_dump_contains_core(pmd_cores, > >>> - pmd->numa_id, > >>> - pmd->core_id)) { > >>> - changed = true; > >>> - break; > >>> - } > >>> + /* We need to adjust 'static_tx_qid's only if we're reducing > number of > >>> + * PMD threads. Otherwise, new threads will allocate all the > >>> + freed ids. */ > >> > >> Can you elaborate on why the static_tx_qids need to be adjusted - > >> it's still not clear to > >me from the comment. > > > >I think, currently, the main case for keeping tx queue ids sequential > >is the vhost-user ports with big number of queues but disabled by > >driver in guest. > > > >If vhost-user device has enough number of queues to not use dynamic > >distribution (XPS) and some of queues will be disabled inside the > >guest, we will face almost same issue as described in 347ba9bb9b7a > >("dpif-netdev: Unique and sequential tx_qids.") > > > >For example: > > > > We may have 4 threads and vhost-user port with 4 queues. > > Lets assume that static tx qids distributed as follows: > > thread #0 --> queue #0 > > thread #1 --> queue #1 > > thread #2 --> queue #3 > > thread #3 --> queue #2 > > Now guest disables queue #3 (ethtool -L eth0 combined 3) > > And we're manually changing pmd-cpu-mask to remove thread #3. > > (You need to be sure that threads wasn't reloaded here.) > > At this point, if tx queue ids wasn't adjusted, we will have: > > thread #0 --> queue #0 --> queue #0 > > thread #1 --> queue #1 --> queue #1 > > thread #2 --> queue #3 --> queue #0 > > Where the last column is the result of remapping inside > > __netdev_dpdk_vhost_send(). > > So, in this case, there will be 3 queues available, but > > only 2 used by 3 threads. --> unused queue #2 and locking > > on access to queue #0. > > Okay, I got it - thanks for the detailed explanation Ilya. > > With that, Acked-by: Mark Kavanagh <[email protected]> > > > > > >>> + if (ovs_numa_dump_count(pmd_cores) < cmap_count(&dp- > >poll_threads) - 1) { > >>> + need_to_adjust_static_tx_qids = true; > >>> + } > >>> + > >>> + /* Check for unwanted pmd threads */ > >>> + CMAP_FOR_EACH(pmd, node, &dp->poll_threads) { > >>> + if (pmd->core_id == NON_PMD_CORE_ID) { > >>> + continue; > >>> + } > >>> + if (!ovs_numa_dump_contains_core(pmd_cores, pmd->numa_id, > >>> + pmd->core_id)) { > >>> + hmapx_add(&to_delete, pmd); > >>> + } else if (need_to_adjust_static_tx_qids) { > >>> + pmd->need_reload = true; > >>> } > >>> } > >>> > >>> - /* Destroy the old and recreate the new pmd threads. We don't > perform an > >>> - * incremental update because we would have to adjust > 'static_tx_qid'. */ > >>> - if (changed) { > >>> - struct ovs_numa_info_core *core; > >>> - struct ovs_numa_info_numa *numa; > >>> + HMAPX_FOR_EACH (node, &to_delete) { > >>> + pmd = (struct dp_netdev_pmd_thread *) node->data; > >>> + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d > destroyed.", > >>> + pmd->numa_id, pmd->core_id); > >>> + dp_netdev_del_pmd(dp, pmd); > >>> + } > >>> + changed = !hmapx_is_empty(&to_delete); > >>> + hmapx_destroy(&to_delete); > >>> > >>> - /* Do not destroy the non pmd thread. */ > >>> - dp_netdev_destroy_all_pmds(dp, false); > >>> - FOR_EACH_CORE_ON_DUMP (core, pmd_cores) { > >>> - struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); > >>> + if (need_to_adjust_static_tx_qids) { > >>> + /* 'static_tx_qid's are not sequential now. > >>> + * Reload remaining threads to fix this. */ > >>> + reload_affected_pmds(dp); > >>> + } > >>> > >>> + /* Check for required new pmd threads */ > >>> + FOR_EACH_CORE_ON_DUMP(core, pmd_cores) { > >>> + pmd = dp_netdev_get_pmd(dp, core->core_id); > >>> + if (!pmd) { > >>> + pmd = xzalloc(sizeof *pmd); > >>> dp_netdev_configure_pmd(pmd, dp, core->core_id, > >>> core->numa_id); > >>> - > >>> pmd->thread = ovs_thread_create("pmd", pmd_thread_main, > >>> pmd); > >>> + VLOG_INFO("PMD thread on numa_id: %d, core id: %2d > created.", > >>> + pmd->numa_id, pmd->core_id); > >>> + changed = true; > >>> + } else { > >>> + dp_netdev_pmd_unref(pmd); > >>> } > >>> + } > >>> + > >>> + if (changed) { > >>> + struct ovs_numa_info_numa *numa; > >>> > >>> /* Log the number of pmd threads per numa node. */ > >>> FOR_EACH_NUMA_ON_DUMP (numa, pmd_cores) { > >>> - VLOG_INFO("Created %"PRIuSIZE" pmd threads on numa node > %d", > >>> + VLOG_INFO("There are %"PRIuSIZE" pmd threads on numa > >>> + node %d", > >>> numa->n_cores, numa->numa_id); > >>> } > >>> } > >>> @@ -3362,19 +3416,6 @@ reconfigure_pmd_threads(struct dp_netdev *dp) > >>> } > >>> > >>> static void > >>> -reload_affected_pmds(struct dp_netdev *dp) -{ > >>> - struct dp_netdev_pmd_thread *pmd; > >>> - > >>> - CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { > >>> - if (pmd->need_reload) { > >>> - dp_netdev_reload_pmd__(pmd); > >>> - pmd->need_reload = false; > >>> - } > >>> - } > >>> -} > >>> - > >>> -static void > >>> pmd_remove_stale_ports(struct dp_netdev *dp, > >>> struct dp_netdev_pmd_thread *pmd) > >>> OVS_EXCLUDED(pmd->port_mutex) > >>> @@ -3673,6 +3714,28 @@ pmd_load_cached_ports(struct > dp_netdev_pmd_thread *pmd) > >>> } > >>> } > >>> > >>> +static void > >>> +pmd_alloc_static_tx_qid(struct dp_netdev_pmd_thread *pmd) { > >>> + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); > >>> + if (!id_pool_alloc_id(pmd->dp->tx_qid_pool, &pmd->static_tx_qid)) > { > >>> + VLOG_ABORT("static_tx_qid allocation failed for PMD on core > %2d" > >>> + ", numa_id %d.", pmd->core_id, pmd->numa_id); > >>> + } > >>> + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); > >>> + > >>> + VLOG_DBG("static_tx_qid = %d allocated for PMD thread on core > %2d" > >>> + ", numa_id %d.", pmd->static_tx_qid, pmd->core_id, > >>> +pmd->numa_id); } > >>> + > >>> +static void > >>> +pmd_free_static_tx_qid(struct dp_netdev_pmd_thread *pmd) { > >>> + ovs_mutex_lock(&pmd->dp->tx_qid_pool_mutex); > >>> + id_pool_free_id(pmd->dp->tx_qid_pool, pmd->static_tx_qid); > >>> + ovs_mutex_unlock(&pmd->dp->tx_qid_pool_mutex); > >>> +} > >>> + > >>> static int > >>> pmd_load_queues_and_ports(struct dp_netdev_pmd_thread *pmd, > >>> struct polled_queue **ppoll_list) @@ > >>> -3718,6 +3781,7 @@ pmd_thread_main(void *f_) > >>> dpdk_set_lcore_id(pmd->core_id); > >>> poll_cnt = pmd_load_queues_and_ports(pmd, &poll_list); > >>> reload: > >>> + pmd_alloc_static_tx_qid(pmd); > >>> emc_cache_init(&pmd->flow_cache); > >>> > >>> /* List port/core affinity */ > >>> @@ -3766,6 +3830,7 @@ reload: > >>> dp_netdev_pmd_reload_done(pmd); > >>> > >>> emc_cache_uninit(&pmd->flow_cache); > >>> + pmd_free_static_tx_qid(pmd); > >>> > >>> if (!exiting) { > >>> goto reload; > >>> @@ -4176,8 +4241,6 @@ dp_netdev_configure_pmd(struct > >>> dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > >>> pmd->numa_id = numa_id; > >>> pmd->need_reload = false; > >>> > >>> - *CONST_CAST(int *, &pmd->static_tx_qid) = cmap_count(&dp- > >poll_threads); > >>> - > >>> ovs_refcount_init(&pmd->ref_cnt); > >>> latch_init(&pmd->exit_latch); > >>> pmd->reload_seq = seq_create(); > >>> @@ -4198,6 +4261,7 @@ dp_netdev_configure_pmd(struct > >>> dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, > >>> * actual thread created for NON_PMD_CORE_ID. */ > >>> if (core_id == NON_PMD_CORE_ID) { > >>> emc_cache_init(&pmd->flow_cache); > >>> + pmd_alloc_static_tx_qid(pmd); > >>> } > >>> cmap_insert(&dp->poll_threads, CONST_CAST(struct cmap_node *, > &pmd->node), > >>> hash_int(core_id, 0)); @@ -4240,6 +4304,7 @@ > >>> dp_netdev_del_pmd(struct dp_netdev *dp, struct dp_netdev_pmd_thread > >>> *pmd) > >>> ovs_mutex_lock(&dp->non_pmd_mutex); > >>> emc_cache_uninit(&pmd->flow_cache); > >>> pmd_free_cached_ports(pmd); > >>> + pmd_free_static_tx_qid(pmd); > >>> ovs_mutex_unlock(&dp->non_pmd_mutex); > >>> } else { > >>> latch_set(&pmd->exit_latch); diff --git a/tests/pmd.at > >>> b/tests/pmd.at index 05755b3..39a3645 100644 > >>> --- a/tests/pmd.at > >>> +++ b/tests/pmd.at > >>> @@ -38,7 +38,7 @@ dnl > >>> dnl Whaits for creation of 'n_threads' or at least 1 thread if $1 > >>> not dnl passed. Checking starts from line number 'line' in ovs- > vswithd.log . > >>> m4_define([CHECK_PMD_THREADS_CREATED], [ > >>> - PATTERN="Created [[0-9]]* pmd threads on numa node $2" > >>> + PATTERN="There are [[0-9]]* pmd threads on numa node $2" > >>> line_st=$3 > >>> if [[ -z "$line_st" ]] > >>> then > >>> -- > >>> 2.7.4 > >>> > >>> _______________________________________________ > >>> dev mailing list > >>> [email protected] > >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >> > >> > >> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
