Superseded by "[PATCH RFC] dpif-netdev: Rework of rx queue management." http://openvswitch.org/pipermail/dev/2015-December/063920.html
On 22.12.2015 17:26, Ilya Maximets wrote: > While reloading, two PMD threads, one already reloaded and > one not yet reloaded, can poll same queue of the same port. > > Fix that by introducing per numa node barriers prior to polling. > > Also, as soon as we have such barriers, we may replace the > separated configuration and starting of threads in > dp_netdev_set_pmds_on_numa() with barrier right after > first-time configuration of PMD threads. > > Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> > --- > lib/dpif-netdev.c | 53 ++++++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 40 insertions(+), 13 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 3efbed0..ad4a665 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -212,6 +212,8 @@ struct dp_netdev { > > /* Stores all 'struct dp_netdev_pmd_thread's. */ > struct cmap poll_threads; > + /* Per numa node reconfiguration barriers for pmd threads. */ > + struct ovs_barrier *numa_barriers; > > /* Protects the access of the 'struct dp_netdev_pmd_thread' > * instance for non-pmd thread. */ > @@ -822,7 +824,7 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > OVS_REQUIRES(dp_netdev_mutex) > { > struct dp_netdev *dp; > - int error; > + int n_numas, i, error; > > dp = xzalloc(sizeof *dp); > shash_add(&dp_netdevs, name, dp); > @@ -834,6 +836,13 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > > ovs_mutex_init(&dp->port_mutex); > cmap_init(&dp->ports); > + > + n_numas = ovs_numa_get_n_numas(); > + dp->numa_barriers = xmalloc(n_numas * sizeof *dp->numa_barriers); > + for (i = 0; i < n_numas; i++) { > + ovs_barrier_init(&dp->numa_barriers[i], 0); > + } > + > dp->port_seq = seq_create(); > fat_rwlock_init(&dp->upcall_rwlock); > > @@ -906,11 +915,19 @@ dp_netdev_free(struct dp_netdev *dp) > OVS_REQUIRES(dp_netdev_mutex) > { > struct dp_netdev_port *port; > + int i, n_numas; > > shash_find_and_delete(&dp_netdevs, dp->name); > > dp_netdev_destroy_all_pmds(dp); > cmap_destroy(&dp->poll_threads); > + > + n_numas = ovs_numa_get_n_numas(); > + for (i = 0; i < n_numas; i++) { > + ovs_barrier_destroy(&dp->numa_barriers[i]); > + } > + free(dp->numa_barriers); > + > ovs_mutex_destroy(&dp->non_pmd_mutex); > ovsthread_key_delete(dp->per_pmd_key); > > @@ -2651,6 +2668,9 @@ pmd_thread_main(void *f_) > /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ > ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); > pmd_thread_setaffinity_cpu(pmd->core_id); > + /* Synchronize to be sure that all PMD threads of this numa > + * node already configured before pmd_load_queues(). */ > + ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]); > reload: > emc_cache_init(&pmd->flow_cache); > poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt); > @@ -2663,6 +2683,10 @@ reload: > /* Signal here to make sure the pmd finishes > * reloading the updated configuration. */ > dp_netdev_pmd_reload_done(pmd); > + /* Synchronize all threads of this numa node to avoid situation > + * when two threads (one already reloaded and one not yet > + * reloaded) trying to receive on same queue of the same port. */ > + ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]); > > for (;;) { > int i; > @@ -2906,6 +2930,14 @@ dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int > numa_id) > } > } > > +/* Changes size of given numa barrier. *barrier must be initialized. */ > +static void > +dp_netdev_reset_numa_barrier(struct ovs_barrier *barrier, uint32_t size) > +{ > + ovs_barrier_destroy(barrier); > + ovs_barrier_init(barrier, size); > +} > + > /* Checks the numa node id of 'netdev' and starts pmd threads for > * the numa node. */ > static void > @@ -2926,7 +2958,6 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int > numa_id) > * pmd threads for the numa node. */ > if (!n_pmds) { > int can_have, n_unpinned, i; > - struct dp_netdev_pmd_thread **pmds; > > n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id); > if (!n_unpinned) { > @@ -2938,22 +2969,18 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int > numa_id) > /* If cpu mask is specified, uses all unpinned cores, otherwise > * tries creating NR_PMD_THREADS pmd threads. */ > can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, > NR_PMD_THREADS); > - pmds = xzalloc(can_have * sizeof *pmds); > + > + dp_netdev_reset_numa_barrier(&dp->numa_barriers[numa_id], can_have); > + > for (i = 0; i < can_have; i++) { > + struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd); > unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id); > - pmds[i] = xzalloc(sizeof **pmds); > - dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id); > - } > - /* The pmd thread code needs to see all the others configured pmd > - * threads on the same numa node. That's why we call > - * 'dp_netdev_configure_pmd()' on all the threads and then we > actually > - * start them. */ > - for (i = 0; i < can_have; i++) { > + > + dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id); > /* Each thread will distribute all devices rx-queues among > * themselves. */ > - pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, > pmds[i]); > + pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd); > } > - free(pmds); > VLOG_INFO("Created %d pmd threads on numa node %d", can_have, > numa_id); > } > } > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev