Superseded by "[PATCH RFC] dpif-netdev: Rework of rx queue management."
http://openvswitch.org/pipermail/dev/2015-December/063920.html

On 22.12.2015 17:26, Ilya Maximets wrote:
> While reloading, two PMD threads, one already reloaded and
> one not yet reloaded, can poll same queue of the same port.
> 
> Fix that by introducing per numa node barriers prior to polling.
> 
> Also, as soon as we have such barriers, we may replace the
> separated configuration and starting of threads in
> dp_netdev_set_pmds_on_numa() with barrier right after
> first-time configuration of PMD threads.
> 
> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com>
> ---
>  lib/dpif-netdev.c | 53 ++++++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 40 insertions(+), 13 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 3efbed0..ad4a665 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -212,6 +212,8 @@ struct dp_netdev {
>  
>      /* Stores all 'struct dp_netdev_pmd_thread's. */
>      struct cmap poll_threads;
> +    /* Per numa node reconfiguration barriers for pmd threads. */
> +    struct ovs_barrier *numa_barriers;
>  
>      /* Protects the access of the 'struct dp_netdev_pmd_thread'
>       * instance for non-pmd thread. */
> @@ -822,7 +824,7 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>      OVS_REQUIRES(dp_netdev_mutex)
>  {
>      struct dp_netdev *dp;
> -    int error;
> +    int n_numas, i, error;
>  
>      dp = xzalloc(sizeof *dp);
>      shash_add(&dp_netdevs, name, dp);
> @@ -834,6 +836,13 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>  
>      ovs_mutex_init(&dp->port_mutex);
>      cmap_init(&dp->ports);
> +
> +    n_numas = ovs_numa_get_n_numas();
> +    dp->numa_barriers = xmalloc(n_numas * sizeof *dp->numa_barriers);
> +    for (i = 0; i < n_numas; i++) {
> +        ovs_barrier_init(&dp->numa_barriers[i], 0);
> +    }
> +
>      dp->port_seq = seq_create();
>      fat_rwlock_init(&dp->upcall_rwlock);
>  
> @@ -906,11 +915,19 @@ dp_netdev_free(struct dp_netdev *dp)
>      OVS_REQUIRES(dp_netdev_mutex)
>  {
>      struct dp_netdev_port *port;
> +    int i, n_numas;
>  
>      shash_find_and_delete(&dp_netdevs, dp->name);
>  
>      dp_netdev_destroy_all_pmds(dp);
>      cmap_destroy(&dp->poll_threads);
> +
> +    n_numas = ovs_numa_get_n_numas();
> +    for (i = 0; i < n_numas; i++) {
> +        ovs_barrier_destroy(&dp->numa_barriers[i]);
> +    }
> +    free(dp->numa_barriers);
> +
>      ovs_mutex_destroy(&dp->non_pmd_mutex);
>      ovsthread_key_delete(dp->per_pmd_key);
>  
> @@ -2651,6 +2668,9 @@ pmd_thread_main(void *f_)
>      /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
>      ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
>      pmd_thread_setaffinity_cpu(pmd->core_id);
> +    /* Synchronize to be sure that all PMD threads of this numa
> +     * node already configured before pmd_load_queues(). */
> +    ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]);
>  reload:
>      emc_cache_init(&pmd->flow_cache);
>      poll_cnt = pmd_load_queues(pmd, &poll_list, poll_cnt);
> @@ -2663,6 +2683,10 @@ reload:
>      /* Signal here to make sure the pmd finishes
>       * reloading the updated configuration. */
>      dp_netdev_pmd_reload_done(pmd);
> +    /* Synchronize all threads of this numa node to avoid situation
> +     * when two threads (one already reloaded and one not yet
> +     * reloaded) trying to receive on same queue of the same port. */
> +    ovs_barrier_block(&pmd->dp->numa_barriers[pmd->numa_id]);
>  
>      for (;;) {
>          int i;
> @@ -2906,6 +2930,14 @@ dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int 
> numa_id)
>      }
>  }
>  
> +/* Changes size of given numa barrier. *barrier must be initialized. */
> +static void
> +dp_netdev_reset_numa_barrier(struct ovs_barrier *barrier, uint32_t size)
> +{
> +    ovs_barrier_destroy(barrier);
> +    ovs_barrier_init(barrier, size);
> +}
> +
>  /* Checks the numa node id of 'netdev' and starts pmd threads for
>   * the numa node. */
>  static void
> @@ -2926,7 +2958,6 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
> numa_id)
>       * pmd threads for the numa node. */
>      if (!n_pmds) {
>          int can_have, n_unpinned, i;
> -        struct dp_netdev_pmd_thread **pmds;
>  
>          n_unpinned = ovs_numa_get_n_unpinned_cores_on_numa(numa_id);
>          if (!n_unpinned) {
> @@ -2938,22 +2969,18 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
> numa_id)
>          /* If cpu mask is specified, uses all unpinned cores, otherwise
>           * tries creating NR_PMD_THREADS pmd threads. */
>          can_have = dp->pmd_cmask ? n_unpinned : MIN(n_unpinned, 
> NR_PMD_THREADS);
> -        pmds = xzalloc(can_have * sizeof *pmds);
> +
> +        dp_netdev_reset_numa_barrier(&dp->numa_barriers[numa_id], can_have);
> +
>          for (i = 0; i < can_have; i++) {
> +            struct dp_netdev_pmd_thread *pmd = xzalloc(sizeof *pmd);
>              unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
> -            pmds[i] = xzalloc(sizeof **pmds);
> -            dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
> -        }
> -        /* The pmd thread code needs to see all the others configured pmd
> -         * threads on the same numa node.  That's why we call
> -         * 'dp_netdev_configure_pmd()' on all the threads and then we 
> actually
> -         * start them. */
> -        for (i = 0; i < can_have; i++) {
> +
> +            dp_netdev_configure_pmd(pmd, dp, i, core_id, numa_id);
>              /* Each thread will distribute all devices rx-queues among
>               * themselves. */
> -            pmds[i]->thread = ovs_thread_create("pmd", pmd_thread_main, 
> pmds[i]);
> +            pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
>          }
> -        free(pmds);
>          VLOG_INFO("Created %d pmd threads on numa node %d", can_have, 
> numa_id);
>      }
>  }
> 
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to