On 09/07/2020 16:19, Ilya Maximets wrote:
> Main thread will try to pause/stop all revalidators during datapath
> reconfiguration via datapath purge callback (dp_purge_cb) while
> holding 'dp->port_mutex'.  And deadlock happens in case any of
> revalidator threads is already waiting on 'dp->port_mutex' while
> dumping offloaded flows:
> 
>            main thread                           revalidator
>  ---------------------------------  ----------------------------------
> 
>  ovs_mutex_lock(&dp->port_mutex)
> 
>                                     dpif_netdev_flow_dump_next()
>                                     -> dp_netdev_flow_to_dpif_flow
>                                     -> get_dpif_flow_status
>                                     -> dpif_netdev_get_flow_offload_status()
>                                     -> ovs_mutex_lock(&dp->port_mutex)
>                                        <waiting for mutex here>
> 
>  reconfigure_datapath()
>  -> reconfigure_pmd_threads()
>  -> dp_netdev_del_pmd()
>  -> dp_purge_cb()
>  -> udpif_pause_revalidators()
>  -> ovs_barrier_block(&udpif->pause_barrier)
>     <waiting for revalidators to reach barrier>
> 
>                           <DEADLOCK>
> 
> We're not allowed to call offloading API without holding global
> port mutex from the userspace datapath due to thread safety
> restrictions on netdev-ofload-dpdk module.  And it's also not easy

typo s/ofload/offload/

> to rework datapath reconfiguration process in order to move actual
> PMD removal and datapath purge out of the port mutex.
> 
> So, for now, not sleeping on a mutex if it's not immediately available
> seem like an easiest workaround.  This will have impact on flow
> statistics update rate and on ability to get the latest statistics
> before removing the flow (latest stats will be lost in case we were
> not able to take the mutex).  However, this will allow us to operate
> normally avoiding the deadlock.
> 
> The last bit is that to avoid flapping of flow attributes and
> statistics we're not failing the operation, but returning last
> statistics and attributes returned by offload provider.  Since those
> might be updated in different threads, stores and reads are atomic.
> 

Nice explanation, thanks

> Reported-by: Frank Wang (王培辉) <wangpei...@inspur.com>
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-dev/2020-June/371753.html
> Fixes: a309e4f52660 ("dpif-netdev: Update offloaded flows statistics.")
> Signed-off-by: Ilya Maximets <i.maxim...@ovn.org>
> ---
> 
> Tested with unit tests only.  Needs checking with the real setup.
> Especially stats/attrs update parts.
> 
>  AUTHORS.rst       |  1 +
>  lib/dpif-netdev.c | 78 +++++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 73 insertions(+), 6 deletions(-)
> 
> diff --git a/AUTHORS.rst b/AUTHORS.rst
> index 8e6a0769f..eb36a01d0 100644
> --- a/AUTHORS.rst
> +++ b/AUTHORS.rst
> @@ -504,6 +504,7 @@ Edwin Chiu                      ec...@vmware.com
>  Eivind Bulie Haanaes
>  Enas Ahmad                      enas.ah...@kaust.edu.sa
>  Eric Lopez
> +Frank Wang (王培辉)             wangpei...@inspur.com
>  Frido Roose                     fr.ro...@gmail.com
>  Gaetano Catalli                 gaetano.cata...@gmail.com
>  Gavin Remaley                   gavin_rema...@selinc.com
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 629a0cb53..2eb7f32ff 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -490,6 +490,12 @@ struct dp_netdev_flow_stats {
>      atomic_uint16_t tcp_flags;     /* Bitwise-OR of seen tcp_flags values. */
>  };
>  
> +/* Contained by struct dp_netdev_flow's 'last_attrs' member.  */
> +struct dp_netdev_flow_attrs {
> +    atomic_bool offloaded;         /* True if flow is offloaded to HW. */
> +    ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */
> +};
> +
>  /* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
>   *
>   *
> @@ -550,6 +556,10 @@ struct dp_netdev_flow {
>      /* Statistics. */
>      struct dp_netdev_flow_stats stats;
>  
> +    /* Statistics and attributes received from the netdev offload provider. 
> */
> +    struct dp_netdev_flow_stats last_stats;
> +    struct dp_netdev_flow_attrs last_attrs;
> +
>      /* Actions. */
>      OVSRCU_TYPE(struct dp_netdev_actions *) actions;
>  
> @@ -3150,9 +3160,43 @@ dp_netdev_pmd_find_flow(const struct 
> dp_netdev_pmd_thread *pmd,
>      return NULL;
>  }
>  
> +static void
> +dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
> +                                    const struct dpif_flow_stats *stats,
> +                                    const struct dpif_flow_attrs *attrs)
> +{
> +    struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
> +    struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
> +
> +    atomic_store_relaxed(&last_stats->used,         stats->used);
> +    atomic_store_relaxed(&last_stats->packet_count, stats->n_packets);
> +    atomic_store_relaxed(&last_stats->byte_count,   stats->n_bytes);
> +    atomic_store_relaxed(&last_stats->tcp_flags,    stats->tcp_flags);
> +
> +    atomic_store_relaxed(&last_attrs->offloaded,    attrs->offloaded);
> +    atomic_store_relaxed(&last_attrs->dp_layer,     attrs->dp_layer);
> +}
> +
> +static void
> +dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
> +                                    struct dpif_flow_stats *stats,
> +                                    struct dpif_flow_attrs *attrs)
> +{
> +    struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
> +    struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
> +
> +    atomic_read_relaxed(&last_stats->used,         &stats->used);
> +    atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets);
> +    atomic_read_relaxed(&last_stats->byte_count,   &stats->n_bytes);
> +    atomic_read_relaxed(&last_stats->tcp_flags,    &stats->tcp_flags);
> +
> +    atomic_read_relaxed(&last_attrs->offloaded,    &attrs->offloaded);
> +    atomic_read_relaxed(&last_attrs->dp_layer,     &attrs->dp_layer);
> +}
> +
>  static bool
>  dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
> -                                    const struct dp_netdev_flow *netdev_flow,
> +                                    struct dp_netdev_flow *netdev_flow,
>                                      struct dpif_flow_stats *stats,
>                                      struct dpif_flow_attrs *attrs)
>  {
> @@ -3175,11 +3219,31 @@ dpif_netdev_get_flow_offload_status(const struct 
> dp_netdev *dp,
>      }
>      ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
>      /* Taking a global 'port_mutex' to fulfill thread safety
> -     * restrictions for the netdev-offload-dpdk module. */
> -    ovs_mutex_lock(&dp->port_mutex);
> -    ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
> -                          stats, attrs, &buf);
> -    ovs_mutex_unlock(&dp->port_mutex);
> +     * restrictions for the netdev-offload-dpdk module.
> +     *
> +     * XXX: Main thread will try to pause/stop all revalidators during 
> datapath
> +     *      reconfiguration via datapath purge callback (dp_purge_cb) while
> +     *      holding 'dp->port_mutex'.  So we're not waiting for mutex here.
> +     *      Otherwise, deadlock is possible, bcause revalidators might sleep
> +     *      waiting for the main thread to release the lock and main thread
> +     *      will wait for them to stop processing.
> +     *      This workaround might make statistics less accurate. Especially
> +     *      for flow deletion case, since there will be no other attempt.  */
> +    if (!ovs_mutex_trylock(&dp->port_mutex)) {
> +        ret = netdev_flow_get(netdev, &match, &actions,
> +                              &netdev_flow->mega_ufid, stats, attrs, &buf);
> +        /* Storing attributes from the last request for later use on mutex
> +         * contention. */

Shouldn't you check 'ret' here before the set

> +        dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs);
> +        ovs_mutex_unlock(&dp->port_mutex);
> +    } else {
> +        dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs);
> +        if (!attrs->dp_layer) {
> +            /* Flow was never reported as 'offloaded' so it's harmless
> +             * to continue to think so. */
> +            ret = EAGAIN;
> +        }
> +    }
>      netdev_close(netdev);
>      if (ret) {
>          return false;
> @@ -3448,6 +3512,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
>      /* Do not allocate extra space. */
>      flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
>      memset(&flow->stats, 0, sizeof flow->stats);
> +    memset(&flow->last_stats, 0, sizeof flow->last_stats);
> +    memset(&flow->last_attrs, 0, sizeof flow->last_attrs);
>      flow->dead = false;
>      flow->batch = NULL;
>      flow->mark = INVALID_FLOW_MARK;
> 

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to