On 7/14/20 5:33 PM, Kevin Traynor wrote: > On 09/07/2020 16:19, Ilya Maximets wrote: >> Main thread will try to pause/stop all revalidators during datapath >> reconfiguration via datapath purge callback (dp_purge_cb) while >> holding 'dp->port_mutex'. And deadlock happens in case any of >> revalidator threads is already waiting on 'dp->port_mutex' while >> dumping offloaded flows: >> >> main thread revalidator >> --------------------------------- ---------------------------------- >> >> ovs_mutex_lock(&dp->port_mutex) >> >> dpif_netdev_flow_dump_next() >> -> dp_netdev_flow_to_dpif_flow >> -> get_dpif_flow_status >> -> dpif_netdev_get_flow_offload_status() >> -> ovs_mutex_lock(&dp->port_mutex) >> <waiting for mutex here> >> >> reconfigure_datapath() >> -> reconfigure_pmd_threads() >> -> dp_netdev_del_pmd() >> -> dp_purge_cb() >> -> udpif_pause_revalidators() >> -> ovs_barrier_block(&udpif->pause_barrier) >> <waiting for revalidators to reach barrier> >> >> <DEADLOCK> >> >> We're not allowed to call offloading API without holding global >> port mutex from the userspace datapath due to thread safety >> restrictions on netdev-ofload-dpdk module. And it's also not easy > > typo s/ofload/offload/
Thanks. > >> to rework datapath reconfiguration process in order to move actual >> PMD removal and datapath purge out of the port mutex. >> >> So, for now, not sleeping on a mutex if it's not immediately available >> seem like an easiest workaround. This will have impact on flow >> statistics update rate and on ability to get the latest statistics >> before removing the flow (latest stats will be lost in case we were >> not able to take the mutex). However, this will allow us to operate >> normally avoiding the deadlock. >> >> The last bit is that to avoid flapping of flow attributes and >> statistics we're not failing the operation, but returning last >> statistics and attributes returned by offload provider. Since those >> might be updated in different threads, stores and reads are atomic. >> > > Nice explanation, thanks > >> Reported-by: Frank Wang (王培辉) <wangpei...@inspur.com> >> Reported-at: >> https://mail.openvswitch.org/pipermail/ovs-dev/2020-June/371753.html >> Fixes: a309e4f52660 ("dpif-netdev: Update offloaded flows statistics.") >> Signed-off-by: Ilya Maximets <i.maxim...@ovn.org> >> --- >> >> Tested with unit tests only. Needs checking with the real setup. >> Especially stats/attrs update parts. >> >> AUTHORS.rst | 1 + >> lib/dpif-netdev.c | 78 +++++++++++++++++++++++++++++++++++++++++++---- >> 2 files changed, 73 insertions(+), 6 deletions(-) >> >> diff --git a/AUTHORS.rst b/AUTHORS.rst >> index 8e6a0769f..eb36a01d0 100644 >> --- a/AUTHORS.rst >> +++ b/AUTHORS.rst >> @@ -504,6 +504,7 @@ Edwin Chiu ec...@vmware.com >> Eivind Bulie Haanaes >> Enas Ahmad enas.ah...@kaust.edu.sa >> Eric Lopez >> +Frank Wang (王培辉) wangpei...@inspur.com >> Frido Roose fr.ro...@gmail.com >> Gaetano Catalli gaetano.cata...@gmail.com >> Gavin Remaley gavin_rema...@selinc.com >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index 629a0cb53..2eb7f32ff 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -490,6 +490,12 @@ struct dp_netdev_flow_stats { >> atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. >> */ >> }; >> >> +/* Contained by struct dp_netdev_flow's 'last_attrs' member. */ >> +struct dp_netdev_flow_attrs { >> + atomic_bool offloaded; /* True if flow is offloaded to HW. */ >> + ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */ >> +}; >> + >> /* A flow in 'dp_netdev_pmd_thread's 'flow_table'. >> * >> * >> @@ -550,6 +556,10 @@ struct dp_netdev_flow { >> /* Statistics. */ >> struct dp_netdev_flow_stats stats; >> >> + /* Statistics and attributes received from the netdev offload provider. >> */ >> + struct dp_netdev_flow_stats last_stats; >> + struct dp_netdev_flow_attrs last_attrs; >> + >> /* Actions. */ >> OVSRCU_TYPE(struct dp_netdev_actions *) actions; >> >> @@ -3150,9 +3160,43 @@ dp_netdev_pmd_find_flow(const struct >> dp_netdev_pmd_thread *pmd, >> return NULL; >> } >> >> +static void >> +dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow, >> + const struct dpif_flow_stats *stats, >> + const struct dpif_flow_attrs *attrs) >> +{ >> + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; >> + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; >> + >> + atomic_store_relaxed(&last_stats->used, stats->used); >> + atomic_store_relaxed(&last_stats->packet_count, stats->n_packets); >> + atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes); >> + atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags); >> + >> + atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded); >> + atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer); >> +} >> + >> +static void >> +dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow, >> + struct dpif_flow_stats *stats, >> + struct dpif_flow_attrs *attrs) >> +{ >> + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; >> + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; >> + >> + atomic_read_relaxed(&last_stats->used, &stats->used); >> + atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets); >> + atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes); >> + atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags); >> + >> + atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded); >> + atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer); >> +} >> + >> static bool >> dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, >> - const struct dp_netdev_flow >> *netdev_flow, >> + struct dp_netdev_flow *netdev_flow, >> struct dpif_flow_stats *stats, >> struct dpif_flow_attrs *attrs) >> { >> @@ -3175,11 +3219,31 @@ dpif_netdev_get_flow_offload_status(const struct >> dp_netdev *dp, >> } >> ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf); >> /* Taking a global 'port_mutex' to fulfill thread safety >> - * restrictions for the netdev-offload-dpdk module. */ >> - ovs_mutex_lock(&dp->port_mutex); >> - ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid, >> - stats, attrs, &buf); >> - ovs_mutex_unlock(&dp->port_mutex); >> + * restrictions for the netdev-offload-dpdk module. >> + * >> + * XXX: Main thread will try to pause/stop all revalidators during >> datapath >> + * reconfiguration via datapath purge callback (dp_purge_cb) while >> + * holding 'dp->port_mutex'. So we're not waiting for mutex here. >> + * Otherwise, deadlock is possible, bcause revalidators might sleep >> + * waiting for the main thread to release the lock and main thread >> + * will wait for them to stop processing. >> + * This workaround might make statistics less accurate. Especially >> + * for flow deletion case, since there will be no other attempt. >> */ >> + if (!ovs_mutex_trylock(&dp->port_mutex)) { >> + ret = netdev_flow_get(netdev, &match, &actions, >> + &netdev_flow->mega_ufid, stats, attrs, &buf); >> + /* Storing attributes from the last request for later use on mutex >> + * contention. */ > > Shouldn't you check 'ret' here before the set Yes. That makes sense. I'll move below call under 'if (!ret)'. > >> + dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs); >> + ovs_mutex_unlock(&dp->port_mutex); >> + } else { >> + dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs); >> + if (!attrs->dp_layer) { >> + /* Flow was never reported as 'offloaded' so it's harmless >> + * to continue to think so. */ >> + ret = EAGAIN; >> + } >> + } >> netdev_close(netdev); >> if (ret) { >> return false; >> @@ -3448,6 +3512,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, >> /* Do not allocate extra space. */ >> flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); >> memset(&flow->stats, 0, sizeof flow->stats); >> + memset(&flow->last_stats, 0, sizeof flow->last_stats); >> + memset(&flow->last_attrs, 0, sizeof flow->last_attrs); >> flow->dead = false; >> flow->batch = NULL; >> flow->mark = INVALID_FLOW_MARK; >> > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev