If there is a device notification from the vhost-event thread
that calls destroy_device() and the user deletes the vhost port at
the same time, there is a possibility of a deadlock occuring. e.g.
vhost-event thread is blocking on the ovsrcu_synchronize(), waiting
for main thread to queisce, i.e.
0x0000000003690c09 in ovsrcu_synchronize () at lib/ovs-rcu.c:237
0x00000000037162fa in destroy_device (vid=0) at lib/netdev-dpdk.c:4919
0x0000000003238a12 in vhost_destroy_device_notify (dev=0x14ba693c0)
at ../lib/vhost/vhost.c:756
While main thread is looping in rte_vhost_driver_unregister(), waiting
for vhost-event callback to finish, meaning it cannot quiesce. i.e.:
0x00000000032336e8 in rte_vhost_driver_unregister (path=0xd217a30
"/tmp/vhost0") at ../lib/vhost/socket.c:1075
0x000000000370d5f3 in dpdk_vhost_driver_unregister (dev=0x17d6e8b40,
vhost_id=0xd217a30 "/tmp/vhost0") at lib/netdev-dpdk.c:1811
0x000000000370d709 in netdev_dpdk_vhost_destruct
(netdev=0x17d6e8bc0) at lib/netdev-dpdk.c:1843
In order to prevent this situation, use ovsrcu to postpone the call to
rte_vhost_driver_unregister(). This means that main thread will not
block and will quiesce, allowing ovsrcu_synchornize() in destroy_device()
to complete.
Fixes: afee281f7f4f ("netdev-dpdk: Fix dpdk_watchdog failure to quiesce.")
Reported-by: Xinxin Zhao <[email protected]>
Signed-off-by: Kevin Traynor <[email protected]>
---
lib/netdev-dpdk.c | 66 +++++++++++++++++++++++++++++++++++------------
1 file changed, 50 insertions(+), 16 deletions(-)
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e454a4a5d..ab1cca07c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -201,4 +201,11 @@ static const struct rte_vhost_device_ops
virtio_net_device_ops =
};
+/* Information used for postponed vhost driver unregistering. */
+struct vhost_unregister_info {
+ char *vhost_id;
+ char *name;
+ bool is_server;
+};
+
/* Custom software stats for dpdk ports */
struct netdev_dpdk_sw_stats {
@@ -1802,11 +1809,40 @@ netdev_dpdk_destruct(struct netdev *netdev)
* try to acquire 'dpdk_mutex' and possibly 'dev->mutex'. To avoid a
* deadlock, none of the mutexes must be held while calling this function. */
-static int
-dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED,
- char *vhost_id)
+static void
+dpdk_vhost_driver_unregister(struct vhost_unregister_info *vhost_info)
OVS_EXCLUDED(dpdk_mutex)
- OVS_EXCLUDED(dev->mutex)
{
- return rte_vhost_driver_unregister(vhost_id);
+ char *vhost_id = vhost_info->vhost_id;
+ char *name = vhost_info->name;
+
+ if (rte_vhost_driver_unregister(vhost_id)) {
+ VLOG_ERR("%s: Unable to unregister vhost driver for socket '%s'.\n",
+ name, vhost_id);
+ } else if (vhost_info->is_server) {
+ /* OVS server mode - remove this socket from list for deletion */
+ fatal_signal_remove_file_to_unlink(vhost_id);
+ }
+ free(vhost_id);
+ free(name);
+ free(vhost_info);
+}
+
+/* Postpone calling rte_vhost_driver_unregister() so it is not called from the
+ * main thread otherwise it may deadlock with a device notification calling
+ * destroy_device(). i.e. rte_vhost_driver_unregister() from main thread
+ * blocks waiting for destroy_device() to complete, destroy_device() blocks
+ * waiting for main thread to quiesce. */
+static void
+dpdk_vhost_driver_unregister_postpone(const char *vhost_id, const char *name,
+ bool is_server)
+{
+ struct vhost_unregister_info *vhost_info;
+
+ vhost_info = xmalloc(sizeof *vhost_info);
+ vhost_info->vhost_id = xstrdup(vhost_id);
+ vhost_info->name = xstrdup(name);
+ vhost_info->is_server = is_server;
+
+ ovsrcu_postpone(dpdk_vhost_driver_unregister, vhost_info);
}
@@ -1837,16 +1873,11 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
if (!vhost_id) {
- goto out;
+ free(vhost_id);
+ return;
}
- if (dpdk_vhost_driver_unregister(dev, vhost_id)) {
- VLOG_ERR("%s: Unable to unregister vhost driver for socket '%s'.\n",
- netdev->name, vhost_id);
- } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
- /* OVS server mode - remove this socket from list for deletion */
- fatal_signal_remove_file_to_unlink(vhost_id);
- }
-out:
- free(vhost_id);
+ dpdk_vhost_driver_unregister_postpone(vhost_id, netdev_get_name(netdev),
+ !(dev->vhost_driver_flags
+ & RTE_VHOST_USER_CLIENT));
}
@@ -6285,5 +6316,8 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev
*netdev)
if (unregister) {
- dpdk_vhost_driver_unregister(dev, vhost_id);
+ dpdk_vhost_driver_unregister_postpone(vhost_id,
+ netdev_get_name(netdev),
+ !(dev->vhost_driver_flags
+ & RTE_VHOST_USER_CLIENT));
}
--
2.46.2
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev