If there is a device notification from the vhost-event thread
that calls destroy_device() and the user deletes the vhost port at
the same time, there is a possibility of a deadlock occuring. e.g.

vhost-event thread is blocking on the ovsrcu_synchronize(), waiting
for main thread to queisce, i.e.

0x0000000003690c09 in ovsrcu_synchronize () at lib/ovs-rcu.c:237
0x00000000037162fa in destroy_device (vid=0) at lib/netdev-dpdk.c:4919
0x0000000003238a12 in vhost_destroy_device_notify (dev=0x14ba693c0)
at ../lib/vhost/vhost.c:756

While main thread is looping in rte_vhost_driver_unregister(), waiting
for vhost-event callback to finish, meaning it cannot quiesce. i.e.:

0x00000000032336e8 in rte_vhost_driver_unregister (path=0xd217a30
"/tmp/vhost0") at ../lib/vhost/socket.c:1075
0x000000000370d5f3 in dpdk_vhost_driver_unregister (dev=0x17d6e8b40,
vhost_id=0xd217a30 "/tmp/vhost0") at lib/netdev-dpdk.c:1811
0x000000000370d709 in netdev_dpdk_vhost_destruct
(netdev=0x17d6e8bc0) at lib/netdev-dpdk.c:1843

In order to prevent this situation, use ovsrcu to postpone the call to
rte_vhost_driver_unregister(). This means that main thread will not
block and will quiesce, allowing ovsrcu_synchornize() in destroy_device()
to complete.

Fixes: afee281f7f4f ("netdev-dpdk: Fix dpdk_watchdog failure to quiesce.")
Reported-by: Xinxin Zhao <[email protected]>
Signed-off-by: Kevin Traynor <[email protected]>
---
 lib/netdev-dpdk.c | 66 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 50 insertions(+), 16 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e454a4a5d..ab1cca07c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -201,4 +201,11 @@ static const struct rte_vhost_device_ops 
virtio_net_device_ops =
 };
 
+/* Information used for postponed vhost driver unregistering. */
+struct vhost_unregister_info {
+    char *vhost_id;
+    char *name;
+    bool is_server;
+};
+
 /* Custom software stats for dpdk ports */
 struct netdev_dpdk_sw_stats {
@@ -1802,11 +1809,40 @@ netdev_dpdk_destruct(struct netdev *netdev)
  * try to acquire 'dpdk_mutex' and possibly 'dev->mutex'.  To avoid a
  * deadlock, none of the mutexes must be held while calling this function. */
-static int
-dpdk_vhost_driver_unregister(struct netdev_dpdk *dev OVS_UNUSED,
-                             char *vhost_id)
+static void
+dpdk_vhost_driver_unregister(struct vhost_unregister_info *vhost_info)
     OVS_EXCLUDED(dpdk_mutex)
-    OVS_EXCLUDED(dev->mutex)
 {
-    return rte_vhost_driver_unregister(vhost_id);
+    char *vhost_id = vhost_info->vhost_id;
+    char *name = vhost_info->name;
+
+    if (rte_vhost_driver_unregister(vhost_id)) {
+        VLOG_ERR("%s: Unable to unregister vhost driver for socket '%s'.\n",
+                 name, vhost_id);
+    } else if (vhost_info->is_server) {
+        /* OVS server mode - remove this socket from list for deletion */
+        fatal_signal_remove_file_to_unlink(vhost_id);
+    }
+    free(vhost_id);
+    free(name);
+    free(vhost_info);
+}
+
+/* Postpone calling rte_vhost_driver_unregister() so it is not called from the
+ * main thread otherwise it may deadlock with a device notification calling
+ * destroy_device(). i.e. rte_vhost_driver_unregister() from main thread
+ * blocks waiting for destroy_device() to complete, destroy_device() blocks
+ * waiting for main thread to quiesce. */
+static void
+dpdk_vhost_driver_unregister_postpone(const char *vhost_id, const char *name,
+                                      bool is_server)
+{
+    struct vhost_unregister_info *vhost_info;
+
+    vhost_info = xmalloc(sizeof *vhost_info);
+    vhost_info->vhost_id = xstrdup(vhost_id);
+    vhost_info->name = xstrdup(name);
+    vhost_info->is_server = is_server;
+
+    ovsrcu_postpone(dpdk_vhost_driver_unregister, vhost_info);
 }
 
@@ -1837,16 +1873,11 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
 
     if (!vhost_id) {
-        goto out;
+        free(vhost_id);
+        return;
     }
 
-    if (dpdk_vhost_driver_unregister(dev, vhost_id)) {
-        VLOG_ERR("%s: Unable to unregister vhost driver for socket '%s'.\n",
-                 netdev->name, vhost_id);
-    } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
-        /* OVS server mode - remove this socket from list for deletion */
-        fatal_signal_remove_file_to_unlink(vhost_id);
-    }
-out:
-    free(vhost_id);
+    dpdk_vhost_driver_unregister_postpone(vhost_id, netdev_get_name(netdev),
+                                          !(dev->vhost_driver_flags
+                                          & RTE_VHOST_USER_CLIENT));
 }
 
@@ -6285,5 +6316,8 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev 
*netdev)
 
     if (unregister) {
-        dpdk_vhost_driver_unregister(dev, vhost_id);
+        dpdk_vhost_driver_unregister_postpone(vhost_id,
+                                              netdev_get_name(netdev),
+                                              !(dev->vhost_driver_flags
+                                              & RTE_VHOST_USER_CLIENT));
     }
 
-- 
2.46.2

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to