It is not safe to call drain_call_rcu() from qmp_device_add() because
some call stacks are not prepared for drain_call_rcu() to drop the Big
QEMU Lock (BQL).

For example, device emulation code is protected by the BQL but when it
calls aio_poll() -> ... -> qmp_device_add() -> drain_call_rcu() then the
BQL is dropped. See bz#2215192 below for a concrete bug of this type.

Another limitation of drain_call_rcu() is that it cannot be invoked
within an RCU read-side critical section since the reclamation phase
cannot complete until the end of the critical section. Unfortunately,
call stacks have been seen where this happens (see bz#2214985 below).

Switch to call_drain_rcu_co() to avoid these problems. This requires
making qmp_device_add() a coroutine. qdev_device_add() is not designed
to be called from coroutines, so it must be invoked from a BH and then
switch back to the coroutine.

Fixes: 7bed89958bfbf40df9ca681cefbdca63abdde39d ("device_core: use 
drain_call_rcu in in qmp_device_add")
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215192
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2214985
Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com>
---
 qapi/qdev.json         |  1 +
 include/monitor/qdev.h |  3 ++-
 monitor/qmp-cmds.c     |  2 +-
 softmmu/qdev-monitor.c | 34 ++++++++++++++++++++++++++++++----
 hmp-commands.hx        |  1 +
 5 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/qapi/qdev.json b/qapi/qdev.json
index 6bc5a733b8..78e9d7f7b8 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -79,6 +79,7 @@
 ##
 { 'command': 'device_add',
   'data': {'driver': 'str', '*bus': 'str', '*id': 'str'},
+  'coroutine': true,
   'gen': false, # so we can get the additional arguments
   'features': ['json-cli', 'json-cli-hotplug'] }
 
diff --git a/include/monitor/qdev.h b/include/monitor/qdev.h
index 1d57bf6577..1fed9eb9ea 100644
--- a/include/monitor/qdev.h
+++ b/include/monitor/qdev.h
@@ -5,7 +5,8 @@
 
 void hmp_info_qtree(Monitor *mon, const QDict *qdict);
 void hmp_info_qdm(Monitor *mon, const QDict *qdict);
-void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp);
+void coroutine_fn
+qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp);
 
 int qdev_device_help(QemuOpts *opts);
 DeviceState *qdev_device_add(QemuOpts *opts, Error **errp);
diff --git a/monitor/qmp-cmds.c b/monitor/qmp-cmds.c
index b0f948d337..a7419226fe 100644
--- a/monitor/qmp-cmds.c
+++ b/monitor/qmp-cmds.c
@@ -202,7 +202,7 @@ static void __attribute__((__constructor__)) 
monitor_init_qmp_commands(void)
     qmp_init_marshal(&qmp_commands);
 
     qmp_register_command(&qmp_commands, "device_add",
-                         qmp_device_add, 0, 0);
+                         qmp_device_add, QCO_COROUTINE, 0);
 
     QTAILQ_INIT(&qmp_cap_negotiation_commands);
     qmp_register_command(&qmp_cap_negotiation_commands, "qmp_capabilities",
diff --git a/softmmu/qdev-monitor.c b/softmmu/qdev-monitor.c
index 74f4e41338..85ae62f7cf 100644
--- a/softmmu/qdev-monitor.c
+++ b/softmmu/qdev-monitor.c
@@ -839,8 +839,28 @@ void hmp_info_qdm(Monitor *mon, const QDict *qdict)
     qdev_print_devinfos(true);
 }
 
-void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
+typedef struct {
+    Coroutine *co;
+    QemuOpts *opts;
+    Error **errp;
+    DeviceState *dev;
+} QmpDeviceAdd;
+
+static void qmp_device_add_bh(void *opaque)
 {
+    QmpDeviceAdd *data = opaque;
+
+    data->dev = qdev_device_add(data->opts, data->errp);
+    aio_co_wake(data->co);
+}
+
+void coroutine_fn
+qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp)
+{
+    QmpDeviceAdd data = {
+        .co = qemu_coroutine_self(),
+        .errp = errp,
+    };
     QemuOpts *opts;
     DeviceState *dev;
 
@@ -852,7 +872,13 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, 
Error **errp)
         qemu_opts_del(opts);
         return;
     }
-    dev = qdev_device_add(opts, errp);
+
+    /* Perform qdev_device_add() call outside coroutine context */
+    data.opts = opts;
+    aio_bh_schedule_oneshot(qemu_coroutine_get_aio_context(data.co),
+                            qmp_device_add_bh, &data);
+    qemu_coroutine_yield();
+    dev = data.dev;
 
     /*
      * Drain all pending RCU callbacks. This is done because
@@ -863,7 +889,7 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error 
**errp)
      * will finish its job completely once qmp command returns result
      * to the user
      */
-    drain_call_rcu();
+    drain_call_rcu_co();
 
     if (!dev) {
         qemu_opts_del(opts);
@@ -956,7 +982,7 @@ void qmp_device_del(const char *id, Error **errp)
     }
 }
 
-void hmp_device_add(Monitor *mon, const QDict *qdict)
+void coroutine_fn hmp_device_add(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
 
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 2cbd0f77a0..c737d1fd64 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -695,6 +695,7 @@ ERST
         .params     = "driver[,prop=value][,...]",
         .help       = "add device, like -device on the command line",
         .cmd        = hmp_device_add,
+        .coroutine  = true,
         .command_completion = device_add_completion,
     },
 
-- 
2.41.0


Reply via email to