call_drain_rcu() has limitations that make it unsuitable for use in qmp_device_add(). Introduce a new coroutine version of drain_call_rcu() with the same functionality but that does not drop the BQL. The next patch will use it to fix qmp_device_add().
Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- MAINTAINERS | 2 ++ docs/devel/rcu.txt | 21 +++++++++++++++++ include/qemu/rcu.h | 1 + util/rcu-internal.h | 8 +++++++ util/rcu-co.c | 55 +++++++++++++++++++++++++++++++++++++++++++++ util/rcu.c | 3 ++- util/meson.build | 2 +- 7 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 util/rcu-internal.h create mode 100644 util/rcu-co.c diff --git a/MAINTAINERS b/MAINTAINERS index 3b29568ed4..7f98253bda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2908,6 +2908,8 @@ F: include/qemu/rcu*.h F: tests/unit/rcutorture.c F: tests/unit/test-rcu-*.c F: util/rcu.c +F: util/rcu-co.c +F: util/rcu-internal.h Human Monitor (HMP) M: Dr. David Alan Gilbert <d...@treblig.org> diff --git a/docs/devel/rcu.txt b/docs/devel/rcu.txt index 2e6cc607a1..344764527f 100644 --- a/docs/devel/rcu.txt +++ b/docs/devel/rcu.txt @@ -130,6 +130,27 @@ The core RCU API is small: g_free_rcu(&foo, rcu); + void coroutine_fn drain_call_rcu_co(void); + + drain_call_rcu_co() yields until the reclamation phase is finished. + Reclaimer functions previously submitted with call_rcu1() in this + thread will have finished by the time drain_call_rcu_co() returns. + + void drain_call_rcu(void); + + drain_call_rcu() releases the Big QEMU Lock (BQL), if held, waits until + the reclamation phase is finished, and then re-acquires the BQL, if + previously held. Reclaimer functions previously submitted with + call_rcu1() in this thread will have finished by the time + drain_call_rcu() returns. + + drain_call_rcu() has the following limitations: + 1. It deadlocks when called within an RCU read-side critical section. + 2. All functions on the call stack must be designed to handle dropping + the BQL. + + Prefer drain_call_rcu_co() over drain_call_rcu(). + typeof(*p) qatomic_rcu_read(p); qatomic_rcu_read() is similar to qatomic_load_acquire(), but it makes diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index fea058aa9f..53055df1dc 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -141,6 +141,7 @@ struct rcu_head { }; void call_rcu1(struct rcu_head *head, RCUCBFunc *func); +void coroutine_fn drain_call_rcu_co(void); void drain_call_rcu(void); /* The operands of the minus operator must have the same type, diff --git a/util/rcu-internal.h b/util/rcu-internal.h new file mode 100644 index 0000000000..7d85366d54 --- /dev/null +++ b/util/rcu-internal.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#ifndef RCU_INTERNAL_H +#define RCU_INTERNAL_H + +extern int in_drain_call_rcu; + +#endif /* RCU_INTERNAL_H */ diff --git a/util/rcu-co.c b/util/rcu-co.c new file mode 100644 index 0000000000..920fcacb7a --- /dev/null +++ b/util/rcu-co.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * RCU APIs for coroutines + * + * The RCU coroutine APIs are kept separate from the main RCU code to avoid + * depending on AioContext APIs in rcu.c. This is necessary because at least + * tests/unit/ptimer-test.c has replacement functions for AioContext APIs that + * conflict with the real functions. + * + * It's also nice to logically separate the core RCU code from the coroutine + * APIs :). + */ +#include "qemu/osdep.h" +#include "block/aio.h" +#include "qemu/atomic.h" +#include "qemu/coroutine.h" +#include "qemu/rcu.h" +#include "rcu-internal.h" + +typedef struct { + struct rcu_head rcu; + Coroutine *co; +} RcuDrainCo; + +static void drain_call_rcu_co_bh(void *opaque) +{ + RcuDrainCo *data = opaque; + + /* Re-enter drain_call_rcu_co() where it yielded */ + aio_co_wake(data->co); +} + +static void drain_call_rcu_co_cb(struct rcu_head *node) +{ + RcuDrainCo *data = container_of(node, RcuDrainCo, rcu); + AioContext *ctx = qemu_coroutine_get_aio_context(data->co); + + /* + * drain_call_rcu_co() might still be running in its thread, so schedule a + * BH in its thread. The BH only runs after the coroutine has yielded. + */ + aio_bh_schedule_oneshot(ctx, drain_call_rcu_co_bh, data); +} + +void coroutine_fn drain_call_rcu_co(void) +{ + RcuDrainCo data = { + .co = qemu_coroutine_self(), + }; + + qatomic_inc(&in_drain_call_rcu); + call_rcu1(&data.rcu, drain_call_rcu_co_cb); + qemu_coroutine_yield(); /* wait for drain_rcu_co_bh() */ + qatomic_dec(&in_drain_call_rcu); +} diff --git a/util/rcu.c b/util/rcu.c index e587bcc483..2519bd7d5c 100644 --- a/util/rcu.c +++ b/util/rcu.c @@ -32,6 +32,7 @@ #include "qemu/thread.h" #include "qemu/main-loop.h" #include "qemu/lockable.h" +#include "rcu-internal.h" #if defined(CONFIG_MALLOC_TRIM) #include <malloc.h> #endif @@ -46,7 +47,7 @@ unsigned long rcu_gp_ctr = RCU_GP_LOCKED; QemuEvent rcu_gp_event; -static int in_drain_call_rcu; +int in_drain_call_rcu; static QemuMutex rcu_registry_lock; static QemuMutex rcu_sync_lock; diff --git a/util/meson.build b/util/meson.build index a375160286..849d56f756 100644 --- a/util/meson.build +++ b/util/meson.build @@ -43,7 +43,7 @@ util_ss.add(files('keyval.c')) util_ss.add(files('crc32c.c')) util_ss.add(files('uuid.c')) util_ss.add(files('getauxval.c')) -util_ss.add(files('rcu.c')) +util_ss.add(files('rcu.c', 'rcu-co.c')) if have_membarrier util_ss.add(files('sys_membarrier.c')) endif -- 2.41.0