fdmon_ops->wait() is called with notify_me enabled. This makes it an
expensive place to call qemu_bh_schedule() because aio_notify() invokes
write(2) on the EventNotifier.

Moving qemu_bh_schedule() after notify_me is reset improves IOPS from
270k to 300k IOPS with --blockdev file,aio=io_uring.

I considered alternatives:
1. Introducing a variant of qemu_bh_schedule() that skips aio_notify().
   This only makes sense within the AioContext and fdmon implementation
   itself and is therefore a specialized internal API. I don't like
   that.
2. Changing fdmon_ops->wait() so implementors can reset notify_me
   themselves. This makes things complex and the other fdmon
   implementations don't need it, so it doesn't seem like a good
   solution.

So in the end I moved the qemu_bh_schedule() call from fdmon-io_uring.c
to aio-posix.c. It's ugly but straightforward.

Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com>
---
 util/aio-posix.c      | 11 +++++++++++
 util/fdmon-io_uring.c | 11 ++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/util/aio-posix.c b/util/aio-posix.c
index 89bb215a2f..01428b141c 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -693,6 +693,17 @@ bool aio_poll(AioContext *ctx, bool blocking)
                              qatomic_read(&ctx->notify_me) - 2);
     }
 
+#ifdef CONFIG_LINUX_IO_URING
+    /*
+     * This is part of fdmon-io_uring.c but it's more efficient to do it here
+     * after notify_me has been reset. That way qemu_bh_schedule() ->
+     * aio_notify() does not write the EventNotifier.
+     */
+    if (!QSIMPLEQ_EMPTY(&ctx->cqe_handler_ready_list)) {
+        qemu_bh_schedule(ctx->cqe_handler_bh);
+    }
+#endif
+
     aio_notify_accept(ctx);
 
     /* Calculate blocked time for adaptive polling */
diff --git a/util/fdmon-io_uring.c b/util/fdmon-io_uring.c
index 3a49d6a20a..03a07a4caf 100644
--- a/util/fdmon-io_uring.c
+++ b/util/fdmon-io_uring.c
@@ -318,8 +318,12 @@ static bool process_cqe(AioContext *ctx,
     }
 
     cqe_handler->cqe = *cqe;
+
+    /*
+     * aio_poll() and fdmon_io_uring_gsource_dispatch() schedule cqe_handler_bh
+     * when the list is non-empty.
+     */
     QSIMPLEQ_INSERT_TAIL(&ctx->cqe_handler_ready_list, cqe_handler, next);
-    qemu_bh_schedule(ctx->cqe_handler_bh);
     return false;
 }
 
@@ -370,6 +374,11 @@ static void fdmon_io_uring_gsource_dispatch(AioContext 
*ctx,
                                             AioHandlerList *ready_list)
 {
     process_cq_ring(ctx, ready_list);
+
+    /* Ensure CqeHandlers enqueued by process_cq_ring() will run */
+    if (!QSIMPLEQ_EMPTY(&ctx->cqe_handler_ready_list)) {
+        qemu_bh_schedule(ctx->cqe_handler_bh);
+    }
 }
 
 static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,
-- 
2.49.0


Reply via email to