On Aug 27 17:12, Jinhao Fan wrote:
> Add AioContext polling handlers for NVMe SQ and CQ. By employing polling,
> the latency of NVMe IO emulation is greatly reduced. The SQ polling
> handler checks for updates on the SQ tail shadow doorbell buffer. The CQ
> polling handler is an empty function because we procatively polls the CQ
> head shadow doorbell buffer when we want to post a cqe. Updates on the
> SQ eventidx buffer is stopped during polling to avoid the host doing
> unnecessary doorbell buffer writes.
> 
> Comparison (KIOPS):
> 
> QD        1   4  16  64
> QEMU     53 155 245 309
> polling 123 165 189 191
> 
> Signed-off-by: Jinhao Fan <[email protected]>
> ---
>  hw/nvme/ctrl.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++----
>  hw/nvme/nvme.h |  1 +
>  2 files changed, 70 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index 869565d77b..a7f8a4220e 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -298,6 +298,8 @@ static const uint32_t nvme_cse_iocs_zoned[256] = {
>  
>  static void nvme_process_sq(void *opaque);
>  static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst);
> +static void nvme_update_sq_eventidx(const NvmeSQueue *sq);
> +static void nvme_update_sq_tail(NvmeSQueue *sq);
>  
>  static uint16_t nvme_sqid(NvmeRequest *req)
>  {
> @@ -4447,6 +4449,21 @@ static void nvme_cq_notifier(EventNotifier *e)
>      nvme_post_cqes(cq);
>  }
>  
> +static bool nvme_cq_notifier_aio_poll(void *opaque)
> +{
> +    /*
> +     * We already "poll" the CQ tail shadow doorbell value in 
> nvme_post_cqes(),
> +     * so we do not need to check the value here. However, QEMU's AioContext
> +     * polling requires us to provide io_poll and io_poll_ready handlers, so
> +     * use dummy functions for CQ.
> +     */
> +    return false;
> +}
> +
> +static void nvme_cq_notifier_aio_poll_ready(EventNotifier *n)
> +{
> +}
> +
>  static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
>  {
>      NvmeCtrl *n = cq->ctrl;
> @@ -4459,8 +4476,10 @@ static int nvme_init_cq_ioeventfd(NvmeCQueue *cq)
>      }
>  
>      if (cq->cqid) {
> -        aio_set_event_notifier(n->ctx, &cq->notifier, true, nvme_cq_notifier,
> -                               NULL, NULL);
> +        aio_set_event_notifier(n->ctx, &cq->notifier, true,
> +                               nvme_cq_notifier,
> +                               nvme_cq_notifier_aio_poll,
> +                               nvme_cq_notifier_aio_poll_ready);

There is no reason to set up these polling handlers (since they don't do
anything).

>      } else {
>          event_notifier_set_handler(&cq->notifier, nvme_cq_notifier);
>      }
> @@ -4482,6 +4501,44 @@ static void nvme_sq_notifier(EventNotifier *e)
>      nvme_process_sq(sq);
>  }
>  
> +static void nvme_sq_notifier_aio_poll_begin(EventNotifier *n)
> +{
> +    NvmeSQueue *sq = container_of(n, NvmeSQueue, notifier);
> +
> +    nvme_update_sq_eventidx(sq);
> +
> +    /* Stop host doorbell writes by stop updating eventidx */
> +    sq->suppress_db = true;

This doesn't do what you expect it to. By not updaring the eventidx it
will fall behind the actual head, causing the host to think that the
device is not processing events (but it is!), resulting in doorbell
ringing.

> +}
> +
> +static bool nvme_sq_notifier_aio_poll(void *opaque)
> +{
> +    EventNotifier *n = opaque;
> +    NvmeSQueue *sq = container_of(n, NvmeSQueue, notifier);
> +    uint32_t old_tail = sq->tail;
> +
> +    nvme_update_sq_tail(sq);
> +
> +    return sq->tail != old_tail;
> +}
> +
> +static void nvme_sq_notifier_aio_poll_ready(EventNotifier *n)
> +{
> +    NvmeSQueue *sq = container_of(n, NvmeSQueue, notifier);
> +
> +    nvme_process_sq(sq);
> +}
> +
> +static void nvme_sq_notifier_aio_poll_end(EventNotifier *n)
> +{
> +    NvmeSQueue *sq = container_of(n, NvmeSQueue, notifier);
> +
> +    nvme_update_sq_eventidx(sq);
> +
> +    /* Resume host doorbell writes */
> +    sq->suppress_db = false;
> +}
> +
>  static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
>  {
>      NvmeCtrl *n = sq->ctrl;
> @@ -4494,8 +4551,13 @@ static int nvme_init_sq_ioeventfd(NvmeSQueue *sq)
>      }
>  
>      if (sq->sqid) {
> -        aio_set_event_notifier(n->ctx, &sq->notifier, true, nvme_sq_notifier,
> -                               NULL, NULL);
> +        aio_set_event_notifier(n->ctx, &sq->notifier, true,
> +                               nvme_sq_notifier,
> +                               nvme_sq_notifier_aio_poll,
> +                               nvme_sq_notifier_aio_poll_ready);
> +        aio_set_event_notifier_poll(n->ctx, &sq->notifier,
> +                                    nvme_sq_notifier_aio_poll_begin,
> +                                    nvme_sq_notifier_aio_poll_end);

You can remove the call to aio_set_event_notifier_poll() since the
supress_db "trick" shouldnt be used anyway.

>      } else {
>          event_notifier_set_handler(&sq->notifier, nvme_sq_notifier);
>      }
> @@ -6530,7 +6592,9 @@ static void nvme_process_sq(void *opaque)
>          }
>  
>          if (n->dbbuf_enabled) {
> -            nvme_update_sq_eventidx(sq);
> +            if (!sq->suppress_db) {
> +                nvme_update_sq_eventidx(sq);
> +            }

Remove this change.

>              nvme_update_sq_tail(sq);
>          }
>      }
> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
> index 224b73e6c4..bd486a8e15 100644
> --- a/hw/nvme/nvme.h
> +++ b/hw/nvme/nvme.h
> @@ -380,6 +380,7 @@ typedef struct NvmeSQueue {
>      QEMUTimer   *timer;
>      EventNotifier notifier;
>      bool        ioeventfd_enabled;
> +    bool        suppress_db;

Remove this.

>      NvmeRequest *io_req;
>      QTAILQ_HEAD(, NvmeRequest) req_list;
>      QTAILQ_HEAD(, NvmeRequest) out_req_list;
> -- 
> 2.25.1
> 

I tested with the patch modified for the above and I am not seeing any
difference in performance. But without the supress_db, this seems more
"correct".

-- 
One of us - No more doubt, silence or taboo about mental illness.

Attachment: signature.asc
Description: PGP signature

Reply via email to