+CC MST
> virtio_pmem_host_ack() reclaims virtqueue descriptors with
> virtqueue_get_buf(). The -ENOSPC waiter wakeup is tied to completing the
> returned token.
>
> If token completion is skipped for any reason, reclaimed descriptors may
> not wake a waiter and the submitter may sleep forever waiting for a free
> slot.
>
> Always wake one -ENOSPC waiter for each virtqueue completion before
> touching the returned token.
>
> Use READ_ONCE()/WRITE_ONCE() for the wait_event() flags (done and
> wq_buf_avail). They are observed by waiters without pmem_lock, so make
> the accesses explicit single loads/stores and avoid compiler
> reordering/caching across the wait/wake paths.
>
> Signed-off-by: Li Chen <[email protected]>
> ---
> drivers/nvdimm/nd_virtio.c | 35 +++++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
> index c3f07be4aa22..6f9890361d0b 100644
> --- a/drivers/nvdimm/nd_virtio.c
> +++ b/drivers/nvdimm/nd_virtio.c
> @@ -9,26 +9,33 @@
> #include "virtio_pmem.h"
> #include "nd.h"
>
> +static void virtio_pmem_wake_one_waiter(struct virtio_pmem *vpmem)
> +{
> + struct virtio_pmem_request *req_buf;
> +
> + if (list_empty(&vpmem->req_list))
> + return;
> +
> + req_buf = list_first_entry(&vpmem->req_list,
> + struct virtio_pmem_request, list);
[...]
> + list_del_init(&req_buf->list);
> + WRITE_ONCE(req_buf->wq_buf_avail, true);
> + wake_up(&req_buf->wq_buf);
Seems with the above change (3 line fix), you are allowing to wakeup a waiter
before accessing the token. Maybe simplify the patch by just
keeping this change in the single patch & other changes (READ_ONCE/WRITE_ONCE)
onto separate patch with corresponding commit log.
Thanks,
Pankaj
> +}
> +
> /* The interrupt handler */
> void virtio_pmem_host_ack(struct virtqueue *vq)
> {
> struct virtio_pmem *vpmem = vq->vdev->priv;
> - struct virtio_pmem_request *req_data, *req_buf;
> + struct virtio_pmem_request *req_data;
> unsigned long flags;
> unsigned int len;
>
> spin_lock_irqsave(&vpmem->pmem_lock, flags);
> while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
> - req_data->done = true;
> + virtio_pmem_wake_one_waiter(vpmem);
> + WRITE_ONCE(req_data->done, true);
> wake_up(&req_data->host_acked);
> -
> - if (!list_empty(&vpmem->req_list)) {
> - req_buf = list_first_entry(&vpmem->req_list,
> - struct virtio_pmem_request, list);
> - req_buf->wq_buf_avail = true;
> - wake_up(&req_buf->wq_buf);
> - list_del(&req_buf->list);
> - }
> }
> spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> }
> @@ -58,7 +65,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> if (!req_data)
> return -ENOMEM;
>
> - req_data->done = false;
> + WRITE_ONCE(req_data->done, false);
> init_waitqueue_head(&req_data->host_acked);
> init_waitqueue_head(&req_data->wq_buf);
> INIT_LIST_HEAD(&req_data->list);
> @@ -79,12 +86,12 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> GFP_ATOMIC)) == -ENOSPC) {
>
> dev_info(&vdev->dev, "failed to send command to virtio pmem
> device, no free slots in the virtqueue\n");
> - req_data->wq_buf_avail = false;
> + WRITE_ONCE(req_data->wq_buf_avail, false);
> list_add_tail(&req_data->list, &vpmem->req_list);
> spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>
> /* A host response results in "host_ack" getting called */
> - wait_event(req_data->wq_buf, req_data->wq_buf_avail);
> + wait_event(req_data->wq_buf,
> READ_ONCE(req_data->wq_buf_avail));
> spin_lock_irqsave(&vpmem->pmem_lock, flags);
> }
> err1 = virtqueue_kick(vpmem->req_vq);
> @@ -98,7 +105,7 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
> err = -EIO;
> } else {
> /* A host response results in "host_ack" getting called */
> - wait_event(req_data->host_acked, req_data->done);
> + wait_event(req_data->host_acked, READ_ONCE(req_data->done));
> err = le32_to_cpu(req_data->resp.ret);
> }
>
> --
> 2.52.0
>