[RFC v2 0/2] virtio-pmem: Asynchronous flush

2021-07-25 Thread Pankaj Gupta
From: Pankaj Gupta 

 Jeff reported preflush order issue with the existing implementation
 of virtio pmem preflush. Dan suggested[1] to implement asynchronous flush
 for virtio pmem using work queue as done in md/RAID. This patch series
 intends to solve the preflush ordering issue and also makes the flush
 asynchronous for the submitting thread.

 Submitting this patch series for review. Sorry, It took me long time to
 come back to this due to some personal reasons.

 RFC v1 -> RFC v2
 - More testing and bug fix.

 [1] https://marc.info/?l=linux-kernel&m=157446316409937&w=2

Pankaj Gupta (2):
  virtio-pmem: Async virtio-pmem flush
  pmem: enable pmem_submit_bio for asynchronous flush

 drivers/nvdimm/nd_virtio.c   | 72 
 drivers/nvdimm/pmem.c| 17 ++---
 drivers/nvdimm/virtio_pmem.c | 10 -
 drivers/nvdimm/virtio_pmem.h | 14 +++
 4 files changed, 91 insertions(+), 22 deletions(-)

-- 
2.25.1




[RFC v2 1/2] virtio-pmem: Async virtio-pmem flush

2021-07-25 Thread Pankaj Gupta
From: Pankaj Gupta 

Implement asynchronous flush for virtio pmem using work queue
to solve the preflush ordering issue. Also, coalesce the flush
requests when a flush is already in process.

Signed-off-by: Pankaj Gupta 
---
 drivers/nvdimm/nd_virtio.c   | 72 
 drivers/nvdimm/virtio_pmem.c | 10 -
 drivers/nvdimm/virtio_pmem.h | 14 +++
 3 files changed, 79 insertions(+), 17 deletions(-)

diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 10351d5b49fa..61b655b583be 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -97,29 +97,69 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
return err;
 };
 
+static void submit_async_flush(struct work_struct *ws);
+
 /* The asynchronous flush callback function */
 int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
 {
-   /*
-* Create child bio for asynchronous flush and chain with
-* parent bio. Otherwise directly call nd_region flush.
+   /* queue asynchronous flush and coalesce the flush requests */
+   struct virtio_device *vdev = nd_region->provider_data;
+   struct virtio_pmem *vpmem  = vdev->priv;
+   ktime_t req_start = ktime_get_boottime();
+
+   spin_lock_irq(&vpmem->lock);
+   /* flush requests wait until ongoing flush completes,
+* hence coalescing all the pending requests.
 */
-   if (bio && bio->bi_iter.bi_sector != -1) {
-   struct bio *child = bio_alloc(GFP_ATOMIC, 0);
-
-   if (!child)
-   return -ENOMEM;
-   bio_copy_dev(child, bio);
-   child->bi_opf = REQ_PREFLUSH;
-   child->bi_iter.bi_sector = -1;
-   bio_chain(child, bio);
-   submit_bio(child);
-   return 0;
+   wait_event_lock_irq(vpmem->sb_wait,
+   !vpmem->flush_bio ||
+   ktime_before(req_start, vpmem->prev_flush_start),
+   vpmem->lock);
+   /* new request after previous flush is completed */
+   if (ktime_after(req_start, vpmem->prev_flush_start)) {
+   WARN_ON(vpmem->flush_bio);
+   vpmem->flush_bio = bio;
+   bio = NULL;
+   }
+   spin_unlock_irq(&vpmem->lock);
+
+   if (!bio) {
+   INIT_WORK(&vpmem->flush_work, submit_async_flush);
+   queue_work(vpmem->pmem_wq, &vpmem->flush_work);
+   return 1;
+   }
+
+   /* flush completed in other context while we waited */
+   if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
+   bio->bi_opf &= ~REQ_PREFLUSH;
+   submit_bio(bio);
+   } else if (bio && (bio->bi_opf & REQ_FUA)) {
+   bio->bi_opf &= ~REQ_FUA;
+   bio_endio(bio);
}
-   if (virtio_pmem_flush(nd_region))
-   return -EIO;
 
return 0;
 };
 EXPORT_SYMBOL_GPL(async_pmem_flush);
+
+static void submit_async_flush(struct work_struct *ws)
+{
+   struct virtio_pmem *vpmem = container_of(ws, struct virtio_pmem, 
flush_work);
+   struct bio *bio = vpmem->flush_bio;
+
+   vpmem->start_flush = ktime_get_boottime();
+   bio->bi_status = 
errno_to_blk_status(virtio_pmem_flush(vpmem->nd_region));
+   vpmem->prev_flush_start = vpmem->start_flush;
+   vpmem->flush_bio = NULL;
+   wake_up(&vpmem->sb_wait);
+
+   /* Submit parent bio only for PREFLUSH */
+   if (bio && (bio->bi_opf & REQ_PREFLUSH)) {
+   bio->bi_opf &= ~REQ_PREFLUSH;
+   submit_bio(bio);
+   } else if (bio && (bio->bi_opf & REQ_FUA)) {
+   bio->bi_opf &= ~REQ_FUA;
+   bio_endio(bio);
+   }
+}
 MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 726c7354d465..56780a6140c7 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -24,6 +24,7 @@ static int init_vq(struct virtio_pmem *vpmem)
return PTR_ERR(vpmem->req_vq);
 
spin_lock_init(&vpmem->pmem_lock);
+   spin_lock_init(&vpmem->lock);
INIT_LIST_HEAD(&vpmem->req_list);
 
return 0;
@@ -57,7 +58,12 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
goto out_err;
}
-
+   vpmem->pmem_wq = alloc_workqueue("vpmem_wq", WQ_MEM_RECLAIM, 0);
+   if (!vpmem->pmem_wq) {
+   err = -ENOMEM;
+   goto out_err;
+   }
+   init_waitqueue_head(&vpmem->sb_wait);
virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
start, &vpmem->start);
virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
@@ -90,10 +96,12 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
goto out_nd;
}
nd_region->provider_data = dev_to_virtio(nd_regi

[RFC v2 2/2] pmem: Enable pmem_submit_bio for asynchronous flush

2021-07-25 Thread Pankaj Gupta
From: Pankaj Gupta 

Return from "pmem_submit_bio" when asynchronous flush is in
process in other context.

Signed-off-by: Pankaj Gupta 
---
 drivers/nvdimm/pmem.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 1e0615b8565e..3ca1fa88a5e7 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -201,8 +201,13 @@ static blk_qc_t pmem_submit_bio(struct bio *bio)
struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
struct nd_region *nd_region = to_region(pmem);
 
-   if (bio->bi_opf & REQ_PREFLUSH)
-   ret = nvdimm_flush(nd_region, bio);
+   if ((bio->bi_opf & REQ_PREFLUSH) &&
+   nvdimm_flush(nd_region, bio)) {
+
+   /* asynchronous flush completes in other context */
+   if (nd_region->flush)
+   return BLK_QC_T_NONE;
+   }
 
do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
if (do_acct)
@@ -222,11 +227,13 @@ static blk_qc_t pmem_submit_bio(struct bio *bio)
if (do_acct)
bio_end_io_acct(bio, start);
 
-   if (bio->bi_opf & REQ_FUA)
+   if (bio->bi_opf & REQ_FUA)  {
ret = nvdimm_flush(nd_region, bio);
 
-   if (ret)
-   bio->bi_status = errno_to_blk_status(ret);
+   /* asynchronous flush completes in other context */
+   if (nd_region->flush)
+   return BLK_QC_T_NONE;
+   }
 
bio_endio(bio);
return BLK_QC_T_NONE;
-- 
2.25.1