pmem_submit_bio() passes the parent bio to nvdimm_flush() for
REQ_FUA. For virtio-pmem this makes async_pmem_flush() allocate
and submit a child PREFLUSH bio chained to the parent.

That child allocation is in the block submit path. Making it
blocking with GFP_NOIO can consume the same global bio mempool that
submit_bio() uses, while making it GFP_ATOMIC can fail under
pressure. A forced failure of the child allocation produced:

virtio_pmem: forcing child bio allocation failure for test
Buffer I/O error on dev pmem0, logical block 0, lost sync page write
EXT4-fs (pmem0): I/O error while writing superblock
EXT4-fs (pmem0): mount failed

Avoid the child bio without turning REQ_FUA into a synchronous
submit-path wait. Let provider flush callbacks return
NVDIMM_FLUSH_ASYNC after taking ownership of parent bio completion.
pmem_submit_bio() returns in that case, and virtio-pmem queues an
ordered WQ_MEM_RECLAIM work item that runs the existing host flush
path and completes the parent bio.

This keeps the asynchronous completion model of the child-bio path
while removing the child bio allocation from the submit path.

Signed-off-by: Li Chen <[email protected]>
---
Changes in v7:
- Replace synchronous FUA flushing with provider-owned asynchronous parent bio
  completion.
- Add NVDIMM_FLUSH_ASYNC and ordered WQ_MEM_RECLAIM flush work.
Changes in v6:
- Replace the child bio allocation fix with synchronous FUA flushing.

 drivers/nvdimm/nd_virtio.c   | 54 +++++++++++++++++++++++++-----------
 drivers/nvdimm/pmem.c        |  5 +++-
 drivers/nvdimm/region_devs.c |  2 ++
 drivers/nvdimm/virtio_pmem.c | 17 +++++++++++-
 drivers/nvdimm/virtio_pmem.h |  4 +++
 include/linux/libnvdimm.h    |  9 ++++++
 6 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 4176046627beb..8e16b7780be1a 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -9,6 +9,12 @@
 #include "virtio_pmem.h"
 #include "nd.h"
 
+struct virtio_pmem_flush_work {
+       struct work_struct work;
+       struct nd_region *nd_region;
+       struct bio *bio;
+};
+
  /* The interrupt handler */
 void virtio_pmem_host_ack(struct virtqueue *vq)
 {
@@ -107,30 +113,46 @@ static int virtio_pmem_flush(struct nd_region *nd_region)
        return err;
 };
 
+static void virtio_pmem_flush_work(struct work_struct *work)
+{
+       struct virtio_pmem_flush_work *flush;
+       int err;
+
+       flush = container_of(work, struct virtio_pmem_flush_work, work);
+       err = virtio_pmem_flush(flush->nd_region);
+       if (err > 0)
+               err = -EIO;
+       if (err)
+               flush->bio->bi_status = errno_to_blk_status(err);
+       bio_endio(flush->bio);
+       kfree(flush);
+}
+
 /* The asynchronous flush callback function */
 int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
 {
-       /*
-        * Create child bio for asynchronous flush and chain with
-        * parent bio. Otherwise directly call nd_region flush.
-        */
-       if (bio && bio->bi_iter.bi_sector != -1) {
-               struct bio *child = bio_alloc(bio->bi_bdev, 0,
-                                             REQ_OP_WRITE | REQ_PREFLUSH,
-                                             GFP_ATOMIC);
+       struct virtio_device *vdev = nd_region->provider_data;
+       struct virtio_pmem *vpmem = vdev->priv;
+       struct virtio_pmem_flush_work *flush;
+       int err;
 
-               if (!child)
+       if (bio && bio->bi_iter.bi_sector != -1) {
+               flush = kmalloc_obj(*flush, GFP_NOIO);
+               if (!flush)
                        return -ENOMEM;
-               bio_clone_blkg_association(child, bio);
-               child->bi_iter.bi_sector = -1;
-               bio_chain(child, bio);
-               submit_bio(child);
-               return 0;
+
+               INIT_WORK(&flush->work, virtio_pmem_flush_work);
+               flush->nd_region = nd_region;
+               flush->bio = bio;
+               queue_work(vpmem->flush_wq, &flush->work);
+               return NVDIMM_FLUSH_ASYNC;
        }
-       if (virtio_pmem_flush(nd_region))
+
+       err = virtio_pmem_flush(nd_region);
+       if (err > 0)
                return -EIO;
 
-       return 0;
+       return err;
 };
 EXPORT_SYMBOL_GPL(async_pmem_flush);
 MODULE_DESCRIPTION("Virtio Persistent Memory Driver");
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 82ee1ddb3a445..30a51c365ce8b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -241,8 +241,11 @@ static void pmem_submit_bio(struct bio *bio)
                        bio_end_io_acct(bio, start);
        }
 
-       if ((bio->bi_opf & REQ_FUA) && !bio->bi_status)
+       if ((bio->bi_opf & REQ_FUA) && !bio->bi_status) {
                ret = nvdimm_flush(nd_region, bio);
+               if (ret == NVDIMM_FLUSH_ASYNC)
+                       return;
+       }
 
        if (ret)
                bio->bi_status = errno_to_blk_status(ret);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7cd2c2f0d3121..c540f1cff9250 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1116,6 +1116,8 @@ int nvdimm_flush(struct nd_region *nd_region, struct bio 
*bio)
                rc = generic_nvdimm_flush(nd_region);
        else {
                rc = nd_region->flush(nd_region, bio);
+               if (rc > 0)
+                       return rc;
                if (rc && rc != -ENOMEM)
                        rc = -EIO;
        }
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 77b1966619059..9cf822a6c0c38 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -67,10 +67,17 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
        mutex_init(&vpmem->flush_lock);
        vpmem->vdev = vdev;
        vdev->priv = vpmem;
+       vpmem->flush_wq = alloc_ordered_workqueue("virtio-pmem-flush",
+                                                 WQ_MEM_RECLAIM);
+       if (!vpmem->flush_wq) {
+               err = -ENOMEM;
+               goto out_err;
+       }
+
        err = init_vq(vpmem);
        if (err) {
                dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
-               goto out_err;
+               goto out_wq;
        }
 
        if (virtio_has_feature(vdev, VIRTIO_PMEM_F_SHMEM_REGION)) {
@@ -131,6 +138,8 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
        nvdimm_bus_unregister(vpmem->nvdimm_bus);
 out_vq:
        vdev->config->del_vqs(vdev);
+out_wq:
+       destroy_workqueue(vpmem->flush_wq);
 out_err:
        return err;
 }
@@ -138,14 +147,20 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
 static void virtio_pmem_remove(struct virtio_device *vdev)
 {
        struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
+       struct virtio_pmem *vpmem = vdev->priv;
 
        nvdimm_bus_unregister(nvdimm_bus);
+       drain_workqueue(vpmem->flush_wq);
        vdev->config->del_vqs(vdev);
        virtio_reset_device(vdev);
+       destroy_workqueue(vpmem->flush_wq);
 }
 
 static int virtio_pmem_freeze(struct virtio_device *vdev)
 {
+       struct virtio_pmem *vpmem = vdev->priv;
+
+       drain_workqueue(vpmem->flush_wq);
        vdev->config->del_vqs(vdev);
        virtio_reset_device(vdev);
 
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
index f72cf17f9518f..e6dfc10ce0762 100644
--- a/drivers/nvdimm/virtio_pmem.h
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -15,6 +15,7 @@
 #include <linux/libnvdimm.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
+#include <linux/workqueue.h>
 
 struct virtio_pmem_request {
        struct virtio_pmem_req req;
@@ -39,6 +40,9 @@ struct virtio_pmem {
        /* Serialize flush requests to the device. */
        struct mutex flush_lock;
 
+       /* Complete asynchronous FUA flushes outside the submit path. */
+       struct workqueue_struct *flush_wq;
+
        /* nvdimm bus registers virtio pmem device */
        struct nvdimm_bus *nvdimm_bus;
        struct nvdimm_bus_descriptor nd_desc;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 28f086c4a1873..d929d83abf3be 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -126,6 +126,15 @@ struct nd_mapping_desc {
 struct bio;
 struct resource;
 struct nd_region;
+
+/*
+ * Provider flush callback return values:
+ *   0: flush completed synchronously
+ *  <0: flush failed
+ *  >0: flush completion was queued and @bio will be completed later
+ */
+#define NVDIMM_FLUSH_ASYNC 1
+
 struct nd_region_desc {
        struct resource *res;
        struct nd_mapping_desc *mapping;
-- 
2.52.0

Reply via email to