From: Mikulas Patocka <[email protected]>
Update DM core's IO submission to allocate required memory using
GFP_NOWAIT if REQ_NOWAIT is set. Lone exception is in the error path
where DM's requeue support needs to allocate a clone bio in
dm_io_rewind() to allow the IO to be resubmitted: GFP_NOWAIT is used
first but if it fails GFP_NOIO is used as a last resort.
Tested with simple test provided in commit a9ce385344f916 ("dm: don't
attempt to queue IO under RCU protection") that was enhanced to check
error codes. Also tested using fio's pvsync2 with nowait=1.
But testing with induced GFP_NOWAIT allocation failures wasn't
performed.
Signed-off-by: Mikulas Patocka <[email protected]>
Co-developed-by: Mike Snitzer <[email protected]>
Signed-off-by: Mike Snitzer <[email protected]>
---
drivers/md/dm-io-rewind.c | 13 ++++++--
drivers/md/dm.c | 66 ++++++++++++++++++++++++++++++++-------
2 files changed, 65 insertions(+), 14 deletions(-)
diff --git a/drivers/md/dm-io-rewind.c b/drivers/md/dm-io-rewind.c
index 6155b0117c9d..bde5a53e2d88 100644
--- a/drivers/md/dm-io-rewind.c
+++ b/drivers/md/dm-io-rewind.c
@@ -143,8 +143,17 @@ static void dm_bio_rewind(struct bio *bio, unsigned int
bytes)
void dm_io_rewind(struct dm_io *io, struct bio_set *bs)
{
struct bio *orig = io->orig_bio;
- struct bio *new_orig = bio_alloc_clone(orig->bi_bdev, orig,
- GFP_NOIO, bs);
+ struct bio *new_orig;
+
+ new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOWAIT, bs);
+ if (unlikely(!new_orig)) {
+ /*
+ * Returning early and failing rewind isn't an option, even
+ * if orig has REQ_NOWAIT set, so retry alloc with GFP_NOIO.
+ */
+ new_orig = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOIO, bs);
+ }
+
/*
* dm_bio_rewind can restore to previous position since the
* end sector is fixed for original bio, but we still need
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1113a8da3c47..b0e1425af475 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -87,6 +87,8 @@ struct clone_info {
unsigned int sector_count;
bool is_abnormal_io:1;
bool submit_as_polled:1;
+ bool is_nowait:1;
+ bool nowait_failed:1;
};
static inline struct dm_target_io *clone_to_tio(struct bio *clone)
@@ -576,7 +578,13 @@ static struct dm_io *alloc_io(struct mapped_device *md,
struct bio *bio)
struct dm_target_io *tio;
struct bio *clone;
- clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
+ if (unlikely(bio->bi_opf & REQ_NOWAIT)) {
+ clone = bio_alloc_clone(NULL, bio, GFP_NOWAIT,
&md->mempools->io_bs);
+ if (!clone)
+ return NULL;
+ } else {
+ clone = bio_alloc_clone(NULL, bio, GFP_NOIO,
&md->mempools->io_bs);
+ }
tio = clone_to_tio(clone);
tio->flags = 0;
dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
@@ -1500,9 +1508,17 @@ static void alloc_multiple_bios(struct bio_list *blist,
struct clone_info *ci,
mutex_unlock(&ci->io->md->table_devices_lock);
if (bio_nr == num_bios)
return;
-
+ /*
+ * Failed to allocate all bios, all or nothing, so rewind
partial
+ * allocations and short-circuit retry with GFP_NOIO if
'is_nowait'
+ */
while ((bio = bio_list_pop(blist)))
free_tio(bio);
+
+ if (ci->is_nowait) {
+ ci->nowait_failed = true;
+ return;
+ }
}
}
@@ -1519,7 +1535,15 @@ static unsigned int __send_duplicate_bios(struct
clone_info *ci, struct dm_targe
case 1:
if (len)
setup_split_accounting(ci, *len);
- clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
+ if (unlikely(ci->is_nowait)) {
+ clone = alloc_tio(ci, ti, 0, len, GFP_NOWAIT);
+ if (!clone) {
+ ci->nowait_failed = true;
+ return 0;
+ }
+ } else {
+ clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
+ }
__map_bio(clone);
ret = 1;
break;
@@ -1539,7 +1563,7 @@ static unsigned int __send_duplicate_bios(struct
clone_info *ci, struct dm_targe
return ret;
}
-static void __send_empty_flush(struct clone_info *ci)
+static blk_status_t __send_empty_flush(struct clone_info *ci)
{
struct dm_table *t = ci->map;
struct bio flush_bio;
@@ -1572,6 +1596,8 @@ static void __send_empty_flush(struct clone_info *ci)
atomic_sub(1, &ci->io->io_count);
bio_uninit(ci->bio);
+
+ return likely(!ci->nowait_failed) ? BLK_STS_OK : BLK_STS_AGAIN;
}
static void __send_changing_extent_only(struct clone_info *ci, struct
dm_target *ti,
@@ -1656,7 +1682,7 @@ static blk_status_t __process_abnormal_io(struct
clone_info *ci,
__send_changing_extent_only(ci, ti, num_bios,
max_granularity, max_sectors);
- return BLK_STS_OK;
+ return likely(!ci->nowait_failed) ? BLK_STS_OK : BLK_STS_AGAIN;
}
/*
@@ -1714,7 +1740,7 @@ static blk_status_t __split_and_process_bio(struct
clone_info *ci)
if (unlikely(!ti))
return BLK_STS_IOERR;
- if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
+ if (unlikely(ci->is_nowait) &&
unlikely(!dm_target_supports_nowait(ti->type)))
return BLK_STS_NOTSUPP;
@@ -1729,7 +1755,15 @@ static blk_status_t __split_and_process_bio(struct
clone_info *ci)
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
setup_split_accounting(ci, len);
- clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
+ if (unlikely(ci->is_nowait)) {
+ clone = alloc_tio(ci, ti, 0, &len, GFP_NOWAIT);
+ if (unlikely(!clone)) {
+ ci->nowait_failed = true; /* unchecked, set for
consistency */
+ return BLK_STS_AGAIN;
+ }
+ } else {
+ clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
+ }
__map_bio(clone);
ci->sector += len;
@@ -1738,11 +1772,13 @@ static blk_status_t __split_and_process_bio(struct
clone_info *ci)
return BLK_STS_OK;
}
-static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
+static void init_clone_info(struct clone_info *ci, struct dm_io *io,
struct dm_table *map, struct bio *bio, bool
is_abnormal)
{
+ ci->is_nowait = !!(bio->bi_opf & REQ_NOWAIT);
+ ci->nowait_failed = false;
ci->map = map;
- ci->io = alloc_io(md, bio);
+ ci->io = io;
ci->bio = bio;
ci->is_abnormal_io = is_abnormal;
ci->submit_as_polled = false;
@@ -1777,11 +1813,17 @@ static void dm_split_and_process_bio(struct
mapped_device *md,
return;
}
- init_clone_info(&ci, md, map, bio, is_abnormal);
- io = ci.io;
+ io = alloc_io(md, bio);
+ if (!io) {
+ /* Unable to do anything without dm_io. */
+ bio_wouldblock_error(bio);
+ return;
+ }
+
+ init_clone_info(&ci, io, map, bio, is_abnormal);
if (bio->bi_opf & REQ_PREFLUSH) {
- __send_empty_flush(&ci);
+ error = __send_empty_flush(&ci);
/* dm_io_complete submits any data associated with flush */
goto out;
}
--
2.40.0