BLK_STS_INVAL indicates the I/O request itself was invalid (for example a
misaligned direct I/O), not that the device has failed. dm-raid1 treated
any read or write completion error as a device failure: it failed the
mirror leg, retried on the alternatives - which fail identically - and
eventually returned EIO while spuriously degrading the array.

Since commit 5ff3f74e145a ("block: simplify direct io validity check") the
direct I/O path no longer rejects misaligned buffers up front, so an
invalid bio now reaches the lower block layers, which fail it with
BLK_STS_INVAL. dm-io collapses the block status into a per-region error
bit before invoking the completion callback, so record BLK_STS_INVAL on
the originating bio and have the dm-raid1 read, write and end_io paths
propagate it instead of failing the device.

This mirrors the raid1/raid10 fix in commit f7b24c7b41f23
("md/raid1,raid10: don't fail devices for invalid IO errors") for the
device-mapper mirror target.

Fixes: 7eac33186957 ("iomap: simplify direct io validity check")
Fixes: 5ff3f74e145a ("block: simplify direct io validity check")
Reported-by: Dr. David Alan Gilbert <[email protected]>
Reported-by: Vjaceslavs Klimovs <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
Resending patch 2/2 from a different machine. For some reason, only 1/2
is getting through with git-send-email, so manually replying to the
thread with the missing second patch.

 drivers/md/dm-io.c    | 14 +++++++++++++-
 drivers/md/dm-raid1.c | 28 +++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 28adfeb58f240..f382e9f9be059 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -37,6 +37,7 @@ struct io {
        struct dm_io_client *client;
        io_notify_fn callback;
        void *context;
+       struct bio *orig_bio;
        void *vma_invalidate_address;
        unsigned long vma_invalidate_size;
 } __aligned(DM_IO_MAX_REGIONS);
@@ -132,8 +133,18 @@ static void complete_io(struct io *io)
 
 static void dec_count(struct io *io, unsigned int region, blk_status_t error)
 {
-       if (error)
+       if (error) {
                set_bit(region, &io->error_bits);
+               /*
+                * BLK_STS_INVAL means the bio was not valid for the underlying
+                * device (e.g. a misaligned direct I/O), which is a caller 
error
+                * rather than a device failure. Record it on the original bio 
so
+                * bio-based targets can propagate it instead of treating it as 
a
+                * media error and failing the device.
+                */
+               if (error == BLK_STS_INVAL && io->orig_bio)
+                       io->orig_bio->bi_status = error;
+       }
 
        if (atomic_dec_and_test(&io->count))
                complete_io(io);
@@ -398,6 +409,7 @@ static void async_io(struct dm_io_client *client, unsigned 
int num_regions,
        io->client = client;
        io->callback = fn;
        io->context = context;
+       io->orig_bio = dp->orig_bio;
 
        io->vma_invalidate_address = dp->vma_invalidate_address;
        io->vma_invalidate_size = dp->vma_invalidate_size;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index de5c00704e69c..022ad791c2957 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -524,6 +524,17 @@ static void read_callback(unsigned long error, void 
*context)
                return;
        }
 
+       /*
+        * BLK_STS_INVAL means the bio was not valid for the underlying device,
+        * e.g. a misaligned direct I/O. That is a caller error, not a device
+        * failure, so propagate it rather than failing the mirror and retrying
+        * on the other legs, which would fail the same way.
+        */
+       if (bio->bi_status == BLK_STS_INVAL) {
+               bio_endio(bio);
+               return;
+       }
+
        fail_mirror(m, DM_RAID1_READ_ERROR);
 
        if (likely(default_ok(m)) || mirror_available(m->ms, bio)) {
@@ -622,6 +633,16 @@ static void write_callback(unsigned long error, void 
*context)
                return;
        }
 
+       /*
+        * BLK_STS_INVAL means the bio was not valid for the underlying device,
+        * e.g. a misaligned direct I/O. Propagate the error without degrading
+        * the array.
+        */
+       if (bio->bi_status == BLK_STS_INVAL) {
+               bio_endio(bio);
+               return;
+       }
+
        /*
         * If the bio is discard, return an error, but do not
         * degrade the array.
@@ -1262,7 +1283,12 @@ static int mirror_end_io(struct dm_target *ti, struct 
bio *bio,
                return DM_ENDIO_DONE;
        }
 
-       if (*error == BLK_STS_NOTSUPP)
+       /*
+        * BLK_STS_INVAL means the bio was not valid for the underlying device,
+        * e.g. a misaligned direct I/O. Propagate it rather than failing the
+        * mirror and retrying, which would fail the same way on every leg.
+        */
+       if (*error == BLK_STS_NOTSUPP || *error == BLK_STS_INVAL)
                goto out;
 
        if (bio->bi_opf & REQ_RAHEAD)
-- 
2.52.0



Reply via email to