From: Keith Busch <[email protected]>
For DM_IO_BIO requests, do_region() built each destination bio by walking
the source bio's biovec and re-adding the pages one at a time, tracking
the remaining transfer in sectors. The vector lengths are byte granular
and need not be sector aligned (e.g. a misaligned O_DIRECT buffer split
across pages), so the sector-based accounting could lose a sub-sector
fragment: to_sector() truncated the remainder and the outer loop spun
forever submitting empty bios, hanging the I/O.
There is no need to rebuild the biovec at all. The destination reads into
(or writes from) exactly the same pages as the source bio, so the bio can
simply clone the source's biovec with bio_alloc_clone() and remap it to
the target device. The clone inherits the source's iterator and alignment,
and the block layer splits it to the target's limits on submission, so the
whole region maps to a single cloned bio with no manual page copying or
sector accounting.
This removes the per-page copy path (and its open-coded bvec dpages
helpers) for bio-backed I/O and fixes the hang on misaligned direct I/O to
a dm-mirror device. Page-list, vma and kmem sources keep the existing copy
path.
Fixes: 7eac33186957 ("iomap: simplify direct io validity check")
Fixes: 5ff3f74e145a ("block: simplify direct io validity check")
Reported-by: Dr. David Alan Gilbert <[email protected]>
Reported-by: Vjaceslavs Klimovs <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
drivers/md/dm-io.c | 67 +++++++++++++++++-----------------------------
1 file changed, 24 insertions(+), 43 deletions(-)
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 1db565b376200..28adfeb58f240 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -170,12 +170,11 @@ struct dpages {
struct page **p, unsigned long *len, unsigned int
*offset);
void (*next_page)(struct dpages *dp);
- union {
- unsigned int context_u;
- struct bvec_iter context_bi;
- };
+ unsigned int context_u;
void *context_ptr;
+ struct bio *orig_bio;
+
void *vma_invalidate_address;
unsigned long vma_invalidate_size;
};
@@ -210,44 +209,6 @@ static void list_dp_init(struct dpages *dp, struct
page_list *pl, unsigned int o
dp->context_ptr = pl;
}
-/*
- * Functions for getting the pages from a bvec.
- */
-static void bio_get_page(struct dpages *dp, struct page **p,
- unsigned long *len, unsigned int *offset)
-{
- struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
- dp->context_bi);
-
- *p = bvec.bv_page;
- *len = bvec.bv_len;
- *offset = bvec.bv_offset;
-
- /* avoid figuring it out again in bio_next_page() */
- dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
-}
-
-static void bio_next_page(struct dpages *dp)
-{
- unsigned int len = (unsigned int)dp->context_bi.bi_sector;
-
- bvec_iter_advance((struct bio_vec *)dp->context_ptr,
- &dp->context_bi, len);
-}
-
-static void bio_dp_init(struct dpages *dp, struct bio *bio)
-{
- dp->get_page = bio_get_page;
- dp->next_page = bio_next_page;
-
- /*
- * We just use bvec iterator to retrieve pages, so it is ok to
- * access the bvec table directly here
- */
- dp->context_ptr = bio->bi_io_vec;
- dp->context_bi = bio->bi_iter;
-}
-
/*
* Functions for getting the pages from a VMA.
*/
@@ -332,6 +293,21 @@ static void do_region(const blk_opf_t opf, unsigned int
region,
return;
}
+ if (dp->orig_bio) {
+ bio = bio_alloc_clone(where->bdev, dp->orig_bio, GFP_NOIO,
+ &io->client->bios);
+ bio->bi_iter.bi_sector = where->sector;
+ bio->bi_iter.bi_size = where->count << SECTOR_SHIFT;
+ bio->bi_opf = opf;
+ bio->bi_end_io = endio;
+ bio->bi_ioprio = ioprio;
+ store_io_and_region_in_bio(bio, io, region);
+
+ atomic_inc(&io->count);
+ submit_bio(bio);
+ return;
+ }
+
/*
* where->count may be zero if op holds a flush and we need to
* send a zero-sized flush.
@@ -468,6 +444,7 @@ static int dp_init(struct dm_io_request *io_req, struct
dpages *dp,
dp->vma_invalidate_address = NULL;
dp->vma_invalidate_size = 0;
+ dp->orig_bio = NULL;
switch (io_req->mem.type) {
case DM_IO_PAGE_LIST:
@@ -475,7 +452,11 @@ static int dp_init(struct dm_io_request *io_req, struct
dpages *dp,
break;
case DM_IO_BIO:
- bio_dp_init(dp, io_req->mem.ptr.bio);
+ /*
+ * The destination bios clone this bio's biovec directly, so
+ * there are no per-page accessors to set up here.
+ */
+ dp->orig_bio = io_req->mem.ptr.bio;
break;
case DM_IO_VMA:
--
2.52.0