NVMe disk driver disables sg gaps by setting the flag in its queue. md-raids must propagate the flag to its queue as well. Otherwise, an upper user will get OK from bio_add_page() even if a gap exists, and that bio with a gap will crash NVMe driver who doesn't expect gaps. (see bvec_gap_to_prev for the definotion of gap)
https://jira.sw.ru/browse/PSBM-56838 Signed-off-by: Maxim Patlasov <[email protected]> --- drivers/md/linear.c | 9 +++++++++ drivers/md/raid0.c | 8 ++++++++ drivers/md/raid1.c | 11 +++++++++++ drivers/md/raid10.c | 12 ++++++++++++ drivers/md/raid5.c | 10 ++++++++++ include/linux/blkdev.h | 1 + 6 files changed, 51 insertions(+) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 3310b59..e57b8ff 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -128,6 +128,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) struct md_rdev *rdev; int i, cnt; bool discard_supported = false; + bool sg_gaps_disabled = false; conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info), GFP_KERNEL); @@ -163,6 +164,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) if (blk_queue_discard(bdev_get_queue(rdev->bdev))) discard_supported = true; + + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + sg_gaps_disabled = true; } if (cnt != raid_disks) { printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", @@ -175,6 +179,11 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + if (!sg_gaps_disabled) + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue); + else + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue); + /* * Here we calculate the device offsets. */ diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 100ef23..2b77d0f 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -435,6 +435,7 @@ static int raid0_run(struct mddev *mddev) if (mddev->queue) { struct md_rdev *rdev; bool discard_supported = false; + bool sg_gaps_disabled = false; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); @@ -449,6 +450,8 @@ static int raid0_run(struct mddev *mddev) rdev->data_offset << 9); if (blk_queue_discard(bdev_get_queue(rdev->bdev))) discard_supported = true; + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + sg_gaps_disabled = true; } /* Unfortunately, some devices have awful discard performance, @@ -470,6 +473,11 @@ static int raid0_run(struct mddev *mddev) queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); else queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + + if (!sg_gaps_disabled) + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue); + else + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue); } /* calculate array device size */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b45b64c..a1763b5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1663,6 +1663,8 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) md_integrity_add_rdev(rdev, mddev); if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue); print_conf(conf); return err; } @@ -2905,6 +2907,7 @@ static int run(struct mddev *mddev) struct md_rdev *rdev; int ret; bool discard_supported = false; + bool sg_gaps_disabled = false; if (mddev->level != 1) { printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n", @@ -2939,6 +2942,8 @@ static int run(struct mddev *mddev) rdev->data_offset << 9); if (blk_queue_discard(bdev_get_queue(rdev->bdev))) discard_supported = true; + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + sg_gaps_disabled = true; } mddev->degraded = 0; @@ -2976,6 +2981,12 @@ static int run(struct mddev *mddev) else queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + if (sg_gaps_disabled) + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, + mddev->queue); + else + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, + mddev->queue); } ret = md_integrity_register(mddev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index f0c7f3b..52f8d73 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1865,6 +1865,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) md_integrity_add_rdev(rdev, mddev); if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev))) queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue); print_conf(conf); return err; @@ -3650,6 +3652,7 @@ static int run(struct mddev *mddev) sector_t min_offset_diff = 0; int first = 1; bool discard_supported = false; + bool sg_gaps_disabled = false; if (mddev->private == NULL) { conf = setup_conf(mddev); @@ -3717,6 +3720,9 @@ static int run(struct mddev *mddev) if (blk_queue_discard(bdev_get_queue(rdev->bdev))) discard_supported = true; + + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + sg_gaps_disabled = true; } if (mddev->queue) { @@ -3726,6 +3732,12 @@ static int run(struct mddev *mddev) else queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + if (sg_gaps_disabled) + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, + mddev->queue); + else + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, + mddev->queue); } /* need to check that every block has at least one working mirror */ if (!enough(conf, -1)) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 50902ad1..862b86f 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6814,6 +6814,7 @@ static int run(struct mddev *mddev) if (mddev->queue) { int chunk_size; bool discard_supported = true; + bool sg_gaps_disabled = false; /* read-ahead size must cover two whole stripes, which * is 2 * (datadisks) * chunksize where 'n' is the * number of raid devices @@ -6878,6 +6879,8 @@ static int run(struct mddev *mddev) } discard_supported = false; } + if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev))) + sg_gaps_disabled = true; } if (discard_supported && @@ -6888,6 +6891,13 @@ static int run(struct mddev *mddev) else queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + + if (sg_gaps_disabled) + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, + mddev->queue); + else + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, + mddev->queue); } return 0; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8b400c6..e3752b1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -639,6 +639,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) +#define blk_queue_sg_gaps(q) test_bit(QUEUE_FLAG_SG_GAPS, &(q)->queue_flags) #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
