NVMe disk driver disables sg gaps by setting the flag in its queue.
md-raids must propagate the flag to its queue as well. Otherwise, an upper
user will get OK from bio_add_page() even if a gap exists, and that bio
with a gap will crash NVMe driver who doesn't expect gaps.
(see bvec_gap_to_prev for the definotion of gap)

https://jira.sw.ru/browse/PSBM-56838

Signed-off-by: Maxim Patlasov <[email protected]>
---
 drivers/md/linear.c    |    9 +++++++++
 drivers/md/raid0.c     |    8 ++++++++
 drivers/md/raid1.c     |   11 +++++++++++
 drivers/md/raid10.c    |   12 ++++++++++++
 drivers/md/raid5.c     |   10 ++++++++++
 include/linux/blkdev.h |    1 +
 6 files changed, 51 insertions(+)

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 3310b59..e57b8ff 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -128,6 +128,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, 
int raid_disks)
        struct md_rdev *rdev;
        int i, cnt;
        bool discard_supported = false;
+       bool sg_gaps_disabled = false;
 
        conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
                        GFP_KERNEL);
@@ -163,6 +164,9 @@ static struct linear_conf *linear_conf(struct mddev *mddev, 
int raid_disks)
 
                if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
                        discard_supported = true;
+
+               if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+                       sg_gaps_disabled = true;
        }
        if (cnt != raid_disks) {
                printk(KERN_ERR "md/linear:%s: not enough drives present. 
Aborting!\n",
@@ -175,6 +179,11 @@ static struct linear_conf *linear_conf(struct mddev 
*mddev, int raid_disks)
        else
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
 
+       if (!sg_gaps_disabled)
+               queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
+       else
+               queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
+
        /*
         * Here we calculate the device offsets.
         */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 100ef23..2b77d0f 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -435,6 +435,7 @@ static int raid0_run(struct mddev *mddev)
        if (mddev->queue) {
                struct md_rdev *rdev;
                bool discard_supported = false;
+               bool sg_gaps_disabled = false;
 
                blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
                blk_queue_max_write_same_sectors(mddev->queue, 
mddev->chunk_sectors);
@@ -449,6 +450,8 @@ static int raid0_run(struct mddev *mddev)
                                          rdev->data_offset << 9);
                        if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
                                discard_supported = true;
+                       if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+                               sg_gaps_disabled = true;
                }
 
                /* Unfortunately, some devices have awful discard performance,
@@ -470,6 +473,11 @@ static int raid0_run(struct mddev *mddev)
                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, 
mddev->queue);
                else
                        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 
mddev->queue);
+
+               if (!sg_gaps_disabled)
+                       queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, 
mddev->queue);
+               else
+                       queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, 
mddev->queue);
        }
 
        /* calculate array device size */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index b45b64c..a1763b5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1663,6 +1663,8 @@ static int raid1_add_disk(struct mddev *mddev, struct 
md_rdev *rdev)
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+       if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+               queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
        print_conf(conf);
        return err;
 }
@@ -2905,6 +2907,7 @@ static int run(struct mddev *mddev)
        struct md_rdev *rdev;
        int ret;
        bool discard_supported = false;
+       bool sg_gaps_disabled = false;
 
        if (mddev->level != 1) {
                printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring 
(%d)\n",
@@ -2939,6 +2942,8 @@ static int run(struct mddev *mddev)
                                  rdev->data_offset << 9);
                if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
                        discard_supported = true;
+               if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+                       sg_gaps_disabled = true;
        }
 
        mddev->degraded = 0;
@@ -2976,6 +2981,12 @@ static int run(struct mddev *mddev)
                else
                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
                                                  mddev->queue);
+               if (sg_gaps_disabled)
+                       queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
+                                               mddev->queue);
+               else
+                       queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
+                                                 mddev->queue);
        }
 
        ret =  md_integrity_register(mddev);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index f0c7f3b..52f8d73 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1865,6 +1865,8 @@ static int raid10_add_disk(struct mddev *mddev, struct 
md_rdev *rdev)
        md_integrity_add_rdev(rdev, mddev);
        if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
                queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+       if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+               queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
 
        print_conf(conf);
        return err;
@@ -3650,6 +3652,7 @@ static int run(struct mddev *mddev)
        sector_t min_offset_diff = 0;
        int first = 1;
        bool discard_supported = false;
+       bool sg_gaps_disabled = false;
 
        if (mddev->private == NULL) {
                conf = setup_conf(mddev);
@@ -3717,6 +3720,9 @@ static int run(struct mddev *mddev)
 
                if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
                        discard_supported = true;
+
+               if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+                       sg_gaps_disabled = true;
        }
 
        if (mddev->queue) {
@@ -3726,6 +3732,12 @@ static int run(struct mddev *mddev)
                else
                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
                                                  mddev->queue);
+               if (sg_gaps_disabled)
+                       queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
+                                               mddev->queue);
+               else
+                       queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
+                                                 mddev->queue);
        }
        /* need to check that every block has at least one working mirror */
        if (!enough(conf, -1)) {
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 50902ad1..862b86f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6814,6 +6814,7 @@ static int run(struct mddev *mddev)
        if (mddev->queue) {
                int chunk_size;
                bool discard_supported = true;
+               bool sg_gaps_disabled = false;
                /* read-ahead size must cover two whole stripes, which
                 * is 2 * (datadisks) * chunksize where 'n' is the
                 * number of raid devices
@@ -6878,6 +6879,8 @@ static int run(struct mddev *mddev)
                                }
                                discard_supported = false;
                        }
+                       if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
+                               sg_gaps_disabled = true;
                }
 
                if (discard_supported &&
@@ -6888,6 +6891,13 @@ static int run(struct mddev *mddev)
                else
                        queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
                                                mddev->queue);
+
+               if (sg_gaps_disabled)
+                       queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
+                                               mddev->queue);
+               else
+                       queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
+                                               mddev->queue);
        }
 
        return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8b400c6..e3752b1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -639,6 +639,7 @@ static inline void queue_flag_clear(unsigned int flag, 
struct request_queue *q)
 #define blk_queue_discard(q)   test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 #define blk_queue_secdiscard(q)        (blk_queue_discard(q) && \
        test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
+#define blk_queue_sg_gaps(q)   test_bit(QUEUE_FLAG_SG_GAPS, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
        ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to