Maxim,
will you sent this to mainline too ?

thank you,
        Vasily Averin

On 12/09/2016 08:17 AM, Maxim Patlasov wrote:
> NVMe disk driver disables sg gaps by setting the flag in its queue.
> md-raids must propagate the flag to its queue as well. Otherwise, an upper
> user will get OK from bio_add_page() even if a gap exists, and that bio
> with a gap will crash NVMe driver who doesn't expect gaps.
> (see bvec_gap_to_prev for the definotion of gap)
> 
> https://jira.sw.ru/browse/PSBM-56838
> 
> Signed-off-by: Maxim Patlasov <[email protected]>
> ---
>  drivers/md/linear.c    |    9 +++++++++
>  drivers/md/raid0.c     |    8 ++++++++
>  drivers/md/raid1.c     |   11 +++++++++++
>  drivers/md/raid10.c    |   12 ++++++++++++
>  drivers/md/raid5.c     |   10 ++++++++++
>  include/linux/blkdev.h |    1 +
>  6 files changed, 51 insertions(+)
> 
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index 3310b59..e57b8ff 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -128,6 +128,7 @@ static struct linear_conf *linear_conf(struct mddev 
> *mddev, int raid_disks)
>       struct md_rdev *rdev;
>       int i, cnt;
>       bool discard_supported = false;
> +     bool sg_gaps_disabled = false;
>  
>       conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
>                       GFP_KERNEL);
> @@ -163,6 +164,9 @@ static struct linear_conf *linear_conf(struct mddev 
> *mddev, int raid_disks)
>  
>               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>                       discard_supported = true;
> +
> +             if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +                     sg_gaps_disabled = true;
>       }
>       if (cnt != raid_disks) {
>               printk(KERN_ERR "md/linear:%s: not enough drives present. 
> Aborting!\n",
> @@ -175,6 +179,11 @@ static struct linear_conf *linear_conf(struct mddev 
> *mddev, int raid_disks)
>       else
>               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>  
> +     if (!sg_gaps_disabled)
> +             queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
> +     else
> +             queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
> +
>       /*
>        * Here we calculate the device offsets.
>        */
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index 100ef23..2b77d0f 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -435,6 +435,7 @@ static int raid0_run(struct mddev *mddev)
>       if (mddev->queue) {
>               struct md_rdev *rdev;
>               bool discard_supported = false;
> +             bool sg_gaps_disabled = false;
>  
>               blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
>               blk_queue_max_write_same_sectors(mddev->queue, 
> mddev->chunk_sectors);
> @@ -449,6 +450,8 @@ static int raid0_run(struct mddev *mddev)
>                                         rdev->data_offset << 9);
>                       if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>                               discard_supported = true;
> +                     if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +                             sg_gaps_disabled = true;
>               }
>  
>               /* Unfortunately, some devices have awful discard performance,
> @@ -470,6 +473,11 @@ static int raid0_run(struct mddev *mddev)
>                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, 
> mddev->queue);
>               else
>                       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 
> mddev->queue);
> +
> +             if (!sg_gaps_disabled)
> +                     queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, 
> mddev->queue);
> +             else
> +                     queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, 
> mddev->queue);
>       }
>  
>       /* calculate array device size */
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index b45b64c..a1763b5 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1663,6 +1663,8 @@ static int raid1_add_disk(struct mddev *mddev, struct 
> md_rdev *rdev)
>       md_integrity_add_rdev(rdev, mddev);
>       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
> +     if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +             queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>       print_conf(conf);
>       return err;
>  }
> @@ -2905,6 +2907,7 @@ static int run(struct mddev *mddev)
>       struct md_rdev *rdev;
>       int ret;
>       bool discard_supported = false;
> +     bool sg_gaps_disabled = false;
>  
>       if (mddev->level != 1) {
>               printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring 
> (%d)\n",
> @@ -2939,6 +2942,8 @@ static int run(struct mddev *mddev)
>                                 rdev->data_offset << 9);
>               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>                       discard_supported = true;
> +             if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +                     sg_gaps_disabled = true;
>       }
>  
>       mddev->degraded = 0;
> @@ -2976,6 +2981,12 @@ static int run(struct mddev *mddev)
>               else
>                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>                                                 mddev->queue);
> +             if (sg_gaps_disabled)
> +                     queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
> +                                             mddev->queue);
> +             else
> +                     queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
> +                                               mddev->queue);
>       }
>  
>       ret =  md_integrity_register(mddev);
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index f0c7f3b..52f8d73 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -1865,6 +1865,8 @@ static int raid10_add_disk(struct mddev *mddev, struct 
> md_rdev *rdev)
>       md_integrity_add_rdev(rdev, mddev);
>       if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
> +     if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +             queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>  
>       print_conf(conf);
>       return err;
> @@ -3650,6 +3652,7 @@ static int run(struct mddev *mddev)
>       sector_t min_offset_diff = 0;
>       int first = 1;
>       bool discard_supported = false;
> +     bool sg_gaps_disabled = false;
>  
>       if (mddev->private == NULL) {
>               conf = setup_conf(mddev);
> @@ -3717,6 +3720,9 @@ static int run(struct mddev *mddev)
>  
>               if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>                       discard_supported = true;
> +
> +             if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +                     sg_gaps_disabled = true;
>       }
>  
>       if (mddev->queue) {
> @@ -3726,6 +3732,12 @@ static int run(struct mddev *mddev)
>               else
>                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>                                                 mddev->queue);
> +             if (sg_gaps_disabled)
> +                     queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
> +                                             mddev->queue);
> +             else
> +                     queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
> +                                               mddev->queue);
>       }
>       /* need to check that every block has at least one working mirror */
>       if (!enough(conf, -1)) {
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 50902ad1..862b86f 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -6814,6 +6814,7 @@ static int run(struct mddev *mddev)
>       if (mddev->queue) {
>               int chunk_size;
>               bool discard_supported = true;
> +             bool sg_gaps_disabled = false;
>               /* read-ahead size must cover two whole stripes, which
>                * is 2 * (datadisks) * chunksize where 'n' is the
>                * number of raid devices
> @@ -6878,6 +6879,8 @@ static int run(struct mddev *mddev)
>                               }
>                               discard_supported = false;
>                       }
> +                     if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
> +                             sg_gaps_disabled = true;
>               }
>  
>               if (discard_supported &&
> @@ -6888,6 +6891,13 @@ static int run(struct mddev *mddev)
>               else
>                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>                                               mddev->queue);
> +
> +             if (sg_gaps_disabled)
> +                     queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
> +                                             mddev->queue);
> +             else
> +                     queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
> +                                             mddev->queue);
>       }
>  
>       return 0;
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 8b400c6..e3752b1 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -639,6 +639,7 @@ static inline void queue_flag_clear(unsigned int flag, 
> struct request_queue *q)
>  #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
>  #define blk_queue_secdiscard(q)      (blk_queue_discard(q) && \
>       test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
> +#define blk_queue_sg_gaps(q) test_bit(QUEUE_FLAG_SG_GAPS, &(q)->queue_flags)
>  
>  #define blk_noretry_request(rq) \
>       ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
> 
> 
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to