Maxim,

as far as I understand it cannot be used for live-patching:
it ssems for me .run is called on start of mdraid thread only.
Can you propose some live-patch-ready variant of this fix?

thank you,
        Vasily Averin


On 12/09/2016 08:21 AM, Vasily Averin wrote:
> Maxim,
> will you sent this to mainline too ?
> 
> thank you,
>       Vasily Averin
> 
> On 12/09/2016 08:17 AM, Maxim Patlasov wrote:
>> NVMe disk driver disables sg gaps by setting the flag in its queue.
>> md-raids must propagate the flag to its queue as well. Otherwise, an upper
>> user will get OK from bio_add_page() even if a gap exists, and that bio
>> with a gap will crash NVMe driver who doesn't expect gaps.
>> (see bvec_gap_to_prev for the definotion of gap)
>>
>> https://jira.sw.ru/browse/PSBM-56838
>>
>> Signed-off-by: Maxim Patlasov <[email protected]>
>> ---
>>  drivers/md/linear.c    |    9 +++++++++
>>  drivers/md/raid0.c     |    8 ++++++++
>>  drivers/md/raid1.c     |   11 +++++++++++
>>  drivers/md/raid10.c    |   12 ++++++++++++
>>  drivers/md/raid5.c     |   10 ++++++++++
>>  include/linux/blkdev.h |    1 +
>>  6 files changed, 51 insertions(+)
>>
>> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
>> index 3310b59..e57b8ff 100644
>> --- a/drivers/md/linear.c
>> +++ b/drivers/md/linear.c
>> @@ -128,6 +128,7 @@ static struct linear_conf *linear_conf(struct mddev 
>> *mddev, int raid_disks)
>>      struct md_rdev *rdev;
>>      int i, cnt;
>>      bool discard_supported = false;
>> +    bool sg_gaps_disabled = false;
>>  
>>      conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
>>                      GFP_KERNEL);
>> @@ -163,6 +164,9 @@ static struct linear_conf *linear_conf(struct mddev 
>> *mddev, int raid_disks)
>>  
>>              if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>                      discard_supported = true;
>> +
>> +            if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +                    sg_gaps_disabled = true;
>>      }
>>      if (cnt != raid_disks) {
>>              printk(KERN_ERR "md/linear:%s: not enough drives present. 
>> Aborting!\n",
>> @@ -175,6 +179,11 @@ static struct linear_conf *linear_conf(struct mddev 
>> *mddev, int raid_disks)
>>      else
>>              queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>>  
>> +    if (!sg_gaps_disabled)
>> +            queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>> +    else
>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>> +
>>      /*
>>       * Here we calculate the device offsets.
>>       */
>> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
>> index 100ef23..2b77d0f 100644
>> --- a/drivers/md/raid0.c
>> +++ b/drivers/md/raid0.c
>> @@ -435,6 +435,7 @@ static int raid0_run(struct mddev *mddev)
>>      if (mddev->queue) {
>>              struct md_rdev *rdev;
>>              bool discard_supported = false;
>> +            bool sg_gaps_disabled = false;
>>  
>>              blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
>>              blk_queue_max_write_same_sectors(mddev->queue, 
>> mddev->chunk_sectors);
>> @@ -449,6 +450,8 @@ static int raid0_run(struct mddev *mddev)
>>                                        rdev->data_offset << 9);
>>                      if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>                              discard_supported = true;
>> +                    if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +                            sg_gaps_disabled = true;
>>              }
>>  
>>              /* Unfortunately, some devices have awful discard performance,
>> @@ -470,6 +473,11 @@ static int raid0_run(struct mddev *mddev)
>>                      queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, 
>> mddev->queue);
>>              else
>>                      queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, 
>> mddev->queue);
>> +
>> +            if (!sg_gaps_disabled)
>> +                    queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, 
>> mddev->queue);
>> +            else
>> +                    queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, 
>> mddev->queue);
>>      }
>>  
>>      /* calculate array device size */
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index b45b64c..a1763b5 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -1663,6 +1663,8 @@ static int raid1_add_disk(struct mddev *mddev, struct 
>> md_rdev *rdev)
>>      md_integrity_add_rdev(rdev, mddev);
>>      if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>              queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>> +    if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>      print_conf(conf);
>>      return err;
>>  }
>> @@ -2905,6 +2907,7 @@ static int run(struct mddev *mddev)
>>      struct md_rdev *rdev;
>>      int ret;
>>      bool discard_supported = false;
>> +    bool sg_gaps_disabled = false;
>>  
>>      if (mddev->level != 1) {
>>              printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring 
>> (%d)\n",
>> @@ -2939,6 +2942,8 @@ static int run(struct mddev *mddev)
>>                                rdev->data_offset << 9);
>>              if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>                      discard_supported = true;
>> +            if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +                    sg_gaps_disabled = true;
>>      }
>>  
>>      mddev->degraded = 0;
>> @@ -2976,6 +2981,12 @@ static int run(struct mddev *mddev)
>>              else
>>                      queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>                                                mddev->queue);
>> +            if (sg_gaps_disabled)
>> +                    queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>> +                                            mddev->queue);
>> +            else
>> +                    queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>> +                                              mddev->queue);
>>      }
>>  
>>      ret =  md_integrity_register(mddev);
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index f0c7f3b..52f8d73 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -1865,6 +1865,8 @@ static int raid10_add_disk(struct mddev *mddev, struct 
>> md_rdev *rdev)
>>      md_integrity_add_rdev(rdev, mddev);
>>      if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>              queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
>> +    if (mddev->queue && blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +            queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, mddev->queue);
>>  
>>      print_conf(conf);
>>      return err;
>> @@ -3650,6 +3652,7 @@ static int run(struct mddev *mddev)
>>      sector_t min_offset_diff = 0;
>>      int first = 1;
>>      bool discard_supported = false;
>> +    bool sg_gaps_disabled = false;
>>  
>>      if (mddev->private == NULL) {
>>              conf = setup_conf(mddev);
>> @@ -3717,6 +3720,9 @@ static int run(struct mddev *mddev)
>>  
>>              if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
>>                      discard_supported = true;
>> +
>> +            if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +                    sg_gaps_disabled = true;
>>      }
>>  
>>      if (mddev->queue) {
>> @@ -3726,6 +3732,12 @@ static int run(struct mddev *mddev)
>>              else
>>                      queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>                                                mddev->queue);
>> +            if (sg_gaps_disabled)
>> +                    queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>> +                                            mddev->queue);
>> +            else
>> +                    queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>> +                                              mddev->queue);
>>      }
>>      /* need to check that every block has at least one working mirror */
>>      if (!enough(conf, -1)) {
>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> index 50902ad1..862b86f 100644
>> --- a/drivers/md/raid5.c
>> +++ b/drivers/md/raid5.c
>> @@ -6814,6 +6814,7 @@ static int run(struct mddev *mddev)
>>      if (mddev->queue) {
>>              int chunk_size;
>>              bool discard_supported = true;
>> +            bool sg_gaps_disabled = false;
>>              /* read-ahead size must cover two whole stripes, which
>>               * is 2 * (datadisks) * chunksize where 'n' is the
>>               * number of raid devices
>> @@ -6878,6 +6879,8 @@ static int run(struct mddev *mddev)
>>                              }
>>                              discard_supported = false;
>>                      }
>> +                    if (blk_queue_sg_gaps(bdev_get_queue(rdev->bdev)))
>> +                            sg_gaps_disabled = true;
>>              }
>>  
>>              if (discard_supported &&
>> @@ -6888,6 +6891,13 @@ static int run(struct mddev *mddev)
>>              else
>>                      queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
>>                                              mddev->queue);
>> +
>> +            if (sg_gaps_disabled)
>> +                    queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS,
>> +                                            mddev->queue);
>> +            else
>> +                    queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS,
>> +                                            mddev->queue);
>>      }
>>  
>>      return 0;
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 8b400c6..e3752b1 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -639,6 +639,7 @@ static inline void queue_flag_clear(unsigned int flag, 
>> struct request_queue *q)
>>  #define blk_queue_discard(q)        test_bit(QUEUE_FLAG_DISCARD, 
>> &(q)->queue_flags)
>>  #define blk_queue_secdiscard(q)     (blk_queue_discard(q) && \
>>      test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags))
>> +#define blk_queue_sg_gaps(q)        test_bit(QUEUE_FLAG_SG_GAPS, 
>> &(q)->queue_flags)
>>  
>>  #define blk_noretry_request(rq) \
>>      ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
>>
>>
> 
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to