Re: [dm-devel] [PATCH v5 08/11] dm: Forbid requeue of writes to zones

2021-06-04 Thread Damien Le Moal
On 2021/06/04 23:56, Mike Snitzer wrote:
> On Tue, May 25 2021 at  5:24P -0400,
> Damien Le Moal  wrote:
> 
>> A target map method requesting the requeue of a bio with
>> DM_MAPIO_REQUEUE or completing it with DM_ENDIO_REQUEUE can cause
>> unaligned write errors if the bio is a write operation targeting a
>> sequential zone. If a zoned target request such a requeue, warn about
>> it and kill the IO.
>>
>> The function dm_is_zone_write() is introduced to detect write operations
>> to zoned targets.
>>
>> This change does not affect the target drivers supporting zoned devices
>> and exposing a zoned device, namely dm-crypt, dm-linear and dm-flakey as
>> none of these targets ever request a requeue.
>>
>> Signed-off-by: Damien Le Moal 
>> Reviewed-by: Hannes Reinecke 
>> Reviewed-by: Himanshu Madhani 
>> ---
>>  drivers/md/dm-zone.c | 17 +
>>  drivers/md/dm.c  | 18 +++---
>>  drivers/md/dm.h  |  5 +
>>  3 files changed, 37 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
>> index b42474043249..edc3bbb45637 100644
>> --- a/drivers/md/dm-zone.c
>> +++ b/drivers/md/dm-zone.c
>> @@ -104,6 +104,23 @@ int dm_report_zones(struct block_device *bdev, sector_t 
>> start, sector_t sector,
>>  }
>>  EXPORT_SYMBOL_GPL(dm_report_zones);
>>  
>> +bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
>> +{
>> +struct request_queue *q = md->queue;
>> +
>> +if (!blk_queue_is_zoned(q))
>> +return false;
>> +
>> +switch (bio_op(bio)) {
>> +case REQ_OP_WRITE_ZEROES:
>> +case REQ_OP_WRITE_SAME:
>> +case REQ_OP_WRITE:
>> +return !op_is_flush(bio->bi_opf) && bio_sectors(bio);
>> +default:
>> +return false;
>> +}
>> +}
>> +
>>  void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
>>  {
>>  if (!blk_queue_is_zoned(q))
>> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
>> index c49976cc4e44..ed8c5a8df2e5 100644
>> --- a/drivers/md/dm.c
>> +++ b/drivers/md/dm.c
>> @@ -846,11 +846,15 @@ static void dec_pending(struct dm_io *io, blk_status_t 
>> error)
>>   * Target requested pushing back the I/O.
>>   */
>>  spin_lock_irqsave(>deferred_lock, flags);
>> -if (__noflush_suspending(md))
>> +if (__noflush_suspending(md) &&
>> +!WARN_ON_ONCE(dm_is_zone_write(md, bio)))
>>  /* NOTE early return due to BLK_STS_DM_REQUEUE 
>> below */
>>  bio_list_add_head(>deferred, io->orig_bio);
>>  else
>> -/* noflush suspend was interrupted. */
>> +/*
>> + * noflush suspend was interrupted or this is
>> + * a write to a zoned target.
>> + */
>>  io->status = BLK_STS_IOERR;
>>  spin_unlock_irqrestore(>deferred_lock, flags);
>>  }
> 
> So I now see this incremental fix:
> https://patchwork.kernel.org/project/dm-devel/patch/20210604004703.408562-1-damien.lem...@opensource.wdc.com/
> 
> And I've folded it in...

Thanks.

>> @@ -947,7 +951,15 @@ static void clone_endio(struct bio *bio)
>>  int r = endio(tio->ti, bio, );
>>  switch (r) {
>>  case DM_ENDIO_REQUEUE:
>> -error = BLK_STS_DM_REQUEUE;
>> +/*
>> + * Requeuing writes to a sequential zone of a zoned
>> + * target will break the sequential write pattern:
>> + * fail such IO.
>> + */
>> +if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
>> +error = BLK_STS_IOERR;
>> +else
>> +error = BLK_STS_DM_REQUEUE;
>>  fallthrough;
>>  case DM_ENDIO_DONE:
>>  break;
> 
> But I'm left wondering why dec_pending, now dm_io_dec_pending, needs
> to be modified to also check dm_is_zone_write() if clone_endio() is
> already dealing with it?

The way I understand the code is that if the target ->map_bio() method returns
DM_MAPIO_REQUEUE (in __map_bio()), then clone_endio() is not called since the
clone BIO is not submitted. But we still need to fail orig_bio, hence the check
in dm_io_dec_pending() to cover the submission path. Am I missing something ? Is
clone_endio() also called in that case ?

> Not that big a deal, just not loving how we're sprinkling special
> zoned code around...

I do not like it either. It makes maintenance harder. But as explained above, I
did not see any other way to cover both the submission and completion cases.

> 
> Mike
> 


-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
dm-devel@redhat.com

Re: [dm-devel] [PATCH v5 08/11] dm: Forbid requeue of writes to zones

2021-06-04 Thread Mike Snitzer
On Tue, May 25 2021 at  5:24P -0400,
Damien Le Moal  wrote:

> A target map method requesting the requeue of a bio with
> DM_MAPIO_REQUEUE or completing it with DM_ENDIO_REQUEUE can cause
> unaligned write errors if the bio is a write operation targeting a
> sequential zone. If a zoned target request such a requeue, warn about
> it and kill the IO.
> 
> The function dm_is_zone_write() is introduced to detect write operations
> to zoned targets.
> 
> This change does not affect the target drivers supporting zoned devices
> and exposing a zoned device, namely dm-crypt, dm-linear and dm-flakey as
> none of these targets ever request a requeue.
> 
> Signed-off-by: Damien Le Moal 
> Reviewed-by: Hannes Reinecke 
> Reviewed-by: Himanshu Madhani 
> ---
>  drivers/md/dm-zone.c | 17 +
>  drivers/md/dm.c  | 18 +++---
>  drivers/md/dm.h  |  5 +
>  3 files changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
> index b42474043249..edc3bbb45637 100644
> --- a/drivers/md/dm-zone.c
> +++ b/drivers/md/dm-zone.c
> @@ -104,6 +104,23 @@ int dm_report_zones(struct block_device *bdev, sector_t 
> start, sector_t sector,
>  }
>  EXPORT_SYMBOL_GPL(dm_report_zones);
>  
> +bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
> +{
> + struct request_queue *q = md->queue;
> +
> + if (!blk_queue_is_zoned(q))
> + return false;
> +
> + switch (bio_op(bio)) {
> + case REQ_OP_WRITE_ZEROES:
> + case REQ_OP_WRITE_SAME:
> + case REQ_OP_WRITE:
> + return !op_is_flush(bio->bi_opf) && bio_sectors(bio);
> + default:
> + return false;
> + }
> +}
> +
>  void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
>  {
>   if (!blk_queue_is_zoned(q))
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index c49976cc4e44..ed8c5a8df2e5 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -846,11 +846,15 @@ static void dec_pending(struct dm_io *io, blk_status_t 
> error)
>* Target requested pushing back the I/O.
>*/
>   spin_lock_irqsave(>deferred_lock, flags);
> - if (__noflush_suspending(md))
> + if (__noflush_suspending(md) &&
> + !WARN_ON_ONCE(dm_is_zone_write(md, bio)))
>   /* NOTE early return due to BLK_STS_DM_REQUEUE 
> below */
>   bio_list_add_head(>deferred, io->orig_bio);
>   else
> - /* noflush suspend was interrupted. */
> + /*
> +  * noflush suspend was interrupted or this is
> +  * a write to a zoned target.
> +  */
>   io->status = BLK_STS_IOERR;
>   spin_unlock_irqrestore(>deferred_lock, flags);
>   }

So I now see this incremental fix:
https://patchwork.kernel.org/project/dm-devel/patch/20210604004703.408562-1-damien.lem...@opensource.wdc.com/

And I've folded it in...

> @@ -947,7 +951,15 @@ static void clone_endio(struct bio *bio)
>   int r = endio(tio->ti, bio, );
>   switch (r) {
>   case DM_ENDIO_REQUEUE:
> - error = BLK_STS_DM_REQUEUE;
> + /*
> +  * Requeuing writes to a sequential zone of a zoned
> +  * target will break the sequential write pattern:
> +  * fail such IO.
> +  */
> + if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
> + error = BLK_STS_IOERR;
> + else
> + error = BLK_STS_DM_REQUEUE;
>   fallthrough;
>   case DM_ENDIO_DONE:
>   break;

But I'm left wondering why dec_pending, now dm_io_dec_pending, needs
to be modified to also check dm_is_zone_write() if clone_endio() is
already dealing with it?

Not that big a deal, just not loving how we're sprinkling special
zoned code around...

Mike

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH v5 08/11] dm: Forbid requeue of writes to zones

2021-05-25 Thread Damien Le Moal
A target map method requesting the requeue of a bio with
DM_MAPIO_REQUEUE or completing it with DM_ENDIO_REQUEUE can cause
unaligned write errors if the bio is a write operation targeting a
sequential zone. If a zoned target request such a requeue, warn about
it and kill the IO.

The function dm_is_zone_write() is introduced to detect write operations
to zoned targets.

This change does not affect the target drivers supporting zoned devices
and exposing a zoned device, namely dm-crypt, dm-linear and dm-flakey as
none of these targets ever request a requeue.

Signed-off-by: Damien Le Moal 
Reviewed-by: Hannes Reinecke 
Reviewed-by: Himanshu Madhani 
---
 drivers/md/dm-zone.c | 17 +
 drivers/md/dm.c  | 18 +++---
 drivers/md/dm.h  |  5 +
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index b42474043249..edc3bbb45637 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -104,6 +104,23 @@ int dm_report_zones(struct block_device *bdev, sector_t 
start, sector_t sector,
 }
 EXPORT_SYMBOL_GPL(dm_report_zones);
 
+bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
+{
+   struct request_queue *q = md->queue;
+
+   if (!blk_queue_is_zoned(q))
+   return false;
+
+   switch (bio_op(bio)) {
+   case REQ_OP_WRITE_ZEROES:
+   case REQ_OP_WRITE_SAME:
+   case REQ_OP_WRITE:
+   return !op_is_flush(bio->bi_opf) && bio_sectors(bio);
+   default:
+   return false;
+   }
+}
+
 void dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q)
 {
if (!blk_queue_is_zoned(q))
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index c49976cc4e44..ed8c5a8df2e5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -846,11 +846,15 @@ static void dec_pending(struct dm_io *io, blk_status_t 
error)
 * Target requested pushing back the I/O.
 */
spin_lock_irqsave(>deferred_lock, flags);
-   if (__noflush_suspending(md))
+   if (__noflush_suspending(md) &&
+   !WARN_ON_ONCE(dm_is_zone_write(md, bio)))
/* NOTE early return due to BLK_STS_DM_REQUEUE 
below */
bio_list_add_head(>deferred, io->orig_bio);
else
-   /* noflush suspend was interrupted. */
+   /*
+* noflush suspend was interrupted or this is
+* a write to a zoned target.
+*/
io->status = BLK_STS_IOERR;
spin_unlock_irqrestore(>deferred_lock, flags);
}
@@ -947,7 +951,15 @@ static void clone_endio(struct bio *bio)
int r = endio(tio->ti, bio, );
switch (r) {
case DM_ENDIO_REQUEUE:
-   error = BLK_STS_DM_REQUEUE;
+   /*
+* Requeuing writes to a sequential zone of a zoned
+* target will break the sequential write pattern:
+* fail such IO.
+*/
+   if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
+   error = BLK_STS_IOERR;
+   else
+   error = BLK_STS_DM_REQUEUE;
fallthrough;
case DM_ENDIO_DONE:
break;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index fdf1536a4b62..39c243258e24 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -107,8 +107,13 @@ void dm_set_zones_restrictions(struct dm_table *t, struct 
request_queue *q);
 #ifdef CONFIG_BLK_DEV_ZONED
 int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
 #else
 #define dm_blk_report_zonesNULL
+static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
+{
+   return false;
+}
 #endif
 
 /*-
-- 
2.31.1

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel