Re: [PATCH] block: fix possible race on blk_get_queue()

2020-07-28 Thread Bart Van Assche
On 2020-07-28 18:51, Luis Chamberlain wrote:
> diff --git a/block/blk-core.c b/block/blk-core.c
> index d9d632639bd1..febdd8e8d409 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -605,12 +605,18 @@ EXPORT_SYMBOL(blk_alloc_queue);
>   */
>  bool blk_get_queue(struct request_queue *q)
>  {
> - if (likely(!blk_queue_dying(q))) {
> - __blk_get_queue(q);
> - return true;
> + struct kobject *obj;
> +
> + obj = __blk_get_queue(q);
> + if (!obj)
> + return false;
> +
> + if (unlikely(blk_queue_dying(q))) {
> + blk_put_queue(q);
> + return false;
>   }
>  
> - return false;
> + return true;
>  }

This change is not sufficient to prevent that the QUEUE_FLAG_DYING flag
is set immediately after this function returns. I propose not to modify
this function but instead to add a comment that is the responsibility of
the caller to prevent that such a race condition occurs.

> -static inline void __blk_get_queue(struct request_queue *q)
> +static inline struct kobject * __must_check
> +__blk_get_queue(struct request_queue *q)
>  {
> - kobject_get(>kobj);
> + return kobject_get_unless_zero(>kobj);
>  }

If a function passes a queue pointer to another function that calls
blk_get_queue() then the caller should guarantee that 'q' is valid
during the entire duration of the call. In other words, I'm not sure the
above change is an improvement.

Thanks,

Bart.


[PATCH] block: fix possible race on blk_get_queue()

2020-07-28 Thread Luis Chamberlain
The queue can flip to dying after we check if it is dying,
and then we call __blk_get_queue(). This is a purely
theoretical race, but just fix it. We do this by
Using the atomic kobject_get_unless_zero() first, and
*then* check if the queue is dying *after*.

This issue was found while doing patch review on the
recent blktrace fixes [0].

[0] 
https://lore.kernel.org/linux-block/20200415123434.gu11...@42.do-not-panic.com/

Reported-by: Christoph Hellwig 
Cc: Jan Kara 
Cc: Ming Lei 
Cc: Bart Van Assche 
Cc: Christoph Hellwig 
Signed-off-by: Luis Chamberlain 
---

This goes tested against blktest without finding a regression.

 block/blk-core.c | 14 ++
 block/blk.h  |  5 +++--
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d9d632639bd1..febdd8e8d409 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -605,12 +605,18 @@ EXPORT_SYMBOL(blk_alloc_queue);
  */
 bool blk_get_queue(struct request_queue *q)
 {
-   if (likely(!blk_queue_dying(q))) {
-   __blk_get_queue(q);
-   return true;
+   struct kobject *obj;
+
+   obj = __blk_get_queue(q);
+   if (!obj)
+   return false;
+
+   if (unlikely(blk_queue_dying(q))) {
+   blk_put_queue(q);
+   return false;
}
 
-   return false;
+   return true;
 }
 EXPORT_SYMBOL(blk_get_queue);
 
diff --git a/block/blk.h b/block/blk.h
index 49e2928a1632..bdbc9b084d5b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -39,9 +39,10 @@ blk_get_flush_queue(struct request_queue *q, struct 
blk_mq_ctx *ctx)
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
 }
 
-static inline void __blk_get_queue(struct request_queue *q)
+static inline struct kobject * __must_check
+__blk_get_queue(struct request_queue *q)
 {
-   kobject_get(>kobj);
+   return kobject_get_unless_zero(>kobj);
 }
 
 static inline bool
-- 
2.27.0