On Wed, 23 Sep 2015 15:14:10 -0700
Bart Van Assche <bart.vanass...@sandisk.com> wrote:

> Ensure that blk_mq_queue_enter() waits if mq_freeze_depth is not
> zero. Ensure that the update of mq_freeze_depth by blk_mq_freeze_queue()
> is visible by all CPU cores before that function waits on
> mq_usage_counter.
> 
> It is unfortunate that this patch introduces an smp_mb() in the
> hot path (blk_mq_queue_enter()) but I have not yet found a way to
> avoid this.
> 
> I came across this code while analyzing a lockup triggered by
> deleting a SCSI host created by the SRP initiator immediately
> followed by a relogin.
> 
> Signed-off-by: Bart Van Assche <bart.vanass...@sandisk.com>
> Cc: Christoph Hellwig <h...@lst.de>
> Cc: Tejun Heo <t...@kernel.org>
> Cc: <sta...@vger.kernel.org>
> ---
>  block/blk-mq.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 2077f0d..e3ad411 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -83,8 +83,13 @@ static int blk_mq_queue_enter(struct request_queue *q, 
> gfp_t gfp)
>       while (true) {
>               int ret;
>  
> -             if (percpu_ref_tryget_live(&q->mq_usage_counter))
> -                     return 0;
> +             if (percpu_ref_tryget_live(&q->mq_usage_counter)) {
> +                     /* Order mq_use_counter and mq_freeze_depth accesses */
> +                     smp_mb();
> +                     if (!atomic_read(&q->mq_freeze_depth))
> +                             return 0;
> +                     percpu_ref_put(&q->mq_usage_counter);
> +             }

IMO, mq_freeze_depth should only be accessed in slow path, and looks
the race just happens during the small window between increasing
'mq_freeze_depth' and killing the percpu counter.

One solution I thought of is the following patch, which depends on
Akinobu's patch (blk-mq: fix freeze queue race
http://marc.info/?l=linux-kernel&m=143723697010781&w=2).

---
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f774f67..1c71c04 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -77,6 +77,17 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx 
*hctx,
        clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
 }
 
+static inline int blk_mq_read_freeze_depth(struct request_queue *q)
+{
+       int  depth;
+
+       mutex_lock(&q->mq_freeze_lock);
+       depth = q->mq_freeze_depth;
+       mutex_unlock(&q->mq_freeze_lock);
+
+       return depth;
+}
+
 static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
 {
        while (true) {
@@ -89,7 +100,7 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t 
gfp)
                        return -EBUSY;
 
                ret = wait_event_interruptible(q->mq_freeze_wq,
-                               !atomic_read(&q->mq_freeze_depth) ||
+                               !blk_mq_read_freeze_depth(q) ||
                                blk_queue_dying(q));
                if (blk_queue_dying(q))
                        return -ENODEV;
@@ -113,12 +124,9 @@ static void blk_mq_usage_counter_release(struct percpu_ref 
*ref)
 
 void blk_mq_freeze_queue_start(struct request_queue *q)
 {
-       int freeze_depth;
-
        mutex_lock(&q->mq_freeze_lock);
 
-       freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
-       if (freeze_depth == 1) {
+       if (!q->mq_freeze_depth++) {
                percpu_ref_kill(&q->mq_usage_counter);
                blk_mq_run_hw_queues(q, false);
        }
@@ -149,7 +157,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 
        mutex_lock(&q->mq_freeze_lock);
 
-       freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
+       freeze_depth = --q->mq_freeze_depth;
        WARN_ON_ONCE(freeze_depth < 0);
        if (!freeze_depth) {
                percpu_ref_reinit(&q->mq_usage_counter);
@@ -2084,7 +2092,7 @@ void blk_mq_free_queue(struct request_queue *q)
 /* Basically redo blk_mq_init_queue with queue frozen */
 static void blk_mq_queue_reinit(struct request_queue *q)
 {
-       WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
+       WARN_ON_ONCE(!ACCESS_ONCE(q->mq_freeze_depth));
 
        blk_mq_sysfs_unregister(q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6cdf2b7..86fedcc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -436,7 +436,7 @@ struct request_queue {
        struct mutex            sysfs_lock;
 
        int                     bypass_depth;
-       atomic_t                mq_freeze_depth;
+       int                     mq_freeze_depth;
 
 #if defined(CONFIG_BLK_DEV_BSG)
        bsg_job_fn              *bsg_job_fn;



>  
>               if (!(gfp & __GFP_WAIT))
>                       return -EBUSY;
> @@ -136,6 +141,11 @@ static void blk_mq_freeze_queue_wait(struct 
> request_queue *q)
>  void blk_mq_freeze_queue(struct request_queue *q)
>  {
>       blk_mq_freeze_queue_start(q);
> +     /*
> +      * Ensure that the mq_freeze_depth update is visiable before
> +      * mq_use_counter is read.
> +      */
> +     smp_mb();
>       blk_mq_freeze_queue_wait(q);
>  }
>  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to