On Thu, 2018-12-20 at 11:33 -0700, Jens Axboe wrote:
> [ ... ]
> Something like this, totally untested. If the queue matches, we know it's
> safe to dereference it.
> 
> A safer API to export as well...
> 
> 
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index 2089c6c62f44..78192b544fa2 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned 
> int bitnr, void *data)
>  
>       if (!reserved)
>               bitnr += tags->nr_reserved_tags;
> -     rq = tags->rqs[bitnr];
> +     if (tags->rqs[bitnr].queue != hctx->queue)
> +             return true;
>  
>       /*
>        * We can hit rq == NULL here, because the tagging functions
>        * test and set the bit before assigning ->rqs[].
>        */
> -     if (rq && rq->q == hctx->queue)
> +     rq = tags->rqs[bitnr].rq;
> +     if (rq)
>               return iter_data->fn(hctx, rq, iter_data->data, reserved);
>       return true;
>  }
> @@ -268,6 +270,7 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, 
> struct sbitmap_queue *bt,
>  
>  struct bt_tags_iter_data {
>       struct blk_mq_tags *tags;
> +     struct blk_mq_hw_ctx *hctx;
>       busy_tag_iter_fn *fn;
>       void *data;
>       bool reserved;
> @@ -287,7 +290,7 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned 
> int bitnr, void *data)
>        * We can hit rq == NULL here, because the tagging functions
>        * test and set the bit before assining ->rqs[].
>        */
> -     rq = tags->rqs[bitnr];
> +     rq = tags->rqs[bitnr].rq;
>       if (rq && blk_mq_request_started(rq))
>               return iter_data->fn(rq, iter_data->data, reserved);
>  
> diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
> index 61deab0b5a5a..bb84d1f099c7 100644
> --- a/block/blk-mq-tag.h
> +++ b/block/blk-mq-tag.h
> @@ -4,6 +4,11 @@
>  
>  #include "blk-mq.h"
>  
> +struct rq_tag_entry {
> +     struct request_queue *queue;
> +     struct request *rq;
> +};
> +
>  /*
>   * Tag address space map.
>   */
> @@ -16,7 +21,7 @@ struct blk_mq_tags {
>       struct sbitmap_queue bitmap_tags;
>       struct sbitmap_queue breserved_tags;
>  
> -     struct request **rqs;
> +     struct rq_tag_entry *rqs;
>       struct request **static_rqs;
>       struct list_head page_list;
>  };
> @@ -78,7 +83,8 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx 
> *hctx)
>  static inline void blk_mq_tag_set_rq(struct blk_mq_hw_ctx *hctx,
>               unsigned int tag, struct request *rq)
>  {
> -     hctx->tags->rqs[tag] = rq;
> +     hctx->tags->rqs[tag].queue = hctx->queue;
> +     hctx->tags->rqs[tag].rq = rq;
>  }
>  
>  static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ba37b9e15e9..4f194946dbd9 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -298,13 +298,16 @@ static struct request *blk_mq_rq_ctx_init(struct 
> blk_mq_alloc_data *data,
>               rq->tag = -1;
>               rq->internal_tag = tag;
>       } else {
> -             if (data->hctx->flags & BLK_MQ_F_TAG_SHARED) {
> +             struct blk_mq_hw_ctx *hctx = data->hctx;
> +
> +             if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
>                       rq_flags = RQF_MQ_INFLIGHT;
> -                     atomic_inc(&data->hctx->nr_active);
> +                     atomic_inc(&hctx->nr_active);
>               }
>               rq->tag = tag;
>               rq->internal_tag = -1;
> -             data->hctx->tags->rqs[rq->tag] = rq;
> +             hctx->tags->rqs[rq->tag].queue = hctx->queue;
> +             hctx->tags->rqs[rq->tag].rq = rq;
>       }
>  
>       /* csd/requeue_work/fifo_time is initialized before use */
> @@ -797,8 +800,8 @@ EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
>  struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
>  {
>       if (tag < tags->nr_tags) {
> -             prefetch(tags->rqs[tag]);
> -             return tags->rqs[tag];
> +             prefetch(tags->rqs[tag].rq);
> +             return tags->rqs[tag].rq;
>       }
>  
>       return NULL;
> @@ -809,10 +812,11 @@ static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx 
> *hctx, struct request *rq,
>                              void *priv, bool reserved)
>  {
>       /*
> -      * If we find a request that is inflight and the queue matches,
> -      * we know the queue is busy. Return false to stop the iteration.
> +      * We're only called here if the queue matches. If the rq state is
> +      * inflight, we know the queue is busy. Return false to stop the
> +      * iteration.
>        */
> -     if (rq->state == MQ_RQ_IN_FLIGHT && rq->q == hctx->queue) {
> +     if (rq->state == MQ_RQ_IN_FLIGHT) {
>               bool *busy = priv;
>  
>               *busy = true;
> @@ -1049,11 +1053,14 @@ bool blk_mq_get_driver_tag(struct request *rq)
>       shared = blk_mq_tag_busy(data.hctx);
>       rq->tag = blk_mq_get_tag(&data);
>       if (rq->tag >= 0) {
> +             struct blk_mq_hw_ctx *hctx = data.hctx;
> +
>               if (shared) {
>                       rq->rq_flags |= RQF_MQ_INFLIGHT;
>                       atomic_inc(&data.hctx->nr_active);
>               }
> -             data.hctx->tags->rqs[rq->tag] = rq;
> +             hctx->tags->rqs[rq->tag].queue = hctx->queue;
> +             hctx->tags->rqs[rq->tag].rq = rq;
>       }
>  
>  done:
> @@ -2069,7 +2076,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct 
> blk_mq_tag_set *set,
>       if (!tags)
>               return NULL;
>  
> -     tags->rqs = kcalloc_node(nr_tags, sizeof(struct request *),
> +     tags->rqs = kcalloc_node(nr_tags, sizeof(struct rq_tag_entry),
>                                GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
>                                node);
>       if (!tags->rqs) {

Hi Jens,

How about the patch below? Its behavior should be similar to your patch but I 
think
this patch is simpler.

Thanks,

Bart.


---
 block/blk-mq.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6a7566244de3..c57611b1f2a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -86,6 +86,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx 
*hctx,
 }
 
 struct mq_inflight {
+       struct request_queue *q;
        struct hd_struct *part;
        unsigned int *inflight;
 };
@@ -96,6 +97,9 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
 {
        struct mq_inflight *mi = priv;
 
+       if (rq->q != mi->q)
+               return;
+
        /*
         * index[0] counts the specific partition that was asked for. index[1]
         * counts the ones that are active on the whole device, so increment
@@ -110,7 +114,11 @@ static void blk_mq_check_inflight(struct blk_mq_hw_ctx 
*hctx,
 void blk_mq_in_flight(struct request_queue *q, struct hd_struct *part,
                      unsigned int inflight[2])
 {
-       struct mq_inflight mi = { .part = part, .inflight = inflight, };
+       struct mq_inflight mi = {
+               .q = q,
+               .part = part,
+               .inflight = inflight,
+       };
 
        inflight[0] = inflight[1] = 0;
        blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
@@ -129,7 +137,11 @@ static void blk_mq_check_inflight_rw(struct blk_mq_hw_ctx 
*hctx,
 void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
                         unsigned int inflight[2])
 {
-       struct mq_inflight mi = { .part = part, .inflight = inflight, };
+       struct mq_inflight mi = {
+               .q = q,
+               .part = part,
+               .inflight = inflight,
+       };
 
        inflight[0] = inflight[1] = 0;
        blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
@@ -477,6 +489,8 @@ static void __blk_mq_free_request(struct request *rq)
        const int sched_tag = rq->internal_tag;
 
        blk_pm_mark_last_busy(rq);
+       /* Avoid that blk_mq_queue_tag_busy_iter() picks up this request. */
+       rq->q = NULL;
        if (rq->tag != -1)
                blk_mq_put_tag(hctx, hctx->tags, ctx, rq->tag);
        if (sched_tag != -1)


Reply via email to