On Thu, Nov 30, 2017 at 04:08:47PM -0800, Bart Van Assche wrote:
> During request dispatch, after a scheduler or per-CPU queue has
> been examined, .put_budget() is called if the examined queue is
> empty. Since a new request may be queued concurrently with the
> .put_budget() call, a request queue needs to be rerun after each
> .put_budget() call.
If a request is queued concurrently from another path, it can be run
from that path, so don't need to rerun in __blk_mq_run_hw_queue().
>
> Fixes: commit 1f460b63d4b3 ("blk-mq: don't restart queue when .get_budget
> returns BLK_STS_RESOURCE")
> Signed-off-by: Bart Van Assche <[email protected]>
> Cc: Ming Lei <[email protected]>
> Cc: Omar Sandoval <[email protected]>
> Cc: Christoph Hellwig <[email protected]>
> Cc: Hannes Reinecke <[email protected]>
> Cc: Johannes Thumshirn <[email protected]>
> Cc: <[email protected]>
> ---
> block/blk-mq-sched.c | 39 ++++++++++++++++++++-------------------
> block/blk-mq-sched.h | 2 +-
> block/blk-mq.c | 17 ++++++++++++-----
> 3 files changed, 33 insertions(+), 25 deletions(-)
>
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index 398545d94521..3a935081a2d3 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -82,12 +82,8 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx
> *hctx)
> return blk_mq_run_hw_queue(hctx, true);
> }
>
> -/*
> - * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
> - * its queue by itself in its completion handler, so we don't need to
> - * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
> - */
> -static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
> +/* returns true if hctx needs to be run again */
> +static bool blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
> {
> struct request_queue *q = hctx->queue;
> struct elevator_queue *e = q->elevator;
> @@ -106,7 +102,7 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx
> *hctx)
> rq = e->type->ops.mq.dispatch_request(hctx);
> if (!rq) {
> blk_mq_put_dispatch_budget(hctx);
> - break;
> + return true;
> }
>
> /*
> @@ -116,6 +112,8 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx
> *hctx)
> */
> list_add(&rq->queuelist, &rq_list);
> } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
> +
> + return false;
> }
>
> static struct blk_mq_ctx *blk_mq_next_ctx(struct blk_mq_hw_ctx *hctx,
> @@ -129,16 +127,13 @@ static struct blk_mq_ctx *blk_mq_next_ctx(struct
> blk_mq_hw_ctx *hctx,
> return hctx->ctxs[idx];
> }
>
> -/*
> - * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
> - * its queue by itself in its completion handler, so we don't need to
> - * restart queue if .get_budget() returns BLK_STS_NO_RESOURCE.
> - */
> -static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
> +/* returns true if hctx needs to be run again */
> +static bool blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx)
> {
> struct request_queue *q = hctx->queue;
> LIST_HEAD(rq_list);
> struct blk_mq_ctx *ctx = READ_ONCE(hctx->dispatch_from);
> + bool ret = false;
>
> do {
> struct request *rq;
> @@ -152,6 +147,7 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx
> *hctx)
> rq = blk_mq_dequeue_from_ctx(hctx, ctx);
> if (!rq) {
> blk_mq_put_dispatch_budget(hctx);
> + ret = true;
> break;
> }
>
> @@ -168,19 +164,22 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx
> *hctx)
> } while (blk_mq_dispatch_rq_list(q, &rq_list, true));
>
> WRITE_ONCE(hctx->dispatch_from, ctx);
> +
> + return ret;
> }
>
> /* return true if hw queue need to be run again */
> -void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
> +bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
> {
> struct request_queue *q = hctx->queue;
> struct elevator_queue *e = q->elevator;
> const bool has_sched_dispatch = e && e->type->ops.mq.dispatch_request;
> LIST_HEAD(rq_list);
> + bool run_queue = false;
>
> /* RCU or SRCU read lock is needed before checking quiesced flag */
> if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
> - return;
> + return false;
>
> hctx->run++;
>
> @@ -212,12 +211,12 @@ void blk_mq_sched_dispatch_requests(struct
> blk_mq_hw_ctx *hctx)
> if (!list_empty(&rq_list)) {
> if (blk_mq_dispatch_rq_list(q, &rq_list, false)) {
> if (has_sched_dispatch)
> - blk_mq_do_dispatch_sched(hctx);
> + run_queue = blk_mq_do_dispatch_sched(hctx);
> else
> - blk_mq_do_dispatch_ctx(hctx);
> + run_queue = blk_mq_do_dispatch_ctx(hctx);
> }
> } else if (has_sched_dispatch) {
> - blk_mq_do_dispatch_sched(hctx);
> + run_queue = blk_mq_do_dispatch_sched(hctx);
> } else if (q->mq_ops->get_budget) {
> /*
> * If we need to get budget before queuing request, we
> @@ -227,11 +226,13 @@ void blk_mq_sched_dispatch_requests(struct
> blk_mq_hw_ctx *hctx)
> * TODO: get more budgets, and dequeue more requests in
> * one time.
> */
> - blk_mq_do_dispatch_ctx(hctx);
> + run_queue = blk_mq_do_dispatch_ctx(hctx);
> } else {
> blk_mq_flush_busy_ctxs(hctx, &rq_list);
> blk_mq_dispatch_rq_list(q, &rq_list, false);
> }
> +
> + return run_queue;
> }
>
> bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
> diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
> index ba1d1418a96d..1ccfb8027cfc 100644
> --- a/block/blk-mq-sched.h
> +++ b/block/blk-mq-sched.h
> @@ -23,7 +23,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
> struct blk_mq_ctx *ctx,
> struct list_head *list, bool run_queue_async);
>
> -void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
> +bool blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
>
> int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
> void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3e0ce940377f..b4225f606737 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1079,7 +1079,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q,
> struct list_head *list,
> {
> struct blk_mq_hw_ctx *hctx;
> struct request *rq, *nxt;
> - bool no_tag = false;
> + bool restart = false, no_tag = false;
> int errors, queued;
>
> if (list_empty(list))
> @@ -1105,8 +1105,10 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q,
> struct list_head *list,
> * we'll re-run it below.
> */
> if (!blk_mq_mark_tag_wait(&hctx, rq)) {
> - if (got_budget)
> + if (got_budget) {
> blk_mq_put_dispatch_budget(hctx);
> + restart = true;
> + }
> /*
> * For non-shared tags, the RESTART check
> * will suffice.
> @@ -1193,7 +1195,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q,
> struct list_head *list,
> * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
> * and dm-rq.
> */
> - if (!blk_mq_sched_needs_restart(hctx) ||
> + if (restart ||
> + !blk_mq_sched_needs_restart(hctx) ||
> (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
> blk_mq_run_hw_queue(hctx, true);
> }
> @@ -1204,6 +1207,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q,
> struct list_head *list,
> static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
> {
> int srcu_idx;
> + bool run_queue;
>
> /*
> * We should be running this queue from one of the CPUs that
> @@ -1220,15 +1224,18 @@ static void __blk_mq_run_hw_queue(struct
> blk_mq_hw_ctx *hctx)
>
> if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
> rcu_read_lock();
> - blk_mq_sched_dispatch_requests(hctx);
> + run_queue = blk_mq_sched_dispatch_requests(hctx);
> rcu_read_unlock();
> } else {
> might_sleep();
>
> srcu_idx = srcu_read_lock(hctx->queue_rq_srcu);
> - blk_mq_sched_dispatch_requests(hctx);
> + run_queue = blk_mq_sched_dispatch_requests(hctx);
> srcu_read_unlock(hctx->queue_rq_srcu, srcu_idx);
> }
> +
> + if (run_queue)
> + blk_mq_sched_restart(hctx);
> }
>
> /*
> --
> 2.15.0
>
--
Ming