The behind idea is simple:

1) for none scheduler, driver tag has to be borrowed for flush rq, otherwise
we may run out of tag, and IO hang is caused. And get/put driver tag is
actually noop, so reorder tags isn't necessary at all.

2) for real I/O scheduler, we needn't to allocate driver tag beforehand for
flush rq, and it works just fine to follow the way for normal requests: allocate
driver tag for each rq just before calling .queue_rq().

One visible change to driver is that the driver tag isn't shared in the flush
request sequence, that won't be a problem, since we always do that in legacy 
path.

Then flush rq needn't to be treated specially wrt. get/put driver tag, code gets
cleanup much, such as, reorder_tags_to_front() is removed, and we needn't to 
worry
about request order in dispatch list for avoiding I/O deadlock.

Also we have to put the driver tag before requeueing.

Signed-off-by: Ming Lei <ming....@redhat.com>
---
 block/blk-flush.c    | 35 ++++++++++++++++++++++++++---------
 block/blk-mq-sched.c | 42 +++++-------------------------------------
 block/blk-mq.c       | 41 ++++++-----------------------------------
 3 files changed, 37 insertions(+), 81 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index a9773d2075ac..f17170675917 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -231,8 +231,13 @@ static void flush_end_io(struct request *flush_rq, 
blk_status_t error)
                /* release the tag's ownership to the req cloned from */
                spin_lock_irqsave(&fq->mq_flush_lock, flags);
                hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
-               blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
-               flush_rq->tag = -1;
+               if (!q->elevator) {
+                       blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+                       flush_rq->tag = -1;
+               } else {
+                       blk_mq_put_driver_tag_hctx(hctx, flush_rq);
+                       flush_rq->internal_tag = -1;
+               }
        }
 
        running = &fq->flush_queue[fq->flush_running_idx];
@@ -318,19 +323,26 @@ static bool blk_kick_flush(struct request_queue *q, 
struct blk_flush_queue *fq)
        blk_rq_init(q, flush_rq);
 
        /*
-        * Borrow tag from the first request since they can't
-        * be in flight at the same time. And acquire the tag's
-        * ownership for flush req.
+        * In case of none scheduler, borrow tag from the first request
+        * since they can't be in flight at the same time. And acquire
+        * the tag's ownership for flush req.
+        *
+        * In case of IO scheduler, flush rq need to borrow scheduler tag
+        * just for cheating put/get driver tag.
         */
        if (q->mq_ops) {
                struct blk_mq_hw_ctx *hctx;
 
                flush_rq->mq_ctx = first_rq->mq_ctx;
-               flush_rq->tag = first_rq->tag;
-               fq->orig_rq = first_rq;
 
-               hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
-               blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+               if (!q->elevator) {
+                       fq->orig_rq = first_rq;
+                       flush_rq->tag = first_rq->tag;
+                       hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
+                       blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+               } else {
+                       flush_rq->internal_tag = first_rq->internal_tag;
+               }
        }
 
        flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -394,6 +406,11 @@ static void mq_flush_data_end_io(struct request *rq, 
blk_status_t error)
 
        hctx = blk_mq_map_queue(q, ctx->cpu);
 
+       if (q->elevator) {
+               WARN_ON(rq->tag < 0);
+               blk_mq_put_driver_tag_hctx(hctx, rq);
+       }
+
        /*
         * After populating an empty queue, kick it to avoid stall.  Read
         * the comment in flush_end_io().
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 32e4ea2d0a77..eb082f82b470 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -366,29 +366,12 @@ static bool blk_mq_sched_bypass_insert(struct 
blk_mq_hw_ctx *hctx,
                return true;
        }
 
-       if (has_sched) {
+       if (has_sched)
                rq->rq_flags |= RQF_SORTED;
-               WARN_ON(rq->tag != -1);
-       }
 
        return false;
 }
 
-/*
- * Add flush/fua to the queue. If we fail getting a driver tag, then
- * punt to the requeue list. Requeue will re-invoke us from a context
- * that's safe to block from.
- */
-static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx,
-                                     struct request *rq, bool can_block)
-{
-       if (blk_mq_get_driver_tag(rq, &hctx, can_block)) {
-               blk_insert_flush(rq);
-               blk_mq_run_hw_queue(hctx, true);
-       } else
-               blk_mq_add_to_requeue_list(rq, false, true);
-}
-
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
                                 bool run_queue, bool async, bool can_block)
 {
@@ -399,10 +382,12 @@ void blk_mq_sched_insert_request(struct request *rq, bool 
at_head,
 
        /* flush rq in flush machinery need to be dispatched directly */
        if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) {
-               blk_mq_sched_insert_flush(hctx, rq, can_block);
-               return;
+               blk_insert_flush(rq);
+               goto run;
        }
 
+       WARN_ON(e && (rq->tag != -1));
+
        if (blk_mq_sched_bypass_insert(hctx, !!e, rq))
                goto run;
 
@@ -429,23 +414,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
        struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
        struct elevator_queue *e = hctx->queue->elevator;
 
-       if (e) {
-               struct request *rq, *next;
-
-               /*
-                * We bypass requests that already have a driver tag assigned,
-                * which should only be flushes. Flushes are only ever inserted
-                * as single requests, so we shouldn't ever hit the
-                * WARN_ON_ONCE() below (but let's handle it just in case).
-                */
-               list_for_each_entry_safe(rq, next, list, queuelist) {
-                       if (WARN_ON_ONCE(rq->tag != -1)) {
-                               list_del_init(&rq->queuelist);
-                               blk_mq_sched_bypass_insert(hctx, true, rq);
-                       }
-               }
-       }
-
        if (e && e->type->ops.mq.insert_requests)
                e->type->ops.mq.insert_requests(hctx, list, false);
        else
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bf62065a15ec..445c74e0f5e5 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -650,6 +650,8 @@ static void __blk_mq_requeue_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
 
+       blk_mq_put_driver_tag(rq);
+
        trace_block_rq_requeue(q, rq);
        wbt_requeue(q->rq_wb, &rq->issue_stat);
        blk_mq_sched_requeue_request(rq);
@@ -993,30 +995,6 @@ bool blk_mq_get_driver_tag(struct request *rq, struct 
blk_mq_hw_ctx **hctx,
        return rq->tag != -1;
 }
 
-/*
- * If we fail getting a driver tag because all the driver tags are already
- * assigned and on the dispatch list, BUT the first entry does not have a
- * tag, then we could deadlock. For that case, move entries with assigned
- * driver tags to the front, leaving the set of tagged requests in the
- * same order, and the untagged set in the same order.
- */
-static bool reorder_tags_to_front(struct list_head *list)
-{
-       struct request *rq, *tmp, *first = NULL;
-
-       list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) {
-               if (rq == first)
-                       break;
-               if (rq->tag != -1) {
-                       list_move(&rq->queuelist, list);
-                       if (!first)
-                               first = rq;
-               }
-       }
-
-       return first != NULL;
-}
-
 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int 
flags,
                                void *key)
 {
@@ -1077,9 +1055,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, 
struct list_head *list,
 
                rq = list_first_entry(list, struct request, queuelist);
                if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
-                       if (!queued && reorder_tags_to_front(list))
-                               continue;
-
                        /*
                         * The initial allocation attempt failed, so we need to
                         * rerun the hardware queue when a tag is freed.
@@ -1135,7 +1110,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, 
struct list_head *list,
                                nxt = list_first_entry(list, struct request, 
queuelist);
                                blk_mq_put_driver_tag(nxt);
                        }
-                       blk_mq_put_driver_tag_hctx(hctx, rq);
                        list_add(&rq->queuelist, list);
                        __blk_mq_requeue_request(rq);
                        break;
@@ -1704,13 +1678,10 @@ static blk_qc_t blk_mq_make_request(struct 
request_queue *q, struct bio *bio)
        if (unlikely(is_flush_fua)) {
                blk_mq_put_ctx(data.ctx);
                blk_mq_bio_to_request(rq, bio);
-               if (q->elevator) {
-                       blk_mq_sched_insert_request(rq, false, true, true,
-                                       true);
-               } else {
-                       blk_insert_flush(rq);
-                       blk_mq_run_hw_queue(data.hctx, true);
-               }
+
+               /* bypass scheduler for flush rq */
+               blk_insert_flush(rq);
+               blk_mq_run_hw_queue(data.hctx, true);
        } else if (plug && q->nr_hw_queues == 1) {
                struct request *last = NULL;
 
-- 
2.9.5

Reply via email to