Re: [PATCH] blk-mq: don't allocate driver tag beforehand for flush rq

2017-09-13 Thread Ming Lei
On Wed, Sep 13, 2017 at 09:32:30PM +0800, Ming Lei wrote:
> The behind idea is simple:
> 
> 1) for none scheduler, driver tag has to be borrowed for flush
> rq, otherwise we may run out of tag, and IO hang is caused.
> get/put driver tag is actually a nop, so reorder tags isn't
> necessary at all.
> 
> 2) for real I/O scheduler, we needn't to allocate driver tag
> beforehand for flush rq, and it works just fine to follow the
> way for normal requests: allocate driver tag for each rq just
> before calling .queue_rq().
> 
> Then flush rq isn't treated specially wrt. get/put driver tag,
> codes get cleanup much, such as, reorder_tags_to_front() is
> removed, needn't to worry about request order in dispatch list
> any more.
> 
> One visible change to driver is that flush rq's tag may not be
> same with the data rq in flush sequence, that won't be a
> problem, since we always do that in legacy path.

Please ignore this patch, since looks there is one issue.

-- 
Ming


[PATCH] blk-mq: don't allocate driver tag beforehand for flush rq

2017-09-13 Thread Ming Lei
The behind idea is simple:

1) for none scheduler, driver tag has to be borrowed for flush
rq, otherwise we may run out of tag, and IO hang is caused.
get/put driver tag is actually a nop, so reorder tags isn't
necessary at all.

2) for real I/O scheduler, we needn't to allocate driver tag
beforehand for flush rq, and it works just fine to follow the
way for normal requests: allocate driver tag for each rq just
before calling .queue_rq().

Then flush rq isn't treated specially wrt. get/put driver tag,
codes get cleanup much, such as, reorder_tags_to_front() is
removed, needn't to worry about request order in dispatch list
any more.

One visible change to driver is that flush rq's tag may not be
same with the data rq in flush sequence, that won't be a
problem, since we always do that in legacy path.

Signed-off-by: Ming Lei 
---
 block/blk-flush.c| 43 ++-
 block/blk-mq-sched.c | 64 
 block/blk-mq.c   | 38 ---
 3 files changed, 56 insertions(+), 89 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 4938bec8cfef..080f778257cf 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -216,6 +216,17 @@ static bool blk_flush_complete_seq(struct request *rq,
return kicked | queued;
 }
 
+/*
+ * We don't share tag between flush rq and data rq in case of
+ * IO scheduler, so have to release tag and restart queue
+ */
+static void put_flush_driver_tag(struct blk_mq_hw_ctx *hctx,
+struct blk_mq_ctx *ctx, int tag)
+{
+   blk_mq_put_tag(hctx, hctx->tags, ctx, tag);
+   blk_mq_sched_restart(hctx);
+}
+
 static void flush_end_io(struct request *flush_rq, blk_status_t error)
 {
struct request_queue *q = flush_rq->q;
@@ -231,8 +242,14 @@ static void flush_end_io(struct request *flush_rq, 
blk_status_t error)
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(>mq_flush_lock, flags);
hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu);
-   blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
-   flush_rq->tag = -1;
+   if (!q->elevator) {
+   blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
+   flush_rq->tag = -1;
+   } else {
+   put_flush_driver_tag(hctx, flush_rq->mq_ctx,
+flush_rq->tag);
+   flush_rq->internal_tag = -1;
+   }
}
 
running = >flush_queue[fq->flush_running_idx];
@@ -321,16 +338,24 @@ static bool blk_kick_flush(struct request_queue *q, 
struct blk_flush_queue *fq)
 * Borrow tag from the first request since they can't
 * be in flight at the same time. And acquire the tag's
 * ownership for flush req.
+*
+* In case of io scheduler, flush rq need to borrow
+* scheduler tag for making put/get driver tag workable.
+* In case of none, flush rq need to borrow driver tag.
 */
if (q->mq_ops) {
struct blk_mq_hw_ctx *hctx;
 
flush_rq->mq_ctx = first_rq->mq_ctx;
-   flush_rq->tag = first_rq->tag;
-   fq->orig_rq = first_rq;
 
-   hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
-   blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+   if (!q->elevator) {
+   fq->orig_rq = first_rq;
+   flush_rq->tag = first_rq->tag;
+   hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu);
+   blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq);
+   } else {
+   flush_rq->internal_tag = first_rq->internal_tag;
+   }
}
 
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
@@ -394,6 +419,12 @@ static void mq_flush_data_end_io(struct request *rq, 
blk_status_t error)
 
hctx = blk_mq_map_queue(q, ctx->cpu);
 
+   if (q->elevator) {
+   WARN_ON(rq->tag < 0);
+   put_flush_driver_tag(hctx, ctx, rq->tag);
+   rq->tag = -1;
+   }
+
/*
 * After populating an empty queue, kick it to avoid stall.  Read
 * the comment in flush_end_io().
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 4ab69435708c..dbf100871ad6 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -259,22 +259,20 @@ void blk_mq_sched_request_inserted(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
 
-static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
-  struct request *rq)
+static bool blk_mq_bypass_insert(struct blk_mq_hw_ctx *hctx,
+bool has_sched, struct request *rq)
 {
-   if (rq->tag == -1)