When tag space of one device is big enough, we use hw tag
directly for I/O scheduling.

Now the decision is made if hw queue depth is not less than
q->nr_requests and the tag set isn't shared.

Signed-off-by: Ming Lei <[email protected]>
---
 block/blk-mq-sched.c   | 80 +++++++++++++++++++++++++++++++++++++++++++++-----
 block/blk-mq-sched.h   |  8 +++++
 block/blk-mq.c         | 35 ++++++++++++++++++++--
 include/linux/blkdev.h |  8 +++++
 4 files changed, 122 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 2c5981ff9e04..a7e125a40e0a 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -417,9 +417,9 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
        blk_mq_run_hw_queue(hctx, run_queue_async);
 }
 
-static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
-                                  struct blk_mq_hw_ctx *hctx,
-                                  unsigned int hctx_idx)
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+                           struct blk_mq_hw_ctx *hctx,
+                           unsigned int hctx_idx)
 {
        if (hctx->sched_tags) {
                blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
@@ -428,9 +428,9 @@ static void blk_mq_sched_free_tags(struct blk_mq_tag_set 
*set,
        }
 }
 
-static int blk_mq_sched_alloc_tags(struct request_queue *q,
-                                  struct blk_mq_hw_ctx *hctx,
-                                  unsigned int hctx_idx)
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+                           struct blk_mq_hw_ctx *hctx,
+                           unsigned int hctx_idx)
 {
        struct blk_mq_tag_set *set = q->tag_set;
        int ret;
@@ -450,14 +450,52 @@ static int blk_mq_sched_alloc_tags(struct request_queue 
*q,
        return ret;
 }
 
+static int blk_mq_set_queue_depth(struct blk_mq_hw_ctx *hctx,
+                                 unsigned int nr)
+{
+       if (!hctx->tags)
+               return -EINVAL;
+
+       return blk_mq_tag_update_depth(hctx, &hctx->tags, nr, false);
+}
+
+static int blk_mq_set_queues_depth(struct request_queue *q,
+                                  unsigned int nr)
+{
+       struct blk_mq_hw_ctx *hctx;
+       int i, j, ret;
+
+       queue_for_each_hw_ctx(q, hctx, i) {
+               ret = blk_mq_set_queue_depth(hctx, nr);
+               if (ret)
+                       goto recovery;
+       }
+       return 0;
+
+ recovery:
+       queue_for_each_hw_ctx(q, hctx, j) {
+               if (j >= i)
+                       break;
+               blk_mq_tag_update_depth(hctx, &hctx->tags,
+                                       q->act_hw_queue_depth,
+                                       false);
+       }
+       return ret;
+}
+
 static void blk_mq_sched_tags_teardown(struct request_queue *q)
 {
        struct blk_mq_tag_set *set = q->tag_set;
        struct blk_mq_hw_ctx *hctx;
        int i;
 
-       queue_for_each_hw_ctx(q, hctx, i)
+       queue_for_each_hw_ctx(q, hctx, i) {
+               if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+                       blk_mq_set_queue_depth(hctx, q->act_hw_queue_depth);
+                       hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+               }
                blk_mq_sched_free_tags(set, hctx, i);
+       }
 }
 
 int blk_mq_sched_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx,
@@ -504,12 +542,28 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, 
struct blk_mq_hw_ctx *hctx,
        blk_mq_sched_free_tags(q->tag_set, hctx, hctx_idx);
 }
 
+/*
+ * If this queue has enough hardware tags and doesn't share tags with
+ * other queues, just use hw tag directly for scheduling.
+ */
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q)
+{
+       if (q->tag_set->flags & BLK_MQ_F_TAG_SHARED)
+               return false;
+
+       if (q->act_hw_queue_depth < q->nr_requests)
+               return false;
+
+       return true;
+}
+
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 {
        struct blk_mq_hw_ctx *hctx;
        struct elevator_queue *eq;
        unsigned int i;
        int ret;
+       bool auto_hw_tag;
 
        if (!e) {
                q->elevator = NULL;
@@ -522,7 +576,19 @@ int blk_mq_init_sched(struct request_queue *q, struct 
elevator_type *e)
         */
        q->nr_requests = 2 * BLKDEV_MAX_RQ;
 
+       auto_hw_tag = blk_mq_sched_may_use_hw_tag(q);
+       if (auto_hw_tag) {
+               q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+               if (blk_mq_set_queues_depth(q, q->nr_requests))
+                       auto_hw_tag = false;
+       }
+
        queue_for_each_hw_ctx(q, hctx, i) {
+               if (auto_hw_tag)
+                       hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+               else
+                       hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+
                ret = blk_mq_sched_alloc_tags(q, hctx, i);
                if (ret)
                        goto err;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..bbfc1ea5fafa 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -25,6 +25,7 @@ void blk_mq_sched_insert_requests(struct request_queue *q,
 
 void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
 
+bool blk_mq_sched_may_use_hw_tag(struct request_queue *q);
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
 void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
 
@@ -35,6 +36,13 @@ void blk_mq_sched_exit_hctx(struct request_queue *q, struct 
blk_mq_hw_ctx *hctx,
 
 int blk_mq_sched_init(struct request_queue *q);
 
+void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
+                           struct blk_mq_hw_ctx *hctx,
+                           unsigned int hctx_idx);
+int blk_mq_sched_alloc_tags(struct request_queue *q,
+                           struct blk_mq_hw_ctx *hctx,
+                           unsigned int hctx_idx);
+
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
 {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1a61ca611fae..e02fa8d078e6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2150,6 +2150,34 @@ int blk_mq_get_queue_depth(struct request_queue *q)
        return tags->bitmap_tags.sb.depth + tags->breserved_tags.sb.depth;
 }
 
+static void blk_mq_update_sched_flag(struct request_queue *q)
+{
+       struct blk_mq_hw_ctx *hctx;
+       int i;
+
+       if (!q->elevator)
+               return;
+
+       if (!blk_mq_sched_may_use_hw_tag(q))
+               queue_for_each_hw_ctx(q, hctx, i) {
+                       if (hctx->flags & BLK_MQ_F_SCHED_USE_HW_TAG) {
+                               blk_mq_set_queue_depth(hctx, 
q->act_hw_queue_depth);
+                               hctx->flags &= ~BLK_MQ_F_SCHED_USE_HW_TAG;
+                       }
+                       if (!hctx->sched_tags) {
+                               if (blk_mq_sched_alloc_tags(q, hctx, i))
+                                       goto force_use_hw_tag;
+                       }
+               }
+       else
+ force_use_hw_tag:
+               queue_for_each_hw_ctx(q, hctx, i) {
+                       hctx->flags |= BLK_MQ_F_SCHED_USE_HW_TAG;
+                       if (hctx->sched_tags)
+                               blk_mq_sched_free_tags(q->tag_set, hctx, i);
+               }
+}
+
 static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
        struct blk_mq_hw_ctx *hctx;
@@ -2366,7 +2394,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct 
blk_mq_tag_set *set,
        /*
         * Do this after blk_queue_make_request() overrides it...
         */
-       q->nr_requests = set->queue_depth;
+       q->act_hw_queue_depth = q->nr_requests = set->queue_depth;
 
        /*
         * Default to classic polling
@@ -2689,8 +2717,11 @@ int blk_mq_update_nr_requests(struct request_queue *q, 
unsigned int nr)
                        break;
        }
 
-       if (!ret)
+       if (!ret) {
                q->nr_requests = nr;
+               q->act_hw_queue_depth = blk_mq_get_queue_depth(q);
+               blk_mq_update_sched_flag(q);
+       }
 
        blk_mq_unfreeze_queue(q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b5d1e27631ee..7389e388d583 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -428,6 +428,14 @@ struct request_queue {
        unsigned int            nr_hw_queues;
 
        /*
+        * save active hw queue depth before using hw tag for scheduling,
+        * this need to revisit if per hw queue depth is supported.
+        *
+        * Only used by blk-mq-sched.
+        */
+       unsigned int            act_hw_queue_depth;
+
+       /*
         * Dispatch queue sorting
         */
        sector_t                end_sector;
-- 
2.9.3

Reply via email to