This is 4th and last step change in a bid to enable mapping
of multiple device hardware queues to a single CPU.

Available hardware queues are evenly distributed to CPUs.
Still, there might some number of queues left spared, but no
more than (number of queues) % (number of CPUs) in the worst
case.

CC: Jens Axboe <ax...@kernel.dk>
CC: linux-n...@lists.infradead.org
Signed-off-by: Alexander Gordeev <agord...@redhat.com>
---
 block/blk-mq-cpumap.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 block/blk-mq.c        | 14 +-------------
 block/blk-mq.h        |  2 ++
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index ee553a4..0b49f30 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <linux/crash_dump.h>
 
 #include <linux/blk-mq.h>
 #include "blk.h"
@@ -86,6 +87,49 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int 
nr_queues,
        return 0;
 }
 
+void blk_mq_adjust_tag_set(struct blk_mq_tag_set *set,
+                          const struct cpumask *online_mask)
+{
+       unsigned int nr_cpus, nr_uniq_cpus, first_sibling;
+       cpumask_var_t cpus;
+       int i;
+
+       /*
+        * If a crashdump is active, then we are potentially in a very
+        * memory constrained environment. Limit us to 1 queue.
+        */
+       if (is_kdump_kernel())
+               goto default_map;
+
+       if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
+               goto default_map;
+
+       cpumask_clear(cpus);
+       nr_cpus = nr_uniq_cpus = 0;
+
+       for_each_cpu(i, online_mask) {
+               nr_cpus++;
+               first_sibling = get_first_sibling(i);
+               if (!cpumask_test_cpu(first_sibling, cpus))
+                       nr_uniq_cpus++;
+               cpumask_set_cpu(i, cpus);
+       }
+
+       free_cpumask_var(cpus);
+
+       if (set->nr_hw_queues < nr_uniq_cpus) {
+default_map:
+               set->nr_co_queues = set->nr_hw_queues;
+               set->co_queue_size = 1;
+       } else if (set->nr_hw_queues < nr_cpus) {
+               set->nr_co_queues = nr_uniq_cpus;
+               set->co_queue_size = set->nr_hw_queues / nr_uniq_cpus;
+       } else {
+               set->nr_co_queues = nr_cpus;
+               set->co_queue_size = set->nr_hw_queues / nr_cpus;
+       }
+}
+
 /*
  * We have no quick way of doing reverse lookups. This is only used at
  * queue init time, so runtime isn't important.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 450a3ed..ee05ea9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -21,7 +21,6 @@
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
 #include <linux/delay.h>
-#include <linux/crash_dump.h>
 
 #include <trace/events/block.h>
 
@@ -2286,24 +2285,13 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
         * TODO Restore original queue depth and count limits
         */
 
-       /*
-        * If a crashdump is active, then we are potentially in a very
-        * memory constrained environment. Limit us to 1 queue.
-        */
-       set->nr_co_queues = is_kdump_kernel() ? 1 : set->nr_hw_queues;
-       set->co_queue_size = 1;
+       blk_mq_adjust_tag_set(set, cpu_online_mask);
 
        if (queue_depth(set) < set->reserved_tags + BLK_MQ_TAG_MIN)
                return -EINVAL;
        if (queue_depth(set) > BLK_MQ_MAX_DEPTH)
                return -EINVAL;
 
-       /*
-        * There is no use for more h/w queues than cpus.
-        */
-       if (set->nr_co_queues > nr_cpu_ids)
-               set->nr_co_queues = nr_cpu_ids;
-
        set->tags = kzalloc_node(set->nr_co_queues * sizeof(*set->tags),
                                 GFP_KERNEL, set->numa_node);
        if (!set->tags)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 592e308..70704f7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -49,6 +49,8 @@ void blk_mq_disable_hotplug(void);
  */
 extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues,
                                   const struct cpumask *online_mask);
+extern void blk_mq_adjust_tag_set(struct blk_mq_tag_set *set,
+                                 const struct cpumask *online_mask);
 extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
 
 /*
-- 
1.8.3.1

Reply via email to