When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
given hctx goes offline, there would be no CPU left to handle I/O. To
prevent I/O stalls, prevent offlining housekeeping CPUs that are still
serving isolated CPUs.

When isolcpus=io_queue is enabled and the last housekeeping CPU
for a given hctx goes offline, no CPU would be left to handle I/O.
To prevent I/O stalls, disallow offlining housekeeping CPUs that are
still serving isolated CPUs.

Signed-off-by: Daniel Wagner <w...@kernel.org>
---
 block/blk-mq.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 
0c61492724d228736f975f1d8f195515603801b6..87240644f73ed0490a5459e042c68e0e168f727d
 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3681,6 +3681,43 @@ static bool blk_mq_hctx_has_requests(struct 
blk_mq_hw_ctx *hctx)
        return data.has_rq;
 }
 
+static bool blk_mq_hctx_can_offline_hk_cpu(struct blk_mq_hw_ctx *hctx,
+                                          unsigned int this_cpu)
+{
+       const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+       for (int i = 0; i < hctx->nr_ctx; i++) {
+               struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+               if (ctx->cpu == this_cpu)
+                       continue;
+
+               /*
+                * Check if this context has at least one online
+                * housekeeping CPU; in this case the hardware context is
+                * usable.
+                */
+               if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+                   cpu_online(ctx->cpu))
+                       break;
+
+               /*
+                * The context doesn't have any online housekeeping CPUs,
+                * but there might be an online isolated CPU mapped to
+                * it.
+                */
+               if (cpu_is_offline(ctx->cpu))
+                       continue;
+
+               pr_warn("%s: trying to offline hctx%d but there is still an 
online isolcpu CPU %d mapped to it\n",
+                       hctx->queue->disk->disk_name,
+                       hctx->queue_num, ctx->cpu);
+               return false;
+       }
+
+       return true;
+}
+
 static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
                unsigned int this_cpu)
 {
@@ -3712,6 +3749,11 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, 
struct hlist_node *node)
        struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
                        struct blk_mq_hw_ctx, cpuhp_online);
 
+       if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
+               if (!blk_mq_hctx_can_offline_hk_cpu(hctx, cpu))
+                       return -EINVAL;
+       }
+
        if (blk_mq_hctx_has_online_cpu(hctx, cpu))
                return 0;
 

-- 
2.50.0


Reply via email to