When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
given hctx would go offline, there would be no CPU left which handles
the IOs. To prevent IO stalls, prevent offlining housekeeping CPUs which
are still severing isolated CPUs..

Signed-off-by: Daniel Wagner <w...@kernel.org>
---
 block/blk-mq.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 
c2697db591091200cdb9f6e082e472b829701e4c..aff17673b773583dfb2b01cb2f5f010c456bd834
 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3627,6 +3627,48 @@ static bool blk_mq_hctx_has_requests(struct 
blk_mq_hw_ctx *hctx)
        return data.has_rq;
 }
 
+static bool blk_mq_hctx_check_isolcpus_online(struct blk_mq_hw_ctx *hctx, 
unsigned int cpu)
+{
+       const struct cpumask *hk_mask;
+       int i;
+
+       if (!housekeeping_enabled(HK_TYPE_IO_QUEUE))
+               return true;
+
+       hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+       for (i = 0; i < hctx->nr_ctx; i++) {
+               struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+               if (ctx->cpu == cpu)
+                       continue;
+
+               /*
+                * Check if this context has at least one online
+                * housekeeping CPU in this case the hardware context is
+                * usable.
+                */
+               if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+                   cpu_online(ctx->cpu))
+                       break;
+
+               /*
+                * The context doesn't have any online housekeeping CPUs
+                * but there might be an online isolated CPU mapped to
+                * it.
+                */
+               if (cpu_is_offline(ctx->cpu))
+                       continue;
+
+               pr_warn("%s: trying to offline hctx%d but there is still an 
online isolcpu CPU %d mapped to it\n",
+                       hctx->queue->disk->disk_name,
+                       hctx->queue_num, ctx->cpu);
+               return true;
+       }
+
+       return false;
+}
+
 static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
                unsigned int this_cpu)
 {
@@ -3647,7 +3689,7 @@ static bool blk_mq_hctx_has_online_cpu(struct 
blk_mq_hw_ctx *hctx,
 
                /* this hctx has at least one online CPU */
                if (this_cpu != cpu)
-                       return true;
+                       return blk_mq_hctx_check_isolcpus_online(hctx, 
this_cpu);
        }
 
        return false;
@@ -3659,7 +3701,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, 
struct hlist_node *node)
                        struct blk_mq_hw_ctx, cpuhp_online);
 
        if (blk_mq_hctx_has_online_cpu(hctx, cpu))
-               return 0;
+               return -EINVAL;
 
        /*
         * Prevent new request from being allocated on the current hctx.

-- 
2.49.0


Reply via email to