If hardware queues are stopped for some event, like the device has been
suspended by power management, requests allocated on that hardware queue
are indefinitely stuck causing a queue freeze to wait forever.

This patch abandons requests on stopped queues after syncing with the
all queue_rq events when we need to rebalance the queues. While we
would prefer not to end the requests error if it's possible to submit
them on a different context, there's no good way to unwind a request to
submit on a valid context once it enters a stopped context for removal.
Ending IO with EAGAIN is a better alternative than deadlocking.

Reported-by: Marc Merlin <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
---
 block/blk-mq.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 67 insertions(+), 12 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9b7ed03..0c9a2a3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -117,22 +117,12 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
-/**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
- * @q: request queue.
- *
- * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
- */
-void blk_mq_quiesce_queue(struct request_queue *q)
+static void blk_mq_sync_queue(struct request_queue *q)
 {
        struct blk_mq_hw_ctx *hctx;
        unsigned int i;
        bool rcu = false;
 
-       blk_mq_stop_hw_queues(q);
-
        queue_for_each_hw_ctx(q, hctx, i) {
                if (hctx->flags & BLK_MQ_F_BLOCKING)
                        synchronize_srcu(&hctx->queue_rq_srcu);
@@ -142,6 +132,20 @@ void blk_mq_quiesce_queue(struct request_queue *q)
        if (rcu)
                synchronize_rcu();
 }
+
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Additionally, it is not prevented that
+ * new queue_rq() calls occur unless the queue has been stopped first.
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+       blk_mq_stop_hw_queues(q);
+       blk_mq_sync_queue(q);
+}
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
 void blk_mq_wake_waiters(struct request_queue *q)
@@ -2228,6 +2232,51 @@ static void blk_mq_queue_reinit(struct request_queue *q,
        blk_mq_sysfs_register(q);
 }
 
+static void blk_mq_abandon_stopped_requests(struct request_queue *q)
+{
+       int i;
+       struct request *rq, *next;
+       struct blk_mq_hw_ctx *hctx;
+       LIST_HEAD(rq_list);
+
+       blk_mq_sync_queue(q);
+
+       spin_lock(&q->requeue_lock);
+       list_for_each_entry_safe(rq, next, &q->requeue_list, queuelist) {
+               struct blk_mq_ctx *ctx;
+
+               ctx = rq->mq_ctx;
+               hctx = blk_mq_map_queue(q, ctx->cpu);
+               if (blk_mq_hctx_stopped(hctx)) {
+                       list_del_init(&rq->queuelist);
+
+                       spin_lock(&hctx->lock);
+                       list_add_tail(&rq->queuelist, &rq_list);
+                       spin_unlock(&hctx->lock);
+               }
+       }
+
+       queue_for_each_hw_ctx(q, hctx, i) {
+               if (!blk_mq_hctx_stopped(hctx))
+                       continue;
+
+               flush_busy_ctxs(hctx, &rq_list);
+
+               spin_lock(&hctx->lock);
+               if (!list_empty(&hctx->dispatch))
+                       list_splice_init(&hctx->dispatch, &rq_list);
+               spin_unlock(&hctx->lock);
+       }
+       spin_unlock(&q->requeue_lock);
+
+       while (!list_empty(&rq_list)) {
+               rq = list_first_entry(&rq_list, struct request, queuelist);
+               list_del_init(&rq->queuelist);
+               rq->errors = -EAGAIN;
+               blk_mq_end_request(rq, rq->errors);
+       }
+}
+
 /*
  * New online cpumask which is going to be set in this hotplug event.
  * Declare this cpumasks as global as cpu-hotplug operation is invoked
@@ -2250,6 +2299,8 @@ static void blk_mq_queue_reinit_work(void)
        list_for_each_entry(q, &all_q_list, all_q_node)
                blk_mq_freeze_queue_start(q);
        list_for_each_entry(q, &all_q_list, all_q_node)
+               blk_mq_abandon_stopped_requests(q);
+       list_for_each_entry(q, &all_q_list, all_q_node)
                blk_mq_freeze_queue_wait(q);
 
        list_for_each_entry(q, &all_q_list, all_q_node)
@@ -2477,7 +2528,11 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set 
*set, int nr_hw_queues)
                return;
 
        list_for_each_entry(q, &set->tag_list, tag_set_list)
-               blk_mq_freeze_queue(q);
+               blk_mq_freeze_queue_start(q);
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_abandon_stopped_requests(q);
+       list_for_each_entry(q, &set->tag_list, tag_set_list)
+               blk_mq_freeze_queue_wait(q);
 
        set->nr_hw_queues = nr_hw_queues;
        if (set->ops->map_queues)
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to