After the direct dispatch corruption fix, we permanently disallow direct
dispatch of non read/write requests. This works fine off the normal IO
path, as they will be retried like any other failed direct dispatch
request. But for the blk_insert_cloned_request() that only DM uses to
bypass the bottom level scheduler, we always first attempt direct
dispatch. For some types of requests, that's now a permanent failure,
and no amount of retrying will make that succeed.

Use the driver private RQF_DONTPREP to track this condition in DM. If
we encounter a BUSY condition from blk_insert_cloned_request(), then
flag the request with RQF_DONTPREP. When we next time see this request,
ask blk_insert_cloned_request() to bypass insert the request directly.
This avoids the livelock of repeatedly trying to direct dispatch a
request, while still retaining the BUSY feedback loop for blk-mq so
that we don't over-dispatch to the lower level queue and mess up
opportunities for merging on the DM queue.

Fixes: ffe81d45322c ("blk-mq: fix corruption with direct issue")
Reported-by: Bart Van Assche <[email protected]>
Cc: [email protected]
Signed-off-by: Jens Axboe <[email protected]>

---

This passes my testing as well, like the previous patch. But unlike the
previous patch, we retain the BUSY feedback loop information for better
merging.

diff --git a/block/blk-core.c b/block/blk-core.c
index deb56932f8c4..cccda51e165f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2617,7 +2617,8 @@ static int blk_cloned_rq_check_limits(struct 
request_queue *q,
  * @q:  the queue to submit the request
  * @rq: the request being queued
  */
-blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request 
*rq)
+blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request 
*rq,
+                                      bool force_insert)
 {
        unsigned long flags;
        int where = ELEVATOR_INSERT_BACK;
@@ -2637,7 +2638,11 @@ blk_status_t blk_insert_cloned_request(struct 
request_queue *q, struct request *
                 * bypass a potential scheduler on the bottom device for
                 * insert.
                 */
-               return blk_mq_request_issue_directly(rq);
+               if (force_insert) {
+                       blk_mq_request_bypass_insert(rq, true);
+                       return BLK_STS_OK;
+               } else
+                       return blk_mq_request_issue_directly(rq);
        }
 
        spin_lock_irqsave(q->queue_lock, flags);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 7cd36e4d1310..e497a2ab6766 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -299,16 +299,20 @@ static void end_clone_request(struct request *clone, 
blk_status_t error)
 
 static blk_status_t dm_dispatch_clone_request(struct request *clone, struct 
request *rq)
 {
+       bool was_busy = (rq->rq_flags & RQF_DONTPREP) != 0;
        blk_status_t r;
 
        if (blk_queue_io_stat(clone->q))
                clone->rq_flags |= RQF_IO_STAT;
 
        clone->start_time_ns = ktime_get_ns();
-       r = blk_insert_cloned_request(clone->q, clone);
-       if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != 
BLK_STS_DEV_RESOURCE)
+       r = blk_insert_cloned_request(clone->q, clone, was_busy);
+       if (r == BLK_STS_RESOURCE || r == BLK_STS_DEV_RESOURCE)
+               rq->rq_flags |= RQF_DONTPREP;
+       else if (r != BLK_STS_OK)
                /* must complete clone in terms of original request */
                dm_complete_request(rq, r);
+
        return r;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4293dc1cd160..7cb84ee4c9f4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -994,7 +994,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct 
request *rq_src,
                             void *data);
 extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
-                                    struct request *rq);
+                                    struct request *rq, bool force_insert);
 extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **);

-- 
Jens Axboe

Reply via email to