Re: [PATCH v4] blk-mq: Fix race conditions in request timeout handling

[email protected] Tue, 10 Apr 2018 14:55:31 -0700

On Tue, Apr 10, 2018 at 09:46:23PM +0000, Bart Van Assche wrote:
> On Tue, 2018-04-10 at 14:33 -0700, [email protected] wrote:
> > +       else
> > +               rq->missed_completion = true;
> 
> In this patch I found code that sets rq->missed_completion but no code that
> clears it. Did I perhaps miss something?


Ah, yeah, I was moving it out of add_timer but forgot to actully add
it to the issue path.  Fixed patch below.

BTW, no matter what we do w/ the request handover between normal and
timeout paths, we'd need something similar.  Otherwise, while we can
reduce the window, we can't get rid of it.

Thanks.

---
 block/blk-mq.c         |   45 ++++++++++++++++++++++++++++++++-------------
 include/linux/blkdev.h |    2 ++
 2 files changed, 34 insertions(+), 13 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -642,6 +642,8 @@ void blk_mq_complete_request(struct requ
        hctx_lock(hctx, &srcu_idx);
        if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
                __blk_mq_complete_request(rq);
+       else
+               rq->missed_completion = true;
        hctx_unlock(hctx, srcu_idx);
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
@@ -684,6 +686,7 @@ void blk_mq_start_request(struct request
 
        blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
        blk_add_timer(rq);
+       rq->missed_completion = false;
 
        write_seqcount_end(&rq->gstate_seq);
        preempt_enable();
@@ -881,7 +884,7 @@ static void blk_mq_check_expired(struct
        }
 }
 
-static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+static void blk_mq_timeout_sync_rcu(struct request_queue *q, bool clear)
 {
        struct blk_mq_hw_ctx *hctx;
        bool has_rcu = false;
@@ -896,7 +899,8 @@ static void blk_mq_timeout_sync_rcu(stru
                else
                        synchronize_srcu(hctx->srcu);
 
-               hctx->need_sync_rcu = false;
+               if (clear)
+                       hctx->need_sync_rcu = false;
        }
        if (has_rcu)
                synchronize_rcu();
@@ -917,25 +921,37 @@ static void blk_mq_terminate_expired(str
                blk_mq_rq_timed_out(hctx, rq, priv, reserved);
 }
 
-static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+static void blk_mq_timeout_reset_return(struct blk_mq_hw_ctx *hctx,
                struct request *rq, void *priv, bool reserved)
 {
        /*
         * @rq's timer reset has gone through rcu synchronization and is
         * visible now.  Allow normal completions again by resetting
         * ->aborted_gstate.  Don't clear RQF_MQ_TIMEOUT_RESET here as
-        * there's no memory ordering around ->aborted_gstate making it the
-        * only field safe to update.  Let blk_add_timer() clear it later
-        * when the request is recycled or times out again.
-        *
-        * As nothing prevents from completion happening while
-        * ->aborted_gstate is set, this may lead to ignored completions
-        * and further spurious timeouts.
+        * blk_mq_timeout_reset_cleanup() needs it again and there's no
+        * memory ordering around ->aborted_gstate making it the only field
+        * safe to update.  Let blk_add_timer() clear it later when the
+        * request is recycled or times out again.
         */
        if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
                blk_mq_rq_update_aborted_gstate(rq, 0);
 }
 
+static void blk_mq_timeout_reset_cleanup(struct blk_mq_hw_ctx *hctx,
+               struct request *rq, void *priv, bool reserved)
+{
+       /*
+        * @rq is now fully returned to the normal path.  If normal
+        * completion raced timeout handling, execute the missed completion
+        * here.  This is safe because 1. ->missed_completion can no longer
+        * be asserted because nothing is timing out right now and 2. if
+        * ->missed_completion is set, @rq is safe from recycling because
+        * nobody could have completed it.
+        */
+       if ((rq->rq_flags & RQF_MQ_TIMEOUT_RESET) && rq->missed_completion)
+               blk_mq_complete_request(rq);
+}
+
 static void blk_mq_timeout_work(struct work_struct *work)
 {
        struct request_queue *q =
@@ -976,7 +992,7 @@ static void blk_mq_timeout_work(struct w
                 * becomes a problem, we can add per-hw_ctx rcu_head and
                 * wait in parallel.
                 */
-               blk_mq_timeout_sync_rcu(q);
+               blk_mq_timeout_sync_rcu(q, true);
 
                /* terminate the ones we won */
                blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
@@ -988,9 +1004,12 @@ static void blk_mq_timeout_work(struct w
                 * reset racing against recycling.
                 */
                if (nr_resets) {
-                       blk_mq_timeout_sync_rcu(q);
+                       blk_mq_timeout_sync_rcu(q, false);
+                       blk_mq_queue_tag_busy_iter(q,
+                                       blk_mq_timeout_reset_return, NULL);
+                       blk_mq_timeout_sync_rcu(q, true);
                        blk_mq_queue_tag_busy_iter(q,
-                                       blk_mq_finish_timeout_reset, NULL);
+                                       blk_mq_timeout_reset_cleanup, NULL);
                }
        }
 
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -227,6 +227,8 @@ struct request {
 
        unsigned int extra_len; /* length of alignment and padding */
 
+       bool missed_completion;
+
        /*
         * On blk-mq, the lower bits of ->gstate (generation number and
         * state) carry the MQ_RQ_* state value and the upper bits the

Re: [PATCH v4] blk-mq: Fix race conditions in request timeout handling

Reply via email to