On Tue, Apr 10, 2018 at 09:46:23PM +0000, Bart Van Assche wrote:
> On Tue, 2018-04-10 at 14:33 -0700, [email protected] wrote:
> > + else
> > + rq->missed_completion = true;
>
> In this patch I found code that sets rq->missed_completion but no code that
> clears it. Did I perhaps miss something?
Ah, yeah, I was moving it out of add_timer but forgot to actully add
it to the issue path. Fixed patch below.
BTW, no matter what we do w/ the request handover between normal and
timeout paths, we'd need something similar. Otherwise, while we can
reduce the window, we can't get rid of it.
Thanks.
---
block/blk-mq.c | 45 ++++++++++++++++++++++++++++++++-------------
include/linux/blkdev.h | 2 ++
2 files changed, 34 insertions(+), 13 deletions(-)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -642,6 +642,8 @@ void blk_mq_complete_request(struct requ
hctx_lock(hctx, &srcu_idx);
if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
__blk_mq_complete_request(rq);
+ else
+ rq->missed_completion = true;
hctx_unlock(hctx, srcu_idx);
}
EXPORT_SYMBOL(blk_mq_complete_request);
@@ -684,6 +686,7 @@ void blk_mq_start_request(struct request
blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
blk_add_timer(rq);
+ rq->missed_completion = false;
write_seqcount_end(&rq->gstate_seq);
preempt_enable();
@@ -881,7 +884,7 @@ static void blk_mq_check_expired(struct
}
}
-static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+static void blk_mq_timeout_sync_rcu(struct request_queue *q, bool clear)
{
struct blk_mq_hw_ctx *hctx;
bool has_rcu = false;
@@ -896,7 +899,8 @@ static void blk_mq_timeout_sync_rcu(stru
else
synchronize_srcu(hctx->srcu);
- hctx->need_sync_rcu = false;
+ if (clear)
+ hctx->need_sync_rcu = false;
}
if (has_rcu)
synchronize_rcu();
@@ -917,25 +921,37 @@ static void blk_mq_terminate_expired(str
blk_mq_rq_timed_out(hctx, rq, priv, reserved);
}
-static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+static void blk_mq_timeout_reset_return(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
/*
* @rq's timer reset has gone through rcu synchronization and is
* visible now. Allow normal completions again by resetting
* ->aborted_gstate. Don't clear RQF_MQ_TIMEOUT_RESET here as
- * there's no memory ordering around ->aborted_gstate making it the
- * only field safe to update. Let blk_add_timer() clear it later
- * when the request is recycled or times out again.
- *
- * As nothing prevents from completion happening while
- * ->aborted_gstate is set, this may lead to ignored completions
- * and further spurious timeouts.
+ * blk_mq_timeout_reset_cleanup() needs it again and there's no
+ * memory ordering around ->aborted_gstate making it the only field
+ * safe to update. Let blk_add_timer() clear it later when the
+ * request is recycled or times out again.
*/
if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
blk_mq_rq_update_aborted_gstate(rq, 0);
}
+static void blk_mq_timeout_reset_cleanup(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, void *priv, bool reserved)
+{
+ /*
+ * @rq is now fully returned to the normal path. If normal
+ * completion raced timeout handling, execute the missed completion
+ * here. This is safe because 1. ->missed_completion can no longer
+ * be asserted because nothing is timing out right now and 2. if
+ * ->missed_completion is set, @rq is safe from recycling because
+ * nobody could have completed it.
+ */
+ if ((rq->rq_flags & RQF_MQ_TIMEOUT_RESET) && rq->missed_completion)
+ blk_mq_complete_request(rq);
+}
+
static void blk_mq_timeout_work(struct work_struct *work)
{
struct request_queue *q =
@@ -976,7 +992,7 @@ static void blk_mq_timeout_work(struct w
* becomes a problem, we can add per-hw_ctx rcu_head and
* wait in parallel.
*/
- blk_mq_timeout_sync_rcu(q);
+ blk_mq_timeout_sync_rcu(q, true);
/* terminate the ones we won */
blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
@@ -988,9 +1004,12 @@ static void blk_mq_timeout_work(struct w
* reset racing against recycling.
*/
if (nr_resets) {
- blk_mq_timeout_sync_rcu(q);
+ blk_mq_timeout_sync_rcu(q, false);
+ blk_mq_queue_tag_busy_iter(q,
+ blk_mq_timeout_reset_return, NULL);
+ blk_mq_timeout_sync_rcu(q, true);
blk_mq_queue_tag_busy_iter(q,
- blk_mq_finish_timeout_reset, NULL);
+ blk_mq_timeout_reset_cleanup, NULL);
}
}
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -227,6 +227,8 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
+ bool missed_completion;
+
/*
* On blk-mq, the lower bits of ->gstate (generation number and
* state) carry the MQ_RQ_* state value and the upper bits the