There could be a nvme_timeout running with nvme_dev_disable in parallel. The requests held by timeout path cannot be canceled by nvme_dev_disable. Consequently, the nvme_timeout maybe still running after nvme_dev_disable completes. Then there could be a race between nvme_dev_disable in nvme_timeout and initializing procedure in nvme_reset_work. nvme_timeout nvme_reset_work if (RESETTING) nvme_dev_disable nvme_dev_disable initializing
To fix it, ensure all the q->timeout_work complete before the initializing procedure in nvme_reset_work. At the moment, all the outstanding requests should have been handled by nvme_dev_disable or nvme_timeout. So introduce nvme_sync_queues which invokes blk_sync_queue. In addition to this, add blk_mq_kick_requeue_list into nvme_start_queues and nvme_kill_queues to avoid IO hang in requeue_list, because blk_sync_queue will cancel the requeue_work. Link: https://lkml.org/lkml/2018/1/19/68 Suggested-by: Keith Busch <keith.bu...@intel.com> Signed-off-by: Keith Busch <keith.bu...@intel.com> Signed-off-by: Jianchao Wang <jianchao.w.w...@oracle.com> --- drivers/nvme/host/core.c | 20 ++++++++++++++++++-- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 9 ++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 23b3e53..c2ea8adb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3443,7 +3443,11 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) revalidate_disk(ns->disk); blk_set_queue_dying(ns->queue); - /* Forcibly unquiesce queues to avoid blocking dispatch */ + /* + * Forcibly kick requeue and unquiesce queues to avoid blocking + * dispatch + */ + blk_mq_kick_requeue_list(ns->queue); blk_mq_unquiesce_queue(ns->queue); } mutex_unlock(&ctrl->namespaces_mutex); @@ -3513,12 +3517,24 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); - list_for_each_entry(ns, &ctrl->namespaces, list) + list_for_each_entry(ns, &ctrl->namespaces, list) { + blk_mq_kick_requeue_list(ns->queue); blk_mq_unquiesce_queue(ns->queue); + } mutex_unlock(&ctrl->namespaces_mutex); } EXPORT_SYMBOL_GPL(nvme_start_queues); +void nvme_sync_queues(struct nvme_ctrl *ctrl) +{ + struct nvme_ns *ns; + + mutex_lock(&ctrl->namespaces_mutex); + list_for_each_entry(ns, &ctrl->namespaces, list) + blk_sync_queue(ns->queue); + mutex_unlock(&ctrl->namespaces_mutex); +} +EXPORT_SYMBOL_GPL(nvme_sync_queues); int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set) { if (!ctrl->ops->reinit_request) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a44eeca..01faea6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -370,6 +370,7 @@ int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res); +void nvme_sync_queues(struct nvme_ctrl *ctrl); void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f5207bc..9ba7e55 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2318,8 +2318,15 @@ static void nvme_reset_work(struct work_struct *work) * If we're called to reset a live controller first shut it down before * moving on. */ - if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) + if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) { nvme_dev_disable(dev, false); + /* nvme_timeout could run in parallel, consequently, + * nvme_dev_disable invoked by nvme_timeout could race with + * following initializing procedure. So add nvme_sync_queues + * here to ensure nvme_timeout to be completed. + */ + nvme_sync_queues(&dev->ctrl); + } /* * Introduce RECONNECTING state from nvme-fc/rdma transports to mark the -- 2.7.4