On Fri, May 18, 2018 at 10:38:18AM -0600, Keith Busch wrote:
> This patch fixes races that occur with simultaneous controller
> resets by synchronizing request queues prior to initializing the
> controller. Withouth this, a thread may attempt disabling a controller
> at the same time as we're trying to enable it.
> 
> Signed-off-by: Keith Busch <[email protected]>
> ---
>  drivers/nvme/host/core.c | 21 +++++++++++++++++++--
>  drivers/nvme/host/nvme.h |  1 +
>  drivers/nvme/host/pci.c  |  1 +
>  3 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 99b857e5a7a9..1de68b56b318 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3471,6 +3471,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct 
> device *dev,
>  }
>  EXPORT_SYMBOL_GPL(nvme_init_ctrl);
>  
> +static void nvme_start_queue(struct nvme_ns *ns)
> +{
> +     blk_mq_unquiesce_queue(ns->queue);
> +     blk_mq_kick_requeue_list(ns->queue);
> +}
> +
>  /**
>   * nvme_kill_queues(): Ends all namespace queues
>   * @ctrl: the dead controller that needs to end
> @@ -3499,7 +3505,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
>               blk_set_queue_dying(ns->queue);
>  
>               /* Forcibly unquiesce queues to avoid blocking dispatch */
> -             blk_mq_unquiesce_queue(ns->queue);
> +             nvme_start_queue(ns);
>       }
>       up_read(&ctrl->namespaces_rwsem);
>  }
> @@ -3569,11 +3575,22 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
>  
>       down_read(&ctrl->namespaces_rwsem);
>       list_for_each_entry(ns, &ctrl->namespaces, list)
> -             blk_mq_unquiesce_queue(ns->queue);
> +             nvme_start_queue(ns);
>       up_read(&ctrl->namespaces_rwsem);
>  }
>  EXPORT_SYMBOL_GPL(nvme_start_queues);
>  
> +void nvme_sync_queues(struct nvme_ctrl *ctrl)
> +{
> +     struct nvme_ns *ns;
> +
> +     down_read(&ctrl->namespaces_rwsem);
> +     list_for_each_entry(ns, &ctrl->namespaces, list)
> +             blk_sync_queue(ns->queue);
> +     up_read(&ctrl->namespaces_rwsem);
> +}
> +EXPORT_SYMBOL_GPL(nvme_sync_queues);

This way can't sync timeout reliably, since timeout events can
come from two NS at the same time, and one may be handled as
RESET_TIMER, and another one can be handled as EH_HANDLED.


Thanks,
Ming

Reply via email to