When admin commands are used in EH for recovering controller, we have to
cover their timeout and can't depend on block's timeout since deadlock may
be caused when these commands are timed-out by block layer again.

Cc: Jianchao Wang <jianchao.w.w...@oracle.com>
Cc: Christoph Hellwig <h...@lst.de>
Cc: Sagi Grimberg <s...@grimberg.me>
Cc: linux-n...@lists.infradead.org
Signed-off-by: Ming Lei <ming....@redhat.com>
---
 drivers/nvme/host/pci.c | 77 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 11 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fbc71fac6f1e..0e6cd605164a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1733,21 +1733,28 @@ static inline void nvme_release_cmb(struct nvme_dev 
*dev)
        }
 }
 
-static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+static void nvme_init_set_host_mem_cmd(struct nvme_dev *dev,
+               struct nvme_command *c, u32 bits)
 {
        u64 dma_addr = dev->host_mem_descs_dma;
+
+       memset(c, 0, sizeof(*c));
+       c->features.opcode      = nvme_admin_set_features;
+       c->features.fid         = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
+       c->features.dword11     = cpu_to_le32(bits);
+       c->features.dword12     = cpu_to_le32(dev->host_mem_size >>
+                                             ilog2(dev->ctrl.page_size));
+       c->features.dword13     = cpu_to_le32(lower_32_bits(dma_addr));
+       c->features.dword14     = cpu_to_le32(upper_32_bits(dma_addr));
+       c->features.dword15     = cpu_to_le32(dev->nr_host_mem_descs);
+}
+
+static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
+{
        struct nvme_command c;
        int ret;
 
-       memset(&c, 0, sizeof(c));
-       c.features.opcode       = nvme_admin_set_features;
-       c.features.fid          = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
-       c.features.dword11      = cpu_to_le32(bits);
-       c.features.dword12      = cpu_to_le32(dev->host_mem_size >>
-                                             ilog2(dev->ctrl.page_size));
-       c.features.dword13      = cpu_to_le32(lower_32_bits(dma_addr));
-       c.features.dword14      = cpu_to_le32(upper_32_bits(dma_addr));
-       c.features.dword15      = cpu_to_le32(dev->nr_host_mem_descs);
+       nvme_init_set_host_mem_cmd(dev, &c, bits);
 
        ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
        if (ret) {
@@ -1758,6 +1765,54 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 
bits)
        return ret;
 }
 
+static void nvme_set_host_mem_end_io(struct request *rq, blk_status_t sts)
+{
+       struct completion *waiting = rq->end_io_data;
+
+       rq->end_io_data = NULL;
+
+       /*
+        * complete last, if this is a stack request the process (and thus
+        * the rq pointer) could be invalid right after this complete()
+        */
+       complete(waiting);
+}
+
+/*
+ * This function can be used inside timeout handler, when the block layer
+ * timeout may not work, so this function has to cover the timeout by itself.
+ */
+static int nvme_set_host_mem_timeout(struct nvme_dev *dev, u32 bits)
+{
+       DECLARE_COMPLETION_ONSTACK(wait);
+       struct nvme_command c;
+       struct request_queue *q = dev->ctrl.admin_q;
+       struct request *req;
+       int ret;
+
+       nvme_init_set_host_mem_cmd(dev, &c, bits);
+
+       req = nvme_alloc_request(q, &c, 0, NVME_QID_ANY);
+       if (IS_ERR(req))
+               return PTR_ERR(req);
+
+       req->timeout = ADMIN_TIMEOUT;
+       req->end_io_data = &wait;
+
+       blk_execute_rq_nowait(q, NULL, req, false,
+                       nvme_set_host_mem_end_io);
+       wait_for_completion_io_timeout(&wait, ADMIN_TIMEOUT);
+
+       if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+               ret = -EINTR;
+       else
+               ret = nvme_req(req)->status;
+
+       blk_mq_free_request(req);
+
+       return ret;
+}
+
 static void nvme_free_host_mem(struct nvme_dev *dev)
 {
        int i;
@@ -2216,7 +2271,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool 
shutdown)
                 * but I'd rather be safe than sorry..
                 */
                if (dev->host_mem_descs)
-                       nvme_set_host_mem(dev, 0);
+                       nvme_set_host_mem_timeout(dev, 0);
                nvme_disable_io_queues(dev);
                nvme_disable_admin_queue(dev, shutdown);
        }
-- 
2.9.5

Reply via email to