If the reset failed due to a non-fatal error, this patch will attempt
to reset the controller again, with a maximum of 4 attempts.

Since the failed reset case has changed purpose, this patch provides a
more appropriate name and warning message for the reset failure.

Signed-off-by: Keith Busch <keith.bu...@intel.com>
---
 drivers/nvme/host/pci.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 6a7cbc631d92..ddfeb186d129 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -37,6 +37,8 @@
 
 #define SGES_PER_PAGE  (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
 
+#define MAX_RESET_FAILURES 4
+
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -101,6 +103,8 @@ struct nvme_dev {
        struct completion ioq_wait;
        bool queues_froze;
 
+       int reset_failures;
+
        /* shadow doorbell buffer support: */
        u32 *dbbuf_dbs;
        dma_addr_t dbbuf_dbs_dma_addr;
@@ -2307,9 +2311,23 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
        kfree(dev);
 }
 
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+static void nvme_reset_failure(struct nvme_dev *dev, int status)
 {
-       dev_warn(dev->ctrl.device, "Removing after probe failure status: %d\n", 
status);
+       dev->reset_failures++;
+       dev_warn(dev->ctrl.device, "Reset failure status: %d, failures:%d\n",
+               status, dev->reset_failures);
+
+       /* IO and Interrupted Call may indicate a retryable error */
+       switch (status) {
+       case -EIO:
+       case -EINTR:
+               if (dev->reset_failures < MAX_RESET_FAILURES &&
+                   !nvme_reset_ctrl(&dev->ctrl))
+                       return;
+               break;
+       default:
+               break;
+       }
 
        nvme_get_ctrl(&dev->ctrl);
        nvme_dev_disable(dev, false);
@@ -2410,14 +2428,16 @@ static void nvme_reset_work(struct work_struct *work)
        if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
                dev_warn(dev->ctrl.device,
                        "failed to mark controller state %d\n", new_state);
+               result = -ENODEV;
                goto out;
        }
 
+       dev->reset_failures = 0;
        nvme_start_ctrl(&dev->ctrl);
        return;
 
  out:
-       nvme_remove_dead_ctrl(dev, result);
+       nvme_reset_failure(dev, result);
 }
 
 static void nvme_remove_dead_ctrl_work(struct work_struct *work)
-- 
2.14.3

Reply via email to