--- Begin Message --- Introducing srp_dev_loss_tmo module parameter. Creating a timer to clean up connection after srp_dev_loss_tmo expired. During srp_dev_loss_tmo, the qp is in error state, srp will return DID_RESET for outstanding I/O and return FAILED for abort_cmd, reset_lun, and return SUCCESS (without trying reconnect) on reset_host.

Signed-off-by: Vu Pham <[email protected]>


Index: ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/srp/ib_srp.c
+++ ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.c
@@ -78,6 +77,13 @@
 MODULE_PARM_DESC(mellanox_workarounds,
                 "Enable workarounds for Mellanox SRP target bugs if != 0");
 
+static int srp_dev_loss_tmo = 60;
+
+module_param(srp_dev_loss_tmo, int, 0444);
+MODULE_PARM_DESC(srp_dev_loss_tmo,
+                "Default number of seconds that srp transport should \
+                 insulate the lost of a remote port (default is 60 secs");
+
 static void srp_add_one(struct ib_device *device);
 static void srp_remove_one(struct ib_device *device);
 static void srp_completion(struct ib_cq *cq, void *target_ptr);
@@ -898,6 +926,48 @@
                                      DMA_FROM_DEVICE);
 }
 
+static void srp_reconnect_work(struct work_struct *work)
+{
+       struct srp_target_port *target =
+               container_of(work, struct srp_target_port, work);
+
+       srp_reconnect_target(target);
+       target->work_in_progress = 0;
+}
+
+static void srp_qp_in_err_timer(unsigned long data)
+{
+       struct srp_target_port *target = (struct srp_target_port *)data;
+       struct srp_request *req, *tmp;
+
+       if (target->state != SRP_TARGET_LIVE)
+               return;
+
+       spin_lock_irq(target->scsi_host->host_lock);
+       list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+               srp_reset_req(target, req);
+       spin_unlock_irq(target->scsi_host->host_lock);
+
+       spin_lock_irq(target->scsi_host->host_lock);
+       if (!target->work_in_progress) {
+               target->work_in_progress = 1;
+               INIT_WORK(&target->work, srp_reconnect_work);
+               schedule_work(&target->work);
+       }
+       spin_unlock_irq(target->scsi_host->host_lock);
+}
+
+static void srp_qp_err_add_timer(struct srp_target_port *target, int time)
+{
+       if (!timer_pending(&target->qp_err_timer)) {
+               setup_timer(&target->qp_err_timer,
+                           srp_qp_in_err_timer,
+                           (unsigned long)target);
+               target->qp_err_timer.expires = time * HZ + jiffies;
+               add_timer(&target->qp_err_timer);
+       }
+}
+
 static void srp_completion(struct ib_cq *cq, void *target_ptr)
 {
        struct srp_target_port *target = target_ptr;
@@ -960,11 +980,20 @@
        ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
        while (ib_poll_cq(cq, 1, &wc) > 0) {
                if (wc.status) {
+                       unsigned long flags;
+
                        shost_printk(KERN_ERR, target->scsi_host,
                                     PFX "failed %s status %d\n",
                                     wc.wr_id & SRP_OP_RECV ? "receive" : 
"send",
                                     wc.status);
-                       target->qp_in_error = 1;
+                       spin_lock_irqsave(target->scsi_host->host_lock, flags);
+                       if (!target->qp_in_error &&
+                           target->state == SRP_TARGET_LIVE) {
+                               target->qp_in_error = 1;
+                               srp_qp_err_add_timer(target,
+                                                    srp_dev_loss_tmo - 55);
+                       }
+                       spin_unlock_irqrestore(target->scsi_host->host_lock, 
flags);
                        break;
                }
 
@@ -1274,5 +1299,6 @@
        int attr_mask = 0;
        int comp = 0;
        int opcode = 0;
+       unsigned long flags;
 
        switch (event->event) {
@@ -1301,6 +1381,14 @@
                shost_printk(KERN_ERR, target->scsi_host,
                             PFX "connection closed\n");
 
+               spin_lock_irqsave(target->scsi_host->host_lock, flags);
+               if (!target->qp_in_error &&
+                   target->state == SRP_TARGET_LIVE) {
+                       target->qp_in_error = 1;
+                       srp_qp_err_add_timer(target,
+                                            srp_dev_loss_tmo - 55);
+               }
+               spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
                target->status = 0;
                break;
 
@@ -1443,9 +1529,22 @@
 static int srp_reset_host(struct scsi_cmnd *scmnd)
 {
        struct srp_target_port *target = host_to_target(scmnd->device->host);
+       struct srp_request *req, *tmp;
        int ret = FAILED;
 
-       shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host 
called\n");
+       shost_printk(KERN_ERR, target->scsi_host,
+                    PFX "SRP reset_host called state %d qp_err %d\n",
+                    target->state, target->qp_in_error);
+
+       spin_lock_irq(target->scsi_host->host_lock);
+       if (timer_pending(&target->qp_err_timer) || target->qp_in_error ||
+           target->state != SRP_TARGET_LIVE) {
+               list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+                       srp_reset_req(target, req);
+               spin_unlock_irq(target->scsi_host->host_lock);
+               return SUCCESS;
+       }
+       spin_unlock_irq(target->scsi_host->host_lock);
 
        if (!srp_reconnect_target(target))
                ret = SUCCESS;
@@ -2150,6 +2342,9 @@
                          sizeof (struct srp_indirect_buf) +
                          srp_sg_tablesize * 16);
 
+       if (srp_dev_loss_tmo < 60)
+               srp_dev_loss_tmo = 60;
+
        ret = class_register(&srp_class);
        if (ret) {
                printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
Index: ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/srp/ib_srp.h
+++ ofed_kernel/drivers/infiniband/ulp/srp/ib_srp.h
@@ -153,12 +159,14 @@
        struct srp_request      req_ring[SRP_SQ_SIZE];
 
        struct work_struct      work;
+       int                     work_in_progress;
 
        struct list_head        list;
        struct completion       done;
        int                     status;
        enum srp_target_state   state;
        int                     qp_in_error;
+       struct timer_list       qp_err_timer;
 };
 
 struct srp_iu {

--- End Message ---

Reply via email to