On 3/12/2014 3:16 PM, Bart Van Assche wrote:
On 03/11/14 16:30, Sagi Grimberg wrote:
State FAIL_FAST must come *after* stated BLOCKED. Do you think that
taking the lock
once the rport transitions to state BLOCKED suffices? I'm aiming to
avoid this lock in
the sunny-day flow. Taking this lock always to protect against some
error flow
that might occur feels somewhat wrong to me.
Hello Sagi,

I agree that today the SRP initiator only invokes srp_terminate_io()
after having quiesced I/O first so from that point of view it is not
necessary to add more locking in srp_queuecommand(). However, since
this is nontrivial I'd like to trigger a kernel warning if
srp_terminate_io() is ever invoked concurrently with
srp_queuecommand(). Additionally, I think the code in
srp_reset_device() can trigger a race with the I/O completion path. How
about addressing all this with the patch below ?

Thanks,

Bart.

[PATCH] IB/srp: Fix a race condition between failing I/O and I/O completion

Avoid that srp_terminate_io() can access req->scmnd after it has been
cleared by the I/O completion code. Do this by protecting req->scmnd
accesses from srp_terminate_io() via locking

Signed-off-by: Bart Van Assche <[email protected]>
---
  drivers/infiniband/ulp/srp/ib_srp.c | 33 ++++++++++++++++++++++-----------
  1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c 
b/drivers/infiniband/ulp/srp/ib_srp.c
index a64e469..66a908b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -783,6 +783,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
   * srp_claim_req - Take ownership of the scmnd associated with a request.
   * @target: SRP target port.
   * @req: SRP request.
+ * @sdev: If not NULL, only take ownership for this SCSI device.
   * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
   *         ownership of @req->scmnd if it equals @scmnd.
   *
@@ -791,16 +792,17 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
   */
  static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
                                       struct srp_request *req,
+                                      struct scsi_device *sdev,
                                       struct scsi_cmnd *scmnd)
  {
        unsigned long flags;
spin_lock_irqsave(&target->lock, flags);
-       if (!scmnd) {
+       if (req->scmnd &&
+           (!sdev || req->scmnd->device == sdev) &&
+           (!scmnd || req->scmnd == scmnd)) {
                scmnd = req->scmnd;
                req->scmnd = NULL;
-       } else if (req->scmnd == scmnd) {
-               req->scmnd = NULL;
        } else {
                scmnd = NULL;
        }
@@ -827,9 +829,10 @@ static void srp_free_req(struct srp_target_port *target,
  }
static void srp_finish_req(struct srp_target_port *target,
-                          struct srp_request *req, int result)
+                          struct srp_request *req, struct scsi_device *sdev,
+                          int result)
  {
-       struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
+       struct scsi_cmnd *scmnd = srp_claim_req(target, req, sdev, NULL);
if (scmnd) {
                srp_free_req(target, req, scmnd, 0);
@@ -841,11 +844,20 @@ static void srp_finish_req(struct srp_target_port *target,
  static void srp_terminate_io(struct srp_rport *rport)
  {
        struct srp_target_port *target = rport->lld_data;
+       struct Scsi_Host *shost = target->scsi_host;
+       struct scsi_device *sdev;
        int i;
+ /*
+        * Invoking srp_terminate_io() while srp_queuecommand() is running
+        * is not safe. Hence the warning statement below.
+        */
+       shost_for_each_device(sdev, shost)
+               WARN_ON_ONCE(sdev->request_queue->request_fn_active);
+
        for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
+               srp_finish_req(target, req, NULL, DID_TRANSPORT_FAILFAST << 16);
        }
  }
@@ -882,7 +894,7 @@ static int srp_rport_reconnect(struct srp_rport *rport) for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               srp_finish_req(target, req, DID_RESET << 16);
+               srp_finish_req(target, req, NULL, DID_RESET << 16);
        }
INIT_LIST_HEAD(&target->free_tx);
@@ -1290,7 +1302,7 @@ static void srp_process_rsp(struct srp_target_port 
*target, struct srp_rsp *rsp)
                complete(&target->tsk_mgmt_done);
        } else {
                req = &target->req_ring[rsp->tag];
-               scmnd = srp_claim_req(target, req, NULL);
+               scmnd = srp_claim_req(target, req, NULL, NULL);
                if (!scmnd) {
                        shost_printk(KERN_ERR, target->scsi_host,
                                     "Null scmnd for RSP w/tag %016llx\n",
@@ -2008,7 +2020,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req || !srp_claim_req(target, req, scmnd))
+       if (!req || !srp_claim_req(target, req, NULL, scmnd))
                return SUCCESS;
        if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
                              SRP_TSK_ABORT_TASK) == 0)
@@ -2039,8 +2051,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
-               if (req->scmnd && req->scmnd->device == scmnd->device)
-                       srp_finish_req(target, req, DID_RESET << 16);
+               srp_finish_req(target, req, scmnd->device, DID_RESET << 16);
        }
return SUCCESS;

Acked-by: Sagi Grimberg  <[email protected]>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to