I'm not sure how this happened but the patch that was intended to fix
abort handling was incomplete. This patch fixes that patch as follows:
- If aborting the SCSI command failed, wait until the SCSI command
  completes.
- Return SUCCESS instead of FAILED if an abort attempt races with SCSI
  command completion.
- Since qla2xxx_eh_abort() increments the sp reference count by calling
  sp_get(), decrement the sp reference count before returning.

Cc: Himanshu Madhani <hmadh...@marvell.com>
Fixes: 219d27d7147e ("scsi: qla2xxx: Fix race conditions in the code for 
aborting SCSI commands")
Signed-off-by: Bart Van Assche <bvanass...@acm.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index b667f13b62df..db1f1aac79f2 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1269,6 +1269,7 @@ static int
 qla2xxx_eh_abort(struct scsi_cmnd *cmd)
 {
        scsi_qla_host_t *vha = shost_priv(cmd->device->host);
+       DECLARE_COMPLETION_ONSTACK(comp);
        srb_t *sp;
        int ret;
        unsigned int id;
@@ -1307,6 +1308,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
                return SUCCESS;
        }
 
+       /* Get a reference to the sp and drop the lock. */
        if (sp_get(sp)){
                /* ref_count is already 0 */
                spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
@@ -1334,6 +1336,23 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
                sp->done(sp, DID_ABORT << 16);
                ret = SUCCESS;
                break;
+       case QLA_FUNCTION_PARAMETER_ERROR: {
+               /* Wait for the command completion. */
+               uint32_t ratov = ha->r_a_tov/10;
+               uint32_t ratov_j = msecs_to_jiffies(4 * ratov * 1000);
+
+               WARN_ON_ONCE(sp->comp);
+               sp->comp = &comp;
+               if (!wait_for_completion_timeout(&comp, ratov_j)) {
+                       ql_dbg(ql_dbg_taskm, vha, 0xffff,
+                           "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
+                           __func__, ha->r_a_tov);
+                       ret = FAILED;
+               } else {
+                       ret = SUCCESS;
+               }
+               break;
+       }
        default:
                /*
                 * Either abort failed or abort and completion raced. Let
@@ -1343,6 +1362,8 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
                break;
        }
 
+       sp->comp = NULL;
+       atomic_dec(&sp->ref_count);
        ql_log(ql_log_info, vha, 0x801c,
            "Abort command issued nexus=%ld:%d:%llu -- %x.\n",
            vha->host_no, id, lun, ret);
-- 
2.22.0

Reply via email to