The adapter state machine is susceptible to missing and/or
corrupting state updates at runtime. This can lead to a variety
of unintended issues and is due to the lack of a serialization
mechanism to protect the adapter state.

Use an adapter-wide mutex to serialize state changes.

Signed-off-by: Matthew R. Ochs <mro...@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <ma...@linux.vnet.ibm.com>

Conflicts:
        drivers/scsi/cxlflash/main.c
---
 drivers/scsi/cxlflash/common.h    |  1 +
 drivers/scsi/cxlflash/main.c      | 48 ++++++++++++++++++++++++++++++---------
 drivers/scsi/cxlflash/superpipe.c |  7 +++++-
 3 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index bbfe711..89c82d2 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -127,6 +127,7 @@ struct cxlflash_cfg {
        bool tmf_active;
        wait_queue_head_t reset_waitq;
        enum cxlflash_state state;
+       struct mutex mutex;
 };
 
 struct afu_cmd {
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index ab11ee6..325ba31 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -496,6 +496,7 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, 
struct scsi_cmnd *scp)
        struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
        struct afu *afu = cfg->afu;
        struct device *dev = &cfg->dev->dev;
+       enum cxlflash_state state;
        struct afu_cmd *cmd;
        u32 port_sel = scp->device->channel + 1;
        int nseg, i, ncount;
@@ -525,7 +526,11 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, 
struct scsi_cmnd *scp)
        }
        spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
-       switch (cfg->state) {
+       mutex_lock(&cfg->mutex);
+       state = cfg->state;
+       mutex_unlock(&cfg->mutex);
+
+       switch (state) {
        case STATE_RESET:
                dev_dbg_ratelimited(dev, "%s: device is in reset!\n", __func__);
                rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -722,7 +727,9 @@ static void cxlflash_remove(struct pci_dev *pdev)
                                                  cfg->tmf_slock);
        spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
 
+       mutex_lock(&cfg->mutex);
        cfg->state = STATE_FAILTERM;
+       mutex_unlock(&cfg->mutex);
        cxlflash_stop_term_user_contexts(cfg);
 
        switch (cfg->init_state) {
@@ -1776,9 +1783,10 @@ err1:
  * @mode:      Type of sync to issue (lightweight, heavyweight, global).
  *
  * The AFU can only take 1 sync command at a time. This routine enforces this
- * limitation by using a mutex to provide exclusive access to the AFU during
- * the sync. This design point requires calling threads to not be on interrupt
- * context due to the possibility of sleeping during concurrent sync 
operations.
+ * limitation by holding the adapter mutex across the entirety of the function
+ * to provide exclusive access to the AFU during the sync. This design point
+ * requires calling threads to not be on interrupt context due to the
+ * possibility of sleeping during concurrent sync operations.
  *
  * AFU sync operations are only necessary and allowed when the device is
  * operating normally. When not operating normally, sync requests can occur as
@@ -1798,14 +1806,13 @@ int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t 
ctx_hndl_u,
        struct afu_cmd *cmd = NULL;
        int rc = 0;
        int retry_cnt = 0;
-       static DEFINE_MUTEX(sync_active);
 
+       mutex_lock(&cfg->mutex);
        if (cfg->state != STATE_NORMAL) {
                pr_debug("%s: Sync not required! (%u)\n", __func__, cfg->state);
-               return 0;
+               goto out;
        }
 
-       mutex_lock(&sync_active);
 retry:
        cmd = cmd_checkout(afu);
        if (unlikely(!cmd)) {
@@ -1847,7 +1854,7 @@ retry:
                     (cmd->sa.host_use_b[0] & B_ERROR)))
                rc = -1;
 out:
-       mutex_unlock(&sync_active);
+       mutex_unlock(&cfg->mutex);
        if (cmd)
                cmd_checkin(cmd);
        pr_debug("%s: returning rc=%d\n", __func__, rc);
@@ -1889,6 +1896,7 @@ static int cxlflash_eh_device_reset_handler(struct 
scsi_cmnd *scp)
        struct Scsi_Host *host = scp->device->host;
        struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
        struct afu *afu = cfg->afu;
+       enum cxlflash_state state;
        int rcr = 0;
 
        pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
@@ -1901,7 +1909,11 @@ static int cxlflash_eh_device_reset_handler(struct 
scsi_cmnd *scp)
                 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
 
 retry:
-       switch (cfg->state) {
+       mutex_lock(&cfg->mutex);
+       state = cfg->state;
+       mutex_unlock(&cfg->mutex);
+
+       switch (state) {
        case STATE_NORMAL:
                rcr = send_tmf(afu, scp, TMF_LUN_RESET);
                if (unlikely(rcr))
@@ -1943,6 +1955,7 @@ static int cxlflash_eh_host_reset_handler(struct 
scsi_cmnd *scp)
                 get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
                 get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
 
+       mutex_lock(&cfg->mutex);
        switch (cfg->state) {
        case STATE_NORMAL:
                cfg->state = STATE_RESET;
@@ -1956,7 +1969,9 @@ static int cxlflash_eh_host_reset_handler(struct 
scsi_cmnd *scp)
                wake_up_all(&cfg->reset_waitq);
                break;
        case STATE_RESET:
+               mutex_unlock(&cfg->mutex);
                wait_event(cfg->reset_waitq, cfg->state != STATE_RESET);
+               mutex_lock(&cfg->mutex);
                if (cfg->state == STATE_NORMAL)
                        break;
                /* fall through */
@@ -1964,6 +1979,7 @@ static int cxlflash_eh_host_reset_handler(struct 
scsi_cmnd *scp)
                rc = FAILED;
                break;
        }
+       mutex_unlock(&cfg->mutex);
 
        pr_debug("%s: returning rc=%d\n", __func__, rc);
        return rc;
@@ -2301,10 +2317,11 @@ static void cxlflash_worker_thread(struct work_struct 
*work)
        int port;
        ulong lock_flags;
 
-       /* Avoid MMIO if the device has failed */
+       mutex_lock(&cfg->mutex);
 
+       /* Avoid MMIO if the device has failed */
        if (cfg->state != STATE_NORMAL)
-               return;
+               goto out;
 
        spin_lock_irqsave(cfg->host->host_lock, lock_flags);
 
@@ -2335,6 +2352,8 @@ static void cxlflash_worker_thread(struct work_struct 
*work)
 
        if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0)
                scsi_scan_host(cfg->host);
+out:
+       mutex_unlock(&cfg->mutex);
 }
 
 /**
@@ -2405,6 +2424,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
        INIT_WORK(&cfg->work_q, cxlflash_worker_thread);
        cfg->lr_state = LINK_RESET_INVALID;
        cfg->lr_port = -1;
+       mutex_init(&cfg->mutex);
        mutex_init(&cfg->ctx_tbl_list_mutex);
        mutex_init(&cfg->ctx_recovery_mutex);
        init_rwsem(&cfg->ioctl_rwsem);
@@ -2492,7 +2512,9 @@ static pci_ers_result_t 
cxlflash_pci_error_detected(struct pci_dev *pdev,
 
        switch (state) {
        case pci_channel_io_frozen:
+               mutex_lock(&cfg->mutex);
                cfg->state = STATE_RESET;
+               mutex_unlock(&cfg->mutex);
                scsi_block_requests(cfg->host);
                drain_ioctls(cfg);
                rc = cxlflash_mark_contexts_error(cfg);
@@ -2503,7 +2525,9 @@ static pci_ers_result_t 
cxlflash_pci_error_detected(struct pci_dev *pdev,
                stop_afu(cfg);
                return PCI_ERS_RESULT_NEED_RESET;
        case pci_channel_io_perm_failure:
+               mutex_lock(&cfg->mutex);
                cfg->state = STATE_FAILTERM;
+               mutex_unlock(&cfg->mutex);
                wake_up_all(&cfg->reset_waitq);
                scsi_unblock_requests(cfg->host);
                return PCI_ERS_RESULT_DISCONNECT;
@@ -2550,7 +2574,9 @@ static void cxlflash_pci_resume(struct pci_dev *pdev)
 
        dev_dbg(dev, "%s: pdev=%p\n", __func__, pdev);
 
+       mutex_lock(&cfg->mutex);
        cfg->state = STATE_NORMAL;
+       mutex_unlock(&cfg->mutex);
        wake_up_all(&cfg->reset_waitq);
        scsi_unblock_requests(cfg->host);
 }
diff --git a/drivers/scsi/cxlflash/superpipe.c 
b/drivers/scsi/cxlflash/superpipe.c
index 0d92594..6aaeee0 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -1229,10 +1229,15 @@ static const struct file_operations null_fops = {
 static int check_state(struct cxlflash_cfg *cfg, bool ioctl)
 {
        struct device *dev = &cfg->dev->dev;
+       enum cxlflash_state state;
        int rc = 0;
 
 retry:
-       switch (cfg->state) {
+       mutex_lock(&cfg->mutex);
+       state = cfg->state;
+       mutex_unlock(&cfg->mutex);
+
+       switch (state) {
        case STATE_RESET:
                dev_dbg(dev, "%s: Reset state, going to wait...\n", __func__);
                if (ioctl)
-- 
2.1.0

_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to