The adapter state machine is susceptible to missing and/or
corrupting state updates at runtime. This can lead to a variety
of unintended issues and is due to the lack of a serialization
mechanism to protect the adapter state.
Use an adapter-wide mutex to serialize state changes.
Signed-off-by: Matthew R. Ochs
Signed-off-by: Manoj N. Kumar
Conflicts:
drivers/scsi/cxlflash/main.c
---
drivers/scsi/cxlflash/common.h| 1 +
drivers/scsi/cxlflash/main.c | 48 ++-
drivers/scsi/cxlflash/superpipe.c | 7 +-
3 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index bbfe711..89c82d2 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -127,6 +127,7 @@ struct cxlflash_cfg {
bool tmf_active;
wait_queue_head_t reset_waitq;
enum cxlflash_state state;
+ struct mutex mutex;
};
struct afu_cmd {
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index ab11ee6..325ba31 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -496,6 +496,7 @@ static int cxlflash_queuecommand(struct Scsi_Host *host,
struct scsi_cmnd *scp)
struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
struct afu *afu = cfg->afu;
struct device *dev = >dev->dev;
+ enum cxlflash_state state;
struct afu_cmd *cmd;
u32 port_sel = scp->device->channel + 1;
int nseg, i, ncount;
@@ -525,7 +526,11 @@ static int cxlflash_queuecommand(struct Scsi_Host *host,
struct scsi_cmnd *scp)
}
spin_unlock_irqrestore(>tmf_slock, lock_flags);
- switch (cfg->state) {
+ mutex_lock(>mutex);
+ state = cfg->state;
+ mutex_unlock(>mutex);
+
+ switch (state) {
case STATE_RESET:
dev_dbg_ratelimited(dev, "%s: device is in reset!\n", __func__);
rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -722,7 +727,9 @@ static void cxlflash_remove(struct pci_dev *pdev)
cfg->tmf_slock);
spin_unlock_irqrestore(>tmf_slock, lock_flags);
+ mutex_lock(>mutex);
cfg->state = STATE_FAILTERM;
+ mutex_unlock(>mutex);
cxlflash_stop_term_user_contexts(cfg);
switch (cfg->init_state) {
@@ -1776,9 +1783,10 @@ err1:
* @mode: Type of sync to issue (lightweight, heavyweight, global).
*
* The AFU can only take 1 sync command at a time. This routine enforces this
- * limitation by using a mutex to provide exclusive access to the AFU during
- * the sync. This design point requires calling threads to not be on interrupt
- * context due to the possibility of sleeping during concurrent sync
operations.
+ * limitation by holding the adapter mutex across the entirety of the function
+ * to provide exclusive access to the AFU during the sync. This design point
+ * requires calling threads to not be on interrupt context due to the
+ * possibility of sleeping during concurrent sync operations.
*
* AFU sync operations are only necessary and allowed when the device is
* operating normally. When not operating normally, sync requests can occur as
@@ -1798,14 +1806,13 @@ int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t
ctx_hndl_u,
struct afu_cmd *cmd = NULL;
int rc = 0;
int retry_cnt = 0;
- static DEFINE_MUTEX(sync_active);
+ mutex_lock(>mutex);
if (cfg->state != STATE_NORMAL) {
pr_debug("%s: Sync not required! (%u)\n", __func__, cfg->state);
- return 0;
+ goto out;
}
- mutex_lock(_active);
retry:
cmd = cmd_checkout(afu);
if (unlikely(!cmd)) {
@@ -1847,7 +1854,7 @@ retry:
(cmd->sa.host_use_b[0] & B_ERROR)))
rc = -1;
out:
- mutex_unlock(_active);
+ mutex_unlock(>mutex);
if (cmd)
cmd_checkin(cmd);
pr_debug("%s: returning rc=%d\n", __func__, rc);
@@ -1889,6 +1896,7 @@ static int cxlflash_eh_device_reset_handler(struct
scsi_cmnd *scp)
struct Scsi_Host *host = scp->device->host;
struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
struct afu *afu = cfg->afu;
+ enum cxlflash_state state;
int rcr = 0;
pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
@@ -1901,7 +1909,11 @@ static int cxlflash_eh_device_reset_handler(struct
scsi_cmnd *scp)
get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
retry:
- switch (cfg->state) {
+ mutex_lock(>mutex);
+ state = cfg->state;
+ mutex_unlock(>mutex);
+
+ switch (state) {
case STATE_NORMAL:
rcr = send_tmf(afu, scp, TMF_LUN_RESET);
if (unlikely(rcr))
@@ -1943,6 +1955,7 @@ static int cxlflash_eh_host_reset_handler(struct