[PATCH 05/11] Break up IO ctx list into a separate get and put list

2017-06-15 Thread James Smart
Since unsol rcv ISR and command cmpl ISR both access/lock
this list, a separate get/put lists will reduce contention.

Replaced
struct list_head lpfc_nvmet_ctx_list;
with
struct list_head lpfc_nvmet_ctx_get_list;
struct list_head lpfc_nvmet_ctx_put_list;
and all correpsonding locks and counters.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_attr.c| 11 --
 drivers/scsi/lpfc/lpfc_debugfs.c | 11 --
 drivers/scsi/lpfc/lpfc_init.c| 16 +---
 drivers/scsi/lpfc/lpfc_nvmet.c   | 82 +---
 drivers/scsi/lpfc/lpfc_sli4.h|  9 +++--
 5 files changed, 89 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index af22602b1058..4ed48ed38e79 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -245,15 +245,18 @@ lpfc_nvme_info_show(struct device *dev, struct 
device_attribute *attr,
atomic_read(>xmt_abort_rsp),
atomic_read(>xmt_abort_rsp_error));
 
-   spin_lock(>sli4_hba.nvmet_io_lock);
+   spin_lock(>sli4_hba.nvmet_ctx_get_lock);
+   spin_lock(>sli4_hba.nvmet_ctx_put_lock);
tot = phba->sli4_hba.nvmet_xri_cnt -
-   phba->sli4_hba.nvmet_ctx_cnt;
-   spin_unlock(>sli4_hba.nvmet_io_lock);
+   (phba->sli4_hba.nvmet_ctx_get_cnt +
+   phba->sli4_hba.nvmet_ctx_put_cnt);
+   spin_unlock(>sli4_hba.nvmet_ctx_put_lock);
+   spin_unlock(>sli4_hba.nvmet_ctx_get_lock);
 
len += snprintf(buf + len, PAGE_SIZE - len,
"IO_CTX: %08x  WAIT: cur %08x tot %08x\n"
"CTX Outstanding %08llx\n",
-   phba->sli4_hba.nvmet_ctx_cnt,
+   phba->sli4_hba.nvmet_xri_cnt,
phba->sli4_hba.nvmet_io_wait_cnt,
phba->sli4_hba.nvmet_io_wait_total,
tot);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index cc49850e18a9..ed2850645e70 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -848,15 +848,18 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char 
*buf, int size)
spin_unlock(>sli4_hba.abts_nvme_buf_list_lock);
}
 
-   spin_lock(>sli4_hba.nvmet_io_lock);
+   spin_lock(>sli4_hba.nvmet_ctx_get_lock);
+   spin_lock(>sli4_hba.nvmet_ctx_put_lock);
tot = phba->sli4_hba.nvmet_xri_cnt -
-   phba->sli4_hba.nvmet_ctx_cnt;
-   spin_unlock(>sli4_hba.nvmet_io_lock);
+   (phba->sli4_hba.nvmet_ctx_get_cnt +
+   phba->sli4_hba.nvmet_ctx_put_cnt);
+   spin_unlock(>sli4_hba.nvmet_ctx_put_lock);
+   spin_unlock(>sli4_hba.nvmet_ctx_get_lock);
 
len += snprintf(buf + len, size - len,
"IO_CTX: %08x  WAIT: cur %08x tot %08x\n"
"CTX Outstanding %08llx\n",
-   phba->sli4_hba.nvmet_ctx_cnt,
+   phba->sli4_hba.nvmet_xri_cnt,
phba->sli4_hba.nvmet_io_wait_cnt,
phba->sli4_hba.nvmet_io_wait_total,
tot);
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 77283705eb8d..7e73fdc154f7 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1281,10 +1281,13 @@ lpfc_hb_timeout_handler(struct lpfc_hba *phba)
/* Check outstanding IO count */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
if (phba->nvmet_support) {
-   spin_lock(>sli4_hba.nvmet_io_lock);
+   spin_lock(>sli4_hba.nvmet_ctx_get_lock);
+   spin_lock(>sli4_hba.nvmet_ctx_put_lock);
tot = phba->sli4_hba.nvmet_xri_cnt -
-   phba->sli4_hba.nvmet_ctx_cnt;
-   spin_unlock(>sli4_hba.nvmet_io_lock);
+   (phba->sli4_hba.nvmet_ctx_get_cnt +
+   phba->sli4_hba.nvmet_ctx_put_cnt);
+   spin_unlock(>sli4_hba.nvmet_ctx_put_lock);
+   spin_unlock(>sli4_hba.nvmet_ctx_get_lock);
} else {
tot = atomic_read(>fc4NvmeIoCmpls);
data1 = atomic_read(
@@ -3487,7 +3490,6 @@ lpfc_sli4_nvmet_sgl_update(struct 

[PATCH 03/11] Vport creation is failing with "Link Down" error

2017-06-15 Thread James Smart
Vport creation fails for SLI-3 adapters.

Mailbox submission fails because mailbox interrupt is disabled. Mailbox
interrupt is disabled during port reset.

Do reset only for physical port.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_init.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 9d3a12636455..77283705eb8d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3691,14 +3691,6 @@ lpfc_get_wwpn(struct lpfc_hba *phba)
LPFC_MBOXQ_t *mboxq;
MAILBOX_t *mb;
 
-   if (phba->sli_rev < LPFC_SLI_REV4) {
-   /* Reset the port first */
-   lpfc_sli_brdrestart(phba);
-   rc = lpfc_sli_chipset_init(phba);
-   if (rc)
-   return (uint64_t)-1;
-   }
-
mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
GFP_KERNEL);
if (!mboxq)
@@ -3852,8 +3844,19 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, 
struct device *dev)
int i;
uint64_t wwn;
bool use_no_reset_hba = false;
+   int rc;
 
-   wwn = lpfc_get_wwpn(phba);
+   if (lpfc_no_hba_reset_cnt) {
+   if (phba->sli_rev < LPFC_SLI_REV4 &&
+   dev == >pcidev->dev) {
+   /* Reset the port first */
+   lpfc_sli_brdrestart(phba);
+   rc = lpfc_sli_chipset_init(phba);
+   if (rc)
+   return NULL;
+   }
+   wwn = lpfc_get_wwpn(phba);
+   }
 
for (i = 0; i < lpfc_no_hba_reset_cnt; i++) {
if (wwn == lpfc_no_hba_reset[i]) {
-- 
2.11.0



[PATCH 06/11] Fix SLI3 drivers attempting NVME ELS commands.

2017-06-15 Thread James Smart
In a server with an 8G adapter and a 32G adapter, running NVME
and FCP, the server would crash with the following stack.

RIP: 0010: ... lpfc_nvme_register_port+0x38/0x420 [lpfc]
 lpfc_nlp_state_cleanup+0x154/0x4f0 [lpfc]
 lpfc_nlp_set_state+0x9d/0x1a0 [lpfc]
 lpfc_cmpl_prli_prli_issue+0x35f/0x440 [lpfc]
 lpfc_disc_state_machine+0x78/0x1c0 [lpfc]
 lpfc_cmpl_els_prli+0x17c/0x1f0 [lpfc]
 lpfc_sli_sp_handle_rspiocb+0x39b/0x6b0 [lpfc]
 lpfc_sli_handle_slow_ring_event_s3+0x134/0x2d0 [lpfc]
 lpfc_work_done+0x8ac/0x13b0 [lpfc]
 lpfc_do_work+0xf1/0x1b0 [lpfc]

Crash, on the 8G adapter, is due to a vport which does not have
a nvme local port structure. It's not supposed to have one. NVME is
not supported on the 8G adapter, so the NVME PRLI, which started
this flow shouldn't have been sent in the first place.

Correct discovery engine to recognize when on an SLI3 rport, which
doesn't support SLI3, if the rport supports only NVME, don't send
a NVME PRLI. Instead, as no FC4 will be used, a LOGO is sent.
If rport is FCP and NVME, only execute the SCSI PRLI.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_els.c | 16 +++-
 drivers/scsi/lpfc/lpfc_hbadisc.c |  3 ++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index a140318d6159..54de984d695f 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -2168,6 +2168,19 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp,
 ndlp->nlp_fc4_type, ndlp->nlp_DID);
return 1;
}
+
+   /* SLI3 ports don't support NVME.  If this rport is a strict NVME
+* FC4 type, implicitly LOGO.
+*/
+   if (phba->sli_rev == LPFC_SLI_REV3 &&
+   ndlp->nlp_fc4_type == NLP_FC4_NVME) {
+   lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY,
+"3088 Rport fc4 type 0x%x not supported by 
SLI3 adapter\n",
+ndlp->nlp_type);
+   lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
+   return 1;
+   }
+
elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
 ndlp->nlp_DID, elscmd);
if (!elsiocb)
@@ -2268,7 +2281,8 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp,
/* The driver supports 2 FC4 types.  Make sure
 * a PRLI is issued for all types before exiting.
 */
-   if (local_nlp_type & (NLP_FC4_FCP | NLP_FC4_NVME))
+   if (phba->sli_rev == LPFC_SLI_REV4 &&
+   local_nlp_type & (NLP_FC4_FCP | NLP_FC4_NVME))
goto send_next_prli;
 
return 0;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index db2d0e692ddf..aa5e5ff56dfb 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4194,7 +4194,8 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct 
lpfc_nodelist *ndlp,
lpfc_register_remote_port(vport, ndlp);
}
/* Notify the NVME transport of this new rport. */
-   if (ndlp->nlp_fc4_type & NLP_FC4_NVME) {
+   if (vport->phba->sli_rev >= LPFC_SLI_REV4 &&
+   ndlp->nlp_fc4_type & NLP_FC4_NVME) {
if (vport->phba->nvmet_support == 0) {
/* Register this rport with the transport.
 * Initiators take the NDLP ref count in
-- 
2.11.0



[PATCH 01/11] Fix system panic when express lane enabled.

2017-06-15 Thread James Smart
There is a null pointer dereference that can happen in the FOF interrupt
handler.

The driver was not setting up cq->assoc_qp_for sli4_hba->oas_cq.

Initialize cq->assoc_qp before accessing it.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_sli.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 040575adf9c6..4f2cc395597e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13560,6 +13560,9 @@ lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct 
lpfc_eqe *eqe)
return;
}
 
+   /* Save EQ associated with this CQ */
+   cq->assoc_qp = phba->sli4_hba.fof_eq;
+
/* Process all the entries to the OAS CQ */
while ((cqe = lpfc_sli4_cq_get(cq))) {
workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
-- 
2.11.0



[PATCH 11/11] lpfc: update to revision to 11.4.0.1

2017-06-15 Thread James Smart
Set lpfc driver revision to 11.4.0.1

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 067c9e8a4b2d..c6a24c3e2d5e 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package. *
  ***/
 
-#define LPFC_DRIVER_VERSION "11.4.0.0"
+#define LPFC_DRIVER_VERSION "11.4.0.1"
 #define LPFC_DRIVER_NAME   "lpfc"
 
 /* Used for SLI 2/3 */
-- 
2.11.0



[PATCH 04/11] Reduce time spent in IRQ for received NVME commands

2017-06-15 Thread James Smart
Removed unnecessary bzero of context area. Due to size of sg list,
added a substantial delay and played havoc on cpu caches.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index dba1bd216be3..431faa0a4f3e 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -205,7 +205,6 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct 
lpfc_nvmet_ctxbuf *ctx_buf)
sid = sli4_sid_from_fc_hdr(fc_hdr);
 
ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
-   memset(ctxp, 0, sizeof(ctxp->ctx));
ctxp->wqeq = NULL;
ctxp->txrdy = NULL;
ctxp->offset = 0;
@@ -1422,7 +1421,6 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
"6414 NVMET Context corrupt %d %d oxid x%x\n",
ctxp->state, ctxp->entry_cnt, ctxp->oxid);
}
-   memset(ctxp, 0, sizeof(ctxp->ctx));
ctxp->wqeq = NULL;
ctxp->txrdy = NULL;
ctxp->offset = 0;
-- 
2.11.0



[PATCH 02/11] Fix nvme_info sysfs output to be consistent

2017-06-15 Thread James Smart
First line of nvme_info output is not consistent

There is an Extra colon in the format.

First line of output will contain one of the following strings:
NVME Initiator Enabled
NVME Target Enabled
NVME Disabled

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_attr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 66269e342c7e..af22602b1058 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -171,7 +171,7 @@ lpfc_nvme_info_show(struct device *dev, struct 
device_attribute *attr,
else
statep = "INIT";
len += snprintf(buf + len, PAGE_SIZE - len,
-   "NVME Target: Enabled  State %s\n",
+   "NVME Target Enabled  State %s\n",
statep);
len += snprintf(buf + len, PAGE_SIZE - len,
"%s%d WWPN x%llx WWNN x%llx DID x%06x\n",
-- 
2.11.0



[PATCH 10/11] Driver responds LS_RJT to Beacon Off ELS - Linux

2017-06-15 Thread James Smart
Beacon OFF from switch is rejected by driver.

Driver fails Beacon OFF if frequency is set to 0. As per fc-ls
spec, status, capability, frequency and duration fields are only
applicable for Beacon ON.

Remove frequency and type checks. Reject Beacon ON if duration
is non zero.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_els.c | 22 --
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 54de984d695f..6d1d6f691df4 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5703,27 +5703,13 @@ lpfc_els_rcv_lcb(struct lpfc_vport *vport, struct 
lpfc_iocbq *cmdiocb,
rjt_err = LSRJT_CMD_UNSUPPORTED;
goto rjt;
}
-   if (beacon->lcb_frequency == 0) {
+   if (beacon->lcb_sub_command != LPFC_LCB_ON &&
+   beacon->lcb_sub_command != LPFC_LCB_OFF) {
rjt_err = LSRJT_CMD_UNSUPPORTED;
goto rjt;
}
-   if ((beacon->lcb_type != LPFC_LCB_GREEN) &&
-   (beacon->lcb_type != LPFC_LCB_AMBER)) {
-   rjt_err = LSRJT_CMD_UNSUPPORTED;
-   goto rjt;
-   }
-   if ((beacon->lcb_sub_command != LPFC_LCB_ON) &&
-   (beacon->lcb_sub_command != LPFC_LCB_OFF)) {
-   rjt_err = LSRJT_CMD_UNSUPPORTED;
-   goto rjt;
-   }
-   if ((beacon->lcb_sub_command == LPFC_LCB_ON) &&
-   (beacon->lcb_type != LPFC_LCB_GREEN) &&
-   (beacon->lcb_type != LPFC_LCB_AMBER)) {
-   rjt_err = LSRJT_CMD_UNSUPPORTED;
-   goto rjt;
-   }
-   if (be16_to_cpu(beacon->lcb_duration) != 0) {
+   if (beacon->lcb_sub_command == LPFC_LCB_ON &&
+   be16_to_cpu(beacon->lcb_duration) != 0) {
rjt_err = LSRJT_CMD_UNSUPPORTED;
goto rjt;
}
-- 
2.11.0



[PATCH 08/11] Fix crash doing IO with resets

2017-06-15 Thread James Smart
During every reset, IOCBs are allocated. So, at one point, number of
allocated IOCBs reaches maximum limit and lpfc_sli_next_iotag fails.

Allocate IOCBs only during initialization. Reuse them after every reset
instead of allocating new set of IOCBs.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_sli.c | 22 --
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 8de70b9d79dd..e948ea05fd33 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -6927,18 +6927,6 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
cnt = phba->cfg_iocb_cnt * 1024;
/* We need 1 iocbq for every SGL, for IO processing */
cnt += phba->sli4_hba.nvmet_xri_cnt;
-   /* Initialize and populate the iocb list per host */
-   lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-   "2821 initialize iocb list %d total %d\n",
-   phba->cfg_iocb_cnt, cnt);
-   rc = lpfc_init_iocb_list(phba, cnt);
-   if (rc) {
-   lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-   "1413 Failed to init iocb list.\n");
-   goto out_destroy_queue;
-   }
-
-   lpfc_nvmet_create_targetport(phba);
} else {
/* update host scsi xri-sgl sizes and mappings */
rc = lpfc_sli4_scsi_sgl_update(phba);
@@ -6959,18 +6947,24 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
}
 
cnt = phba->cfg_iocb_cnt * 1024;
+   }
+
+   if (!phba->sli.iocbq_lookup) {
/* Initialize and populate the iocb list per host */
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
-   "2820 initialize iocb list %d total %d\n",
+   "2821 initialize iocb list %d total %d\n",
phba->cfg_iocb_cnt, cnt);
rc = lpfc_init_iocb_list(phba, cnt);
if (rc) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-   "6301 Failed to init iocb list.\n");
+   "1413 Failed to init iocb list.\n");
goto out_destroy_queue;
}
}
 
+   if (phba->nvmet_support)
+   lpfc_nvmet_create_targetport(phba);
+
if (phba->nvmet_support && phba->cfg_nvmet_mrq) {
/* Post initial buffers to all RQs created */
for (i = 0; i < phba->cfg_nvmet_mrq; i++) {
-- 
2.11.0



[PATCH 09/11] Fix crash in lpfc_sli_ringtxcmpl_put when nvmet gets an abort request.

2017-06-15 Thread James Smart
When running nvme detach-ns /dev/nvme0n1 -n 1 command,
the nvmet lpfc driver crashes with this stack dump:

kernel BUG at /root/NVME/lpfc_8.4/lpfc_sli.c:1393!
invalid opcode:  [#1] SMP
Workqueue: nvmet-fc-cpu0 nvmet_fc_do_work_on_cpu [nvmet_fc]
 lpfc_sli4_issue_wqe+0x357/0x440 [lpfc]
 lpfc_nvmet_xmt_fcp_abort+0x36b/0x5c0 [lpfc]
 nvmet_fc_abort_op+0x30/0x50 [nvmet_fc]
 nvmet_fc_do_work_on_cpu+0xd9/0x130 [nvmet_fc]
 process_one_work+0x14e/0x410
 worker_thread+0x116/0x490
 kthread+0xc7/0xe0
 ret_from_fork+0x3f/0x70

Crash is due to an uninitialized iocbq->vport pointer.

Explicitly set the iocbq->vport field to phba->pport in
lpfc_nvmet_sol_fcp_issue_abort as it does all abort iocbq
initialization in the routine.  Using phba->pport is ok because
target does not support NPIV instances.

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_nvmet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 5fb29735e236..7dc061a14f95 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -2523,6 +2523,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
abts_wqeq->iocb_cmpl = 0;
abts_wqeq->iocb_flag |= LPFC_IO_NVME;
abts_wqeq->context2 = ctxp;
+   abts_wqeq->vport = phba->pport;
rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq);
spin_unlock_irqrestore(>hbalock, flags);
if (rc == WQE_SUCCESS) {
-- 
2.11.0



[PATCH 07/11] Fix crash after firmware flash when IO is running.

2017-06-15 Thread James Smart
OS crashes after the completion of firmware download.

Failure in posting SCSI SGL buffers because number of SGL buffers
is less than total count. Some of the pending IOs are not completed
by driver. SGL buffers for these IOs are not added back to the list.
Pending IOs are not completed because lpfc_wq_list list is
initialized before completion of pending IOs.

Postpone lpfc_wq_list reinitialization by moving
lpfc_sli4_queue_destroy() after lpfc_hba_down_post().

Signed-off-by: Dick Kennedy 
Signed-off-by: James Smart 
---
 drivers/scsi/lpfc/lpfc_sli.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 4f2cc395597e..8de70b9d79dd 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -4303,7 +4303,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba)
 
/* Perform FCoE PCI function reset before freeing queue memory */
rc = lpfc_pci_function_reset(phba);
-   lpfc_sli4_queue_destroy(phba);
 
/* Restore PCI cmd register */
pci_write_config_word(phba->pcidev, PCI_COMMAND, cfg_value);
@@ -4428,6 +4427,7 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba)
pci_disable_pcie_error_reporting(phba->pcidev);
 
lpfc_hba_down_post(phba);
+   lpfc_sli4_queue_destroy(phba);
 
return rc;
 }
-- 
2.11.0



[PATCH 00/11] lpfc updates for 11.4.0.1

2017-06-15 Thread James Smart
This patch set provides a number of bug fixes.

The patches were cut against the Martin's 4.13/scsi-queue tree.
There are no outside dependencies. The patches should merge via
Martin's tree. 

James Smart (11):
  Fix system panic when express lane enabled.
  Fix nvme_info sysfs output to be consistent
  Vport creation is failing with "Link Down" error
  Reduce time spent in IRQ for received NVME commands
  Break up IO ctx list into a separate get and put list
  Fix SLI3 drivers attempting NVME ELS commands.
  Fix crash after firmware flash when IO is running.
  Fix crash doing IO with resets
  Fix crash in lpfc_sli_ringtxcmpl_put when nvmet gets an abort request.
  Driver responds LS_RJT to Beacon Off ELS - Linux
  lpfc: update to revision to 11.4.0.1

 drivers/scsi/lpfc/lpfc_attr.c| 13 +++---
 drivers/scsi/lpfc/lpfc_debugfs.c | 11 --
 drivers/scsi/lpfc/lpfc_els.c | 38 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c |  3 +-
 drivers/scsi/lpfc/lpfc_init.c| 37 ++---
 drivers/scsi/lpfc/lpfc_nvmet.c   | 85 
 drivers/scsi/lpfc/lpfc_sli.c | 27 ++---
 drivers/scsi/lpfc/lpfc_sli4.h|  9 +++--
 drivers/scsi/lpfc/lpfc_version.h |  2 +-
 9 files changed, 137 insertions(+), 88 deletions(-)

-- 
2.11.0



Re: [PATCH] tcmu: Fix module removal due to stuck unmap_thread thread again

2017-06-15 Thread Mike Christie
On 06/15/2017 02:05 AM, lixi...@cmss.chinamobile.com wrote:
> From: Xiubo Li 
> 
> Because the unmap code just after the schdule() returned may take
> a long time and if the kthread_stop() is fired just when in this
> routine, the module removal maybe stuck too.
> 
> Signed-off-by: Xiubo Li 
> ---
>  drivers/target/target_core_user.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/target/target_core_user.c 
> b/drivers/target/target_core_user.c
> index beb5f09..203bff1 100644
> --- a/drivers/target/target_core_user.c
> +++ b/drivers/target/target_core_user.c
> @@ -1573,7 +1573,7 @@ static int unmap_thread_fn(void *data)
>   struct page *page;
>   int i;
>  
> - while (1) {
> + while (!kthread_should_stop()) {
>   DEFINE_WAIT(__wait);
>  
>   prepare_to_wait(_wait, &__wait, TASK_INTERRUPTIBLE);
> 

Looks ok to me

Reviewed-by: Mike Christie 


Re: [PATCH v2 00/17] lpfc updates for 11.4.0.0

2017-06-15 Thread James Smart

On 6/12/2017 8:04 PM, Martin K. Petersen wrote:

I have queued the whole series for 4.13. If there are smaller, trivial
patches without dependencies you would like to see in 4.12 I can shuffle
them over. 10, 11, and 12 appear to fix panics and are thus candidates
for rc6. Whereas "Add nvme initiator devloss support", while obviously a
deficiency, falls pretty clearly in the new feature bucket.


I'm ok with waiting for 4.13

-- james



[PATCH 4/4] g_NCR5380: Cleanup comments and whitespace

2017-06-15 Thread Finn Thain
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 61 ++--
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 784913193ea5..e9a942d86865 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -1,17 +1,17 @@
 /*
  * Generic Generic NCR5380 driver
- * 
+ *
  * Copyright 1993, Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * d...@colorado.edu
- *  +1 (303) 440-4894
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * d...@colorado.edu
+ * +1 (303) 440-4894
  *
  * NCR53C400 extensions (c) 1994,1995,1996, Kevin Lentin
- *k.len...@cs.monash.edu.au
+ * k.len...@cs.monash.edu.au
  *
  * NCR53C400A extensions (c) 1996, Ingmar Baumgart
- *ing...@gonzo.schwaben.de
+ * ing...@gonzo.schwaben.de
  *
  * DTC3181E extensions (c) 1997, Ronald van Cuijlenborg
  * ronald.van.cuijlenb...@tip.nl or nu...@dds.nl
@@ -482,15 +482,14 @@ static void generic_NCR5380_release_resources(struct 
Scsi_Host *instance)
 }
 
 /**
- * generic_NCR5380_pread - pseudo DMA read
- * @hostdata: scsi host private data
- * @dst: buffer to read into
- * @len: buffer length
+ * generic_NCR5380_pread - pseudo DMA receive
+ * @hostdata: scsi host private data
+ * @dst: buffer to write into
+ * @len: transfer size
  *
- * Perform a pseudo DMA mode read from an NCR53C400 or equivalent
- * controller
+ * Perform a pseudo DMA mode receive from a 53C400 or equivalent device.
  */
- 
+
 static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 unsigned char *dst, int len)
 {
@@ -509,10 +508,10 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
 
if (hostdata->io_port && hostdata->io_width == 2)
insw(hostdata->io_port + hostdata->c400_host_buf,
-   dst + start, 64);
+dst + start, 64);
else if (hostdata->io_port)
insb(hostdata->io_port + hostdata->c400_host_buf,
-   dst + start, 128);
+dst + start, 128);
else
memcpy_fromio(dst + start,
hostdata->io + NCR53C400_host_buffer, 128);
@@ -556,13 +555,12 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
 }
 
 /**
- * generic_NCR5380_pwrite - pseudo DMA write
- * @hostdata: scsi host private data
- * @dst: buffer to read into
- * @len: buffer length
+ * generic_NCR5380_pwrite - pseudo DMA send
+ * @hostdata: scsi host private data
+ * @src: buffer to read from
+ * @len: transfer size
  *
- * Perform a pseudo DMA mode read from an NCR53C400 or equivalent
- * controller
+ * Perform a pseudo DMA mode send to a 53C400 or equivalent device.
  */
 
 static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
@@ -601,10 +599,10 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
 
if (hostdata->io_port && hostdata->io_width == 2)
outsw(hostdata->io_port + hostdata->c400_host_buf,
-   src + start, 64);
+ src + start, 64);
else if (hostdata->io_port)
outsb(hostdata->io_port + hostdata->c400_host_buf,
-   src + start, 128);
+ src + start, 128);
else
memcpy_toio(hostdata->io + NCR53C400_host_buffer,
src + start, 128);
@@ -650,10 +648,8 @@ static int generic_NCR5380_dma_residual(struct 
NCR5380_hostdata *hostdata)
return hostdata->pdma_residual;
 }
 
-/*
- * Include the NCR5380 core code that we build our driver around   
- */
- 
+/* Include the core driver code. */
+
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
@@ -673,11 +669,10 @@ static struct scsi_host_template driver_template = {
.max_sectors= 128,
 };
 
-
 static int generic_NCR5380_isa_match(struct device *pdev, unsigned int ndev)
 {
int ret = generic_NCR5380_init_one(_template, pdev, base[ndev],
- irq[ndev], card[ndev]);
+  irq[ndev], card[ndev]);
if (ret) {
if (base[ndev])
printk(KERN_WARNING "Card not found at address 
0x%03x\n",
@@ -689,7 +684,7 @@ static int generic_NCR5380_isa_match(struct device *pdev, 
unsigned int ndev)
 }
 
 static int generic_NCR5380_isa_remove(struct device 

[PATCH 3/4] g_NCR5380: Limit sg_tablesize to avoid PDMA read overruns on DTC436

2017-06-15 Thread Finn Thain
Back-to-back DMA receive transfers can lose a byte due to a 5380
flaw. This makes scatter-receive difficult or impossible on affected
hardware, so limit the scatter/gather tablesize to 1.

Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 1e1cf7ca86fa..784913193ea5 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -247,6 +247,7 @@ static int generic_NCR5380_init_one(struct 
scsi_host_template *tpnt,
case BOARD_DTC3181E:
ports = dtc_3181e_ports;
magic = ncr_53c400a_magic;
+   tpnt->sg_tablesize = 1;
break;
}
 
-- 
2.13.0



[PATCH 1/4] g_NCR5380: Fix PDMA transfer size

2017-06-15 Thread Finn Thain
From: Ondrej Zary 

generic_NCR5380_dma_xfer_len() incorrectly uses cmd->transfersize
which causes rescan-scsi-bus and CD-ROM access to hang the system.
Use cmd->SCp.this_residual instead, like other NCR5380 drivers.

Signed-off-by: Ondrej Zary 
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67c8dac321ad..14ef4e8c4713 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -76,6 +76,7 @@
 #define IRQ_AUTO 254
 
 #define MAX_CARDS 8
+#define DMA_MAX_SIZE 32768
 
 /* old-style parameters for compatibility */
 static int ncr_irq = -1;
@@ -629,23 +630,16 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
 static int generic_NCR5380_dma_xfer_len(struct NCR5380_hostdata *hostdata,
 struct scsi_cmnd *cmd)
 {
-   int transfersize = cmd->transfersize;
+   int transfersize = cmd->SCp.this_residual;
 
if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
return 0;
 
-   /* Limit transfers to 32K, for xx400 & xx406
-* pseudoDMA that transfers in 128 bytes blocks.
-*/
-   if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
-   !(cmd->SCp.this_residual % transfersize))
-   transfersize = 32 * 1024;
-
/* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
if (transfersize % 128)
transfersize = 0;
 
-   return transfersize;
+   return min(transfersize, DMA_MAX_SIZE);
 }
 
 /*
-- 
2.13.0



[PATCH 0/4] g_NCR5380: PDMA fixes and cleanup

2017-06-15 Thread Finn Thain
Ondrej, would you please test this patch series? One of your patches
has been modified slightly and the two I wrote are untested.


Finn Thain (2):
  g_NCR5380: Limit sg_tablesize to avoid PDMA read overruns on DTC436
  g_NCR5380: Cleanup comments and whitespace

Ondrej Zary (2):
  g_NCR5380: Fix PDMA transfer size
  g_NCR5380: End PDMA transfer correctly on target disconnection

 drivers/scsi/g_NCR5380.c | 111 +++
 1 file changed, 54 insertions(+), 57 deletions(-)

-- 
2.13.0



[PATCH 2/4] g_NCR5380: End PDMA transfer correctly on target disconnection

2017-06-15 Thread Finn Thain
From: Ondrej Zary 

When an IRQ arrives during PDMA transfer, pread() and pwrite() return
without waiting for the 53C80 registers to be ready and this ends up
messing up the chip state. This was observed with SONY CDU-55S which is
slow enough to disconnect during 4096-byte reads.

IRQ during PDMA is not an error so don't return -1. Instead, store the
remaining byte count for use by NCR5380_dma_residual().

[Modified to improve the BASR_END_DMA_TRANSFER error message rather
than remove it -- F.T.]

Signed-off-by: Ondrej Zary 
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 14ef4e8c4713..1e1cf7ca86fa 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -44,12 +44,13 @@
int c400_ctl_status; \
int c400_blk_cnt; \
int c400_host_buf; \
-   int io_width
+   int io_width; \
+   int pdma_residual
 
 #define NCR5380_dma_xfer_lengeneric_NCR5380_dma_xfer_len
 #define NCR5380_dma_recv_setup  generic_NCR5380_pread
 #define NCR5380_dma_send_setup  generic_NCR5380_pwrite
-#define NCR5380_dma_residualNCR5380_dma_residual_none
+#define NCR5380_dma_residualgeneric_NCR5380_dma_residual
 
 #define NCR5380_intrgeneric_NCR5380_intr
 #define NCR5380_queue_command   generic_NCR5380_queue_command
@@ -500,10 +501,8 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
while (1) {
if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ) {
-   printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, 
blocks=%d\n", start, blocks);
-   return -1;
-   }
+   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
+   goto out_wait;
while (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_HOST_BUF_NOT_RDY)
; /* FIXME - no timeout */
 
@@ -542,13 +541,16 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
printk("53C400r: no 53C80 gated irq after transfer");
 
+out_wait:
/* wait for 53C80 registers to be available */
while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
;
 
if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
-   printk(KERN_ERR "53C400r: no end dma signal\n");
-   
+   pr_err("%s: No end dma signal (%d/%d)\n", __func__, start, len);
+
+   hostdata->pdma_residual = len - start;
+
return 0;
 }
 
@@ -571,10 +573,8 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ) {
-   printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, 
blocks=%d\n", start, blocks);
-   return -1;
-   }
+   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
+   goto out_wait;
 
if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
@@ -612,15 +612,17 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
blocks--;
}
 
+out_wait:
/* wait for 53C80 registers to be available */
while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
udelay(4); /* DTC436 chip hangs without this */
/* FIXME - no timeout */
}
 
-   if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
-   printk(KERN_ERR "53C400w: no end dma signal\n");
-   }
+   if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
+   pr_err("%s: No end dma signal (%d/%d)\n", __func__, start, len);
+
+   hostdata->pdma_residual = len - start;
 
while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
;   // TIMEOUT
@@ -642,6 +644,11 @@ static int generic_NCR5380_dma_xfer_len(struct 
NCR5380_hostdata *hostdata,
return min(transfersize, DMA_MAX_SIZE);
 }
 
+static int generic_NCR5380_dma_residual(struct NCR5380_hostdata *hostdata)
+{
+   return hostdata->pdma_residual;
+}
+
 /*
  * Include the NCR5380 core code that we build our driver around   
  */
-- 
2.13.0



[PATCH] scsi_proto.h: ATA_32 added for ata pass-thru(32).

2017-06-15 Thread Minwoo Im
SAT-4(SCSI/ATA Translation Standard) supports a ATA PASS-THROUGH(32)
SCSI command. It uses 7Fh as a operation code which means
variable-length CDB.
It would be great if kernel supports an ata pass-thru(32) command.
Prior to development of ata pass-through in libata level,
the definition of it should be added first.

Signed-off-by: Minwoo Im 
---
 include/scsi/scsi_proto.h |1 +
 1 file changed, 1 insertion(+)

diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index ce78ec8..1eb4efd 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -164,6 +164,7 @@
 #define WRITE_SAME_320x0d
 
 /* Values for T10/04-262r7 */
+#defineATA_320x7F  /* 32-byte pass-thru */
 #defineATA_160x85  /* 16-byte pass-thru */
 #defineATA_120xa1  /* 12-byte pass-thru */
 
-- 
1.7.9.5



Re: [PATCH v2 1/2] libsas: Don't process sas events in static works

2017-06-15 Thread wangyijing


在 2017/6/15 16:00, John Garry 写道:
> On 15/06/2017 08:37, wangyijing wrote:
>>
>>
>> 在 2017/6/14 21:08, John Garry 写道:
>>> On 14/06/2017 10:04, wangyijing wrote:
>>  static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event 
>> event)
  {
 +struct sas_ha_event *ev;
 +
  BUG_ON(event >= HA_NUM_EVENTS);

 -sas_queue_event(event, _ha->pending,
 -_ha->ha_events[event].work, sas_ha);
 +ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 +if (!ev)
 +return;
>> GFP_ATOMIC allocations can fail and then no events will be queued *and* 
>> we
>> don't report the error back to the caller.
>>
 Yes, it's really a problem, but I don't find a better solution, do you 
 have some suggestion ?

>>>
>>> Dan raised an issue with this approach, regarding a malfunctioning PHY 
>>> which spews out events. I still don't think we're handling it safely. 
>>> Here's the suggestion:
>>> - each asd_sas_phy owns a finite-sized pool of events
>>> - when the event pool becomes exhausted, libsas stops queuing events 
>>> (obviously) and disables the PHY in the LLDD
>>> - upon attempting to re-enable the PHY from sysfs, libsas first checks that 
>>> the pool is still not exhausted
>>>
>>> If you cannot find a good solution, then let us know and we can help.
>>
>> Hi John and Dan, what's event you found on malfunctioning PHY, if the event 
>> is PORTE_BROADCAST_RCVD, since
>> every PORTE_BROADCAST_RCVD libsas always call sas_revalidate_domain(), what 
>> about keeping a broadcast waiting(not queued in workqueue)
>> and discard others. If the event is other types, things may become knotty.
>>
> 
> As I mentioned in the v1 series discussion, I found a poorly connected 
> expander PHY was spewing out PHY up and loss of signal events continuously. 
> This is the sort of situation we should protect against. Current solution is 
> ok, as it uses a static event per port/PHY/HA.
> 
> The point is that we cannot allow a PHY to continuously send events to 
> libsas, which may lead to memory exhaustion.

The current solution won't introduce memory exhaustion, but it's not ok, since 
the root of this issue is it may lost event which is normal.
If we cannot identify the abnormal PHY, I think your mem pool idea is a 
candidate solution.

> 
> John
> 
>>
>>>
>>> John
>>>
>>>
>>> .
>>>
>>
>>
>> .
>>
> 
> 
> 
> .
> 



Re: [PATCH v2 1/2] libsas: Don't process sas events in static works

2017-06-15 Thread John Garry

On 15/06/2017 08:37, wangyijing wrote:



在 2017/6/14 21:08, John Garry 写道:

On 14/06/2017 10:04, wangyijing wrote:

 static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event event)

 {
+struct sas_ha_event *ev;
+
 BUG_ON(event >= HA_NUM_EVENTS);

-sas_queue_event(event, _ha->pending,
-_ha->ha_events[event].work, sas_ha);
+ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
+if (!ev)
+return;

GFP_ATOMIC allocations can fail and then no events will be queued *and* we
don't report the error back to the caller.


Yes, it's really a problem, but I don't find a better solution, do you have 
some suggestion ?



Dan raised an issue with this approach, regarding a malfunctioning PHY which 
spews out events. I still don't think we're handling it safely. Here's the 
suggestion:
- each asd_sas_phy owns a finite-sized pool of events
- when the event pool becomes exhausted, libsas stops queuing events 
(obviously) and disables the PHY in the LLDD
- upon attempting to re-enable the PHY from sysfs, libsas first checks that the 
pool is still not exhausted

If you cannot find a good solution, then let us know and we can help.


Hi John and Dan, what's event you found on malfunctioning PHY, if the event is 
PORTE_BROADCAST_RCVD, since
every PORTE_BROADCAST_RCVD libsas always call sas_revalidate_domain(), what 
about keeping a broadcast waiting(not queued in workqueue)
and discard others. If the event is other types, things may become knotty.



As I mentioned in the v1 series discussion, I found a poorly connected 
expander PHY was spewing out PHY up and loss of signal events 
continuously. This is the sort of situation we should protect against. 
Current solution is ok, as it uses a static event per port/PHY/HA.


The point is that we cannot allow a PHY to continuously send events to 
libsas, which may lead to memory exhaustion.


John





John


.




.






Re: [PATCH v2 1/2] libsas: Don't process sas events in static works

2017-06-15 Thread wangyijing


在 2017/6/14 21:08, John Garry 写道:
> On 14/06/2017 10:04, wangyijing wrote:
  static void notify_ha_event(struct sas_ha_struct *sas_ha, enum ha_event 
 event)
 >>  {
 >> +struct sas_ha_event *ev;
 >> +
 >>  BUG_ON(event >= HA_NUM_EVENTS);
 >>
 >> -sas_queue_event(event, _ha->pending,
 >> -_ha->ha_events[event].work, sas_ha);
 >> +ev = kzalloc(sizeof(*ev), GFP_ATOMIC);
 >> +if (!ev)
 >> +return;
>>> > GFP_ATOMIC allocations can fail and then no events will be queued *and* we
>>> > don't report the error back to the caller.
>>> >
>> Yes, it's really a problem, but I don't find a better solution, do you have 
>> some suggestion ?
>>
> 
> Dan raised an issue with this approach, regarding a malfunctioning PHY which 
> spews out events. I still don't think we're handling it safely. Here's the 
> suggestion:
> - each asd_sas_phy owns a finite-sized pool of events
> - when the event pool becomes exhausted, libsas stops queuing events 
> (obviously) and disables the PHY in the LLDD
> - upon attempting to re-enable the PHY from sysfs, libsas first checks that 
> the pool is still not exhausted
> 
> If you cannot find a good solution, then let us know and we can help.

Hi John and Dan, what's event you found on malfunctioning PHY, if the event is 
PORTE_BROADCAST_RCVD, since
every PORTE_BROADCAST_RCVD libsas always call sas_revalidate_domain(), what 
about keeping a broadcast waiting(not queued in workqueue)
and discard others. If the event is other types, things may become knotty.


> 
> John
> 
> 
> .
> 



[PATCH 2/2] qedi: Remove WARN_ON from clear task context.

2017-06-15 Thread Manish Rangankar
Signed-off-by: Manish Rangankar 
---
 drivers/scsi/qedi/qedi_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 09a2946..879d3b7 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1499,11 +1499,9 @@ int qedi_get_task_idx(struct qedi_ctx *qedi)
 
 void qedi_clear_task_idx(struct qedi_ctx *qedi, int idx)
 {
-   if (!test_and_clear_bit(idx, qedi->task_idx_map)) {
+   if (!test_and_clear_bit(idx, qedi->task_idx_map))
QEDI_ERR(>dbg_ctx,
 "FW task context, already cleared, tid=0x%x\n", idx);
-   WARN_ON(1);
-   }
 }
 
 void qedi_update_itt_map(struct qedi_ctx *qedi, u32 tid, u32 proto_itt,
-- 
1.8.3.1



[PATCH 0/2] qedi: Remove unwanted warnings.

2017-06-15 Thread Manish Rangankar
Martin,

Please consider below patches for next 'scsi-fixes' submission.

Thanks,
Manish

Manish Rangankar (2):
  qedi: Remove WARN_ON for untracked cleanup.
  qedi: Remove WARN_ON from clear task context.

 drivers/scsi/qedi/qedi_fw.c   | 1 -
 drivers/scsi/qedi/qedi_main.c | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

-- 
1.8.3.1



[PATCH 1/2] qedi: Remove WARN_ON for untracked cleanup.

2017-06-15 Thread Manish Rangankar
Signed-off-by: Manish Rangankar 
---
 drivers/scsi/qedi/qedi_fw.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index 8bc7ee1..507512c 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -870,7 +870,6 @@ static void qedi_process_cmd_cleanup_resp(struct qedi_ctx 
*qedi,
QEDI_ERR(>dbg_ctx,
 "Delayed or untracked cleanup response, itt=0x%x, 
tid=0x%x, cid=0x%x, task=%p\n",
 protoitt, cqe->itid, qedi_conn->iscsi_conn_id, task);
-   WARN_ON(1);
}
 }
 
-- 
1.8.3.1



[PATCH] tcmu: Fix module removal due to stuck unmap_thread thread again

2017-06-15 Thread lixiubo
From: Xiubo Li 

Because the unmap code just after the schdule() returned may take
a long time and if the kthread_stop() is fired just when in this
routine, the module removal maybe stuck too.

Signed-off-by: Xiubo Li 
---
 drivers/target/target_core_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_user.c 
b/drivers/target/target_core_user.c
index beb5f09..203bff1 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1573,7 +1573,7 @@ static int unmap_thread_fn(void *data)
struct page *page;
int i;
 
-   while (1) {
+   while (!kthread_should_stop()) {
DEFINE_WAIT(__wait);
 
prepare_to_wait(_wait, &__wait, TASK_INTERRUPTIBLE);
-- 
1.8.3.1