On high-end arrays the list-based command allocation becomes a
bottleneck as the lock needs to be taken for each command allocation.
On the other hand the current blk-mq/scsi-mq infrastructure ensures
that a tag is never being reused. So this patch moves the command
allocation to an array-based structure, indexed by the command tag.
With this we can avoid taking a lock during command allocation, and
just mark the command as 'in-use' by setting a flag.
This allows for proper house-keeping in case the HBA needs to be
resetted.

Signed-off-by: Hannes Reinecke <[email protected]>
---
 drivers/scsi/lpfc/lpfc.h      |  1 +
 drivers/scsi/lpfc/lpfc_init.c | 53 +++++++++++++++++++++++++--
 drivers/scsi/lpfc/lpfc_scsi.c | 84 +++++++++++++++++++++++++++++++++++++------
 drivers/scsi/lpfc/lpfc_scsi.h |  7 ++--
 4 files changed, 128 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index bb53b81..289cc50 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -840,6 +840,7 @@ struct lpfc_hba {
        uint64_t bg_reftag_err_cnt;
 
        /* fastpath list. */
+       struct lpfc_scsi_buf **lpfc_scsi_buf_arr;
        spinlock_t scsi_buf_list_get_lock;  /* SCSI buf alloc list lock */
        spinlock_t scsi_buf_list_put_lock;  /* SCSI buf free list lock */
        struct list_head lpfc_scsi_buf_list_get;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index d2a6302..55ed075 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -3071,6 +3071,20 @@ lpfc_scsi_free(struct lpfc_hba *phba)
        }
        spin_unlock(&phba->scsi_buf_list_get_lock);
 
+       if (phba->lpfc_scsi_buf_arr) {
+               int idx;
+               for (idx = 0; idx < phba->cfg_hba_queue_depth; idx++) {
+                       sb = phba->lpfc_scsi_buf_arr[idx];
+                       if (!sb)
+                               continue;
+                       clear_bit(LPFC_CMD_QUEUED, &sb->flags);
+                       list_del(&sb->list);
+                       pci_pool_free(phba->lpfc_scsi_dma_buf_pool, sb->data,
+                                     sb->dma_handle);
+                       kfree(sb);
+                       phba->total_scsi_bufs--;
+               }
+       }
        /* Release all the lpfc_iocbq entries maintained by this host. */
        list_for_each_entry_safe(io, io_next, &phba->lpfc_iocb_list, list) {
                list_del(&io->list);
@@ -3212,6 +3226,18 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
                        phba->sli4_hba.scsi_xri_cnt,
                        phba->sli4_hba.scsi_xri_max);
 
+       if (phba->lpfc_scsi_buf_arr) {
+               for (i = 0; i < phba->cfg_hba_queue_depth; i++) {
+                       psb = phba->lpfc_scsi_buf_arr[i];
+                       if (psb) {
+                               if (test_and_set_bit(LPFC_CMD_QUEUED,
+                                                    &psb->flags))
+                                       continue;
+                               list_add_tail(&psb->list, &scsi_sgl_list);
+                       }
+               }
+       }
+
        spin_lock_irq(&phba->scsi_buf_list_get_lock);
        spin_lock(&phba->scsi_buf_list_put_lock);
        list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list);
@@ -3228,6 +3254,9 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
                        list_remove_head(&scsi_sgl_list, psb,
                                         struct lpfc_scsi_buf, list);
                        if (psb) {
+                               clear_bit(LPFC_CMD_QUEUED, &psb->flags);
+                               if (phba->lpfc_scsi_buf_arr)
+                                       phba->lpfc_scsi_buf_arr[psb->iotag] = 
NULL;
                                pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
                                              psb->data, psb->dma_handle);
                                kfree(psb);
@@ -3258,8 +3287,17 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
        list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get);
        INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
        spin_unlock(&phba->scsi_buf_list_put_lock);
-       spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 
+       if (phba->lpfc_scsi_buf_arr) {
+               for (i = 0; i < phba->cfg_hba_queue_depth; i++) {
+                       psb = phba->lpfc_scsi_buf_arr[i];
+                       if (psb) {
+                               clear_bit(LPFC_CMD_QUEUED, &psb->flags);
+                               list_del_init(&psb->list);
+                       }
+               }
+       }
+       spin_unlock_irq(&phba->scsi_buf_list_get_lock);
        return 0;
 
 out_free_mem:
@@ -3329,7 +3367,8 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, 
struct device *dev)
         * scsi_add_host will fail. This will be adjusted later based on the
         * max xri value determined in hba setup.
         */
-       shost->can_queue = phba->cfg_hba_queue_depth - 10;
+       shost->can_queue = (phba->cfg_hba_queue_depth - 10) /
+               phba->cfg_fcp_io_channel;
        if (dev != &phba->pcidev->dev) {
                shost->transportt = lpfc_vport_transport_template;
                vport->port_type = LPFC_NPIV_PORT;
@@ -3338,6 +3377,13 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, 
struct device *dev)
                vport->port_type = LPFC_PHYSICAL_PORT;
        }
 
+       if (shost_use_blk_mq(shost) && phba->sli_rev == LPFC_SLI_REV4) {
+               phba->lpfc_scsi_buf_arr = kzalloc(sizeof(struct lpfc_scsi_buf 
*) *
+                                                 phba->cfg_hba_queue_depth, 
GFP_KERNEL);
+               if (!phba->lpfc_scsi_buf_arr)
+                       goto out_put_shost;
+       }
+
        /* Initialize all internally managed lists. */
        INIT_LIST_HEAD(&vport->fc_nodes);
        INIT_LIST_HEAD(&vport->rcv_buffer_list);
@@ -6312,7 +6358,8 @@ lpfc_post_init_setup(struct lpfc_hba *phba)
         * adjust the value of can_queue.
         */
        shost = pci_get_drvdata(phba->pcidev);
-       shost->can_queue = phba->cfg_hba_queue_depth - 10;
+       shost->can_queue = (phba->cfg_hba_queue_depth - 10) /
+               phba->cfg_fcp_io_channel;
        if (phba->sli3_options & LPFC_SLI3_BG_ENABLED)
                lpfc_setup_bg(phba, shost);
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 3111a9d..a3eb5ff 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -746,9 +746,19 @@ int
 lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba)
 {
        LIST_HEAD(post_sblist);
-       int num_posted, rc = 0;
+       int i, num_posted, rc = 0;
 
        /* get all SCSI buffers need to repost to a local list */
+       if (phba->lpfc_scsi_buf_arr) {
+               struct lpfc_scsi_buf *psb;
+
+               for (i = 0; i < phba->cfg_hba_queue_depth; i++) {
+                       psb = phba->lpfc_scsi_buf_arr[i];
+                       if (psb &&
+                           !test_and_set_bit(LPFC_CMD_QUEUED, &psb->flags))
+                               list_add(&psb->list, &post_sblist);
+               }
+       }
        spin_lock_irq(&phba->scsi_buf_list_get_lock);
        spin_lock(&phba->scsi_buf_list_put_lock);
        list_splice_init(&phba->lpfc_scsi_buf_list_get, &post_sblist);
@@ -913,6 +923,12 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int 
num_to_alloc)
                psb->dma_phys_bpl = pdma_phys_bpl;
 
                /* add the scsi buffer to a post list */
+               if (phba->lpfc_scsi_buf_arr) {
+                       int idx = phba->total_scsi_bufs + bcnt;
+                       psb->iotag = idx;
+                       phba->lpfc_scsi_buf_arr[idx] = psb;
+                       set_bit(LPFC_CMD_QUEUED, &psb->flags);
+               }
                list_add_tail(&psb->list, &post_sblist);
                spin_lock_irq(&phba->scsi_buf_list_get_lock);
                phba->sli4_hba.scsi_xri_cnt++;
@@ -1105,9 +1121,13 @@ lpfc_release_scsi_buf_s4(struct lpfc_hba *phba, struct 
lpfc_scsi_buf *psb)
        } else {
                psb->pCmd = NULL;
                psb->cur_iocbq.iocb_flag = LPFC_IO_FCP;
-               spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
-               list_add_tail(&psb->list, &phba->lpfc_scsi_buf_list_put);
-               spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag);
+               if (phba->lpfc_scsi_buf_arr)
+                       clear_bit(LPFC_CMD_QUEUED, &psb->flags);
+               else {
+                       spin_lock_irqsave(&phba->scsi_buf_list_put_lock, iflag);
+                       list_add_tail(&psb->list, 
&phba->lpfc_scsi_buf_list_put);
+                       spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, 
iflag);
+               }
        }
 }
 
@@ -4533,7 +4553,7 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct 
scsi_cmnd *cmnd)
        struct lpfc_hba   *phba = vport->phba;
        struct lpfc_rport_data *rdata;
        struct lpfc_nodelist *ndlp;
-       struct lpfc_scsi_buf *lpfc_cmd;
+       struct lpfc_scsi_buf *lpfc_cmd = NULL;
        struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
        int err;
 
@@ -4566,7 +4586,28 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct 
scsi_cmnd *cmnd)
        if (atomic_read(&ndlp->cmd_pending) >= ndlp->cmd_qdepth)
                goto out_tgt_busy;
 
-       lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp);
+       if (phba->lpfc_scsi_buf_arr) {
+               u32 tag = blk_mq_unique_tag(cmnd->request);
+               u16 hwq = blk_mq_unique_tag_to_hwq(tag);
+               u16 idx = blk_mq_unique_tag_to_tag(tag);
+
+               idx = idx * phba->cfg_fcp_io_channel + hwq;
+               if (idx >= phba->cfg_hba_queue_depth) {
+                       lpfc_printf_vlog(vport, KERN_ERR, LOG_SCSI_CMD,
+                                        "9034 iotag %x too large\n", idx);
+               } else
+                       lpfc_cmd = phba->lpfc_scsi_buf_arr[idx];
+               if (!lpfc_cmd)
+                       lpfc_printf_vlog(vport, KERN_ERR, LOG_SCSI_CMD,
+                                        "9035 iotag %x invalid\n", idx);
+               else if (test_and_set_bit(LPFC_CMD_QUEUED, &lpfc_cmd->flags)) {
+                       lpfc_printf_vlog(vport, KERN_INFO, LOG_SCSI_CMD,
+                                        "9036 iotag %x hwq %x busy\n",
+                                        lpfc_cmd->iotag, hwq);
+                       lpfc_cmd = NULL;
+               }
+       } else
+               lpfc_cmd = lpfc_get_scsi_buf(phba, ndlp);
        if (lpfc_cmd == NULL) {
                lpfc_rampdown_queue_depth(phba);
 
@@ -4962,7 +5003,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct 
lpfc_rport_data *rdata,
                    uint8_t task_mgmt_cmd)
 {
        struct lpfc_hba   *phba = vport->phba;
-       struct lpfc_scsi_buf *lpfc_cmd;
+       struct lpfc_scsi_buf *lpfc_cmd = NULL;
        struct lpfc_iocbq *iocbq;
        struct lpfc_iocbq *iocbqrsp;
        struct lpfc_nodelist *pnode = rdata->pnode;
@@ -4972,7 +5013,21 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct 
lpfc_rport_data *rdata,
        if (!pnode || !NLP_CHK_NODE_ACT(pnode))
                return FAILED;
 
-       lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode);
+       if (phba->lpfc_scsi_buf_arr) {
+               int idx;
+               for (idx = 0; idx < phba->cfg_hba_queue_depth; idx++) {
+                       lpfc_cmd = phba->lpfc_scsi_buf_arr[idx];
+                       if (test_and_set_bit(LPFC_CMD_QUEUED,
+                                            &lpfc_cmd->flags)) {
+                               ret = 0;
+                               break;
+                       }
+                       ret = -EBUSY;
+               }
+               if (ret < 0)
+                       lpfc_cmd = NULL;
+       } else
+               lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode);
        if (lpfc_cmd == NULL)
                return FAILED;
        lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo;
@@ -5483,10 +5538,12 @@ lpfc_slave_alloc(struct scsi_device *sdev)
         * extra.  This list of scsi bufs exists for the lifetime of the driver.
         */
        total = phba->total_scsi_bufs;
-       num_to_alloc = vport->cfg_lun_queue_depth + 2;
+       num_to_alloc = (vport->cfg_lun_queue_depth + 2) *
+               phba->cfg_fcp_io_channel;
 
        /* If allocated buffers are enough do nothing */
-       if ((sdev_cnt * (vport->cfg_lun_queue_depth + 2)) < total)
+       if (!shost_use_blk_mq(sdev->host) &&
+           (sdev_cnt * (vport->cfg_lun_queue_depth + 2)) < total)
                return 0;
 
        /* Allow some exchanges to be available always to complete discovery */
@@ -5514,8 +5571,13 @@ lpfc_slave_alloc(struct scsi_device *sdev)
                                         "Allocated %d buffers.\n",
                                         num_to_alloc, num_allocated);
        }
-       if (num_allocated > 0)
+       if (num_allocated > 0) {
                phba->total_scsi_bufs += num_allocated;
+               if (shost_use_blk_mq(sdev->host)) {
+                       int num_tags = num_allocated / phba->cfg_fcp_io_channel;
+                       scsi_mq_resize_tags(sdev->host, num_tags);
+               }
+       }
        return 0;
 }
 
diff --git a/drivers/scsi/lpfc/lpfc_scsi.h b/drivers/scsi/lpfc/lpfc_scsi.h
index 4e8f0bd..a07341e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.h
+++ b/drivers/scsi/lpfc/lpfc_scsi.h
@@ -134,7 +134,7 @@ struct lpfc_scsi_buf {
 
        uint32_t timeout;
 
-       uint16_t xx_exch_busy;     /* SLI4 hba reported XB on complete WCQE */
+       uint16_t iotag;
        uint16_t status;        /* From IOCB Word 7- ulpStatus */
        uint32_t result;        /* From IOCB Word 4. */
 
@@ -144,8 +144,9 @@ struct lpfc_scsi_buf {
        uint32_t prot_seg_cnt;  /* seg_cnt's counterpart for protection data */
 
        unsigned long flags;
-#define LPFC_CMD_EXCH_BUSY 1
-#define LPFC_CMD_ABORTED   2
+#define LPFC_CMD_EXCH_BUSY 0
+#define LPFC_CMD_ABORTED   1
+#define LPFC_CMD_QUEUED    2
        dma_addr_t nonsg_phys;  /* Non scatter-gather physical address. */
 
        /*
-- 
1.8.5.6

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to