On 8/20/2013 3:50 PM, Bart Van Assche wrote:
Certain storage configurations, e.g. a sufficiently large array of
hard disks in a RAID configuration, need a queue depth above 64 to
achieve optimal performance. Hence make the queue depth configurable.

Signed-off-by: Bart Van Assche <bvanass...@acm.org>
Cc: Roland Dreier <rol...@purestorage.com>
Cc: David Dillow <dillo...@ornl.gov>
Cc: Vu Pham <v...@mellanox.com>
Cc: Sebastian Riemer <sebastian.rie...@profitbricks.com>
Cc: Konrad Grzybowski <konr...@k2.pl>
---
  drivers/infiniband/ulp/srp/ib_srp.c |  125 ++++++++++++++++++++++++++---------
  drivers/infiniband/ulp/srp/ib_srp.h |   17 +++--
  2 files changed, 103 insertions(+), 39 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c 
b/drivers/infiniband/ulp/srp/ib_srp.c
index ece1f2d..6de2323 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -299,16 +299,16 @@ static int srp_create_target_ib(struct srp_target_port 
*target)
                return -ENOMEM;
recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-                              srp_recv_completion, NULL, target, SRP_RQ_SIZE,
-                              target->comp_vector);
+                              srp_recv_completion, NULL, target,
+                              target->queue_size, target->comp_vector);
        if (IS_ERR(recv_cq)) {
                ret = PTR_ERR(recv_cq);
                goto err;
        }
send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-                              srp_send_completion, NULL, target, SRP_SQ_SIZE,
-                              target->comp_vector);
+                              srp_send_completion, NULL, target,
+                              target->queue_size, target->comp_vector);
        if (IS_ERR(send_cq)) {
                ret = PTR_ERR(send_cq);
                goto err_recv_cq;
@@ -317,8 +317,8 @@ static int srp_create_target_ib(struct srp_target_port 
*target)
        ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
init_attr->event_handler = srp_qp_event;
-       init_attr->cap.max_send_wr     = SRP_SQ_SIZE;
-       init_attr->cap.max_recv_wr     = SRP_RQ_SIZE;
+       init_attr->cap.max_send_wr     = target->queue_size;
+       init_attr->cap.max_recv_wr     = target->queue_size;
        init_attr->cap.max_recv_sge    = 1;
        init_attr->cap.max_send_sge    = 1;
        init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR;
@@ -364,6 +364,10 @@ err:
        return ret;
  }
+/*
+ * Note: this function may be called without srp_alloc_iu_bufs() having been
+ * invoked. Hence the target->[rt]x_ring checks.
+ */
  static void srp_free_target_ib(struct srp_target_port *target)
  {
        int i;
@@ -375,10 +379,18 @@ static void srp_free_target_ib(struct srp_target_port 
*target)
        target->qp = NULL;
        target->send_cq = target->recv_cq = NULL;
- for (i = 0; i < SRP_RQ_SIZE; ++i)
-               srp_free_iu(target->srp_host, target->rx_ring[i]);
-       for (i = 0; i < SRP_SQ_SIZE; ++i)
-               srp_free_iu(target->srp_host, target->tx_ring[i]);
+       if (target->rx_ring) {
+               for (i = 0; i < target->queue_size; ++i)
+                       srp_free_iu(target->srp_host, target->rx_ring[i]);
+               kfree(target->rx_ring);
+               target->rx_ring = NULL;
+       }
+       if (target->tx_ring) {
+               for (i = 0; i < target->queue_size; ++i)
+                       srp_free_iu(target->srp_host, target->tx_ring[i]);
+               kfree(target->tx_ring);
+               target->tx_ring = NULL;
+       }
  }
static void srp_path_rec_completion(int status,
@@ -564,7 +576,11 @@ static void srp_free_req_data(struct srp_target_port 
*target)
        struct srp_request *req;
        int i;
- for (i = 0, req = target->req_ring; i < SRP_CMD_SQ_SIZE; ++i, ++req) {
+       if (!target->req_ring)
+               return;
+
+       for (i = 0; i < target->req_ring_size; ++i) {
+               req = &target->req_ring[i];
                kfree(req->fmr_list);
                kfree(req->map_page);
                if (req->indirect_dma_addr) {
@@ -574,6 +590,9 @@ static void srp_free_req_data(struct srp_target_port 
*target)
                }
                kfree(req->indirect_desc);
        }
+
+       kfree(target->req_ring);
+       target->req_ring = NULL;
  }
static int srp_alloc_req_data(struct srp_target_port *target)
@@ -586,7 +605,12 @@ static int srp_alloc_req_data(struct srp_target_port 
*target)
INIT_LIST_HEAD(&target->free_reqs); - for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+       target->req_ring = kzalloc(target->req_ring_size *
+                                  sizeof(*target->req_ring), GFP_KERNEL);
+       if (!target->req_ring)
+               goto out;
+
+       for (i = 0; i < target->req_ring_size; ++i) {
                req = &target->req_ring[i];
                req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
                                        GFP_KERNEL);
@@ -810,7 +834,7 @@ static void srp_terminate_io(struct srp_rport *rport)
        struct srp_target_port *target = rport->lld_data;
        int i;
- for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+       for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
                srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16);
        }
@@ -847,13 +871,13 @@ static int srp_rport_reconnect(struct srp_rport *rport)
        else
                srp_create_target_ib(target);
- for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+       for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
                srp_finish_req(target, req, DID_RESET << 16);
        }
INIT_LIST_HEAD(&target->free_tx);
-       for (i = 0; i < SRP_SQ_SIZE; ++i)
+       for (i = 0; i < target->queue_size; ++i)
                list_add(&target->tx_ring[i]->list, &target->free_tx);
if (ret == 0)
@@ -1544,11 +1568,24 @@ err_unlock:
        return SCSI_MLQUEUE_HOST_BUSY;
  }
+/*
+ * Note: the resources allocated in this function are freed in
+ * srp_free_target_ib().
+ */
  static int srp_alloc_iu_bufs(struct srp_target_port *target)
  {
        int i;
- for (i = 0; i < SRP_RQ_SIZE; ++i) {
+       target->rx_ring = kzalloc(target->queue_size * sizeof(*target->rx_ring),
+                                 GFP_KERNEL);
+       if (!target->rx_ring)
+               goto err_no_ring;
+       target->tx_ring = kzalloc(target->queue_size * sizeof(*target->tx_ring),
+                                 GFP_KERNEL);
+       if (!target->tx_ring)
+               goto err_no_ring;
+
+       for (i = 0; i < target->queue_size; ++i) {
                target->rx_ring[i] = srp_alloc_iu(target->srp_host,
                                                  target->max_ti_iu_len,
                                                  GFP_KERNEL, DMA_FROM_DEVICE);
@@ -1556,7 +1593,7 @@ static int srp_alloc_iu_bufs(struct srp_target_port 
*target)
                        goto err;
        }
- for (i = 0; i < SRP_SQ_SIZE; ++i) {
+       for (i = 0; i < target->queue_size; ++i) {
                target->tx_ring[i] = srp_alloc_iu(target->srp_host,
                                                  target->max_iu_len,
                                                  GFP_KERNEL, DMA_TO_DEVICE);
@@ -1569,16 +1606,18 @@ static int srp_alloc_iu_bufs(struct srp_target_port 
*target)
        return 0;
err:
-       for (i = 0; i < SRP_RQ_SIZE; ++i) {
+       for (i = 0; i < target->queue_size; ++i) {
                srp_free_iu(target->srp_host, target->rx_ring[i]);
-               target->rx_ring[i] = NULL;
-       }
-
-       for (i = 0; i < SRP_SQ_SIZE; ++i) {
                srp_free_iu(target->srp_host, target->tx_ring[i]);
-               target->tx_ring[i] = NULL;
        }
+
+err_no_ring:
+       kfree(target->tx_ring);
+       target->tx_ring = NULL;
+       kfree(target->rx_ring);
+       target->rx_ring = NULL;
+
        return -ENOMEM;
  }
@@ -1629,6 +1668,9 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
                target->scsi_host->can_queue
                        = min(target->req_lim - SRP_TSK_MGMT_SQ_SIZE,
                              target->scsi_host->can_queue);
+               target->scsi_host->cmd_per_lun
+                       = min_t(int, target->scsi_host->can_queue,
+                               target->scsi_host->cmd_per_lun);
        } else {
                shost_printk(KERN_WARNING, target->scsi_host,
                             PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
@@ -1636,7 +1678,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
                goto error;
        }
- if (!target->rx_ring[0]) {
+       if (!target->rx_ring) {
                ret = srp_alloc_iu_bufs(target);
                if (ret)
                        goto error;
@@ -1656,7 +1698,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
        if (ret)
                goto error_free;
- for (i = 0; i < SRP_RQ_SIZE; i++) {
+       for (i = 0; i < target->queue_size; i++) {
                struct srp_iu *iu = target->rx_ring[i];
                ret = srp_post_recv(target, iu);
                if (ret)
@@ -1903,7 +1945,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
        if (target->tsk_mgmt_status)
                return FAILED;
- for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
+       for (i = 0; i < target->req_ring_size; ++i) {
                struct srp_request *req = &target->req_ring[i];
                if (req->scmnd && req->scmnd->device == scmnd->device)
                        srp_finish_req(target, req, DID_RESET << 16);
@@ -2096,9 +2138,9 @@ static struct scsi_host_template srp_template = {
        .eh_host_reset_handler          = srp_reset_host,
        .skip_settle_delay              = true,
        .sg_tablesize                   = SRP_DEF_SG_TABLESIZE,
-       .can_queue                      = SRP_CMD_SQ_SIZE,
+       .can_queue                      = SRP_DEFAULT_CMD_SQ_SIZE,
        .this_id                        = -1,
-       .cmd_per_lun                    = SRP_CMD_SQ_SIZE,
+       .cmd_per_lun                    = SRP_DEFAULT_CMD_SQ_SIZE,
        .use_clustering                 = ENABLE_CLUSTERING,
        .shost_attrs                    = srp_host_attrs
  };
@@ -2205,6 +2247,7 @@ enum {
        SRP_OPT_SG_TABLESIZE    = 1 << 11,
        SRP_OPT_COMP_VECTOR     = 1 << 12,
        SRP_OPT_TL_RETRY_COUNT  = 1 << 13,
+       SRP_OPT_CAN_QUEUE       = 1 << 14,
        SRP_OPT_ALL             = (SRP_OPT_ID_EXT       |
                                   SRP_OPT_IOC_GUID     |
                                   SRP_OPT_DGID         |
@@ -2227,6 +2270,7 @@ static const match_table_t srp_opt_tokens = {
        { SRP_OPT_SG_TABLESIZE,         "sg_tablesize=%u"     },
        { SRP_OPT_COMP_VECTOR,          "comp_vector=%u"      },
        { SRP_OPT_TL_RETRY_COUNT,       "tl_retry_count=%u"   },
+       { SRP_OPT_CAN_QUEUE,            "can_queue=%d"                },
        { SRP_OPT_ERR,                  NULL                    }
  };
@@ -2321,13 +2365,25 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
                        target->scsi_host->max_sectors = token;
                        break;
+ case SRP_OPT_CAN_QUEUE:
+                       if (match_int(args, &token) || token < 1) {
+                               pr_warn("bad can_queue parameter '%s'\n", p);
+                               goto out;
+                       }
+                       target->scsi_host->can_queue = token;
+                       target->queue_size = token + SRP_RSP_SQ_SIZE +
+                                            SRP_TSK_MGMT_SQ_SIZE;
+                       if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
+                               target->scsi_host->cmd_per_lun = token;
+                       break;
+
                case SRP_OPT_MAX_CMD_PER_LUN:
-                       if (match_int(args, &token)) {
+                       if (match_int(args, &token) || token < 1) {
                                pr_warn("bad max cmd_per_lun parameter '%s'\n",
                                        p);
                                goto out;
                        }
-                       target->scsi_host->cmd_per_lun = min(token, 
SRP_CMD_SQ_SIZE);
+                       target->scsi_host->cmd_per_lun = token;
                        break;
case SRP_OPT_IO_CLASS:
@@ -2415,6 +2471,12 @@ static int srp_parse_options(const char *buf, struct 
srp_target_port *target)
                                pr_warn("target creation request is missing 
parameter '%s'\n",
                                        srp_opt_tokens[i].pattern);
+ if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
+           && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
+               pr_warn("cmd_per_lun = %d > can_queue = %d\n",
+                       target->scsi_host->cmd_per_lun,
+                       target->scsi_host->can_queue);
+
  out:
        kfree(options);
        return ret;
@@ -2453,11 +2515,14 @@ static ssize_t srp_create_target(struct device *dev,
        target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
        target->allow_ext_sg = allow_ext_sg;
        target->tl_retry_count       = 7;
+       target->queue_size   = SRP_DEFAULT_QUEUE_SIZE;
ret = srp_parse_options(buf, target);
        if (ret)
                goto err;
+ target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
+
        if (!srp_conn_unique(target->srp_host, target)) {
                shost_printk(KERN_INFO, target->scsi_host,
                             PFX "Already connected to target port with 
id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h 
b/drivers/infiniband/ulp/srp/ib_srp.h
index 446b045..5756810 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -57,14 +57,11 @@ enum {
        SRP_MAX_LUN             = 512,
        SRP_DEF_SG_TABLESIZE    = 12,
- SRP_RQ_SHIFT = 6,
-       SRP_RQ_SIZE             = 1 << SRP_RQ_SHIFT,
-
-       SRP_SQ_SIZE             = SRP_RQ_SIZE,
+       SRP_DEFAULT_QUEUE_SIZE  = 1 << 6,
        SRP_RSP_SQ_SIZE         = 1,
-       SRP_REQ_SQ_SIZE         = SRP_SQ_SIZE - SRP_RSP_SQ_SIZE,
        SRP_TSK_MGMT_SQ_SIZE    = 1,
-       SRP_CMD_SQ_SIZE         = SRP_REQ_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE,
+       SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE -
+                                 SRP_TSK_MGMT_SQ_SIZE,
SRP_TAG_NO_REQ = ~0U,
        SRP_TAG_TSK_MGMT        = 1U << 31,
@@ -156,6 +153,8 @@ struct srp_target_port {
        char                    target_name[32];
        unsigned int            scsi_id;
        unsigned int            sg_tablesize;
+       int                     queue_size;
+       int                     req_ring_size;
        int                     comp_vector;
        int                     tl_retry_count;
@@ -173,9 +172,9 @@ struct srp_target_port { int zero_req_lim; - struct srp_iu *tx_ring[SRP_SQ_SIZE];
-       struct srp_iu          *rx_ring[SRP_RQ_SIZE];
-       struct srp_request      req_ring[SRP_CMD_SQ_SIZE];
+       struct srp_iu          **tx_ring;
+       struct srp_iu          **rx_ring;
+       struct srp_request      *req_ring;
struct work_struct tl_err_work;
        struct work_struct      remove_work;

Hey Bart,

I noticed this patch in your github and played with it, I agree that this patch is needed for a long time...

Question,
If srp now will allow larger queues while using a single global FMR pool of size 1024, isn't it more likely now that in stress environment srp will run out of FMRs to handle IO commands? I mean that let's say that you have x scsi hosts with can_queue size of 512 (+-) and all of them are running IO stress, is it possible that all FMRs will be inuse and no FMR is available to register the next IO SG-list?
Did you try out such a scenario?

I guess that in such a case IB core will return EAGAIN and SRP will return SCSI_MLQUEUE_HOST_BUSY. I think it is a good Idea to move FMR pools to be per connection rather than a global pool, what do you think?

Cheers,
-Sagi
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to