On Mar 24, 2015, at 7:34 AM, Devesh Sharma <[email protected]> wrote:

>> -----Original Message-----
>> From: [email protected] [mailto:linux-rdma-
>> [email protected]] On Behalf Of Chuck Lever
>> Sent: Saturday, March 14, 2015 2:58 AM
>> To: [email protected]
>> Subject: [PATCH v1 10/16] xprtrdma: Add "open" memreg op
>> 
>> The open op determines the size of various transport data structures based on
>> device capabilities and memory registration mode.
>> 
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> net/sunrpc/xprtrdma/fmr_ops.c      |   22 +++++++++++++
>> net/sunrpc/xprtrdma/frwr_ops.c     |   60
>> ++++++++++++++++++++++++++++++++++++
>> net/sunrpc/xprtrdma/physical_ops.c |   22 +++++++++++++
>> net/sunrpc/xprtrdma/verbs.c        |   54 ++------------------------------
>> net/sunrpc/xprtrdma/xprt_rdma.h    |    3 ++
>> 5 files changed, 110 insertions(+), 51 deletions(-)
>> 
>> diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
>> index 3115e4b..96e6cd3 100644
>> --- a/net/sunrpc/xprtrdma/fmr_ops.c
>> +++ b/net/sunrpc/xprtrdma/fmr_ops.c
>> @@ -46,6 +46,27 @@ out_err:
>>      return nsegs;
>> }
>> 
>> +static int
>> +fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>> +        struct rpcrdma_create_data_internal *cdata) {
>> +    struct ib_device_attr *devattr = &ia->ri_devattr;
>> +    unsigned int wrs, max_wrs;
>> +
>> +    max_wrs = devattr->max_qp_wr;
>> +    if (cdata->max_requests > max_wrs)
>> +            cdata->max_requests = max_wrs;
>> +
>> +    wrs = cdata->max_requests;
>> +    ep->rep_attr.cap.max_send_wr = wrs;
>> +    ep->rep_attr.cap.max_recv_wr = wrs;
>> +
>> +    dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
>> +            __func__, ep->rep_attr.cap.max_send_wr,
>> +            ep->rep_attr.cap.max_recv_wr);
>> +    return 0;
>> +}
>> +
>> /* FMR mode conveys up to 64 pages of payload per chunk segment.
>>  */
>> static size_t
>> @@ -201,6 +222,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)  const
>> struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
>>      .ro_map                         = fmr_op_map,
>>      .ro_unmap                       = fmr_op_unmap,
>> +    .ro_open                        = fmr_op_open,
>>      .ro_maxpages                    = fmr_op_maxpages,
>>      .ro_init                        = fmr_op_init,
>>      .ro_reset                       = fmr_op_reset,
>> diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
>> index fc3a228..9bb4b2d 100644
>> --- a/net/sunrpc/xprtrdma/frwr_ops.c
>> +++ b/net/sunrpc/xprtrdma/frwr_ops.c
>> @@ -93,6 +93,65 @@ __frwr_release(struct rpcrdma_mw *r)
>>      ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
>> }
>> 
>> +static int
>> +frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>> +         struct rpcrdma_create_data_internal *cdata) {
>> +    struct ib_device_attr *devattr = &ia->ri_devattr;
>> +    unsigned int wrs, max_wrs;
>> +    int depth = 7;
>> +
>> +    max_wrs = devattr->max_qp_wr;
>> +    if (cdata->max_requests > max_wrs)
>> +            cdata->max_requests = max_wrs;
>> +
>> +    wrs = cdata->max_requests;
>> +    ep->rep_attr.cap.max_send_wr = wrs;
>> +    ep->rep_attr.cap.max_recv_wr = wrs;
>> +
>> +    ia->ri_max_frmr_depth =
>> +                    min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
>> +                          devattr->max_fast_reg_page_list_len);
>> +    dprintk("RPC:       %s: device's max FR page list len = %u\n",
>> +            __func__, ia->ri_max_frmr_depth);
>> +
>> +    /* Add room for frmr register and invalidate WRs.
>> +     * 1. FRMR reg WR for head
>> +     * 2. FRMR invalidate WR for head
>> +     * 3. N FRMR reg WRs for pagelist
>> +     * 4. N FRMR invalidate WRs for pagelist
>> +     * 5. FRMR reg WR for tail
>> +     * 6. FRMR invalidate WR for tail
>> +     * 7. The RDMA_SEND WR
>> +     */
>> +
>> +    /* Calculate N if the device max FRMR depth is smaller than
>> +     * RPCRDMA_MAX_DATA_SEGS.
>> +     */
>> +    if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
>> +            int delta = RPCRDMA_MAX_DATA_SEGS - ia-
>>> ri_max_frmr_depth;
>> +
>> +            do {
>> +                    depth += 2; /* FRMR reg + invalidate */
>> +                    delta -= ia->ri_max_frmr_depth;
>> +            } while (delta > 0);
> 
> Please add a check on ia->ri_max_frmr_depth to check it non-zero. A bug in 
> provider (if it reports max_frmr_depth = 0 in query_device), would form an 
> infinite loop here and mount will be stuck.

I’ll include a patch to address this in the next version of this series.

> 
>> +    }
>> +
>> +    ep->rep_attr.cap.max_send_wr *= depth;
>> +    if (ep->rep_attr.cap.max_send_wr > max_wrs) {
>> +            cdata->max_requests = max_wrs / depth;
>> +            if (!cdata->max_requests)
>> +                    return -EINVAL;
>> +            ep->rep_attr.cap.max_send_wr = cdata->max_requests *
>> +                                           depth;
>> +    }
>> +
>> +    dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
>> +            __func__, ep->rep_attr.cap.max_send_wr,
>> +            ep->rep_attr.cap.max_recv_wr);
>> +    return 0;
>> +}
>> +
>> /* FRWR mode conveys a list of pages per chunk segment. The
>>  * maximum length of that list is the FRWR page list depth.
>>  */
>> @@ -290,6 +349,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)  const
>> struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
>>      .ro_map                         = frwr_op_map,
>>      .ro_unmap                       = frwr_op_unmap,
>> +    .ro_open                        = frwr_op_open,
>>      .ro_maxpages                    = frwr_op_maxpages,
>>      .ro_init                        = frwr_op_init,
>>      .ro_reset                       = frwr_op_reset,
>> diff --git a/net/sunrpc/xprtrdma/physical_ops.c
>> b/net/sunrpc/xprtrdma/physical_ops.c
>> index f8da8c4..0998f4f 100644
>> --- a/net/sunrpc/xprtrdma/physical_ops.c
>> +++ b/net/sunrpc/xprtrdma/physical_ops.c
>> @@ -19,6 +19,27 @@
>> # define RPCDBG_FACILITY     RPCDBG_TRANS
>> #endif
>> 
>> +static int
>> +physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
>> +             struct rpcrdma_create_data_internal *cdata) {
>> +    struct ib_device_attr *devattr = &ia->ri_devattr;
>> +    unsigned int wrs, max_wrs;
>> +
>> +    max_wrs = devattr->max_qp_wr;
>> +    if (cdata->max_requests > max_wrs)
>> +            cdata->max_requests = max_wrs;
>> +
>> +    wrs = cdata->max_requests;
>> +    ep->rep_attr.cap.max_send_wr = wrs;
>> +    ep->rep_attr.cap.max_recv_wr = wrs;
>> +
>> +    dprintk("RPC:       %s: pre-allocating %u send WRs, %u recv WRs\n",
>> +            __func__, ep->rep_attr.cap.max_send_wr,
>> +            ep->rep_attr.cap.max_recv_wr);
>> +    return 0;
>> +}
>> +
>> /* PHYSICAL memory registration conveys one page per chunk segment.
>>  */
>> static size_t
>> @@ -75,6 +96,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)  const
>> struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
>>      .ro_map                         = physical_op_map,
>>      .ro_unmap                       = physical_op_unmap,
>> +    .ro_open                        = physical_op_open,
>>      .ro_maxpages                    = physical_op_maxpages,
>>      .ro_init                        = physical_op_init,
>>      .ro_reset                       = physical_op_reset,
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index
>> dcbc736..17b2a29 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -621,11 +621,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct
>> sockaddr *addr, int memreg)
>>                      dprintk("RPC:       %s: FRMR registration "
>>                              "not supported by HCA\n", __func__);
>>                      memreg = RPCRDMA_MTHCAFMR;
>> -            } else {
>> -                    /* Mind the ia limit on FRMR page list depth */
>> -                    ia->ri_max_frmr_depth = min_t(unsigned int,
>> -                            RPCRDMA_MAX_DATA_SEGS,
>> -                            devattr->max_fast_reg_page_list_len);
>>              }
>>      }
>>      if (memreg == RPCRDMA_MTHCAFMR) {
>> @@ -734,56 +729,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct
>> rpcrdma_ia *ia,
>>      struct ib_cq *sendcq, *recvcq;
>>      int rc, err;
>> 
>> -    /* check provider's send/recv wr limits */
>> -    if (cdata->max_requests > devattr->max_qp_wr)
>> -            cdata->max_requests = devattr->max_qp_wr;
>> +    rc = ia->ri_ops->ro_open(ia, ep, cdata);
>> +    if (rc)
>> +            return rc;
>> 
>>      ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
>>      ep->rep_attr.qp_context = ep;
>> -    /* send_cq and recv_cq initialized below */
>>      ep->rep_attr.srq = NULL;
>> -    ep->rep_attr.cap.max_send_wr = cdata->max_requests;
>> -    switch (ia->ri_memreg_strategy) {
>> -    case RPCRDMA_FRMR: {
>> -            int depth = 7;
>> -
>> -            /* Add room for frmr register and invalidate WRs.
>> -             * 1. FRMR reg WR for head
>> -             * 2. FRMR invalidate WR for head
>> -             * 3. N FRMR reg WRs for pagelist
>> -             * 4. N FRMR invalidate WRs for pagelist
>> -             * 5. FRMR reg WR for tail
>> -             * 6. FRMR invalidate WR for tail
>> -             * 7. The RDMA_SEND WR
>> -             */
>> -
>> -            /* Calculate N if the device max FRMR depth is smaller than
>> -             * RPCRDMA_MAX_DATA_SEGS.
>> -             */
>> -            if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
>> -                    int delta = RPCRDMA_MAX_DATA_SEGS -
>> -                                ia->ri_max_frmr_depth;
>> -
>> -                    do {
>> -                            depth += 2; /* FRMR reg + invalidate */
>> -                            delta -= ia->ri_max_frmr_depth;
>> -                    } while (delta > 0);
>> -
>> -            }
>> -            ep->rep_attr.cap.max_send_wr *= depth;
>> -            if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
>> -                    cdata->max_requests = devattr->max_qp_wr / depth;
>> -                    if (!cdata->max_requests)
>> -                            return -EINVAL;
>> -                    ep->rep_attr.cap.max_send_wr = cdata-
>>> max_requests *
>> -                                                   depth;
>> -            }
>> -            break;
>> -    }
>> -    default:
>> -            break;
>> -    }
>> -    ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
>>      ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
>>      ep->rep_attr.cap.max_recv_sge = 1;
>>      ep->rep_attr.cap.max_inline_data = 0;
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h
>> b/net/sunrpc/xprtrdma/xprt_rdma.h index a0e3c3e..a53a564 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -340,6 +340,9 @@ struct rpcrdma_memreg_ops {
>>                                struct rpcrdma_mr_seg *, int, bool);
>>      void            (*ro_unmap)(struct rpcrdma_xprt *,
>>                                  struct rpcrdma_req *, unsigned int);
>> +    int             (*ro_open)(struct rpcrdma_ia *,
>> +                               struct rpcrdma_ep *,
>> +                               struct rpcrdma_create_data_internal *);
>>      size_t          (*ro_maxpages)(struct rpcrdma_xprt *);
>>      int             (*ro_init)(struct rpcrdma_xprt *);
>>      void            (*ro_reset)(struct rpcrdma_xprt *);
>> 
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the 
>> body
>> of a message to [email protected] More majordomo info at
>> http://vger.kernel.org/majordomo-info.html

--
Chuck Lever
chuck[dot]lever[at]oracle[dot]com



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to