On Tue, Nov 10, 2015 at 01:46:40PM +0200, Sagi Grimberg wrote:
> 
> 
> On 10/11/2015 13:41, Christoph Hellwig wrote:
> >Oh, and while we're at it.  Can someone explain why we're even
> >using rdma_read_chunk_frmr for IB?  It seems to work around the
> >fact tat iWarp only allow a single RDMA READ SGE, but it's used
> >whenever the device has IB_DEVICE_MEM_MGT_EXTENSIONS, which seems
> >wrong.
> 
> I think Steve can answer it better than I can. I think that it is
> just to have a single code path for both IB and iWARP. I agree that
> the condition seems wrong and for small transfers rdma_read_chunk_frmr
> is really a performance loss.

Well, the code path already exists, but only is used fi
IB_DEVICE_MEM_MGT_EXTENSIONS isn't set.  Below is an untested patch
that demonstrates how I think svcrdma should setup the reads.  Note
that this also allows to entirely remove it's allphys MR.

Note that as a followon this would also allow to remove the
non-READ_W_INV code path from rdma_read_chunk_frmr as a future
step.


diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f869807..35fa638 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -147,7 +147,6 @@ struct svcxprt_rdma {
        struct ib_qp         *sc_qp;
        struct ib_cq         *sc_rq_cq;
        struct ib_cq         *sc_sq_cq;
-       struct ib_mr         *sc_phys_mr;       /* MR for server memory */
        int                  (*sc_reader)(struct svcxprt_rdma *,
                                          struct svc_rqst *,
                                          struct svc_rdma_op_ctxt *,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c 
b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ff4f01e..a410cb6 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -160,7 +160,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                        goto err;
                atomic_inc(&xprt->sc_dma_used);
 
-               /* The lkey here is either a local dma lkey or a dma_mr lkey */
+               /* The lkey here is the local dma lkey */
                ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
                ctxt->sge[pno].length = len;
                ctxt->count++;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c 
b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9f3eb89..2780da4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -887,8 +887,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt 
*xprt)
        struct ib_cq_init_attr cq_attr = {};
        struct ib_qp_init_attr qp_attr;
        struct ib_device *dev;
-       int uninitialized_var(dma_mr_acc);
-       int need_dma_mr = 0;
        int ret = 0;
        int i;
 
@@ -986,68 +984,41 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt 
*xprt)
        }
        newxprt->sc_qp = newxprt->sc_cm_id->qp;
 
-       /*
-        * Use the most secure set of MR resources based on the
-        * transport type and available memory management features in
-        * the device. Here's the table implemented below:
-        *
-        *              Fast    Global  DMA     Remote WR
-        *              Reg     LKEY    MR      Access
-        *              Sup'd   Sup'd   Needed  Needed
-        *
-        * IWARP        N       N       Y       Y
-        *              N       Y       Y       Y
-        *              Y       N       Y       N
-        *              Y       Y       N       -
-        *
-        * IB           N       N       Y       N
-        *              N       Y       N       -
-        *              Y       N       Y       N
-        *              Y       Y       N       -
-        *
-        * NB:  iWARP requires remote write access for the data sink
-        *      of an RDMA_READ. IB does not.
-        */
-       newxprt->sc_reader = rdma_read_chunk_lcl;
-       if (dev->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
-               newxprt->sc_frmr_pg_list_len =
-                       dev->max_fast_reg_page_list_len;
-               newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
-               newxprt->sc_reader = rdma_read_chunk_frmr;
-       }
+       if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num)) {
+               /*
+                * iWARP requires remote write access for the data sink, and
+                * only supports a single SGE for RDMA_READ requests, so we'll
+                * have to use a memory registration for each RDMA_READ.
+                */
+               if (!(dev->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) {
+                       /*
+                        * We're an iWarp device but don't support FRs.
+                        * Tought luck, better exit early as we have little
+                        * chance of doing something useful.
+                        */
+                       goto errout;
+               }
 
-       /*
-        * Determine if a DMA MR is required and if so, what privs are required
-        */
-       if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
-           !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
+               newxprt->sc_frmr_pg_list_len = dev->max_fast_reg_page_list_len;
+               newxprt->sc_dev_caps =
+                        SVCRDMA_DEVCAP_FAST_REG |
+                        SVCRDMA_DEVCAP_READ_W_INV;
+               newxprt->sc_reader = rdma_read_chunk_frmr;
+       } else if (rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num)) {
+               /*
+                * For IB or RoCE life is easy, no unsafe write access is
+                * required and multiple SGEs are supported, so we don't need
+                * to use MRs.
+                */
+               newxprt->sc_reader = rdma_read_chunk_lcl;
+       } else {
+               /*
+                * Neither iWarp nor IB-ish, we're out of luck.
+                */
                goto errout;
-
-       if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
-           !(dev->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
-               need_dma_mr = 1;
-               dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
-               if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
-                   !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
-                       dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
        }
 
-       if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
-               newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
-
-       /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
-       if (need_dma_mr) {
-               /* Register all of physical memory */
-               newxprt->sc_phys_mr =
-                       ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
-               if (IS_ERR(newxprt->sc_phys_mr)) {
-                       dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
-                               ret);
-                       goto errout;
-               }
-               newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
-       } else
-               newxprt->sc_dma_lkey = dev->local_dma_lkey;
+       newxprt->sc_dma_lkey = dev->local_dma_lkey;
 
        /* Post receive buffers */
        for (i = 0; i < newxprt->sc_max_requests; i++) {
@@ -1203,9 +1174,6 @@ static void __svc_rdma_free(struct work_struct *work)
        if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
                ib_destroy_cq(rdma->sc_rq_cq);
 
-       if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
-               ib_dereg_mr(rdma->sc_phys_mr);
-
        if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
                ib_dealloc_pd(rdma->sc_pd);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to