Add RDMA CM support to CEP manager.

This patch adds extra logic in the CEP manager to enable comparing private data 
according to the IP Addressing Annex to the IB specification (aka RDMA CM).  
The existing IBAL ND provider, as well as WinVerbs, are updated to take 
advantage of the new functionality.  Rather than doing a memory comparison of 
the private data, the offset is overloaded to represent a bit mask of fields to 
compare.  This overload only occurs if the service ID is a valid RDMA CM 
service ID, otherwise the behavior is unchanged.

Signed-off-by: Fab Tillier <[email protected]>

diff -dwup3 -x *svn* -x *makefile.inc -x *sources -r 
c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_cm_cep.c 
.\core\al\kernel\al_cm_cep.c
--- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\al\kernel\al_cm_cep.c    
Fri Aug 03 16:53:03 2012
+++ .\core\al\kernel\al_cm_cep.c        Mon Jul 30 22:23:51 2012
@@ -2700,6 +2700,59 @@ __lookup_by_id(
 }
 
 
+static intn_t
+__cm_rdma_req_cmp(
+       __in UINT64 mask,
+       __in const ib_cm_rdma_req_t* p_cmp1,
+       __in const ib_cm_rdma_req_t* p_cmp2 )
+{
+       intn_t cmp;
+
+       if( p_cmp1->maj_min_ver != p_cmp2->maj_min_ver )
+       {
+               return (intn_t)p_cmp1->maj_min_ver - 
(intn_t)p_cmp2->maj_min_ver;
+       }
+
+       if( p_cmp1->ipv != p_cmp2->ipv )
+       {
+               return (intn_t)p_cmp1->ipv - (intn_t)p_cmp2->ipv;
+       }
+
+       if( mask & IB_REQ_CM_RDMA_CMP_SRC_PORT )
+       {
+               if( p_cmp1->src_port != p_cmp2->src_port )
+               {
+                       return (intn_t)p_cmp1->src_port - 
(intn_t)p_cmp2->src_port;
+               }
+       }
+
+       if( mask & IB_REQ_CM_RDMA_CMP_SRC_IP )
+       {
+               cmp = cl_memcmp( p_cmp1->src_ip_addr,
+                       p_cmp2->src_ip_addr, sizeof(p_cmp2->src_ip_addr) );
+               if( cmp != 0 )
+               {
+                       return cmp;
+               }
+       }
+
+       if( mask & IB_REQ_CM_RDMA_CMP_DST_IP )
+       {
+               cmp = cl_memcmp( p_cmp1->dst_ip_addr,
+                       p_cmp2->dst_ip_addr, sizeof(p_cmp2->dst_ip_addr) );
+               if( cmp != 0 )
+               {
+                       return cmp;
+               }
+       }
+
+       /*
+        * TODO: Richer compare options to allow specifying pdata compare
+        */
+       return 0;
+}
+
+
 /*
  * Lookup a CEP by Service ID and private data.
  */
@@ -2746,10 +2799,36 @@ port_cmp:
 pdata_cmp:
                if( p_cep->p_cmp_buf && p_pdata )
                {
-                       int len = min(p_cep->cmp_len, IB_REQ_PDATA_SIZE - 
p_cep->cmp_offset);
+                       if( ib_cm_is_rdma_cm_sid(sid) )
+                       {
+                               ib_cm_rdma_req_t *p_rdma_req = 
(ib_cm_rdma_req_t *)p_pdata;
+                               CL_ASSERT(p_cep->cmp_len >= 
FIELD_OFFSET(ib_cm_rdma_req_t, pdata));
+
+                               /* reject connection request with incorrect 
version parameters */
+                               if( ib_cm_is_rdma_cm_req_valid( p_rdma_req ) == 
FALSE )
+                               {
+                                       AL_PRINT_EXIT( TRACE_LEVEL_ERROR, 
AL_DBG_ERROR, 
+                                               ("RDMA CM connection req is 
invalid: maj_min_ver %d, ipv %#x \n", 
+                                               p_rdma_req->maj_min_ver, 
p_rdma_req->ipv ) );
+                                       return NULL;
+                               }
                        
+                               cmp = __cm_rdma_req_cmp(
+                                       p_cep->cmp_offset,
+                                       p_rdma_req,
+                                       (ib_cm_rdma_req_t*)p_cep->p_cmp_buf
+                                       );
+                       }
+                       else
+                       {
+                /*
+                 * TODO: this check seems to be for catching a malformed 
listen, and should
+                 * be trapped when the listen is created.  Checking after the 
fact is dumb.
+                 */
+                               int len = min(p_cep->cmp_len, IB_REQ_PDATA_SIZE 
- p_cep->cmp_offset);
                        cmp = cl_memcmp( &p_pdata[p_cep->cmp_offset],
                                p_cep->p_cmp_buf, len );
+                       }
 
                        if( !cmp )
                                goto match;
@@ -3423,26 +3502,6 @@ __cep_queue_mad(
                return IB_INVALID_STATE;
        }
 
-       // TODO: Remove - manage above core kernel CM code
-       /* NDI connection request case */
-       if ( p_cep->state == CEP_STATE_LISTEN &&
-               (p_cep->sid & IB_REQ_CM_RDMA_SID_PREFIX_MASK) == 
IB_REQ_CM_RDMA_SID_PREFIX )
-       { /* Try to complete pending IRP, if any */
-               mad_cm_req_t* p_req = (mad_cm_req_t*)ib_get_mad_buf( p_mad );
-               ib_cm_rdma_req_t *p_rdma_req = (ib_cm_rdma_req_t *)p_req->pdata;
-
-               /* reject connection request with incorrect version parameters 
*/
-               if ( ((p_rdma_req->maj_min_ver >> 4) != 
IB_REQ_CM_RDMA_MAJOR_VERSION) ||
-                        ((p_rdma_req->maj_min_ver & 0x0f) > 
IB_REQ_CM_RDMA_MINOR_VERSION) ||
-                        (p_rdma_req->ipv != 0x40 && p_rdma_req->ipv != 0x60) )
-               {
-                       AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, 
-                               ("RDMA CM connection req is rejected: 
maj_min_ver %d, ipv %#x \n", 
-                               p_rdma_req->maj_min_ver, p_rdma_req->ipv ) );
-                       return IB_UNSUPPORTED;
-               }
-       }
-
        /* Queue this MAD for processing. */
        if( p_cep->p_mad_head )
        {
@@ -4258,6 +4317,7 @@ al_cep_listen(
        boolean_t                       left = TRUE;
        intn_t                          cmp;
        KLOCK_QUEUE_HANDLE      hdl;
+       ib_cm_rdma_req_t*       p_rdma_req = 
(ib_cm_rdma_req_t*)p_listen_info->p_cmp_buf;
 
        AL_PRINT( TRACE_LEVEL_VERBOSE, AL_DBG_CM, ("[ CID = %d\n", cid) );
 
@@ -4291,6 +4351,24 @@ al_cep_listen(
                goto done;
        }
 
+       if( ib_cm_is_rdma_cm_sid(p_listen_info->svc_id) && p_rdma_req != NULL )
+       {
+               if( p_listen_info->cmp_len < FIELD_OFFSET(ib_cm_rdma_req_t, 
pdata) )
+               {
+                       status = IB_INVALID_SETTING;
+                       goto done;
+               }
+
+               if( ib_cm_is_rdma_cm_req_valid(p_rdma_req) == FALSE )
+               {
+                       AL_PRINT_EXIT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, 
+                               ("RDMA CM listen is invalid: maj_min_ver %d, 
ipv %#x \n", 
+                               p_rdma_req->maj_min_ver, p_rdma_req->ipv ) );
+                       status = IB_INVALID_SETTING;
+                       goto done;
+               }
+       }
+
        /* Insert the CEP into the listen map. */
        p_item = cl_rbmap_root( &gp_cep_mgr->listen_map );
        p_insert_at = p_item;
@@ -4336,17 +4414,26 @@ pdata_cmp:
                if( p_listen_info->p_cmp_buf )
                {
                        /* Compare length must match. */
-                       //if( p_listen_info->cmp_len != p_listen->cmp_len )
-                       //      break;
-
-                       /* Compare offset must match. */
-                       //if( p_listen_info->cmp_offset != p_listen->cmp_offset 
)
-                       //      break;
+                       if( p_listen_info->cmp_len != p_listen->cmp_len )
+                               break;
 
-                       int len = min(p_listen_info->cmp_len, 
p_listen->cmp_len);
+                       /* Compare offset (or mask for RDMA CM) must match. */
+                       if( p_listen_info->cmp_offset != p_listen->cmp_offset )
+                               break;
                        
-                       cmp = cl_memcmp( p_listen_info->p_cmp_buf,
-                               p_listen->p_cmp_buf, len );
+                       if( ib_cm_is_rdma_cm_sid(p_listen_info->svc_id) )
+                       {
+                               cmp = __cm_rdma_req_cmp(
+                                       p_listen->cmp_offset,
+                                       p_rdma_req,
+                                       (ib_cm_rdma_req_t*)p_listen->p_cmp_buf
+                                       );
+                       }
+                       else
+                       {
+                               cmp = cl_memcmp( &p_listen_info->p_cmp_buf,
+                                       p_listen->p_cmp_buf, p_listen->cmp_len 
);
+                       }
 
                        if( cmp < 0 )
                                p_item = cl_rbmap_left( p_item ), left = TRUE;
diff -dwup3 -x *svn* -x *makefile.inc -x *sources -r 
c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\winverbs\kernel\wv_ep.c 
.\core\winverbs\kernel\wv_ep.c
--- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\core\winverbs\kernel\wv_ep.c  
Thu Mar 29 00:15:18 2012
+++ .\core\winverbs\kernel\wv_ep.c      Thu Jul 26 15:31:14 2012
@@ -354,18 +354,20 @@ static void WvFormatCmaHeader(IB_CMA_HEA
 {
        pHeader->CmaVersion = IB_CMA_VERSION;
        if (pLocalAddress->SockAddr.Sa.SaFamily == WV_AF_INET) {
-               pHeader->IpVersion = 4 << 4;
+               pHeader->IpVersion = IB_REQ_CM_RDMA_IPV4;
                RtlZeroMemory(pHeader->SrcAddress.Ip4.Pad, 
sizeof(pHeader->SrcAddress.Ip4.Pad));
                pHeader->SrcAddress.Ip4.Address = 
pLocalAddress->SockAddr.In.SinAddr;
                RtlZeroMemory(pHeader->DstAddress.Ip4.Pad, 
sizeof(pHeader->DstAddress.Ip4.Pad));
                pHeader->DstAddress.Ip4.Address = 
pPeerAddress->SockAddr.In.SinAddr;
                pHeader->Port = pLocalAddress->SockAddr.In.SinPort;
        } else {
-               pHeader->IpVersion = 6 << 4;
+               pHeader->IpVersion = IB_REQ_CM_RDMA_IPV6;
                RtlCopyMemory(pHeader->SrcAddress.Ip6Address,
-                                         pLocalAddress->SockAddr.In6.Sin6Addr, 
16);
+                                         pLocalAddress->SockAddr.In6.Sin6Addr,
+                                         
sizeof(pHeader->SrcAddress.Ip6Address));
                RtlCopyMemory(pHeader->DstAddress.Ip6Address,
-                                         pPeerAddress->SockAddr.In6.Sin6Addr, 
16);
+                                         pPeerAddress->SockAddr.In6.Sin6Addr,
+                                         
sizeof(pHeader->DstAddress.Ip6Address));
                pHeader->Port = pLocalAddress->SockAddr.In6.Sin6Port;
        }
 }
@@ -1197,8 +1199,10 @@ void WvEpListen(WV_PROVIDER *pProvider, 
        WV_IO_EP_LISTEN         *pattr;
        NTSTATUS                        status;
        void                            *buf;
-       UINT8                           offset, len;
+       UINT8                           len;
+       UINT8                           mask;
        UINT64                          sid;
+       IB_CMA_HEADER           hdr;
 
        status = WdfRequestRetrieveInputBuffer(Request, sizeof(WV_IO_EP_LISTEN),
                                                                                
   &pattr, NULL);
@@ -1214,18 +1218,13 @@ void WvEpListen(WV_PROVIDER *pProvider, 
 
        if (WvAnyAddress(&ep->Attributes.LocalAddress)) {
                buf = NULL;
-               offset = 0;
                len = 0;
+               mask = 0;
        } else {
-               if (ep->Attributes.LocalAddress.SockAddr.Sa.SaFamily == 
WV_AF_INET) {
-                       buf = &ep->Attributes.LocalAddress.SockAddr.In.SinAddr;
-                       len = sizeof 
ep->Attributes.LocalAddress.SockAddr.In.SinAddr;
-                       offset = FIELD_OFFSET(IB_CMA_HEADER, 
DstAddress.Ip4.Address);
-               } else {
-                       buf = ep->Attributes.LocalAddress.SockAddr.In6.Sin6Addr;
-                       len = sizeof 
ep->Attributes.LocalAddress.SockAddr.In6.Sin6Addr;
-                       offset = FIELD_OFFSET(IB_CMA_HEADER, 
DstAddress.Ip6Address);
-               }
+               WvFormatCmaHeader(&hdr, &ep->Attributes.LocalAddress, 
&ep->Attributes.LocalAddress);
+               buf = &hdr;
+               len = sizeof(hdr);
+               mask = IB_REQ_CM_RDMA_CMP_DST_IP;
        }
 
        WdfObjectAcquireLock(ep->Queue);
@@ -1242,7 +1241,7 @@ void WvEpListen(WV_PROVIDER *pProvider, 
        ep->Attributes.Param.Backlog = pattr->Backlog;
        ep->State = WvEpListening;
        sid = WvGetServiceId(ep->EpType, &ep->Attributes.LocalAddress);
-       status = IbCmInterface.CM.listen(ep->pIbCmId, sid, buf, len, offset);
+       status = IbCmInterface.CM.listen(ep->pIbCmId, sid, buf, len, mask);
 
 release:
        WdfObjectReleaseLock(ep->Queue);
diff -dwup3 -x *svn* -x *makefile.inc -x *sources -r 
c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h 
.\inc\iba\ib_types.h
--- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\inc\iba\ib_types.h    Fri Aug 
03 12:14:07 2012
+++ .\inc\iba\ib_types.h        Thu Jul 26 15:31:14 2012
@@ -12876,19 +12876,40 @@ typedef struct _ib_time_stamp {
 *      ib_cc_mad_t
 *********/
 
-#define IB_REQ_CM_RDMA_SID_PREFIX                      CL_NTOH64( 
0x0000000001000000I64 )
-#define IB_REQ_CM_RDMA_SID_PREFIX_MASK         CL_NTOH64( 
0xFFFFFFFFFF000000I64 )
+#define IB_REQ_CM_RDMA_SID_PREFIX                      0x0000000100000000ULL
+#define IB_REQ_CM_RDMA_SID_PREFIX_MASK      0x000000FFFFFFFFFFULL
 #define IB_REQ_CM_RDMA_PDATA_SIZE                      56
 #define IB_REQ_CM_RDMA_MAJOR_VERSION           0
 #define IB_REQ_CM_RDMA_MINOR_VERSION           0
+#define IB_REQ_CM_RDMA_VERSION              ((IB_REQ_CM_RDMA_MAJOR_VERSION << 
4) |\
+                                            IB_REQ_CM_RDMA_MINOR_VERSION)
+#define IB_REQ_CM_RDMA_IPV4                 0x40
+#define IB_REQ_CM_RDMA_IPV6                 0x60
 
+/*
+ * Bit masks to define what fields should be compared.  Major, Minor and IP
+ * version fields are always compared.
+ */
+#define IB_REQ_CM_RDMA_CMP_SRC_PORT         0x01
+#define IB_REQ_CM_RDMA_CMP_SRC_IP           0x02
+#define IB_REQ_CM_RDMA_CMP_DST_IP           0x04
 
-/****s* Access Layer/ib_cm_rep_t
+static inline boolean_t ib_cm_is_rdma_cm_sid(uint64_t sid)
+{
+    return ((sid & IB_REQ_CM_RDMA_SID_PREFIX_MASK) == 
IB_REQ_CM_RDMA_SID_PREFIX);
+}
+
+static inline net64_t ib_cm_rdma_cm_sid(uint8_t protocol, net16_t port)
+{
+    return IB_REQ_CM_RDMA_SID_PREFIX | (UINT64)protocol << 40 | (UINT64)port 
<< 48;
+}
+
+/****s* Access Layer/ib_cm_rdma_req_t
 * NAME
 *      ib_cm_rdma_req_t
 *
 * DESCRIPTION
-*      Connection reply information used when establishing a connection.
+*      IP Addressing CM REQ Message Private Data Format.
 *
 * SYNOPSIS
 */
@@ -12923,6 +12944,13 @@ typedef struct _ib_cm_rdma_req
 *              Contains Consumer Private Data.
 *
 *****/
+
+static inline boolean_t ib_cm_is_rdma_cm_req_valid(ib_cm_rdma_req_t* 
p_rdma_req)
+{
+       return (p_rdma_req->maj_min_ver == IB_REQ_CM_RDMA_VERSION) &&
+               ((p_rdma_req->ipv == IB_REQ_CM_RDMA_IPV4 ||
+               p_rdma_req->ipv == IB_REQ_CM_RDMA_IPV6));
+}
 
 AL_INLINE net64_t AL_API
 ib_cm_rdma_sid(
diff -dwup3 -x *svn* -x *makefile.inc -x *sources -r 
c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\ulp\nd\user\NdListen.cpp 
.\ulp\nd\user\NdListen.cpp
--- c:\dev\openib\ofw\gen1\branches\mlx4_30\trunk\ulp\nd\user\NdListen.cpp      
Thu May 31 11:22:11 2012
+++ .\ulp\nd\user\NdListen.cpp  Wed May 23 18:26:49 2012
@@ -35,7 +35,6 @@
 #pragma warning( push, 3 )
 #include "winternl.h"
 #pragma warning( pop )
-#include <complib/cl_byteswap.h>
 #include <limits.h>
 #include "nddebug.h"
 
@@ -185,30 +184,36 @@ HRESULT GetPdataForActive(
         ual_cep_listen_ioctl_t listen;
         listen.cid = 0;
 
-        listen.cep_listen.svc_id = ib_cm_rdma_sid( (uint8_t) Protocol, Port );
+        listen.cep_listen.svc_id = 
ib_cm_rdma_sid(static_cast<UINT8>(Protocol), Port);
 
         listen.cep_listen.port_guid = m_pParent->m_PortGuid;
 
+        ib_cm_rdma_req_t* pdata = 
reinterpret_cast<ib_cm_rdma_req_t*>(listen.compare);
+        pdata->maj_min_ver = IB_REQ_CM_RDMA_VERSION;
+        pdata->src_port = 0;
+        ZeroMemory( &pdata->src_ip_addr, sizeof(pdata->src_ip_addr) );
+
         switch( m_pParent->m_Addr.v4.sin_family )
         {
         case AF_INET:
-            ZeroMemory( listen.compare, ATS_IPV4_OFFSET );
-            CopyMemory( &listen.compare[ATS_IPV4_OFFSET],
-                (uint8_t*)&m_pParent->m_Addr.v4.sin_addr,
-                sizeof(m_pParent->m_Addr.v4.sin_addr) );
+            pdata->ipv = IB_REQ_CM_RDMA_IPV4;
+            pdata->dst_ip_addr[0] = pdata->dst_ip_addr[1] = 
pdata->dst_ip_addr[2] = 0;
+            pdata->dst_ip_addr[3] = m_pParent->m_Addr.v4.sin_addr.s_addr;
             ND_PRINT( TRACE_LEVEL_INFORMATION, ND_DBG_NDI,
                 ("Listen for: IP %#x, port %#hx\n", 
-                cl_hton32(m_pParent->m_Addr.v4.sin_addr.S_un.S_addr), 
cl_hton16(m_pParent->m_Addr.v4.sin_port) ) );
+                _byteswap_ulong(m_pParent->m_Addr.v4.sin_addr.S_un.S_addr),
+                _byteswap_ushort(m_pParent->m_Addr.v4.sin_port) ) );
             break;
         case AF_INET6:
-            CopyMemory( listen.compare,
-                (uint8_t*)&m_pParent->m_Addr.v6.sin6_addr,
+            pdata->ipv = IB_REQ_CM_RDMA_IPV6;
+            CopyMemory( &pdata->dst_ip_addr,
+                &m_pParent->m_Addr.v6.sin6_addr,
                 sizeof(m_pParent->m_Addr.v6.sin6_addr) );
             break;
         }
         listen.cep_listen.p_cmp_buf = listen.compare;
-        listen.cep_listen.cmp_len = 16;
-        listen.cep_listen.cmp_offset = FIELD_OFFSET( ib_cm_rdma_req_t, 
dst_ip_addr );
+        listen.cep_listen.cmp_len = FIELD_OFFSET(ib_cm_rdma_req_t, pdata);
+        listen.cep_listen.cmp_offset = IB_REQ_CM_RDMA_CMP_DST_IP;
 
         IO_STATUS_BLOCK IoStatus;
         IoStatus.Status = g_NtDeviceIoControlFile(

Attachment: ndv2.21.patch
Description: ndv2.21.patch

_______________________________________________
ofw mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw

Reply via email to