I don't know who maintains src/userspace/perftest, but here is a patch
set that enables rdma_bw and rdma_lat to use the RDMA_CM with the
addition of the -c or --cma flag.

The rkey/addr info is exchanged in the private data, and SEND/RECV's are
used to sync the client/server before and after execution.

Also, I added -P or --poll to rdma_bw to allow blocking for completion
events when none are ready (if you omit -P, it will block when no
completion is available, otherwise it will spin).

Signed-off-by: Steve Wise <[EMAIL PROTECTED]>



Index: rdma_lat.c
===================================================================
--- rdma_lat.c  (revision 7050)
+++ rdma_lat.c  (working copy)
@@ -53,6 +53,7 @@
 #include <time.h>
 
 #include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
 
 #include "get_clock.h"
 
@@ -71,7 +72,8 @@
        struct ibv_context *context;
        struct ibv_pd      *pd;
        struct ibv_mr      *mr;
-       struct ibv_cq      *cq;
+       struct ibv_cq      *scq;
+       struct ibv_cq      *rcq;
        struct ibv_qp      *qp;
        void               *buf;
        volatile char      *post_buf;
@@ -80,6 +82,7 @@
        int                 tx_depth;
        struct ibv_sge list;
        struct ibv_send_wr wr;
+       struct rdma_cm_id  *cm_id;
 };
 
 struct pingpong_dest {
@@ -323,16 +326,22 @@
                return NULL;
        }
 
-       ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
-       if (!ctx->cq) {
+       ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+       if (!ctx->rcq) {
                fprintf(stderr, "Couldn't create CQ\n");
                return NULL;
        }
 
+       ctx->scq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
+       if (!ctx->scq) {
+               fprintf(stderr, "Couldn't create CQ\n");
+               return NULL;
+       }
+
        {
                struct ibv_qp_init_attr attr = {
-                       .send_cq = ctx->cq,
-                       .recv_cq = ctx->cq,
+                       .send_cq = ctx->scq,
+                       .recv_cq = ctx->rcq,
                        .cap     = {
                                .max_send_wr  = tx_depth,
                                /* Work around:  driver doesnt support
@@ -370,13 +379,6 @@
                }
        }
 
-       ctx->wr.wr_id      = PINGPONG_RDMA_WRID;
-       ctx->wr.sg_list    = &ctx->list;
-       ctx->wr.num_sge    = 1;
-       ctx->wr.opcode     = IBV_WR_RDMA_WRITE;
-       ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
-       ctx->wr.next       = NULL;
-
        return ctx;
 }
 
@@ -489,6 +491,467 @@
        return 0;
 }
 
+/* CMA STUFF */
+
+static void pp_post_recv(struct pingpong_context *ctx)
+{
+       struct ibv_sge list;
+       struct ibv_recv_wr wr, *bad_wr;
+       int rc;
+
+       list.addr = (uintptr_t) ctx->buf;
+       list.length = 1;
+       list.lkey = ctx->mr->lkey;
+       wr.next = NULL;
+       wr.wr_id = 0xdeadbeef;
+       wr.sg_list = &list;
+       wr.num_sge = 1;
+
+       rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
+       if (rc) {
+               perror("ibv_post_recv");
+               fprintf(stderr, "%s ibv_post_recv failed %d\n", __FUNCTION__, 
rc);
+       }
+}
+
+static struct pingpong_context *pp_init_cma_ctx(struct rdma_cm_id *cm_id,
+                                           unsigned size,
+                                           int tx_depth, int port)
+{
+       struct pingpong_context *ctx;
+
+       ctx = malloc(sizeof *ctx);
+       if (!ctx)
+               return NULL;
+
+       ctx->size     = size;
+       ctx->tx_depth = tx_depth;
+
+       ctx->buf = memalign(page_size, size * 2);
+       if (!ctx->buf) {
+               fprintf(stderr, "Couldn't allocate work buf.\n");
+               return NULL;
+       }
+
+       memset(ctx->buf, 0, size * 2);
+
+       ctx->post_buf = (char*)ctx->buf + (size - 1);
+       ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);
+
+       ctx->cm_id = cm_id;
+       ctx->context = cm_id->verbs;
+       if (!ctx->context) {
+               fprintf(stderr, "%s Unbound cm_id!!\n", __FUNCTION__);
+               return NULL;
+       }
+
+       ctx->pd = ibv_alloc_pd(ctx->context);
+       if (!ctx->pd) {
+               fprintf(stderr, "Couldn't allocate PD\n");
+               return NULL;
+       }
+
+        /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+         * The Consumer is not allowed to assign Remote Write or Remote Atomic 
to
+         * a Memory Region that has not been assigned Local Write. */
+       ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
+                            IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+       if (!ctx->mr) {
+               fprintf(stderr, "Couldn't allocate MR\n");
+               return NULL;
+       }
+
+       ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+       if (!ctx->rcq) {
+               fprintf(stderr, "Couldn't create RCQ\n");
+               return NULL;
+       }
+
+       ctx->scq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
+       if (!ctx->scq) {
+               fprintf(stderr, "Couldn't create SCQ\n");
+               return NULL;
+       }
+
+       {
+               struct ibv_qp_init_attr attr = {
+                       .send_cq = ctx->scq,
+                       .recv_cq = ctx->rcq,
+                       .cap     = {
+                               .max_send_wr  = tx_depth,
+                               .max_recv_wr  = 1,
+                               .max_send_sge = 1,
+                               .max_recv_sge = 1,
+                               .max_inline_data = size
+                       },
+                       .qp_type = IBV_QPT_RC
+               };
+
+               if (rdma_create_qp(ctx->cm_id, ctx->pd, &attr)) {
+                       fprintf(stderr, "Couldn't create QP\n");
+                       return NULL;
+               }
+               ctx->qp = ctx->cm_id->qp;
+       }
+
+       pp_post_recv(ctx);
+
+       return ctx;
+}
+
+static void pp_close_cma(struct pingpong_context *ctx, const char *servername)
+{
+       struct rdma_cm_event *event;
+       int rc;
+
+       if (servername) {
+               rc = rdma_disconnect(ctx->cm_id);
+               if (rc) {
+                       perror("rdma_disconnect");
+                       return;
+               }
+       }
+       
+       rdma_get_cm_event(&event);
+       if (event->event != RDMA_CM_EVENT_DISCONNECTED)
+               printf("unexpected event during disconnect %d\n", event->event);
+       rdma_ack_cm_event(event);
+       rdma_destroy_id(ctx->cm_id);
+}
+
+static struct pingpong_context *pp_server_connect_cma(unsigned short port, int 
size, int tx_depth,
+                                                     struct pingpong_dest 
*my_dest, 
+                                                     struct pingpong_dest 
*rem_dest)
+{
+       struct rdma_cm_id *listen_id;
+       struct rdma_cm_event *event;
+       struct rdma_conn_param conn_param;
+       int ret;
+       struct sockaddr_in sin;
+       struct rdma_cm_id *child_cm_id;
+       struct pingpong_context *ctx;
+
+       printf("%s starting server\n", __FUNCTION__);
+       ret = rdma_create_id(&listen_id, NULL);
+       if (ret) {
+               fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, 
ret);
+               return NULL;
+       }
+
+       sin.sin_addr.s_addr = 0;
+       sin.sin_family = PF_INET;
+       sin.sin_port = htons(port);
+       ret = rdma_bind_addr(listen_id, (struct sockaddr *)&sin);
+       if (ret) {
+               fprintf(stderr,"%s rdma_bind_addr failed %d\n", __FUNCTION__, 
ret);
+               goto err2;
+       }
+
+       ret = rdma_listen(listen_id, 0);
+       if (ret) {
+               fprintf(stderr,"%s rdma_listen failed %d\n", __FUNCTION__, ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
+               fprintf(stderr,"%s bad event waiting for connect request %d\n", 
__FUNCTION__, 
+                      event->event);
+               goto err1;
+       }
+
+       if (!event->private_data || (event->private_data_len < 
sizeof(*rem_dest))) {
+               ret = 1;
+               fprintf(stderr,"%s bad private data len %d\n", __FUNCTION__, 
+                      event->private_data_len);
+               goto err1;
+       }
+       
+       memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+       child_cm_id = (struct rdma_cm_id *)event->id;
+       ctx = pp_init_cma_ctx(child_cm_id, size, tx_depth, port);
+
+       if (!ctx) {
+               fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+               goto err0;
+       }
+
+       my_dest->qpn = 0;
+       my_dest->psn = 0xbb;
+       my_dest->rkey = ctx->mr->rkey;
+       my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+       memset(&conn_param, 0, sizeof conn_param);
+       conn_param.responder_resources = 1;
+       conn_param.initiator_depth = 1;
+       conn_param.private_data = my_dest;
+       conn_param.private_data_len = sizeof(*my_dest);
+       ret = rdma_accept(ctx->cm_id, &conn_param);
+       if (ret) {
+               fprintf(stderr,"%s rdma_accept failed %d\n", __FUNCTION__, ret);
+               goto err0;
+       }
+       rdma_ack_cm_event(event);
+       ret = rdma_get_cm_event(&event);
+       if (ret) {
+               fprintf(stderr,"rdma_get_cm_event error %d\n", ret);
+               rdma_destroy_id(child_cm_id);
+               goto err2;
+       }
+       if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+               fprintf(stderr,"%s bad event waiting for established %d\n", 
__FUNCTION__, 
+                      event->event);
+               goto err0;
+       }
+       rdma_ack_cm_event(event);
+       fprintf(stderr,"%s connected!\n", __FUNCTION__);
+       return ctx;
+err0:
+       rdma_destroy_id(child_cm_id);
+err1:
+       rdma_ack_cm_event(event);
+err2:
+       rdma_destroy_id(listen_id);
+       fprintf(stderr,"%s NOT connected!\n", __FUNCTION__);
+       return NULL;
+}
+
+static unsigned get_dst_addr(const char *dst)
+{
+       struct addrinfo *res;
+       int ret;
+       unsigned addr;
+
+       ret = getaddrinfo(dst, NULL, NULL, &res);
+       if (ret) {
+               fprintf(stderr, "%s getaddrinfo failed - invalid hostname or IP 
address\n",
+                       __FUNCTION__);
+               return 0;
+       }
+
+       if (res->ai_family != PF_INET) {
+               return 0;
+       }
+
+       addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
+       freeaddrinfo(res);
+       return addr;
+}
+
+static struct pingpong_context *pp_client_connect_cma(const char *servername, 
+                                                     unsigned short port, int 
size, int tx_depth, 
+                                                     struct pingpong_dest 
*my_dest, 
+                                                     struct pingpong_dest 
*rem_dest)
+{
+       struct rdma_cm_event *event;
+       struct rdma_conn_param conn_param;
+       int ret;
+       struct sockaddr_in sin;
+       struct rdma_cm_id *cm_id;
+       struct pingpong_context *ctx;
+
+       fprintf(stderr,"%s starting client\n", __FUNCTION__);
+       sin.sin_addr.s_addr = get_dst_addr(servername);
+       if (!sin.sin_addr.s_addr) {
+               return NULL;
+       }
+
+       ret = rdma_create_id(&cm_id, NULL);
+       if (ret) {
+               fprintf(stderr,"%s rdma_create_id failed %d\n", __FUNCTION__, 
ret);
+               return NULL;
+       }
+
+       sin.sin_family = PF_INET;
+       sin.sin_port = htons(port);
+       ret = rdma_resolve_addr(cm_id, NULL, (struct sockaddr *)&sin, 2000);
+       if (ret) {
+               fprintf(stderr,"%s rdma_resolve_addr failed %d\n", 
__FUNCTION__, ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
+               fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, 
__LINE__,
+                      event->event);
+               goto err1;
+       }
+       rdma_ack_cm_event(event);
+
+       ret = rdma_resolve_route(cm_id, 2000);
+       if (ret) {
+               fprintf(stderr,"%s rdma_resolve_route failed %d\n", 
__FUNCTION__, ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
+               fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, 
__LINE__,
+                      event->event);
+               goto err1;
+       }
+       rdma_ack_cm_event(event);
+
+       ctx = pp_init_cma_ctx(cm_id, size, tx_depth, port);
+
+       if (!ctx) {
+               fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+               goto err2;
+       }
+
+       my_dest->qpn = 0;
+       my_dest->psn = 0xaa;
+       my_dest->rkey = ctx->mr->rkey;
+       my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+       memset(&conn_param, 0, sizeof conn_param);
+       conn_param.responder_resources = 1;
+       conn_param.initiator_depth = 1;
+       conn_param.retry_count = 5;
+       conn_param.private_data = my_dest;
+       conn_param.private_data_len = sizeof(*my_dest);
+       ret = rdma_connect(ctx->cm_id, &conn_param);
+       if (ret) {
+               fprintf(stderr,"%s rdma_connect failure %d\n", __FUNCTION__, 
ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+               fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, 
__LINE__,
+                      event->event);
+               goto err1;
+       }
+       if (!event->private_data || (event->private_data_len < 
sizeof(*rem_dest))) {
+               fprintf(stderr,"%s bad private data ptr %p len %d\n", 
__FUNCTION__,
+                      event->private_data, event->private_data_len);
+               goto err1;
+       }
+
+       memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+       rdma_ack_cm_event(event);
+       fprintf(stderr,"connected!\n");
+       return ctx;
+err1:
+       rdma_ack_cm_event(event);
+err2:
+       fprintf(stderr,"NOT connected!\n");
+       rdma_destroy_id(cm_id);
+       return NULL;
+}
+
+static void pp_wait_for_done(struct pingpong_context *ctx)
+{
+       struct ibv_wc wc;
+       int ne;
+
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (!(wc.opcode & IBV_WC_RECV))
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xdeadbeef) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+}
+
+static void pp_send_done(struct pingpong_context *ctx)
+{
+       struct ibv_send_wr *bad_wr;
+       int rc;
+       struct ibv_wc wc;
+       int ne;
+
+       ctx->list.addr = (uintptr_t) ctx->buf;
+       ctx->list.length = 1;
+       ctx->list.lkey = ctx->mr->lkey;
+       ctx->wr.wr_id      = 0xcafebabe;
+       ctx->wr.sg_list    = &ctx->list;
+       ctx->wr.num_sge    = 1;
+       ctx->wr.opcode     = IBV_WR_SEND;
+       ctx->wr.send_flags = IBV_SEND_SIGNALED;
+       ctx->wr.next       = NULL;
+       rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+       if (rc != 0)
+               fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, 
rc);
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->scq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (wc.opcode != IBV_WC_SEND)
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xcafebabe) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+       sleep(1);
+}
+
+static void pp_wait_for_start(struct pingpong_context *ctx)
+{
+       struct ibv_wc wc;
+       int ne;
+
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (!(wc.opcode & IBV_WC_RECV))
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xdeadbeef) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+       pp_post_recv(ctx);
+}
+
+static void pp_send_start(struct pingpong_context *ctx)
+{
+       struct ibv_send_wr *bad_wr;
+       int rc;
+       struct ibv_wc wc;
+       int ne;
+
+       ctx->list.addr = (uintptr_t) ctx->buf;
+       ctx->list.length = 1;
+       ctx->list.lkey = ctx->mr->lkey;
+       ctx->wr.wr_id      = 0xabbaabba;
+       ctx->wr.sg_list    = &ctx->list;
+       ctx->wr.num_sge    = 1;
+       ctx->wr.opcode     = IBV_WR_SEND;
+       ctx->wr.send_flags = IBV_SEND_SIGNALED;
+       ctx->wr.next       = NULL;
+       rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+       if (rc != 0)
+               fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, 
rc);
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->scq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (wc.opcode != IBV_WC_SEND)
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xabbaabba) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+}
+
 static void usage(const char *argv0)
 {
        printf("Usage:\n");
@@ -505,6 +968,7 @@
        printf("  -C, --report-cycles    report times in cpu cycle units 
(default microseconds)\n");
        printf("  -H, --report-histogram print out all results (default print 
summary only)\n");
        printf("  -U, --report-unsorted  (implies -H) print out unsorted 
results (default sorted)\n");
+       printf("  -c, --cma              Use the RDMA CMA to setup the RDMA 
connection\n");
 }
 
 /*
@@ -599,6 +1063,7 @@
        struct ibv_send_wr      *wr;
        volatile char           *poll_buf;
        volatile char           *post_buf;
+       int                      use_cma=0;
 
        int                      scnt, rcnt, ccnt;
 
@@ -618,14 +1083,19 @@
                        { .name = "report-cycles",  .has_arg = 0, .val = 'C' },
                        { .name = "report-histogram",.has_arg = 0, .val = 'H' },
                        { .name = "report-unsorted",.has_arg = 0, .val = 'U' },
+                       { .name = "cma",            .has_arg = 0, .val = 'c' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:n:t:CHU", long_options, 
NULL);
+               c = getopt_long(argc, argv, "cp:d:i:s:n:t:CHU", long_options, 
NULL);
                if (c == -1)
                        break;
 
                switch (c) {
+               case 'c':
+                       use_cma = 1;
+                       break;
+
                case 'p':
                        port = strtol(optarg, NULL, 0);
                        if (port < 0 || port > 65535) {
@@ -697,23 +1167,62 @@
        srand48(getpid() * time(NULL));
        page_size = sysconf(_SC_PAGESIZE);
 
-       ib_dev = pp_find_dev(ib_devname);
-       if (!ib_dev)
-               return 7;
+       if (use_cma) {
+               int rc;
+               struct pingpong_dest my_dest;
+               
+               memset(&rem_dest, 0, sizeof(rem_dest));
 
-       ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
-       if (!ctx)
-               return 8;
+               if (servername)
+                       ctx = pp_client_connect_cma(servername, port, size, 
tx_depth, &my_dest, 
+                                                   &rem_dest);
+               else
+                       ctx = pp_server_connect_cma(port, size, tx_depth, 
&my_dest, &rem_dest);
+               if (!ctx) {
+                       fprintf(stderr, "pp_connect_cma(%s,%d) failed!\n", 
servername, port);
+                       return rc;
+               }
 
-       if (pp_open_port(ctx, servername, ib_port, port, &rem_dest))
-               return 9;
+               /*
+                * Synch up and force the server to wait for the client to send
+                * the first message (MPA requirement).
+                */
+               if (servername) {
+                       sleep(1);
+                       pp_send_start(ctx);
+               } else {
+                       pp_wait_for_start(ctx);
+               }
 
+               printf("  local address:  PSN %#06x RKey %#08x VAddr %#016Lx\n",
+                               my_dest.psn, my_dest.rkey, my_dest.vaddr);
+               printf("  remote address: PSN %#06x RKey %#08x VAddr %#016Lx\n",
+                               rem_dest.psn, rem_dest.rkey, rem_dest.vaddr);
+       } else {
+               ib_dev = pp_find_dev(ib_devname);
+               if (!ib_dev)
+                       return 7;
+
+               ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
+               if (!ctx)
+                       return 8;
+
+               if (pp_open_port(ctx, servername, ib_port, port, &rem_dest))
+                       return 9;
+       }
+
        wr = &ctx->wr;
        ctx->list.addr = (uintptr_t) ctx->buf;
        ctx->list.length = ctx->size;
        ctx->list.lkey = ctx->mr->lkey;
        wr->wr.rdma.remote_addr = rem_dest.vaddr;
        wr->wr.rdma.rkey = rem_dest.rkey;
+       wr->wr_id      = PINGPONG_RDMA_WRID;
+       wr->sg_list    = &ctx->list;
+       wr->num_sge    = 1;
+       wr->opcode     = IBV_WR_RDMA_WRITE;
+       wr->send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
+       wr->next       = NULL;
 
        scnt = 0;
        rcnt = 0;
@@ -757,9 +1266,10 @@
                if (ccnt < iters) {
                        struct ibv_wc wc;
                        int ne;
+
                        ++ccnt;
                        do {
-                               ne = ibv_poll_cq(ctx->cq, 1, &wc);
+                               ne = ibv_poll_cq(ctx->scq, 1, &wc);
                        } while (ne == 0);
 
                        if (ne < 0) {
@@ -778,6 +1288,12 @@
                }
        }
 
+       if (use_cma) {
+               pp_send_done(ctx);
+               pp_wait_for_done(ctx);
+               pp_close_cma(ctx, servername);
+       } 
+
        print_report(&report, iters, tstamp);
        return 0;
 }
Index: rdma_bw.c
===================================================================
--- rdma_bw.c   (revision 7050)
+++ rdma_bw.c   (working copy)
@@ -53,6 +53,7 @@
 #include <time.h>
 
 #include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
 
 #include "get_clock.h"
 
@@ -64,13 +65,16 @@
        struct ibv_context *context;
        struct ibv_pd      *pd;
        struct ibv_mr      *mr;
-       struct ibv_cq      *cq;
+       struct ibv_cq      *rcq;
+       struct ibv_cq      *scq;
+       struct ibv_comp_channel *ch;
        struct ibv_qp      *qp;
        void               *buf;
        unsigned            size;
        int                 tx_depth;
        struct ibv_sge      list;
        struct ibv_send_wr  wr;
+       struct rdma_cm_id  *cm_id;
 };
 
 struct pingpong_dest {
@@ -308,16 +312,27 @@
                return NULL;
        }
 
-       ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
-       if (!ctx->cq) {
+       ctx->ch = ibv_create_comp_channel(ctx->context);
+       if (!ctx->ch) {
+               fprintf(stderr, "Couldn't create comp channel\n");
+               return NULL;
+       }
+       ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+       if (!ctx->rcq) {
                fprintf(stderr, "Couldn't create CQ\n");
                return NULL;
        }
 
+       ctx->scq = ibv_create_cq(ctx->context, tx_depth, ctx, ctx->ch, 0);
+       if (!ctx->scq) {
+               fprintf(stderr, "Couldn't create CQ\n");
+               return NULL;
+       }
+
        {
                struct ibv_qp_init_attr attr = {
-                       .send_cq = ctx->cq,
-                       .recv_cq = ctx->cq,
+                       .send_cq = ctx->scq,
+                       .recv_cq = ctx->rcq,
                        .cap     = {
                                .max_send_wr  = tx_depth,
                                /* Work around:  driver doesnt support
@@ -407,6 +422,469 @@
        return 0;
 }
 
+/* CMA STUFF */
+
+static void pp_post_recv(struct pingpong_context *ctx)
+{
+       struct ibv_sge list;
+       struct ibv_recv_wr wr, *bad_wr;
+       int rc;
+
+       list.addr = (uintptr_t) ctx->buf;
+       list.length = 1;
+       list.lkey = ctx->mr->lkey;
+       wr.next = NULL;
+       wr.wr_id = 0xdeadbeef;
+       wr.sg_list = &list;
+       wr.num_sge = 1;
+
+       rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
+       if (rc) {
+               perror("ibv_post_recv");
+               fprintf(stderr, "%s ibv_post_recv failed %d\n", __FUNCTION__, 
rc);
+       }
+}
+
+static struct pingpong_context *pp_init_cma_ctx(struct rdma_cm_id *cm_id,
+                                           unsigned size,
+                                           int tx_depth, int port)
+{
+       struct pingpong_context *ctx;
+
+       ctx = malloc(sizeof *ctx);
+       if (!ctx)
+               return NULL;
+
+       ctx->size     = size;
+       ctx->tx_depth = tx_depth;
+
+       ctx->buf = memalign(page_size, size * 2);
+       if (!ctx->buf) {
+               fprintf(stderr, "Couldn't allocate work buf.\n");
+               return NULL;
+       }
+
+       memset(ctx->buf, 0, size * 2);
+
+       ctx->cm_id = cm_id;
+       ctx->context = cm_id->verbs;
+       if (!ctx->context) {
+               fprintf(stderr, "%s Unbound cm_id!!\n", __FUNCTION__);
+               return NULL;
+       }
+
+       ctx->pd = ibv_alloc_pd(ctx->context);
+       if (!ctx->pd) {
+               fprintf(stderr, "Couldn't allocate PD\n");
+               return NULL;
+       }
+
+        /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+         * The Consumer is not allowed to assign Remote Write or Remote Atomic 
to
+         * a Memory Region that has not been assigned Local Write. */
+       ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
+                            IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+       if (!ctx->mr) {
+               fprintf(stderr, "Couldn't allocate MR\n");
+               return NULL;
+       }
+
+       ctx->ch = ibv_create_comp_channel(ctx->context);
+       if (!ctx->ch) {
+               fprintf(stderr, "Couldn't create comp channel\n");
+               return NULL;
+       }
+
+       ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+       if (!ctx->rcq) {
+               fprintf(stderr, "Couldn't create RCQ\n");
+               return NULL;
+       }
+
+       ctx->scq = ibv_create_cq(ctx->context, tx_depth, ctx, ctx->ch, 0);
+       if (!ctx->scq) {
+               fprintf(stderr, "Couldn't create SCQ\n");
+               return NULL;
+       }
+
+       {
+               struct ibv_qp_init_attr attr = {
+                       .send_cq = ctx->scq,
+                       .recv_cq = ctx->rcq,
+                       .cap     = {
+                               .max_send_wr  = tx_depth,
+                               .max_recv_wr  = 1,
+                               .max_send_sge = 1,
+                               .max_recv_sge = 1,
+                               .max_inline_data = 0
+                       },
+                       .qp_type = IBV_QPT_RC
+               };
+               if (rdma_create_qp(ctx->cm_id, ctx->pd, &attr)) {
+                       fprintf(stderr, "Couldn't create QP\n");
+                       return NULL;
+               }
+               ctx->qp = ctx->cm_id->qp;
+       }
+
+       pp_post_recv(ctx);
+
+       return ctx;
+}
+
+static void pp_close_cma(struct pingpong_context *ctx, const char *servername)
+{
+       struct rdma_cm_event *event;
+       int rc;
+
+       if (servername) {
+               rc = rdma_disconnect(ctx->cm_id);
+               if (rc) {
+                       perror("rdma_disconnect");
+                       return;
+               }
+       }
+       
+       rdma_get_cm_event(&event);
+       if (event->event != RDMA_CM_EVENT_DISCONNECTED)
+               printf("unexpected event during disconnect %d\n", event->event);
+       rdma_ack_cm_event(event);
+       rdma_destroy_id(ctx->cm_id);
+}
+
+static struct pingpong_context *pp_server_connect_cma(unsigned short port, int 
size, int tx_depth,
+                                                     struct pingpong_dest 
*my_dest, 
+                                                     struct pingpong_dest 
*rem_dest)
+{
+       struct rdma_cm_id *listen_id;
+       struct rdma_cm_event *event;
+       struct rdma_conn_param conn_param;
+       int ret;
+       struct sockaddr_in sin;
+       struct rdma_cm_id *child_cm_id;
+       struct pingpong_context *ctx;
+
+       printf("%s starting server\n", __FUNCTION__);
+       ret = rdma_create_id(&listen_id, NULL);
+       if (ret) {
+               fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, 
ret);
+               return NULL;
+       }
+
+       sin.sin_addr.s_addr = 0;
+       sin.sin_family = PF_INET;
+       sin.sin_port = htons(port);
+       ret = rdma_bind_addr(listen_id, (struct sockaddr *)&sin);
+       if (ret) {
+               fprintf(stderr,"%s rdma_bind_addr failed %d\n", __FUNCTION__, 
ret);
+               goto err2;
+       }
+
+       ret = rdma_listen(listen_id, 0);
+       if (ret) {
+               fprintf(stderr,"%s rdma_listen failed %d\n", __FUNCTION__, ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
+               fprintf(stderr,"%s bad event waiting for connect request %d\n", 
__FUNCTION__, 
+                      event->event);
+               goto err1;
+       }
+
+       if (!event->private_data || (event->private_data_len < 
sizeof(*rem_dest))) {
+               ret = 1;
+               fprintf(stderr,"%s bad private data len %d\n", __FUNCTION__, 
+                      event->private_data_len);
+               goto err1;
+       }
+       
+       memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+       child_cm_id = (struct rdma_cm_id *)event->id;
+       ctx = pp_init_cma_ctx(child_cm_id, size, tx_depth, port);
+
+       if (!ctx) {
+               fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+               goto err0;
+       }
+
+       my_dest->qpn = 0;
+       my_dest->psn = 0xbb;
+       my_dest->rkey = ctx->mr->rkey;
+       my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+       memset(&conn_param, 0, sizeof conn_param);
+       conn_param.responder_resources = 1;
+       conn_param.initiator_depth = 1;
+       conn_param.private_data = my_dest;
+       conn_param.private_data_len = sizeof(*my_dest);
+       ret = rdma_accept(ctx->cm_id, &conn_param);
+       if (ret) {
+               fprintf(stderr,"%s rdma_accept failed %d\n", __FUNCTION__, ret);
+               goto err0;
+       }
+       rdma_ack_cm_event(event);
+       ret = rdma_get_cm_event(&event);
+       if (ret) {
+               fprintf(stderr,"rdma_get_cm_event error %d\n", ret);
+               rdma_destroy_id(child_cm_id);
+               goto err2;
+       }
+       if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+               fprintf(stderr,"%s bad event waiting for established %d\n", 
__FUNCTION__, 
+                      event->event);
+               goto err0;
+       }
+       rdma_ack_cm_event(event);
+       fprintf(stderr,"%s connected!\n", __FUNCTION__);
+       return ctx;
+err0:
+       rdma_destroy_id(child_cm_id);
+err1:
+       rdma_ack_cm_event(event);
+err2:
+       rdma_destroy_id(listen_id);
+       fprintf(stderr,"%s NOT connected!\n", __FUNCTION__);
+       return NULL;
+}
+
+static unsigned get_dst_addr(const char *dst)
+{
+       struct addrinfo *res;
+       int ret;
+       unsigned addr;
+
+       ret = getaddrinfo(dst, NULL, NULL, &res);
+       if (ret) {
+               fprintf(stderr, "%s getaddrinfo failed - invalid hostname or IP 
address\n",
+                       __FUNCTION__);
+               return 0;
+       }
+
+       if (res->ai_family != PF_INET) {
+               return 0;
+       }
+
+       addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
+       freeaddrinfo(res);
+       return addr;
+}
+
+static struct pingpong_context *pp_client_connect_cma(const char *servername, 
+                                                     unsigned short port, int 
size, int tx_depth, 
+                                                     struct pingpong_dest 
*my_dest, 
+                                                     struct pingpong_dest 
*rem_dest)
+{
+       struct rdma_cm_event *event;
+       struct rdma_conn_param conn_param;
+       int ret;
+       struct sockaddr_in sin;
+       struct rdma_cm_id *cm_id;
+       struct pingpong_context *ctx;
+
+       fprintf(stderr,"%s starting client\n", __FUNCTION__);
+       sin.sin_addr.s_addr = get_dst_addr(servername);
+       if (!sin.sin_addr.s_addr) {
+               return NULL;
+       }
+
+       ret = rdma_create_id(&cm_id, NULL);
+       if (ret) {
+               fprintf(stderr,"%s rdma_create_id failed %d\n", __FUNCTION__, 
ret);
+               return NULL;
+       }
+
+       sin.sin_family = PF_INET;
+       sin.sin_port = htons(port);
+       ret = rdma_resolve_addr(cm_id, NULL, (struct sockaddr *)&sin, 2000);
+       if (ret) {
+               fprintf(stderr,"%s rdma_resolve_addr failed %d\n", 
__FUNCTION__, ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
+               fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, 
__LINE__,
+                      event->event);
+               goto err1;
+       }
+       rdma_ack_cm_event(event);
+
+       ret = rdma_resolve_route(cm_id, 2000);
+       if (ret) {
+               fprintf(stderr,"%s rdma_resolve_route failed %d\n", 
__FUNCTION__, ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
+               fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, 
__LINE__,
+                      event->event);
+               goto err1;
+       }
+       rdma_ack_cm_event(event);
+
+       ctx = pp_init_cma_ctx(cm_id, size, tx_depth, port);
+
+       if (!ctx) {
+               fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+               goto err2;
+       }
+
+       my_dest->qpn = 0;
+       my_dest->psn = 0xaa;
+       my_dest->rkey = ctx->mr->rkey;
+       my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+       memset(&conn_param, 0, sizeof conn_param);
+       conn_param.responder_resources = 1;
+       conn_param.initiator_depth = 1;
+       conn_param.retry_count = 5;
+       conn_param.private_data = my_dest;
+       conn_param.private_data_len = sizeof(*my_dest);
+       ret = rdma_connect(ctx->cm_id, &conn_param);
+       if (ret) {
+               fprintf(stderr,"%s rdma_connect failure %d\n", __FUNCTION__, 
ret);
+               goto err2;
+       }
+
+       ret = rdma_get_cm_event(&event);
+       if (ret) 
+               goto err2;
+
+       if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+               fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, 
__LINE__,
+                      event->event);
+               goto err1;
+       }
+       if (!event->private_data || (event->private_data_len < 
sizeof(*rem_dest))) {
+               fprintf(stderr,"%s bad private data ptr %p len %d\n", 
__FUNCTION__,
+                      event->private_data, event->private_data_len);
+               goto err1;
+       }
+       memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+       rdma_ack_cm_event(event);
+       fprintf(stderr,"connected!\n");
+       return ctx;
+err1:
+       rdma_ack_cm_event(event);
+err2:
+       fprintf(stderr,"NOT connected!\n");
+       rdma_destroy_id(cm_id);
+       return NULL;
+}
+
+static void pp_wait_for_done(struct pingpong_context *ctx)
+{
+       struct ibv_wc wc;
+       int ne;
+
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (!(wc.opcode & IBV_WC_RECV))
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xdeadbeef) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+}
+
+static void pp_send_done(struct pingpong_context *ctx)
+{
+       struct ibv_send_wr *bad_wr;
+       int rc;
+       struct ibv_wc wc;
+       int ne;
+
+       ctx->list.addr = (uintptr_t) ctx->buf;
+       ctx->list.length = 1;
+       ctx->list.lkey = ctx->mr->lkey;
+       ctx->wr.wr_id      = 0xcafebabe;
+       ctx->wr.sg_list    = &ctx->list;
+       ctx->wr.num_sge    = 1;
+       ctx->wr.opcode     = IBV_WR_SEND;
+       ctx->wr.send_flags = IBV_SEND_SIGNALED;
+       ctx->wr.next       = NULL;
+       rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+       if (rc != 0)
+               fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, 
rc);
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->scq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (wc.opcode != IBV_WC_SEND)
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xcafebabe) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+       sleep(1);
+}
+
+static void pp_wait_for_start(struct pingpong_context *ctx)
+{
+       struct ibv_wc wc;
+       int ne;
+
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (!(wc.opcode & IBV_WC_RECV))
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xdeadbeef) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+       pp_post_recv(ctx);
+}
+
+static void pp_send_start(struct pingpong_context *ctx)
+{
+       struct ibv_send_wr *bad_wr;
+       int rc;
+       struct ibv_wc wc;
+       int ne;
+
+       ctx->list.addr = (uintptr_t) ctx->buf;
+       ctx->list.length = 1;
+       ctx->list.lkey = ctx->mr->lkey;
+       ctx->wr.wr_id      = 0xabbaabba;
+       ctx->wr.sg_list    = &ctx->list;
+       ctx->wr.num_sge    = 1;
+       ctx->wr.opcode     = IBV_WR_SEND;
+       ctx->wr.send_flags = IBV_SEND_SIGNALED;
+       ctx->wr.next       = NULL;
+       rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+       if (rc != 0)
+               fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, 
rc);
+       do {
+               usleep(1000);
+               ne = ibv_poll_cq(ctx->scq, 1, &wc);
+       } while (ne == 0);
+
+       if (wc.status) 
+               fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, 
wc.status);
+       if (wc.opcode != IBV_WC_SEND)
+               fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, 
wc.opcode);
+       if (wc.wr_id != 0xabbaabba) 
+               fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, 
(int)wc.wr_id);
+       printf("start sent\n");
+}
+
 static void usage(const char *argv0)
 {
        printf("Usage:\n");
@@ -421,6 +899,7 @@
        printf("  -t, --tx-depth=<dep>   size of tx queue (default 100)\n");
        printf("  -n, --iters=<iters>    number of exchanges (at least 2, 
default 1000)\n");
        printf("  -b, --bidirectional    measure bidirectional bandwidth 
(default unidirectional)\n");
+       printf("  -c, --cma              Use the RDMA CMA to setup the RDMA 
connection\n");
 }
 
 static void print_report(unsigned int iters, unsigned size, int duplex,
@@ -438,14 +917,15 @@
 
        /* Find the peak bandwidth */
        for (i = 0; i < iters; ++i)
-               for (j = i; j < iters; ++j) {
-                       t = (tcompleted[j] - tposted[i]) / (j - i + 1);
-                       if (t < opt_delta) {
-                               opt_delta  = t;
-                               opt_posted = i;
-                               opt_completed = j;
+               if ((i+200) < iters)
+                       for (j = i; j < i+200; ++j) {
+                               t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+                               if (t < opt_delta) {
+                                       opt_delta  = t;
+                                       opt_posted = i;
+                                       opt_completed = j;
+                               }
                        }
-               }
 
        cycles_to_units = get_cpu_mhz() * 1000000;
 
@@ -486,10 +966,16 @@
        int                      sockfd;
        int                      duplex = 0;
        struct ibv_qp           *qp;
+       int                      use_cma=0;
+       int                      poll = 0;
+       cycles_t                *tposted;
+       cycles_t                *tcompleted;
+       struct ibv_wc            wc;
+       int                      ne;
+       struct ibv_cq           *ev_cq;
+       void                    *ev_ctx;
+       int                      blocks = 0;
 
-       cycles_t        *tposted;
-       cycles_t        *tcompleted;
-
        /* Parameter parsing. */
        while (1) {
                int c;
@@ -502,14 +988,24 @@
                        { .name = "iters",          .has_arg = 1, .val = 'n' },
                        { .name = "tx-depth",       .has_arg = 1, .val = 't' },
                        { .name = "bidirectional",  .has_arg = 0, .val = 'b' },
+                       { .name = "cma",            .has_arg = 0, .val = 'c' },
+                       { .name = "poll",           .has_arg = 0, .val = 'P' },
                        { 0 }
                };
 
-               c = getopt_long(argc, argv, "p:d:i:s:n:t:b", long_options, 
NULL);
+               c = getopt_long(argc, argv, "p:d:i:s:n:t:bcP", long_options, 
NULL);
                if (c == -1)
                        break;
 
                switch (c) {
+               case 'c':
+                       use_cma = 1;
+                       break;
+
+               case 'P':
+                       poll = 1;
+                       break;
+
                case 'p':
                        port = strtol(optarg, NULL, 0);
                        if (port < 0 || port > 65535) {
@@ -552,15 +1048,6 @@
 
                        break;
 
-               case 'l':
-                       tx_depth = strtol(optarg, NULL, 0);
-                       if (tx_depth < 1) {
-                               usage(argv[0]);
-                               return 1;
-                       }
-
-                       break;
-
                case 'b':
                        duplex = 1;
                        break;
@@ -585,82 +1072,125 @@
 
        page_size = sysconf(_SC_PAGESIZE);
 
-       dev_list = ibv_get_device_list(NULL);
-
-       if (!ib_devname) {
-               ib_dev = dev_list[0];
-               if (!ib_dev) {
-                       fprintf(stderr, "No IB devices found\n");
+       if (use_cma) {
+               int rc;
+               rem_dest = malloc(sizeof *rem_dest);
+               if (!rem_dest) {
+                       fprintf(stderr,"%s cannot malloc rem_dest!\n", 
__FUNCTION__);
                        return 1;
                }
+
+               memset(rem_dest, 0, sizeof(*rem_dest));
+
+               if (servername)
+                       ctx = pp_client_connect_cma(servername, port, size, 
tx_depth, &my_dest, 
+                                                   rem_dest);
+               else
+                       ctx = pp_server_connect_cma(port, size, tx_depth, 
&my_dest, rem_dest);
+               if (!ctx) {
+                       fprintf(stderr, "pp_connect_cma(%s,%d) failed!\n", 
servername, port);
+                       return rc;
+               }
+
+               /*
+                * Synch up and force the server to wait for the client to send
+                * the first message (MPA requirement).
+                */
+               if (servername) {
+                       sleep(1);
+                       pp_send_start(ctx);
+               } else {
+                       pp_wait_for_start(ctx);
+               }
+
+               printf("  local address:  PSN %#06x RKey %#08x VAddr %#016Lx\n",
+                               my_dest.psn, my_dest.rkey, my_dest.vaddr);
+               printf("  remote address: PSN %#06x RKey %#08x VAddr %#016Lx\n",
+                               rem_dest->psn, rem_dest->rkey, rem_dest->vaddr);
        } else {
-               for (; (ib_dev = *dev_list); ++dev_list)
-                       if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
-                               break;
-               if (!ib_dev) {
-                       fprintf(stderr, "IB device %s not found\n", ib_devname);
-                       return 1;
+               dev_list = ibv_get_device_list(NULL);
+
+               if (!ib_devname) {
+                       ib_dev = dev_list[0];
+                       if (!ib_dev) {
+                               fprintf(stderr, "No IB devices found\n");
+                               return 1;
+                       }
+               } else {
+                       for (; (ib_dev = *dev_list); ++dev_list)
+                               if (!strcmp(ibv_get_device_name(ib_dev), 
ib_devname))
+                                       break;
+                       if (!ib_dev) {
+                               fprintf(stderr, "IB device %s not found\n", 
ib_devname);
+                               return 1;
+                       }
                }
-       }
 
-       ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
-       if (!ctx)
-               return 1;
+               ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
+               if (!ctx)
+                       return 1;
 
-       /* Create connection between client and server.
-        * We do it by exchanging data over a TCP socket connection. */
+               /* Create connection between client and server.
+                * We do it by exchanging data over a TCP socket connection. */
 
-       my_dest.lid = pp_get_local_lid(ctx, ib_port);
-       my_dest.qpn = ctx->qp->qp_num;
-       my_dest.psn = lrand48() & 0xffffff;
-       if (!my_dest.lid) {
-               fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
-               return 1;
-       }
-       my_dest.rkey = ctx->mr->rkey;
-       my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
+               my_dest.lid = pp_get_local_lid(ctx, ib_port);
+               my_dest.qpn = ctx->qp->qp_num;
+               my_dest.psn = lrand48() & 0xffffff;
+               if (!my_dest.lid) {
+                       fprintf(stderr, "Local lid 0x0 detected. Is an SM 
running?\n");
+                       return 1;
+               }
+               my_dest.rkey = ctx->mr->rkey;
+               my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
 
-       printf("  local address:  LID %#04x, QPN %#06x, PSN %#06x "
-                       "RKey %#08x VAddr %#016Lx\n",
-                       my_dest.lid, my_dest.qpn, my_dest.psn,
-                       my_dest.rkey, my_dest.vaddr);
+               printf("  local address:  LID %#04x, QPN %#06x, PSN %#06x "
+                               "RKey %#08x VAddr %#016Lx\n",
+                               my_dest.lid, my_dest.qpn, my_dest.psn,
+                               my_dest.rkey, my_dest.vaddr);
 
-       if (servername) {
-               sockfd = pp_client_connect(servername, port);
-               if (sockfd < 0)
+               if (servername) {
+                       sockfd = pp_client_connect(servername, port);
+                       if (sockfd < 0)
+                               return 1;
+                       rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+               } else {
+                       sockfd = pp_server_connect(port);
+                       if (sockfd < 0)
+                               return 1;
+                       rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+               }
+
+               if (!rem_dest)
                        return 1;
-               rem_dest = pp_client_exch_dest(sockfd, &my_dest);
-       } else {
-               sockfd = pp_server_connect(port);
-               if (sockfd < 0)
-                       return 1;
-               rem_dest = pp_server_exch_dest(sockfd, &my_dest);
-       }
 
-       if (!rem_dest)
-               return 1;
+               printf("  remote address: LID %#04x, QPN %#06x, PSN %#06x, "
+                               "RKey %#08x VAddr %#016Lx\n",
+                               rem_dest->lid, rem_dest->qpn, rem_dest->psn,
+                               rem_dest->rkey, rem_dest->vaddr);
 
-       printf("  remote address: LID %#04x, QPN %#06x, PSN %#06x, "
-                       "RKey %#08x VAddr %#016Lx\n",
-                       rem_dest->lid, rem_dest->qpn, rem_dest->psn,
-                       rem_dest->rkey, rem_dest->vaddr);
+               if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
+                       return 1;
 
-       if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
-               return 1;
-
-       /* An additional handshake is required *after* moving qp to RTR.
-           Arbitrarily reuse exch_dest for this purpose. */
-       if (servername) {
-               rem_dest = pp_client_exch_dest(sockfd, &my_dest);
-       } else {
-               rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+               /* An additional handshake is required *after* moving qp to RTR.
+                  Arbitrarily reuse exch_dest for this purpose. */
+               if (servername) {
+                       rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+               } else {
+                       rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+               }
        }
 
        /* For half duplex tests, server just waits for client to exit */
        if (!servername && !duplex) {
-               rem_dest = pp_server_exch_dest(sockfd, &my_dest);
-               write(sockfd, "done", sizeof "done");
-               close(sockfd);
+               if (use_cma) {
+                       pp_wait_for_done(ctx);
+                       pp_send_done(ctx);
+                       pp_close_cma(ctx, servername);
+               } else {
+                       rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+                       write(sockfd, "done", sizeof "done");
+                       close(sockfd);
+               }
                return 0;
        }
 
@@ -695,6 +1225,11 @@
                return 1;
        }
 
+       if (!poll && ibv_req_notify_cq(ctx->scq, 0)) {
+               fprintf(stderr, "ibv_req_notify failed!\n");
+               return 1;
+       }
+
        /* Done with setup. Start the test. */
 
        while (scnt < iters || ccnt < iters) {
@@ -712,10 +1247,22 @@
                }
 
                if (ccnt < iters) {
-                       struct ibv_wc wc;
-                       int ne;
+
                        do {
-                               ne = ibv_poll_cq(ctx->cq, 1, &wc);
+                               ne = ibv_poll_cq(ctx->scq, 1, &wc);
+                               if (!poll && ne == 0) {
+                                       blocks++;
+                                       if (ibv_get_cq_event(ctx->ch, &ev_cq, 
&ev_ctx)) {
+                                               fprintf(stderr, "Failed to get 
cq event!\n");
+                                               return 1;
+                                       }
+                                       if (ev_cq != ctx->scq) {
+                                               fprintf(stderr, "Unkown CQ!\n");
+                                               return 1;
+                                       }
+                                       ibv_ack_cq_events(ctx->scq, 1);
+                                       ibv_req_notify_cq(ctx->scq, 0);
+                               }
                        } while (ne == 0);
 
                        tcompleted[ccnt] = get_cycles();
@@ -737,15 +1284,20 @@
                }
        }
 
-       if (servername) {
-               rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+       if (use_cma) {
+               pp_send_done(ctx);
+               pp_wait_for_done(ctx);
+               pp_close_cma(ctx, servername);
        } else {
-               rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+               if (servername) {
+                       rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+               } else {
+                       rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+               }
+               write(sockfd, "done", sizeof "done");
+               close(sockfd);
        }
 
-       write(sockfd, "done", sizeof "done");
-       close(sockfd);
-
        print_report(iters, size, duplex, tposted, tcompleted);
 
        free(tposted);
Index: Makefile
===================================================================
--- Makefile    (revision 7050)
+++ Makefile    (working copy)
@@ -10,7 +10,7 @@
 LOADLIBES += 
 LDFLAGS +=
 
-${TESTS}: LOADLIBES += -libverbs
+${TESTS}: LOADLIBES += -libverbs -lrdmacm
 
 ${TESTS} ${UTILS}: %: %.c ${EXTRA_FILES} ${EXTRA_HEADERS}
        $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $< ${EXTRA_FILES} $(LOADLIBES) 
$(LDLIBS) -o $@

_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to