Adds support for posting SEND_INLINE work requests in libmthca.
With this patch, I get latency as low as 3.35 usec unidirectional
with Arbel Tavor mode. Passed basic testing on Tavor and Arbel mode.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

Index: src/qp.c
===================================================================
--- src/qp.c    (revision 2104)
+++ src/qp.c    (working copy)
@@ -57,6 +57,10 @@ enum {
        MTHCA_NEXT_SOLICIT   = 1 << 1,
 };
 
+enum {
+       MTHCA_INLINE_SEG = 1<<31
+};
+
 struct mthca_next_seg {
        uint32_t        nda_op; /* [31:6] next WQE [4:0] next opcode */
        uint32_t        ee_nds; /* [31:8] next EE  [7] DBD [6] F [5:0] next WQE 
size */
@@ -107,6 +111,10 @@ struct mthca_data_seg {
        uint64_t        addr;
 };
 
+struct mthca_inline_seg {
+       uint32_t        byte_count;
+};
+
 static const uint8_t mthca_opcode[] = {
        [IBV_WR_SEND]                 = MTHCA_OPCODE_SEND,
        [IBV_WR_SEND_WITH_IMM]        = MTHCA_OPCODE_SEND_IMM,
@@ -255,15 +263,38 @@ int mthca_tavor_post_send(struct ibv_qp 
                        goto out;
                }
 
-               for (i = 0; i < wr->num_sge; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               htonl(wr->sg_list[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               htonl(wr->sg_list[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               htonll(wr->sg_list[i].addr);
-                       wqe += sizeof (struct mthca_data_seg);
-                       size += sizeof (struct mthca_data_seg) / 16;
+               if (wr->send_flags & IBV_SEND_INLINE) {
+                       struct mthca_inline_seg *seg = wqe;
+                       int s = 0;
+                       wqe += sizeof *seg;
+                       for (i = 0; i < wr->num_sge; ++i) {
+                               struct ibv_sge *sge = &wr->sg_list[i];
+                               int l;
+                               l = sge->length;
+                               s += l;
+
+                               if (s + sizeof *seg > (1 << qp->sq.wqe_shift)) {
+                                       ret = -1;
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               memcpy(wqe, (void*)(intptr_t)sge->addr, l);
+                               wqe += l;
+                       }
+                       seg->byte_count = htonl(MTHCA_INLINE_SEG | s);
+
+                       size += align(s + sizeof *seg, 16) / 16;
+               } else {
+                       struct mthca_data_seg *seg;
+                       for (i = 0; i < wr->num_sge; ++i) {
+                               seg = wqe;
+                               seg->byte_count = htonl(wr->sg_list[i].length);
+                               seg->lkey = htonl(wr->sg_list[i].lkey);
+                               seg->addr = htonll(wr->sg_list[i].addr);
+                               wqe += sizeof *seg;
+                       }
+                       size += wr->num_sge * sizeof *seg / 16;
                }
 
                qp->wrid[ind + qp->rq.max] = wr->wr_id;
@@ -512,15 +543,37 @@ int mthca_arbel_post_send(struct ibv_qp 
                        goto out;
                }
 
-               for (i = 0; i < wr->num_sge; ++i) {
-                       ((struct mthca_data_seg *) wqe)->byte_count =
-                               htonl(wr->sg_list[i].length);
-                       ((struct mthca_data_seg *) wqe)->lkey =
-                               htonl(wr->sg_list[i].lkey);
-                       ((struct mthca_data_seg *) wqe)->addr =
-                               htonll(wr->sg_list[i].addr);
-                       wqe += sizeof (struct mthca_data_seg);
-                       size += sizeof (struct mthca_data_seg) / 16;
+               if (wr->send_flags & IBV_SEND_INLINE) {
+                       struct mthca_inline_seg *seg = wqe;
+                       int s = 0;
+                       wqe += sizeof *seg;
+                       for (i = 0; i < wr->num_sge; ++i) {
+                               int l = wr->sg_list[i].length;
+                               s += l;
+
+                               if (s + sizeof *seg > (1 << qp->sq.wqe_shift)) {
+                                       ret = -1;
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+
+                               memcpy(wqe,
+                                      (void*)(intptr_t)wr->sg_list[i].addr, l);
+                               wqe += l;
+                       }
+                       seg->byte_count = htonl(MTHCA_INLINE_SEG | s);
+
+                       size += align(s + sizeof *seg, 16) / 16;
+               } else {
+                       struct mthca_data_seg *seg;
+                       for (i = 0; i < wr->num_sge; ++i) {
+                               seg = wqe;
+                               seg->byte_count = htonl(wr->sg_list[i].length);
+                               seg->lkey = htonl(wr->sg_list[i].lkey);
+                               seg->addr = htonll(wr->sg_list[i].addr);
+                               wqe += sizeof *seg;
+                       }
+                       size += wr->num_sge * sizeof *seg / 16;
                }
 
                qp->wrid[ind + qp->rq.max] = wr->wr_id;
-- 
MST - Michael S. Tsirkin
_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to