OK, but I'm not sure why this complexity:

 >  #if SIZEOF_LONG == 8
 > +typedef uint64_t mlx4_wc_copy_t;
 >  #else
 > +typedef uint32_t mlx4_wc_copy_t;

Isn't that just a crazy way of coming up with a new name for unsigned long?

How about something like this (it seems to generate pretty good code
on x86-64 at least):

diff --git a/src/qp.c b/src/qp.c
index 8b4adaa..5721860 100644
--- a/src/qp.c
+++ b/src/qp.c
@@ -168,6 +168,20 @@ static void set_data_seg(struct mlx4_wqe_data_seg *dseg, 
struct ibv_sge *sg)
        dseg->byte_count = htonl(sg->length);
 }
 
+/*
+ * Avoid using memcpy() to copy to BlueFlame page, since memcpy()
+ * implementations may use move-string-buffer assembler instructions,
+ * which do not guarantee order of copying.
+ */
+static void mlx4_bf_copy(unsigned long *dst, unsigned long *src, unsigned 
bytecnt)
+{
+       while (bytecnt > 0) {
+               *dst++ = *src++;
+               *dst++ = *src++;
+               bytecnt -= 2 * sizeof (long);
+       }
+}
+
 int mlx4_post_send(struct ibv_qp *ibqp, struct ibv_send_wr *wr,
                          struct ibv_send_wr **bad_wr)
 {
@@ -388,7 +402,8 @@ out:
 
                pthread_spin_lock(&ctx->bf_lock);
 
-               memcpy(ctx->bf_page + ctx->bf_offset, ctrl, align(size * 16, 
64));
+               mlx4_bf_copy(ctx->bf_page + ctx->bf_offset, (unsigned long *) 
ctrl,
+                            align(size * 16, 64));
                wc_wmb();
 
                ctx->bf_offset ^= ctx->bf_buf_size;
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to