From: Jack Morgenstein <[EMAIL PROTECTED]>:
Subject: IB/mlx4: fix data corruption triggered by wrong headroom marking order

This is an addendum to Roland's commit 0e6e74162164d908edf7889ac66dca09e7505745
(June 18). This addendum adds prefetch headroom marking processing for s/g 
segments.

We write s/g segments in reverse order into the WQE, in order to guarantee
that the first dword of all cachelines containing s/g segments is written last
(overwriting the headroom invalidation pattern). The entire cacheline will thus
contain valid data when the invalidation pattern is overwritten.

Signed-off-by: Jack Morgenstein <[EMAIL PROTECTED]>

---

The previous patch version turned out to contain a
space followed by a tab. Here's a fixed one.

Index: ofed_kernel/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/hw/mlx4/qp.c    2007-07-30 
16:35:01.000000000 +0300
+++ ofed_kernel/drivers/infiniband/hw/mlx4/qp.c 2007-07-30 17:05:47.000000000 
+0300
@@ -1215,9 +1215,18 @@ static void set_datagram_seg(struct mlx4
 static void set_data_seg(struct mlx4_wqe_data_seg *dseg,
                         struct ib_sge *sg)
 {
-       dseg->byte_count = cpu_to_be32(sg->length);
        dseg->lkey       = cpu_to_be32(sg->lkey);
        dseg->addr       = cpu_to_be64(sg->addr);
+
+       /* Need a barrier before writing the byte_count field
+        * to make sure that all the data is visible before the
+        * byte_count field is set.  Otherwise, if the segment
+        * begins a new cacheline, the HCA prefetcher could
+        * grab the 64-byte chunk and get a valid (!= * 0xffffffff)
+        * byte count but stale data, and end up sending the wrong
+        * data.  */
+       wmb();
+       dseg->byte_count = cpu_to_be32(sg->length);
 }
 
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1226,6 +1235,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
        struct mlx4_ib_qp *qp = to_mqp(ibqp);
        void *wqe;
        struct mlx4_wqe_ctrl_seg *ctrl;
+       struct mlx4_wqe_data_seg *seg;
        unsigned long flags;
        int nreq;
        int err = 0;
@@ -1325,19 +1335,22 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
                        break;
                }
 
-               for (i = 0; i < wr->num_sge; ++i) {
-                       set_data_seg(wqe, wr->sg_list + i);
-
-                       wqe  += sizeof (struct mlx4_wqe_data_seg);
+               seg = (struct mlx4_wqe_data_seg *) wqe;
+               /* Add one more inline data segment for ICRC for MLX sends.
+                * Write this inline and all s/g segments in reverse order,
+                * so as to overwrite cacheline stamp last within each
+                * cacheline.  */
+               if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == 
IB_QPT_GSI) {
+                       void *t = wqe + (wr->num_sge) * sizeof(struct 
mlx4_wqe_data_seg);
+                       ((u32 *) t)[1] = 0;
+                       wmb();
+                       ((struct mlx4_wqe_inline_seg *) t)->byte_count =
+                               cpu_to_be32((1 << 31) | 4);
                        size += sizeof (struct mlx4_wqe_data_seg) / 16;
                }
 
-               /* Add one more inline data segment for ICRC for MLX sends */
-               if (qp->ibqp.qp_type == IB_QPT_SMI || qp->ibqp.qp_type == 
IB_QPT_GSI) {
-                       ((struct mlx4_wqe_inline_seg *) wqe)->byte_count =
-                               cpu_to_be32((1 << 31) | 4);
-                       ((u32 *) wqe)[1] = 0;
-                       wqe  += sizeof (struct mlx4_wqe_data_seg);
+               for (i = wr->num_sge - 1; i >= 0; --i) {
+                       set_data_seg(seg + i, wr->sg_list + i);
                        size += sizeof (struct mlx4_wqe_data_seg) / 16;
                }
 

-- 
MST
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to