The Mellanox regression tests posted a number of failures when
multiple threads were accessing the same sockets concurrently.  In
addition to test failures, there were log messages of the form:
  sdp_sock(54386:19002): Could not reap -5 in-flight sends

This fix handles all these failures and errors.

The V2 is a fix to handle 2.6.22+ kernels where sk_buffs have
changed.

Signed-off-by: Jim Mott <[EMAIL PROTECTED]>
---

Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp.h
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp.h
2007-12-04 12:08:38.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp.h 2007-12-04
12:08:59.000000000 -0600
@@ -173,7 +173,6 @@
 
        /* BZCOPY data */
        int   zcopy_thresh;
-       void *zcopy_context;
 
        struct ib_sge ibsge[SDP_MAX_SEND_SKB_FRAGS + 1];
        struct ib_wc  ibwc[SDP_NUM_WC];
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_bcopy.c
2007-12-04 12:08:38.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_bcopy.c
2007-12-04 12:55:45.000000000 -0600
@@ -218,6 +218,7 @@
        struct ib_device *dev;
        struct sdp_buf *tx_req;
        struct sk_buff *skb;
+       struct bzcopy_state *bz;
        int i, frags;
 
        if (unlikely(mseq != ssk->tx_tail)) {
@@ -242,16 +243,9 @@
        ++ssk->tx_tail;
 
        /* TODO: AIO and real zcopy cdoe; add their context support here
*/
-       if (ssk->zcopy_context && skb->data_len) {
-               struct bzcopy_state *bz;
-               struct sdp_bsdh *h;
-
-               h = (struct sdp_bsdh *)skb->data;
-               if (h->mid == SDP_MID_DATA) {
-                       bz = (struct bzcopy_state *)ssk->zcopy_context;
-                       bz->busy--;
-               }
-       }
+       bz = *(struct bzcopy_state **)skb->cb;
+       if (bz)
+               bz->busy--;
 
        return skb;
 }
@@ -751,12 +745,8 @@
                sdp_post_recvs(ssk);
                sdp_post_sends(ssk, 0);
 
-               if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
-                       if (ssk->zcopy_context)
-                               sdp_bzcopy_write_space(ssk);
-                       else
-                               sk_stream_write_space(&ssk->isk.sk);
-               }
+               if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+                       sk_stream_write_space(&ssk->isk.sk);
        }
 
        return ret;
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/sdp/sdp_main.c
2007-12-04 12:08:38.000000000 -0600
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/sdp/sdp_main.c
2007-12-04 12:54:34.000000000 -0600
@@ -1203,10 +1203,24 @@
 
 static inline struct bzcopy_state *sdp_bz_cleanup(struct bzcopy_state
*bz)
 {
-       int i;
+       int i, max_retry;
        struct sdp_sock *ssk = (struct sdp_sock *)bz->ssk;
 
-       ssk->zcopy_context = NULL;
+       /* Wait for in-flight sends; should be quick */
+       if (bz->busy) {
+               struct sock *sk = &ssk->isk.sk;
+
+               for (max_retry = 0; max_retry < 10000; max_retry++) {
+                       poll_send_cq(sk);
+
+                       if (!bz->busy)
+                               break;
+               }
+
+               if (bz->busy)
+                       sdp_warn(sk, "Could not reap %d in-flight
sends\n",
+                                bz->busy);
+       }
 
        if (bz->pages) {
                for (i = bz->cur_page; i < bz->page_cnt; i++)
@@ -1280,14 +1294,14 @@
        }
 
        up_write(&current->mm->mmap_sem);
-       ssk->zcopy_context = bz;
 
        return bz;
 
 out_2:
        up_write(&current->mm->mmap_sem);
+       kfree(bz->pages);
 out_1:
-       sdp_bz_cleanup(bz);
+       kfree(bz);
 
        return NULL;
 }
@@ -1461,19 +1475,17 @@
 };
 
 /* like sk_stream_memory_free - except measures remote credits */
-static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk)
+static inline int sdp_bzcopy_slots_avail(struct sdp_sock *ssk,
+                                        struct bzcopy_state *bz)
 {
-       struct bzcopy_state *bz = (struct bzcopy_state
*)ssk->zcopy_context;
-
-       BUG_ON(!bz);
        return slots_free(ssk) > bz->busy;
 }
 
 /* like sk_stream_wait_memory - except waits on remote credits */
-static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p)
+static int sdp_bzcopy_wait_memory(struct sdp_sock *ssk, long *timeo_p,
+                                 struct bzcopy_state *bz)
 {
        struct sock *sk = &ssk->isk.sk;
-       struct bzcopy_state *bz = (struct bzcopy_state
*)ssk->zcopy_context;
        int err = 0;
        long vm_wait = 0;
        long current_timeo = *timeo_p;
@@ -1481,7 +1493,7 @@
 
        BUG_ON(!bz);
 
-       if (sdp_bzcopy_slots_avail(ssk))
+       if (sdp_bzcopy_slots_avail(ssk, bz))
                current_timeo = vm_wait = (net_random() % (HZ / 5)) + 2;
 
        while (1) {
@@ -1506,13 +1518,13 @@
 
                clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-               if (sdp_bzcopy_slots_avail(ssk))
+               if (sdp_bzcopy_slots_avail(ssk, bz))
                        break;
 
                set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
                sk->sk_write_pending++;
                sk_wait_event(sk, &current_timeo,
-                       sdp_bzcopy_slots_avail(ssk) && vm_wait);
+                       sdp_bzcopy_slots_avail(ssk, bz) && vm_wait);
                sk->sk_write_pending--;
 
                if (vm_wait) {
@@ -1603,7 +1615,8 @@
                        skb = sk->sk_write_queue.prev;
 
                        if (!sk->sk_send_head ||
-                           (copy = size_goal - skb->len) <= 0) {
+                           (copy = size_goal - skb->len) <= 0 ||
+                           bz != *(struct bzcopy_state **)skb->cb) {
 
 new_segment:
                                /*
@@ -1614,7 +1627,7 @@
                                 * receive credits.
                                 */
                                if (bz) {
-                                       if
(!sdp_bzcopy_slots_avail(ssk))
+                                       if (!sdp_bzcopy_slots_avail(ssk,
bz))
                                                goto wait_for_sndbuf;
                                } else {
                                        if (!sk_stream_memory_free(sk))
@@ -1626,6 +1639,8 @@
                                if (!skb)
                                        goto wait_for_memory;
 
+                               *((struct bzcopy_state **)skb->cb) = bz;
+
                                /*
                                 * Check whether we can use HW checksum.
                                 */
@@ -1691,7 +1706,7 @@
                        if (copied)
                                sdp_push(sk, ssk, flags & ~MSG_MORE,
mss_now, TCP_NAGLE_PUSH);
 
-                       err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo)
:
+                       err = (bz) ? sdp_bzcopy_wait_memory(ssk, &timeo,
bz) :
                                     sk_stream_wait_memory(sk, &timeo);
                        if (err)
                                goto do_error;
@@ -1704,24 +1719,10 @@
 out:
        if (copied) {
                sdp_push(sk, ssk, flags, mss_now, ssk->nonagle);
-               if (bz) {
-                       int max_retry;
-
-                       /* Wait for in-flight sends; should be quick */
-                       for (max_retry = 0; max_retry < 10000;
max_retry++) {
-                               if (!bz->busy)
-                                       break;
-
-                               poll_send_cq(sk);
-                       }
-
-                       if (bz->busy)
-                               sdp_warn(sk,
-                                        "Could not reap %d in-flight
sends\n",
-                                        bz->busy);
 
+               if (bz)
                        bz = sdp_bz_cleanup(bz);
-               } else
+               else
                        if (size > send_poll_thresh)
                                poll_send_cq(sk);
        }
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to