In UDP tests we have been running here, I noticed that when using high
rate of UDP packets over ipoib, there are sometimes cases of packet
drop. Investigating farther I found that the packets are dropped since
the socket buffer is exhausted and we fail in the following code:

net/core/sock.c

int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
        int err = 0;
        int skb_len;

        /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
           number of warnings when compiling with -W --ANK
         */
        if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
            (unsigned)sk->sk_rcvbuf) {
                err = -ENOMEM;
                goto out;
        }


In the condition above skb->truesize is about the same as the size
allocated for the skb; for small packets, this will charge the socket
far more than it actually consumed.

I used the following patch to make things better in this regard which
passes up to the stack smaller skbs. I am not saying this is the best
way to handle this but I would like to hear opinions as for how we
should address this problem.

Index: connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- connectx_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c        
2007-07-05 16:54:56.000000000 +0300
+++ connectx_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c     2007-07-05 
17:10:32.000000000 +0300
@@ -50,6 +50,8 @@
                 "Enable data path debug tracing if > 0");
 #endif
 
+#define SKB_LEN_THOLD 150
+
 static DEFINE_MUTEX(pkey_mutex);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
@@ -169,7 +171,7 @@
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;
-       struct sk_buff *skb;
+       struct sk_buff *skb, *nskb;
        u64 addr;
 
        ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",
@@ -223,6 +225,19 @@
                ++priv->stats.rx_packets;
                priv->stats.rx_bytes += skb->len;
 
+               if (skb->len < SKB_LEN_THOLD) {
+                       nskb = dev_alloc_skb(skb->len);
+                       if (!nskb) {
+                               ipoib_warn(priv, "failed to allocate skb\n");
+                               return;
+                       }
+                       memcpy(nskb->data, skb->data, skb->len);
+                       skb_put(nskb, skb->len);
+                       nskb->protocol = skb->protocol;
+                       dev_kfree_skb_any(skb);
+                       skb = nskb;
+               }
+
                skb->dev = dev;
                /* XXX get correct PACKET_ type here */
                skb->pkt_type = PACKET_HOST;
@@ -350,7 +365,6 @@
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int n, i;
 
-       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
        do {
                n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
                for (i = 0; i < n; ++i) {
@@ -363,6 +377,7 @@
                                ipoib_ib_handle_tx_wc(dev, wc);
                }
        } while (n == IPOIB_NUM_WC);
+       ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 }
 #endif
 

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to