Copying small packets into a new skb is actually a fairly
well-established optimization to avoid the overhead of allocating a
new skb.  For example look for RX_COPY_THRESHOLD in tg3 or copybreak
in e1000.  So this approach makes sense to me.  However, a few
comments about your patch:

 > +#define SKB_LEN_THOLD 150

150 is probably not the right value; the cost of copying half a
cacheline is probably nearly the same as a full cacheline, so this
should probably be a multiple of 64 (or at least 32, since I don't
know of any arch with smaller than 32-byte cachelines).

With that said I don't know what the right value is here.  256 seems
to be a popular choice; I guess it is system-dependent but I don't
think it makes sense to add yet another knob to adjust this.

 > +            if (skb->len < SKB_LEN_THOLD) {
 > +                    nskb = dev_alloc_skb(skb->len);
 > +                    if (!nskb) {
 > +                            ipoib_warn(priv, "failed to allocate skb\n");
 > +                            return;
 > +                    }
 > +                    memcpy(nskb->data, skb->data, skb->len);

should be skb_copy_from_linear_data()

 > +                    skb_put(nskb, skb->len);
 > +                    nskb->protocol = skb->protocol;
 > +                    dev_kfree_skb_any(skb);

and there's no point in freeing the old skb... we should repost it to
the receive queue instead.

 > +                    skb = nskb;
 > +            }

And I think we would want something similar for ipoib_cm.c too.

Your patch also made me look again at how we handle packets the HCA
replicates back to us... there's no reason to free the skb and
allocate a new one; we could just repost the same skb again.  So the
patch below seems like it might help multicast senders.  What do
people think about putting this into 2.6.23?

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c 
b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 8404f05..1094488 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -197,6 +197,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, 
struct ib_wc *wc)
        }
 
        /*
+        * Drop packets that this interface sent, ie multicast packets
+        * that the HCA has replicated.
+        */
+       if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+               goto repost;
+
+       /*
         * If we can't allocate a new RX buffer, dump
         * this packet and reuse the old buffer.
         */
@@ -213,24 +220,18 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, 
struct ib_wc *wc)
        skb_put(skb, wc->byte_len);
        skb_pull(skb, IB_GRH_BYTES);
 
-       if (wc->slid != priv->local_lid ||
-           wc->src_qp != priv->qp->qp_num) {
-               skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-               skb_reset_mac_header(skb);
-               skb_pull(skb, IPOIB_ENCAP_LEN);
+       skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+       skb_reset_mac_header(skb);
+       skb_pull(skb, IPOIB_ENCAP_LEN);
 
-               dev->last_rx = jiffies;
-               ++priv->stats.rx_packets;
-               priv->stats.rx_bytes += skb->len;
+       dev->last_rx = jiffies;
+       ++priv->stats.rx_packets;
+       priv->stats.rx_bytes += skb->len;
 
-               skb->dev = dev;
-               /* XXX get correct PACKET_ type here */
-               skb->pkt_type = PACKET_HOST;
-               netif_receive_skb(skb);
-       } else {
-               ipoib_dbg_data(priv, "dropping loopback packet\n");
-               dev_kfree_skb_any(skb);
-       }
+       skb->dev = dev;
+       /* XXX get correct PACKET_ type here */
+       skb->pkt_type = PACKET_HOST;
+       netif_receive_skb(skb);
 
 repost:
        if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to