Add LSO support to ipoib

Using LSO improves performance by allowing the software
to not fragment the payload to mtu sized patckets and also
results in lower rate of interrupts since each such work
request has just one CQE. 

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>

---

Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c     
2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c  2007-08-15 
20:50:38.000000000 +0300
@@ -704,7 +704,13 @@ static int ipoib_start_xmit(struct sk_bu
                                goto out;
                        }
 
-                       ipoib_send(dev, skb, neigh->ah, 
IPOIB_QPN(skb->dst->neighbour->ha));
+                       if (skb_is_gso(skb))
+                               ipoib_send_gso(dev, skb, neigh->ah,
+                                          IPOIB_QPN(skb->dst->neighbour->ha));
+                       else
+                               ipoib_send(dev, skb, neigh->ah,
+                                          IPOIB_QPN(skb->dst->neighbour->ha));
+
                        goto out;
                }
 
@@ -1152,6 +1158,10 @@ static struct net_device *ipoib_add_port
                goto event_failed;
        }
 
+       if (priv->dev->features & NETIF_F_SG)
+               if (priv->ca->flags & IB_DEVICE_TCP_GSO)
+                       priv->dev->features |= NETIF_F_TSO;
+
        result = register_netdev(priv->dev);
        if (result) {
                printk(KERN_WARNING "%s: couldn't register ipoib port %d; error 
%d\n",
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib.h  2007-08-15 
20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h       2007-08-15 
20:50:38.000000000 +0300
@@ -373,6 +373,10 @@ int ipoib_add_pkey_attr(struct net_devic
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn);
+
+void ipoib_send_gso(struct net_device *dev, struct sk_buff *skb,
+               struct ipoib_ah *address, u32 qpn);
+
 void ipoib_reap_ah(struct work_struct *work);
 
 void ipoib_flush_paths(struct net_device *dev);
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c       
2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c    2007-08-15 
20:50:38.000000000 +0300
@@ -38,6 +38,7 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/ip.h>
+#include <linux/tcp.h>
 
 #include <rdma/ib_cache.h>
 
@@ -249,15 +250,24 @@ repost:
 }
 
 static int dma_unmap_list(struct ib_device *ca, struct ipoib_mapping_st *map,
-                          u16 n)
+                          u16 n, int gso)
 {
        int i;
        int len;
+       int first;
 
-       ib_dma_unmap_single(ca, map[0].addr, map[0].size, DMA_TO_DEVICE);
-       len = map[0].size;
+       if (!gso) {
+               ib_dma_unmap_single(ca, map[0].addr, map[0].size,
+                                   DMA_TO_DEVICE);
+               len = map[0].size;
+               first = 1;
+       } else {
+               len = 0;
+               first = 0;
+       }
+
+       for (i = first; i < n; ++i) {
 
-       for (i = 1; i < n; ++i) {
                ib_dma_unmap_page(ca, map[i].addr, map[i].size,
                                  DMA_TO_DEVICE);
                len += map[i].size;
@@ -276,6 +286,7 @@ static void ipoib_ib_handle_tx_wc(struct
        ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
                       wr_id, wc->status);
 
+
        if (unlikely(wr_id >= ipoib_sendq_size)) {
                ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
                           wr_id, ipoib_sendq_size);
@@ -283,8 +294,16 @@ static void ipoib_ib_handle_tx_wc(struct
        }
 
        tx_req = &priv->tx_ring[wr_id];
-       priv->stats.tx_bytes += dma_unmap_list(priv->ca, tx_req->mapping,
-                                       skb_shinfo(tx_req->skb)->nr_frags + 1);
+       if (skb_is_gso(tx_req->skb))
+               priv->stats.tx_bytes +=
+                       dma_unmap_list(priv->ca, tx_req->mapping,
+                                      skb_shinfo(tx_req->skb)->nr_frags, 1);
+       else
+               priv->stats.tx_bytes +=
+                       dma_unmap_list(priv->ca, tx_req->mapping,
+                                      skb_shinfo(tx_req->skb)->nr_frags + 1,
+                                      0);
+
        ++priv->stats.tx_packets;
 
        dev_kfree_skb_any(tx_req->skb);
@@ -367,7 +386,8 @@ void ipoib_ib_completion(struct ib_cq *c
 static inline int post_send(struct ipoib_dev_priv *priv,
                            unsigned int wr_id,
                            struct ib_ah *address, u32 qpn,
-                           struct ipoib_mapping_st *mapping, int ngather)
+                           struct ipoib_mapping_st *mapping, int ngather,
+                           void *lso_header, int h_len)
 {
        struct ib_send_wr *bad_wr;
        int i;
@@ -382,9 +402,88 @@ static inline int post_send(struct ipoib
        priv->tx_wr.wr.ud.remote_qpn  = qpn;
        priv->tx_wr.wr.ud.ah          = address;
 
+       if (lso_header) {
+               priv->tx_wr.wr.ud.mss = priv->dev->mtu;
+               priv->tx_wr.wr.ud.header = lso_header;
+               priv->tx_wr.wr.ud.hlen = h_len;
+               priv->tx_wr.opcode      = IB_WR_LSO;
+       } else
+               priv->tx_wr.opcode      = IB_WR_SEND;
+
        return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
+
+void ipoib_send_gso(struct net_device *dev, struct sk_buff *skb,
+                   struct ipoib_ah *address, u32 qpn)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_tx_buf *tx_req;
+       struct skb_frag_struct *frag;
+       u64 addr;
+       unsigned short i;
+
+       ipoib_dbg_data(priv, "sending gso packet, length=%d address=%p"
+                      " qpn=0x%06x\n", skb->len, address, qpn);
+
+       if (unlikely((skb_headlen(skb) - IPOIB_ENCAP_LEN) !=
+           ((ip_hdr(skb)->ihl + tcp_hdr(skb)->doff) << 2))) {
+               ipoib_warn(priv, "headlen (%d) does not match ip (%d)and "
+                          "tcp headers(%d), dropping skb\n",
+                          skb_headlen(skb) - IPOIB_ENCAP_LEN,
+                          ip_hdr(skb)->ihl << 2, tcp_hdr(skb)->doff << 2);
+               ++priv->stats.tx_errors;
+               dev_kfree_skb_any(skb);
+               return;
+       }
+
+       /*
+        * We put the skb into the tx_ring _before_ we call post_send()
+        * because it's entirely possible that the completion handler will
+        * run before we execute anything after the post_send().  That
+        * means we have to make sure everything is properly recorded and
+        * our state is consistent before we call post_send().
+        */
+       tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
+       tx_req->skb = skb;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               frag = &skb_shinfo(skb)->frags[i];
+               addr = ib_dma_map_page(priv->ca, frag->page, frag->page_offset,
+                                      frag->size, DMA_TO_DEVICE);
+               if (unlikely(ib_dma_mapping_error(priv->ca, addr)))
+                       goto map_err;
+
+               tx_req->mapping[i].addr = addr;
+               tx_req->mapping[i].size = frag->size;
+       }
+
+       if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
+                              address->ah, qpn, tx_req->mapping,
+                              skb_shinfo(skb)->nr_frags, skb->data,
+                              skb_headlen(skb)))) {
+               ipoib_warn(priv, "post_send failed\n");
+               goto map_err;
+       } else {
+               dev->trans_start = jiffies;
+
+               address->last_send = priv->tx_head;
+               ++priv->tx_head;
+
+               if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
+                       ipoib_dbg(priv, "TX ring full, stopping kernel"
+                                 " net queue\n");
+                       netif_stop_queue(dev);
+                       set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+               }
+       }
+       return;
+
+map_err:
+       dma_unmap_list(priv->ca, tx_req->mapping, i, 1);
+       dev_kfree_skb_any(skb);
+}
+
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn)
 {
@@ -449,7 +548,7 @@ void ipoib_send(struct net_device *dev, 
 
        if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
                               address->ah, qpn, tx_req->mapping,
-                              skb_shinfo(skb)->nr_frags + 1))) {
+                              skb_shinfo(skb)->nr_frags + 1, NULL, 0))) {
                ipoib_warn(priv, "post_send failed\n");
                goto map_err;
        } else {
@@ -467,7 +566,7 @@ void ipoib_send(struct net_device *dev, 
        return;
 
 map_err:
-       dma_unmap_list(priv->ca, tx_req->mapping, i + 1);
+       dma_unmap_list(priv->ca, tx_req->mapping, i + 1, 0);
        dev_kfree_skb_any(skb);
 }
 
@@ -664,7 +763,8 @@ int ipoib_ib_dev_stop(struct net_device 
                                                        (ipoib_sendq_size - 1)];
                                skb = tx_req->skb;
                                dma_unmap_list(priv->ca, tx_req->mapping,
-                                              skb_shinfo(skb)->nr_frags + 1);
+                                              skb_shinfo(skb)->nr_frags + 1,
+                                              skb_is_gso(skb));
                                dev_kfree_skb_any(skb);
                                ++priv->tx_tail;
                        }
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c    
2007-08-15 20:50:33.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-08-15 
20:50:38.000000000 +0300
@@ -206,7 +206,6 @@ int ipoib_transport_dev_init(struct net_
        for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
                priv->tx_sge[i].lkey    = priv->mr->lkey;
 
-       priv->tx_wr.opcode      = IB_WR_SEND;
        priv->tx_wr.sg_list     = priv->tx_sge;
        priv->tx_wr.send_flags  = IB_SEND_SIGNALED;
 

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to