Add LSO supprt to ipoib

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>

---

Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c     
2007-08-09 08:56:09.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c  2007-08-09 
09:33:19.000000000 +0300
@@ -705,7 +705,13 @@ static int ipoib_start_xmit(struct sk_bu
                                goto out;
                        }
 
-                       ipoib_send(dev, skb, neigh->ah, 
IPOIB_QPN(skb->dst->neighbour->ha));
+                       if (skb_is_gso(skb))
+                               ipoib_send_gso(dev, skb, neigh->ah,
+                                              
IPOIB_QPN(skb->dst->neighbour->ha));
+                       else
+                                ipoib_send(dev, skb, neigh->ah,
+                                          IPOIB_QPN(skb->dst->neighbour->ha));
+
                        goto out;
                }
 
@@ -1186,9 +1192,13 @@ static struct net_device *ipoib_add_port
                goto event_failed;
        }
 
-       if (!set_tx_csum(priv->dev))
+       if (!set_tx_csum(priv->dev)) {
                priv->dev->features |= NETIF_F_SG;
 
+               if (priv->ca->flags & IB_DEVICE_TCP_GSO)
+                       priv->dev->features |= NETIF_F_TSO;
+       }
+
        set_rx_csum(priv->dev);
 
        result = register_netdev(priv->dev);
Index: linux-2.6.23-rc1/drivers/net/mlx4/fw.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/net/mlx4/fw.c 2007-08-09 08:56:08.000000000 
+0300
+++ linux-2.6.23-rc1/drivers/net/mlx4/fw.c      2007-08-09 08:56:11.000000000 
+0300
@@ -133,6 +133,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
 #define QUERY_DEV_CAP_MAX_AV_OFFSET            0x27
 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET                0x29
 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET                0x2b
+#define QUERY_DEV_CAP_MAX_GSO_OFFSET           0x2d
 #define QUERY_DEV_CAP_MAX_RDMA_OFFSET          0x2f
 #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET           0x33
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET         0x35
@@ -215,6 +216,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
        dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
        dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET);
+       field &= 0x1f;
+       if (!field)
+               dev_cap->max_gso_sz = 0;
+       else
+               dev_cap->max_gso_sz = 1 << field;
+
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
        dev_cap->max_rdma_global = 1 << (field & 0x3f);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
@@ -377,6 +385,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
                 dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
        mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
                 dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
+       mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
 
        dump_dev_cap_flags(dev, dev_cap->flags);
 
Index: linux-2.6.23-rc1/drivers/net/mlx4/fw.h
===================================================================
--- linux-2.6.23-rc1.orig/drivers/net/mlx4/fw.h 2007-08-09 08:41:54.000000000 
+0300
+++ linux-2.6.23-rc1/drivers/net/mlx4/fw.h      2007-08-09 08:56:11.000000000 
+0300
@@ -96,6 +96,7 @@ struct mlx4_dev_cap {
        u8  bmme_flags;
        u32 reserved_lkey;
        u64 max_icm_sz;
+       int max_gso_sz;
 };
 
 struct mlx4_adapter {
Index: linux-2.6.23-rc1/drivers/net/mlx4/main.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/net/mlx4/main.c       2007-08-09 
08:41:54.000000000 +0300
+++ linux-2.6.23-rc1/drivers/net/mlx4/main.c    2007-08-09 08:56:11.000000000 
+0300
@@ -158,6 +158,7 @@ static int __devinit mlx4_dev_cap(struct
        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
        dev->caps.flags              = dev_cap->flags;
        dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
+       dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 
        return 0;
 }
Index: linux-2.6.23-rc1/drivers/infiniband/hw/mlx4/main.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/hw/mlx4/main.c     2007-08-09 
08:56:08.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/hw/mlx4/main.c  2007-08-09 
08:56:11.000000000 +0300
@@ -101,6 +101,8 @@ static int mlx4_ib_query_device(struct i
                props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
        if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
                props->device_cap_flags |= IB_DEVICE_IP_CSUM;
+       if (dev->dev->caps.max_gso_sz)
+               props->device_cap_flags |= IB_DEVICE_TCP_GSO;
 
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 
36)) &
                0xffffff;
@@ -572,6 +574,8 @@ static void *mlx4_ib_add(struct mlx4_dev
 
        if (ibdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
                ibdev->ib_dev.flags |= IB_DEVICE_IP_CSUM;
+        if (ibdev->dev->caps.max_gso_sz)
+               ibdev->ib_dev.flags |= IB_DEVICE_TCP_GSO;
 
        if (init_node_data(ibdev))
                goto err_map;
Index: linux-2.6.23-rc1/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/hw/mlx4/qp.c       2007-08-09 
08:56:10.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/hw/mlx4/qp.c    2007-08-09 
09:03:35.000000000 +0300
@@ -65,6 +65,7 @@ struct mlx4_ib_sqp {
 
 static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_SEND]                    = 
__constant_cpu_to_be32(MLX4_OPCODE_SEND),
+       [IB_WR_LSO]                     = 
__constant_cpu_to_be32(MLX4_OPCODE_LSO),
        [IB_WR_SEND_WITH_IMM]           = 
__constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
        [IB_WR_RDMA_WRITE]              = 
__constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
        [IB_WR_RDMA_WRITE_WITH_IMM]     = 
__constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
@@ -740,7 +741,8 @@ static int __mlx4_ib_modify_qp(struct ib
        if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
                context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
        else if (ibqp->qp_type == IB_QPT_UD)
-               context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+               context->mtu_msgmax = (IB_MTU_4096 << 5) |
+               ilog2(dev->dev->caps.max_gso_sz);
        else if (attr_mask & IB_QP_PATH_MTU) {
                if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > 
IB_MTU_4096) {
                        printk(KERN_ERR "path MTU (%u) is invalid\n",
@@ -1312,6 +1314,28 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
                        set_datagram_seg(wqe, wr);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+                       if (wr->opcode == IB_WR_LSO) {
+                               int halign;
+
+                               memcpy(((struct mlx4_lso_seg *)wqe)->header,
+                                      wr->wr.ud.header, wr->wr.ud.hlen);
+                               wmb();
+                               ((struct mlx4_lso_seg *)wqe)->mss_hdr_size =
+                                       cpu_to_be32(((wr->wr.ud.mss - 
wr->wr.ud.hlen) << 16) |
+                                                   wr->wr.ud.hlen);
+
+                               halign = ALIGN(wr->wr.ud.hlen, 16);
+                               wqe += halign;
+                               size += halign >> 4;
+
+                               if (unlikely(wr->num_sge > qp->sq.max_gs - 
(halign >> 4))) {
+                                       err = -EINVAL;
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+                       }
+
                        break;
 
                case IB_QPT_SMI:
@@ -1365,6 +1389,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp
                ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
                        (ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
 
+
                /*
                 * We can improve latency by not stamping the last
                 * send queue WQE until after ringing the doorbell, so
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib.h  2007-08-09 
08:56:09.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h       2007-08-09 
08:56:11.000000000 +0300
@@ -375,6 +375,10 @@ int ipoib_add_pkey_attr(struct net_devic
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn);
+
+void ipoib_send_gso(struct net_device *dev, struct sk_buff *skb,
+               struct ipoib_ah *address, u32 qpn);
+
 void ipoib_reap_ah(struct work_struct *work);
 
 void ipoib_flush_paths(struct net_device *dev);
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c       
2007-08-09 08:56:09.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c    2007-08-09 
09:33:01.000000000 +0300
@@ -38,6 +38,7 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/ip.h>
+#include <linux/tcp.h>
 
 #include <rdma/ib_cache.h>
 
@@ -255,15 +256,22 @@ repost:
 }
 
 static int dma_unmap_list(struct ib_device *ca, struct ipoib_mapping_st *map,
-                          u16 n)
+                          u16 n, int gso)
 {
-       int i, len;
+       int i, len, first;
 
        BUG_ON(!n);
-       ib_dma_unmap_single(ca, map[0].addr, map[0].size, DMA_TO_DEVICE);
-       len = map[0].size;
+       if (!gso) {
+               ib_dma_unmap_single(ca, map[0].addr, map[0].size, 
DMA_TO_DEVICE);
+               len = map[0].size;
+               first = 1;
+       }
+       else {
+               len = 0;
+               first = 0;
+       }
 
-       for (i = 1; i < n; ++i) {
+       for (i = first; i < n; ++i) {
                ib_dma_unmap_page(ca, map[i].addr, map[i].size,
                                  DMA_TO_DEVICE);
                len += map[i].size;
@@ -282,6 +290,7 @@ static void ipoib_ib_handle_tx_wc(struct
        ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
                       wr_id, wc->status);
 
+
        if (unlikely(wr_id >= ipoib_sendq_size)) {
                ipoib_warn(priv, "send completion event with wrid %d (> %d)\n",
                           wr_id, ipoib_sendq_size);
@@ -289,8 +298,14 @@ static void ipoib_ib_handle_tx_wc(struct
        }
 
        tx_req = &priv->tx_ring[wr_id];
-       priv->stats.tx_bytes += dma_unmap_list(priv->ca, tx_req->mapping,
-                                              
skb_shinfo(tx_req->skb)->nr_frags + 1);
+
+       if (skb_is_gso(tx_req->skb))
+               priv->stats.tx_bytes += dma_unmap_list(priv->ca, 
tx_req->mapping,
+                                                      
skb_shinfo(tx_req->skb)->nr_frags, 1);
+       else
+               priv->stats.tx_bytes += dma_unmap_list(priv->ca, 
tx_req->mapping,
+                                                      
skb_shinfo(tx_req->skb)->nr_frags + 1, 0);
+
        ++priv->stats.tx_packets;
 
        dev_kfree_skb_any(tx_req->skb);
@@ -373,7 +388,8 @@ void ipoib_ib_completion(struct ib_cq *c
 static inline int post_send(struct ipoib_dev_priv *priv,
                            unsigned int wr_id,
                            struct ib_ah *address, u32 qpn,
-                           struct ipoib_mapping_st *mapping, int ngather)
+                           struct ipoib_mapping_st *mapping, int ngather,
+                           void *lso_header, int h_len)
 {
        struct ib_send_wr *bad_wr;
        int i;
@@ -388,9 +404,89 @@ static inline int post_send(struct ipoib
        priv->tx_wr.wr.ud.remote_qpn  = qpn;
        priv->tx_wr.wr.ud.ah          = address;
 
+       if (lso_header) {
+               priv->tx_wr.wr.ud.mss = priv->dev->mtu;
+               priv->tx_wr.wr.ud.header = lso_header;
+               priv->tx_wr.wr.ud.hlen = h_len;
+               priv->tx_wr.opcode      = IB_WR_LSO;
+       }
+       else
+               priv->tx_wr.opcode      = IB_WR_SEND;
+
        return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
 
+
+void ipoib_send_gso(struct net_device *dev, struct sk_buff *skb,
+                   struct ipoib_ah *address, u32 qpn)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_tx_buf *tx_req;
+       struct skb_frag_struct *frag;
+       u64 addr;
+       unsigned short i;
+
+       ipoib_dbg_data(priv, "sending gso packet, length=%d address=%p 
qpn=0x%06x\n",
+                      skb->len, address, qpn);
+
+       if (unlikely((skb_headlen(skb) - IPOIB_ENCAP_LEN) !=
+           ((ip_hdr(skb)->ihl + tcp_hdr(skb)->doff) << 2))) {
+               ipoib_warn(priv, "headlen (%d) does not match ip (%d)and "
+                          "tcp headers(%d), dropping skb\n",
+                          skb_headlen(skb) - IPOIB_ENCAP_LEN, ip_hdr(skb)->ihl 
<< 2,
+                          tcp_hdr(skb)->doff << 2);
+               ++priv->stats.tx_errors;
+               dev_kfree_skb_any(skb);
+               return;
+       }
+
+       /*
+        * We put the skb into the tx_ring _before_ we call post_send()
+        * because it's entirely possible that the completion handler will
+        * run before we execute anything after the post_send().  That
+        * means we have to make sure everything is properly recorded and
+        * our state is consistent before we call post_send().
+        */
+       tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
+       tx_req->skb = skb;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               frag = &skb_shinfo(skb)->frags[i];
+               addr = ib_dma_map_page(priv->ca, frag->page, frag->page_offset,
+                                      frag->size, DMA_TO_DEVICE);
+               if (unlikely(ib_dma_mapping_error(priv->ca, addr)))
+                       goto map_err;
+
+               tx_req->mapping[i].addr = addr;
+               tx_req->mapping[i].size = frag->size;
+//             printk("%s: [%d] addr = 0x%llx, size = %d\n", __func__, i, 
addr, frag->size);
+       }
+
+       if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
+                              address->ah, qpn, tx_req->mapping,
+                              skb_shinfo(skb)->nr_frags,
+                              skb->data, skb_headlen(skb)))) {
+               ipoib_warn(priv, "post_send failed\n");
+               goto map_err;
+       } else {
+               dev->trans_start = jiffies;
+
+               address->last_send = priv->tx_head;
+               ++priv->tx_head;
+
+               if (priv->tx_head - priv->tx_tail == ipoib_sendq_size) {
+                       ipoib_dbg(priv, "TX ring full, stopping kernel net 
queue\n");
+                       netif_stop_queue(dev);
+                       set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
+               }
+       }
+       return;
+
+map_err:
+       dma_unmap_list(priv->ca, tx_req->mapping, i, 1);
+       dev_kfree_skb_any(skb);
+}
+
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
                struct ipoib_ah *address, u32 qpn)
 {
@@ -444,7 +540,7 @@ void ipoib_send(struct net_device *dev, 
        }
 
        if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
-                              address->ah, qpn, tx_req->mapping, 
skb_shinfo(skb)->nr_frags + 1))) {
+                              address->ah, qpn, tx_req->mapping, 
skb_shinfo(skb)->nr_frags + 1, NULL, 0))) {
                ipoib_warn(priv, "post_send failed\n");
                goto map_err;
        } else {
@@ -462,7 +558,7 @@ void ipoib_send(struct net_device *dev, 
        return;
 
 map_err:
-       dma_unmap_list(priv->ca, tx_req->mapping, i + 1);
+       dma_unmap_list(priv->ca, tx_req->mapping, i + 1, 0);
        dev_kfree_skb_any(skb);
 }
 
@@ -657,7 +753,7 @@ int ipoib_ib_dev_stop(struct net_device 
                                tx_req = &priv->tx_ring[priv->tx_tail &
                                                        (ipoib_sendq_size - 1)];
                                dma_unmap_list(priv->ca, tx_req->mapping,
-                                              
skb_shinfo(tx_req->skb)->nr_frags + 1);
+                                              
skb_shinfo(tx_req->skb)->nr_frags + 1, skb_is_gso(tx_req->skb));
                                dev_kfree_skb_any(tx_req->skb);
                                ++priv->tx_tail;
                        }
Index: linux-2.6.23-rc1/include/linux/mlx4/device.h
===================================================================
--- linux-2.6.23-rc1.orig/include/linux/mlx4/device.h   2007-08-09 
08:41:54.000000000 +0300
+++ linux-2.6.23-rc1/include/linux/mlx4/device.h        2007-08-09 
08:56:11.000000000 +0300
@@ -177,6 +177,7 @@ struct mlx4_caps {
        u32                     flags;
        u16                     stat_rate_support;
        u8                      port_width_cap[MLX4_MAX_PORTS + 1];
+       int                     max_gso_sz;
 };
 
 struct mlx4_buf_list {
Index: linux-2.6.23-rc1/include/linux/mlx4/qp.h
===================================================================
--- linux-2.6.23-rc1.orig/include/linux/mlx4/qp.h       2007-08-09 
08:56:08.000000000 +0300
+++ linux-2.6.23-rc1/include/linux/mlx4/qp.h    2007-08-09 08:56:11.000000000 
+0300
@@ -215,6 +215,11 @@ struct mlx4_wqe_datagram_seg {
        __be32                  reservd[2];
 };
 
+struct mlx4_lso_seg {
+       __be32                  mss_hdr_size;
+       __be32                  header[0];
+};
+
 struct mlx4_wqe_bind_seg {
        __be32                  flags1;
        __be32                  flags2;
Index: linux-2.6.23-rc1/include/rdma/ib_verbs.h
===================================================================
--- linux-2.6.23-rc1.orig/include/rdma/ib_verbs.h       2007-08-09 
08:56:08.000000000 +0300
+++ linux-2.6.23-rc1/include/rdma/ib_verbs.h    2007-08-09 08:56:11.000000000 
+0300
@@ -94,7 +94,8 @@ enum ib_device_cap_flags {
        IB_DEVICE_ZERO_STAG             = (1<<15),
        IB_DEVICE_SEND_W_INV            = (1<<16),
        IB_DEVICE_MEM_WINDOW            = (1<<17),
-       IB_DEVICE_IP_CSUM               = (1<<18)
+       IB_DEVICE_IP_CSUM               = (1<<18),
+       IB_DEVICE_TCP_GSO               = (1<<19)
 };
 
 enum ib_atomic_cap {
@@ -606,6 +607,7 @@ enum ib_wr_opcode {
        IB_WR_RDMA_WRITE,
        IB_WR_RDMA_WRITE_WITH_IMM,
        IB_WR_SEND,
+       IB_WR_LSO,
        IB_WR_SEND_WITH_IMM,
        IB_WR_RDMA_READ,
        IB_WR_ATOMIC_CMP_AND_SWP,
@@ -648,6 +650,9 @@ struct ib_send_wr {
                } atomic;
                struct {
                        struct ib_ah *ah;
+                       void   *header;
+                       int     hlen;
+                       int     mss;
                        u32     remote_qpn;
                        u32     remote_qkey;
                        u16     pkey_index; /* valid for GSI only */
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c    
2007-08-09 08:56:07.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-08-09 
08:56:11.000000000 +0300
@@ -200,7 +200,6 @@ int ipoib_transport_dev_init(struct net_
        for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
                priv->tx_sge[i].lkey    = priv->mr->lkey;
 
-       priv->tx_wr.opcode      = IB_WR_SEND;
        priv->tx_wr.sg_list     = priv->tx_sge;
        priv->tx_wr.send_flags  = IB_SEND_SIGNALED;
 

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to