>From f60c81b106a5cbbef9d4943012215022e9d7f0b0 Mon Sep 17 00:00:00 2001
From: Eli Cohen <[EMAIL PROTECTED]>
Date: Tue, 11 Mar 2008 15:26:38 +0200
Subject: [PATCH] IB/mlx4: Add LSO support

Add LSO support to mlx4 driver such that it will be able
to send SKBs passed from the driver which publish NETIF_TSO.

Signed-off-by: Eli Cohen <[EMAIL PROTECTED]>
---
 drivers/infiniband/hw/mlx4/cq.c   |    3 ++
 drivers/infiniband/hw/mlx4/main.c |    3 ++
 drivers/infiniband/hw/mlx4/qp.c   |   60 ++++++++++++++++++++++++++++++++++--
 drivers/net/mlx4/fw.c             |    9 +++++
 drivers/net/mlx4/fw.h             |    1 +
 drivers/net/mlx4/main.c           |    1 +
 include/linux/mlx4/device.h       |    1 +
 include/linux/mlx4/qp.h           |    5 +++
 8 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d2e32b0..7d70af7 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -420,6 +420,9 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
                case MLX4_OPCODE_BIND_MW:
                        wc->opcode    = IB_WC_BIND_MW;
                        break;
+               case MLX4_OPCODE_LSO:
+                       wc->opcode    = IB_WC_LSO;
+                       break;
                }
        } else {
                wc->byte_len = be32_to_cpu(cqe->byte_cnt);
diff --git a/drivers/infiniband/hw/mlx4/main.c 
b/drivers/infiniband/hw/mlx4/main.c
index ef5e9db..4bc2ca4 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -101,6 +101,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
        if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
                props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+       if (dev->dev->caps.max_gso_sz)
+               props->device_cap_flags |= IB_DEVICE_TCP_TSO;
+
 
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 
36)) &
                0xffffff;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 31b2b5b..d5a42e8 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -71,6 +71,7 @@ enum {
 
 static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_SEND]                    = 
__constant_cpu_to_be32(MLX4_OPCODE_SEND),
+       [IB_WR_LSO]                     = 
__constant_cpu_to_be32(MLX4_OPCODE_LSO),
        [IB_WR_SEND_WITH_IMM]           = 
__constant_cpu_to_be32(MLX4_OPCODE_SEND_IMM),
        [IB_WR_RDMA_WRITE]              = 
__constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
        [IB_WR_RDMA_WRITE_WITH_IMM]     = 
__constant_cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
@@ -311,6 +312,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, 
struct ib_qp_cap *cap,
                              enum ib_qp_type type, struct mlx4_ib_qp *qp)
 {
        int s;
+       int reserve;
 
        /* Sanity check SQ size before proceeding */
        if (cap->max_send_wr     > dev->dev->caps.max_wqes  ||
@@ -327,9 +329,11 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, 
struct ib_qp_cap *cap,
            cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
                return -EINVAL;
 
+       reserve = qp->flags & MLX4_QP_LSO ? 64 : 0;
+
        s = max(cap->max_send_sge * sizeof (struct mlx4_wqe_data_seg),
                cap->max_inline_data + sizeof (struct mlx4_wqe_inline_seg)) +
-               send_wqe_overhead(type);
+               send_wqe_overhead(type) + reserve;
 
        /*
         * Hermon supports shrinking WQEs, such that a single work
@@ -393,7 +397,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, 
struct ib_qp_cap *cap,
                ++qp->sq.wqe_shift;
        }
 
-       qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) -
+       qp->sq.max_gs = ((qp->sq_max_wqes_per_wr << qp->sq.wqe_shift) - reserve 
-
                         send_wqe_overhead(type)) / sizeof (struct 
mlx4_wqe_data_seg);
 
        qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
@@ -503,6 +507,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct 
ib_pd *pd,
        } else {
                qp->sq_no_prefetch = 0;
 
+               if (init_attr->create_flags & QP_CREATE_LSO)
+                       qp->flags |= MLX4_QP_LSO;
+
                err = set_kernel_sq_size(dev, &init_attr->cap, 
init_attr->qp_type, qp);
                if (err)
                        goto err;
@@ -876,9 +883,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                }
        }
 
-       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
-           ibqp->qp_type == IB_QPT_UD)
+       if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
                context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+       else if (ibqp->qp_type == IB_QPT_UD) {
+               if (qp->flags & MLX4_QP_LSO)
+                        context->mtu_msgmax = (IB_MTU_4096 << 5) |
+                                             ilog2(dev->dev->caps.max_gso_sz);
+               else
+                        context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
+       }
        else if (attr_mask & IB_QP_PATH_MTU) {
                if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > 
IB_MTU_4096) {
                        printk(KERN_ERR "path MTU (%u) is invalid\n",
@@ -1396,6 +1409,28 @@ static void __set_data_seg(struct mlx4_wqe_data_seg 
*dseg, struct ib_sge *sg)
        dseg->addr       = cpu_to_be64(sg->addr);
 }
 
+static int build_lso_seg(struct mlx4_lso_seg *wqe, struct ib_send_wr *wr,
+                        struct mlx4_ib_qp *qp, int *lso_seg_len)
+{
+       int halign;
+
+       halign = ALIGN(wr->wr.ud.hlen, 16);
+       if (unlikely(!(qp->flags & MLX4_QP_LSO) && wr->num_sge > qp->sq.max_gs 
- (halign >> 4)))
+                return -EINVAL;
+
+       memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+
+       /* make sure LSO header is written before
+          overwriting stamping */
+       wmb();
+
+       wqe->mss_hdr_size = cpu_to_be32(((wr->wr.ud.mss - wr->wr.ud.hlen)
+                                        << 16) | wr->wr.ud.hlen);
+
+       *lso_seg_len = halign;
+       return 0;
+}
+
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                      struct ib_send_wr **bad_wr)
 {
@@ -1419,11 +1454,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
                if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
                        err = -ENOMEM;
                        *bad_wr = wr;
+                       printk("failed here %d\n", __LINE__);
                        goto out;
                }
 
                if (unlikely(wr->num_sge > qp->sq.max_gs)) {
                        err = -EINVAL;
+                       printk("failed here %d: num_sge=%d, max_gs=%d\n", 
__LINE__, wr->num_sge, qp->sq.max_gs);
                        *bad_wr = wr;
                        goto out;
                }
@@ -1487,12 +1524,27 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct 
ib_send_wr *wr,
                        set_datagram_seg(wqe, wr);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
+
+                       if (wr->opcode == IB_WR_LSO) {
+                               int hlen;
+
+                               err = build_lso_seg(wqe, wr, qp, &hlen);
+                               if (err) {
+                                       printk("failed here %d\n", __LINE__);
+                                       *bad_wr = wr;
+                                       goto out;
+                               }
+                               wqe += hlen;
+                               size += hlen >> 4;
+                       }
+
                        break;
 
                case IB_QPT_SMI:
                case IB_QPT_GSI:
                        err = build_mlx_header(to_msqp(qp), wr, ctrl);
                        if (err < 0) {
+                               printk("failed here %d\n", __LINE__);
                                *bad_wr = wr;
                                goto out;
                        }
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index f494c3e..d82f275 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -133,6 +133,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_AV_OFFSET            0x27
 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET                0x29
 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET                0x2b
+#define QUERY_DEV_CAP_MAX_GSO_OFFSET           0x2d
 #define QUERY_DEV_CAP_MAX_RDMA_OFFSET          0x2f
 #define QUERY_DEV_CAP_RSZ_SRQ_OFFSET           0x33
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET         0x35
@@ -215,6 +216,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
        dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
        dev_cap->max_responder_per_qp = 1 << (field & 0x3f);
+       MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_GSO_OFFSET);
+       field &= 0x1f;
+       if (!field)
+               dev_cap->max_gso_sz = 0;
+       else
+               dev_cap->max_gso_sz = 1 << field;
+
        MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RDMA_OFFSET);
        dev_cap->max_rdma_global = 1 << (field & 0x3f);
        MLX4_GET(field, outbox, QUERY_DEV_CAP_ACK_DELAY_OFFSET);
@@ -377,6 +385,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
                 dev_cap->max_sq_desc_sz, dev_cap->max_sq_sg);
        mlx4_dbg(dev, "Max RQ desc size: %d, max RQ S/G: %d\n",
                 dev_cap->max_rq_desc_sz, dev_cap->max_rq_sg);
+       mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
 
        dump_dev_cap_flags(dev, dev_cap->flags);
 
diff --git a/drivers/net/mlx4/fw.h b/drivers/net/mlx4/fw.h
index e16dec8..306cb9b 100644
--- a/drivers/net/mlx4/fw.h
+++ b/drivers/net/mlx4/fw.h
@@ -96,6 +96,7 @@ struct mlx4_dev_cap {
        u8  bmme_flags;
        u32 reserved_lkey;
        u64 max_icm_sz;
+       int max_gso_sz;
 };
 
 struct mlx4_adapter {
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index 08bfc13..7cfbe75 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -159,6 +159,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct 
mlx4_dev_cap *dev_cap)
        dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
        dev->caps.flags              = dev_cap->flags;
        dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
+       dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
 
        return 0;
 }
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6cdf813..ff7df1a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -186,6 +186,7 @@ struct mlx4_caps {
        u32                     flags;
        u16                     stat_rate_support;
        u8                      port_width_cap[MLX4_MAX_PORTS + 1];
+       int                     max_gso_sz;
 };
 
 struct mlx4_buf_list {
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 31f9eb3..cf0bf4e 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -219,6 +219,11 @@ struct mlx4_wqe_datagram_seg {
        __be32                  reservd[2];
 };
 
+struct mlx4_lso_seg {
+       __be32                  mss_hdr_size;
+       __be32                  header[0];
+};
+
 struct mlx4_wqe_bind_seg {
        __be32                  flags1;
        __be32                  flags2;
-- 
1.5.4.4



_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to