Some HCAs like ehca2 support fewer than 16 SG entries. Currently IPoIB/CM implicitly assumes all HCAs will support 16 SG entries of 4K pages for 64K MTUs. This patch removes that restriction.
This patch continues to use order 0 allocations and enables implementation of connected mode on such HCAs with smaller MTUs. HCAs having the capability to support 16 SG entries are left untouched. A version of this patch has been integrated into Roland's for-2.6.25 git tree for a couple of weeks. Here is a back ported version of that patch (for OFED 1.3). Please consider for inclusion into OFED 1.3. This patch addresses bug# 728: https://bugs.openfabrics.org/show_bug.cgi?id=728 Signed-off-by: Pradeep Satyanarayana <[EMAIL PROTECTED]> --- --- a/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-10 13:13:12.000000000 -0500 +++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-10 18:07:11.000000000 -0500 @@ -234,6 +234,8 @@ struct ipoib_cm_dev_priv { struct ib_wc ibwc[IPOIB_NUM_WC]; struct ib_sge rx_sge[IPOIB_CM_RX_SG]; struct ib_recv_wr rx_wr; + int max_cm_mtu; + int num_frags; }; /* --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-10 13:13:12.000000000 -0500 +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-17 15:53:30.000000000 -0500 @@ -89,13 +89,13 @@ static int ipoib_cm_post_receive(struct priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, priv->cm.srq_ring[id].mapping); dev_kfree_skb_any(priv->cm.srq_ring[id].skb); priv->cm.srq_ring[id].skb = NULL; @@ -1261,10 +1261,10 @@ int ipoib_cm_dev_init(struct net_device struct ib_srq_init_attr srq_init_attr = { .attr = { .max_wr = ipoib_recvq_size, - .max_sge = IPOIB_CM_RX_SG } }; int ret, i; + struct ib_device_attr attr; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); @@ -1281,6 +1281,18 @@ int ipoib_cm_dev_init(struct net_device skb_queue_head_init(&priv->cm.skb_queue); + ret = ib_query_device(priv->ca, &attr); + if (ret) { + printk(KERN_WARNING "ib_query_device() failed with %d\n", ret); + return ret; + } + + ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); + + attr.max_srq_sge = min(IPOIB_CM_RX_SG, attr.max_srq_sge); + + srq_init_attr.attr.max_sge = attr.max_srq_sge; + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { ret = PTR_ERR(priv->cm.srq); @@ -1288,6 +1300,11 @@ int ipoib_cm_dev_init(struct net_device return ret; } + priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10; + priv->cm.num_frags = attr.max_srq_sge; + ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n", + priv->cm.max_cm_mtu, priv->cm.num_frags); + priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); if (!priv->cm.srq_ring) { @@ -1297,18 +1314,18 @@ int ipoib_cm_dev_init(struct net_device return -ENOMEM; } - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].lkey = priv->mr->lkey; priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < IPOIB_CM_RX_SG; ++i) + for (i = 1; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].length = PAGE_SIZE; priv->cm.rx_wr.next = NULL; priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; + priv->cm.rx_wr.num_sge = priv->cm.num_frags; for (i = 0; i < ipoib_recvq_size; ++i) { - if (!ipoib_cm_alloc_rx_skb(dev, i, IPOIB_CM_RX_SG - 1, + if (!ipoib_cm_alloc_rx_skb(dev, i, priv->cm.num_frags - 1, priv->cm.srq_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); ipoib_cm_dev_cleanup(dev); --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-01-10 13:13:12.000000000 -0500 +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2008-01-17 17:15:27.000000000 -0500 @@ -182,7 +182,10 @@ static int ipoib_change_mtu(struct net_d struct ipoib_dev_priv *priv = netdev_priv(dev); /* dev->mtu > 2K ==> connected mode */ - if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { + if (ipoib_cm_admin_enabled(dev)) { + if (new_mtu > priv->cm.max_cm_mtu) + return -EINVAL; + if (new_mtu > priv->mcast_mtu) ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); @@ -190,9 +193,8 @@ static int ipoib_change_mtu(struct net_d return 0; } - if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { + if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) return -EINVAL; - } priv->admin_mtu = new_mtu; _______________________________________________ ewg mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ewg
