Some HCAs like ehca2 support fewer than 16 SG entries. Currently IPoIB/CM implicitly assumes all HCAs will support 16 SG entries of 4K pages for 64K MTUs. This patch removes that restriction.
This patch continues to use order 0 allocations and enables implementation of connected mode on such HCAs with smaller MTUs. HCAs having the capability to support 16 SG entries are left untouched. This patch addresses bug# 728: https://bugs.openfabrics.org/show_bug.cgi?id=728 The primary difference between this version and the previous one is incorporating Roland's suggestion of making max_cm_mtu and num_of_frags per interface. Also eliminated a bogus kfree() left over from a previous patch. Signed-off-by: Pradeep Satyanarayana <[EMAIL PROTECTED]> --- --- a/drivers/infiniband/ulp/ipoib/ipoib.h 2007-11-03 14:37:02.000000000 -0400 +++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2007-12-21 13:29:06.000000000 -0500 @@ -238,6 +238,8 @@ struct ipoib_cm_dev_priv { struct ib_sge rx_sge[IPOIB_CM_RX_SG]; struct ib_recv_wr rx_wr; int nonsrq_conn_qp; + int max_cm_mtu; + int num_of_frags; }; /* --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-11-21 10:46:35.000000000 -0500 +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-12-21 15:45:30.000000000 -0500 @@ -96,13 +96,13 @@ static int ipoib_cm_post_receive_srq(str priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + for (i = 0; i < priv->cm.num_of_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + ipoib_cm_dma_unmap_rx(priv, priv->cm.num_of_frags - 1, priv->cm.srq_ring[id].mapping); dev_kfree_skb_any(priv->cm.srq_ring[id].skb); priv->cm.srq_ring[id].skb = NULL; @@ -1399,16 +1399,16 @@ int ipoib_cm_add_mode_attr(struct net_de return device_create_file(&dev->dev, &dev_attr_mode); } -static void ipoib_cm_create_srq(struct net_device *dev) +static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { .attr = { .max_wr = ipoib_recvq_size, - .max_sge = IPOIB_CM_RX_SG } }; + srq_init_attr.attr.max_sge = max_sge; priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { if (PTR_ERR(priv->cm.srq) != -ENOSYS) @@ -1431,7 +1431,9 @@ static void ipoib_cm_create_srq(struct n int ipoib_cm_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i; + int i, ret, num_of_frags; + struct ib_srq_attr srq_attr; + struct ib_device_attr attr; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); @@ -1448,22 +1450,52 @@ int ipoib_cm_dev_init(struct net_device skb_queue_head_init(&priv->cm.skb_queue); - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + ret = ib_query_device(priv->ca, &attr); + if (ret) { + printk(KERN_WARNING "ib_query_device() failed with %d\n", ret); + return ret; + } + + ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); + + ipoib_cm_create_srq(dev, attr.max_srq_sge); + + if (ipoib_cm_has_srq(dev)) { + ret = ib_query_srq(priv->cm.srq, &srq_attr); + if (ret) { + printk(KERN_WARNING "ib_query_srq() failed with %d\n", ret); + return -EINVAL; + } + if (srq_attr.max_sge > IPOIB_CM_RX_SG) + srq_attr.max_sge = IPOIB_CM_RX_SG; + + /* pad similar to IPOIB_CM_MTU */ + priv->cm.max_cm_mtu = srq_attr.max_sge * PAGE_SIZE - 0x10; + priv->cm.num_of_frags = srq_attr.max_sge; + ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_of_frags=%d\n", + priv->cm.max_cm_mtu, priv->cm.num_of_frags); + } else { + /* In the nonsrq case the num of SG elements is set at qp creation */ + priv->cm.max_cm_mtu = IPOIB_CM_MTU; + priv->cm.num_of_frags = IPOIB_CM_RX_SG; + } + + num_of_frags = priv->cm.num_of_frags; + + for (i = 0; i < num_of_frags; ++i) priv->cm.rx_sge[i].lkey = priv->mr->lkey; priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < IPOIB_CM_RX_SG; ++i) + for (i = 1; i < num_of_frags; ++i) priv->cm.rx_sge[i].length = PAGE_SIZE; priv->cm.rx_wr.next = NULL; priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; - - ipoib_cm_create_srq(dev); + priv->cm.rx_wr.num_sge = num_of_frags; if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, - IPOIB_CM_RX_SG - 1, + num_of_frags - 1, priv->cm.srq_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate " "receive buffer %d\n", i); --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-12-19 17:02:15.000000000 -0500 +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-12-21 00:08:04.000000000 -0500 @@ -182,12 +182,15 @@ static int ipoib_change_mtu(struct net_d struct ipoib_dev_priv *priv = netdev_priv(dev); /* dev->mtu > 2K ==> connected mode */ - if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { - if (new_mtu > priv->mcast_mtu) - ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", + if (ipoib_cm_admin_enabled(dev)) { + if (new_mtu <= priv->cm.max_cm_mtu) { + if (new_mtu > priv->mcast_mtu) + ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); - dev->mtu = new_mtu; - return 0; + dev->mtu = new_mtu; + return 0; + } else + return -EINVAL; } if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { _______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
