Some HCAs like ehca2 support fewer than 16 SG entries. Currently IPoIB/CM implicitly assumes all HCAs will support 16 SG entries of 4K pages for 64K MTUs. This patch removes that restriction.
This patch continues to use order 0 allocations and enables implementation of connected mode on such HCAs with smaller MTUs. HCAs having the capability to support 16 SG entries are left untouched. This patch addresses bug# 728: https://bugs.openfabrics.org/show_bug.cgi?id=728 While working on this patch I discovered that mthca reports an incorrect value of max_srq_sge. I had reported this issue previously too several weeks ago. I solved that by using a hard coded value of 16 for max_srq_sge (mthca only). More on that in a following mail. Signed-off-by: Pradeep Satyanarayana <[EMAIL PROTECTED]> --- --- a/drivers/infiniband/ulp/ipoib/ipoib.h 2007-11-03 11:37:02.000000000 -0700 +++ b/drivers/infiniband/ulp/ipoib/ipoib.h 2007-12-20 13:17:43.000000000 -0800 @@ -466,6 +466,7 @@ void ipoib_drain_cq(struct net_device *d #define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) extern int ipoib_max_conn_qp; +extern int max_cm_mtu; static inline int ipoib_cm_admin_enabled(struct net_device *dev) { --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-11-21 07:46:35.000000000 -0800 +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-12-20 14:47:13.000000000 -0800 @@ -74,6 +74,9 @@ static struct ib_send_wr ipoib_cm_rx_dra .opcode = IB_WR_SEND, }; +static int num_of_frags; +int max_cm_mtu; + static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); @@ -96,13 +99,13 @@ static int ipoib_cm_post_receive_srq(str priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + for (i = 0; i < num_of_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); - ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, + ipoib_cm_dma_unmap_rx(priv, num_of_frags - 1, priv->cm.srq_ring[id].mapping); dev_kfree_skb_any(priv->cm.srq_ring[id].skb); priv->cm.srq_ring[id].skb = NULL; @@ -623,6 +626,7 @@ repost: --p->recv_count; ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " "for buf %d\n", wr_id); + kfree(mapping); /*** Check if this needed ***/ } } } @@ -1399,16 +1403,17 @@ int ipoib_cm_add_mode_attr(struct net_de return device_create_file(&dev->dev, &dev_attr_mode); } -static void ipoib_cm_create_srq(struct net_device *dev) +static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ib_srq_init_attr srq_init_attr = { .attr = { .max_wr = ipoib_recvq_size, - .max_sge = IPOIB_CM_RX_SG } }; + srq_init_attr.attr.max_sge = max_sge; + priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { if (PTR_ERR(priv->cm.srq) != -ENOSYS) @@ -1418,6 +1423,7 @@ static void ipoib_cm_create_srq(struct n return; } + priv->cm.srq_ring = kzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring, GFP_KERNEL); if (!priv->cm.srq_ring) { @@ -1431,7 +1437,9 @@ static void ipoib_cm_create_srq(struct n int ipoib_cm_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - int i; + int i, ret; + struct ib_srq_attr srq_attr; + struct ib_device_attr attr; INIT_LIST_HEAD(&priv->cm.passive_ids); INIT_LIST_HEAD(&priv->cm.reap_list); @@ -1448,22 +1456,46 @@ int ipoib_cm_dev_init(struct net_device skb_queue_head_init(&priv->cm.skb_queue); - for (i = 0; i < IPOIB_CM_RX_SG; ++i) + ret = ib_query_device(priv->ca, &attr); + if (ret) { + printk(KERN_WARNING "ib_query_device() failed with %d\n", ret); + return ret; + } + + ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge); + + ipoib_cm_create_srq(dev, attr.max_srq_sge); + + if (ipoib_cm_has_srq(dev)) { + ret = ib_query_srq(priv->cm.srq, &srq_attr); + if (ret) { + printk(KERN_WARNING "ib_query_srq() failed with %d\n", ret); + return -EINVAL; + } + /* pad similar to IPOIB_CM_MTU */ + max_cm_mtu = srq_attr.max_sge * PAGE_SIZE - 0x10; + num_of_frags = srq_attr.max_sge; + ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_of_frags=%d\n", + max_cm_mtu, num_of_frags); + } else { + max_cm_mtu = IPOIB_CM_MTU; + num_of_frags = IPOIB_CM_RX_SG; + } + + for (i = 0; i < num_of_frags; ++i) priv->cm.rx_sge[i].lkey = priv->mr->lkey; priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE; - for (i = 1; i < IPOIB_CM_RX_SG; ++i) + for (i = 1; i < num_of_frags; ++i) priv->cm.rx_sge[i].length = PAGE_SIZE; priv->cm.rx_wr.next = NULL; priv->cm.rx_wr.sg_list = priv->cm.rx_sge; - priv->cm.rx_wr.num_sge = IPOIB_CM_RX_SG; - - ipoib_cm_create_srq(dev); + priv->cm.rx_wr.num_sge = num_of_frags; if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i, - IPOIB_CM_RX_SG - 1, + num_of_frags - 1, priv->cm.srq_ring[i].mapping)) { ipoib_warn(priv, "failed to allocate " "receive buffer %d\n", i); --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-12-19 14:02:15.000000000 -0800 +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c 2007-12-20 13:17:43.000000000 -0800 @@ -182,12 +182,15 @@ static int ipoib_change_mtu(struct net_d struct ipoib_dev_priv *priv = netdev_priv(dev); /* dev->mtu > 2K ==> connected mode */ - if (ipoib_cm_admin_enabled(dev) && new_mtu <= IPOIB_CM_MTU) { - if (new_mtu > priv->mcast_mtu) - ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", + if (ipoib_cm_admin_enabled(dev)) { + if (new_mtu <= max_cm_mtu) { + if (new_mtu > priv->mcast_mtu) + ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n", priv->mcast_mtu); - dev->mtu = new_mtu; - return 0; + dev->mtu = new_mtu; + return 0; + } else + return -EINVAL; } if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN) { _______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
