> The same bug exists with mthca.  I saw it originally in the kernel doing RDS 
 > work, but I just put together a short user space test.

Thanks.  The patch below seems to fix this for me.  I guess I'll queue
this for 2.6.24.

I'm also including the test program I wrote to verify this; mlx4 and
mthca seem OK on my system now.

diff --git a/drivers/infiniband/hw/mthca/mthca_main.c 
b/drivers/infiniband/hw/mthca/mthca_main.c
index 60de6f9..0c22cf0 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -45,6 +45,7 @@
 #include "mthca_cmd.h"
 #include "mthca_profile.h"
 #include "mthca_memfree.h"
+#include "mthca_wqe.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
@@ -205,7 +206,20 @@ static int mthca_dev_lim(struct mthca_dev *mdev, struct 
mthca_dev_lim *dev_lim)
        mdev->limits.gid_table_len      = dev_lim->max_gids;
        mdev->limits.pkey_table_len     = dev_lim->max_pkeys;
        mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
-       mdev->limits.max_sg             = dev_lim->max_sg;
+       /*
+        * Reduce max_sg to a value so that all possible send requests
+        * will fit into max_desc_sz; send requests will need a next
+        * segment plus possibly another extra segment, and the UD
+        * segment is the biggest extra segment.
+        */
+       mdev->limits.max_sg             =
+               min_t(int, dev_lim->max_sg,
+                     (dev_lim->max_desc_sz -
+                      (sizeof (struct mthca_next_seg) +
+                       (mthca_is_memfree(mdev) ?
+                        sizeof (struct mthca_arbel_ud_seg) :
+                        sizeof (struct mthca_tavor_ud_seg)))) /
+                     sizeof (struct mthca_data_seg));
        mdev->limits.max_wqes           = dev_lim->max_qp_sz;
        mdev->limits.max_qp_init_rdma   = dev_lim->max_requester_per_qp;
        mdev->limits.reserved_qps       = dev_lim->reserved_qps;


---

Here's the test program:

#include <stdio.h>
#include <string.h>

#include <infiniband/verbs.h>

int main(int argc, char *argv)
{
        struct ibv_device      **dev_list;
        struct ibv_device_attr   dev_attr;
        struct ibv_context      *context;
        struct ibv_pd           *pd;
        struct ibv_cq           *cq;
        struct ibv_qp_init_attr  qp_attr;
        int                      t;
        static const struct {
                enum ibv_qp_type type;
                char            *name;
        }                        type_tab[] = {
                { IBV_QPT_RC, "RC" },
                { IBV_QPT_UC, "UC" },
                { IBV_QPT_UD, "UD" },
        };

        dev_list = ibv_get_device_list(NULL);
        if (!dev_list) {
                printf("No IB devices found\n");
                return 1;
        }

        for (; *dev_list; ++dev_list) {
                printf("%s:\n", ibv_get_device_name(*dev_list));

                context = ibv_open_device(*dev_list);
                if (!context) {
                        printf("  ibv_open_device failed\n");
                        continue;
                }

                if (ibv_query_device(context, &dev_attr)) {
                        printf("  ibv_query_device failed\n");
                        continue;
                }

                cq = ibv_create_cq(context, 1, NULL, NULL, 0);
                if (!cq) {
                        printf("  ibv_create_cq failed\n");
                        continue;
                }

                pd = ibv_alloc_pd(context);
                if (!pd) {
                        printf("  ibv_alloc_pd failed\n");
                        continue;
                }

                for (t = 0; t < sizeof type_tab / sizeof type_tab[0]; ++t) {
                        memset(&qp_attr, 0, sizeof qp_attr);

                        qp_attr.send_cq = cq;
                        qp_attr.recv_cq = cq;
                        qp_attr.cap.max_send_wr = 1;
                        qp_attr.cap.max_recv_wr = 1;
                        qp_attr.cap.max_send_sge = dev_attr.max_sge;
                        qp_attr.cap.max_recv_sge = dev_attr.max_sge;
                        qp_attr.qp_type = type_tab[t].type;

                        printf("  %s: SGE %d ", type_tab[t].name, 
dev_attr.max_sge);

                        if (ibv_create_qp(pd, &qp_attr))
                                printf("ok\n");
                        else
                                printf("FAILED\n");
                }
        }

        return 0;
}
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to