Hi,
We indeed have a fix for XRC support on our branch at Bull and sorry I
neglected to contribute it, my bad…
I join here the patch on top of current v1.6.6 (should I rather
submit it as a pull request ?).
For v1.8+, a merge of the v1.6 code is not enough as openib connect
changed from xoob to udcm. I made a version on a pre-git state, so I
will update it and make a pull request.
Piotr
________________________________________
De : devel [[email protected]] de la part de Gilles Gouaillardet
[[email protected]]
Envoyé : lundi 8 décembre 2014 03:27
À : Open MPI Developers
Objet : Re: [OMPI devel] openmpi and XRC API from ofed-3.12
Hi Piotr,
this is quite an old thread now, but i did not see any support for XRC
with ofed 3.12 yet
(nor in trunk nor in v1.8)
my understanding is that Bull already did something similar for the v1.6
series,
so let me put this the other way around :
does Bull have any plan to contribute this work ?
(for example, publish a patch for the v1.6 series, or submit pull
request(s) for master and v1.8 branch)
Cheers,
Gilles
On 2014/04/23 21:58, Piotr Lesnicki wrote:
> Hi,
>
> In OFED-3.12 the API for XRC has changed. I did not find
> corresponding changes in Open MPI: for example the function
> 'ibv_create_xrc_rcv_qp()' queried in openmpi configure script no
> longer exists in ofed-3.12-rc1.
>
> Are there any plans to support the new XRC API ?
>
>
> --
> Piotr
> _______________________________________________
> devel mailing list
> [email protected]
> Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
> Link to this post:
> http://www.open-mpi.org/community/lists/devel/2014/04/14583.php
_______________________________________________
devel mailing list
[email protected]
Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel
Link to this post:
http://www.open-mpi.org/community/lists/devel/2014/12/16445.php
diff --git a/ompi/config/ompi_check_openib.m4 b/ompi/config/ompi_check_openib.m4
index 187356f..97ee8fb 100644
--- a/ompi/config/ompi_check_openib.m4
+++ b/ompi/config/ompi_check_openib.m4
@@ -15,6 +15,7 @@
# reserved.
# Copyright (c) 2006-2009 Mellanox Technologies. All rights reserved.
# Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2014 Bull SAS. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@@ -175,6 +176,7 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
# (unconditionally)
$1_have_xrc=0
$1_have_rdmacm=0
+ $1_have_xrc_connectib=0
$1_have_opensm_devel=0
# If we have the openib stuff available, find out what we've got
@@ -188,10 +190,15 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
[#include <infiniband/verbs.h>])
# ibv_create_xrc_rcv_qp was added in OFED 1.3
+ # ibv_open_xrcd was added in OFED 3.12 (new API)
if test "$enable_connectx_xrc" = "yes"; then
- AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp], [$1_have_xrc=1])
+ AC_CHECK_FUNCS([ibv_create_xrc_rcv_qp ibv_cmd_open_xrcd], [$1_have_xrc=1])
+ fi
+ if test "$enable_connectx_xrc" = "yes"; then
+ AC_CHECK_FUNCS([ibv_cmd_open_xrcd], [$1_have_xrc_connectib=1])
fi
+
if test "no" != "$enable_openib_dynamic_sl"; then
# We need ib_types.h file, which is installed with opensm-devel
# package. However, ib_types.h has a bad include directive,
@@ -279,6 +286,15 @@ AC_DEFUN([OMPI_CHECK_OPENIB],[
AC_MSG_RESULT([no])
fi
+ AC_MSG_CHECKING([if ConnectIB XRC support is enabled])
+ AC_DEFINE_UNQUOTED([OMPI_HAVE_CONNECTIB_XRC], [$$1_have_xrc_connectib],
+ [Enable features required for ConnectIB XRC support])
+ if test "1" = "$$1_have_xrc_connectib"; then
+ AC_MSG_RESULT([yes])
+ else
+ AC_MSG_RESULT([no])
+ fi
+
AC_MSG_CHECKING([if dynamic SL is enabled])
AC_DEFINE_UNQUOTED([OMPI_ENABLE_DYNAMIC_SL], [$$1_have_opensm_devel],
[Enable features required for dynamic SL support])
diff --git a/ompi/mca/btl/openib/btl_openib.c b/ompi/mca/btl/openib/btl_openib.c
index 8a9d942..80f833b 100644
--- a/ompi/mca/btl/openib/btl_openib.c
+++ b/ompi/mca/btl/openib/btl_openib.c
@@ -17,6 +17,7 @@
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -323,10 +324,26 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
openib_btl->qps[qp].u.srq_qp.rd_posted = 0;
#if HAVE_XRC
if(BTL_OPENIB_QP_TYPE_XRC(qp)) {
+#if OMPI_HAVE_CONNECTIB_XRC
+ struct ibv_srq_init_attr_ex attr_ex;
+ memset(&attr_ex, 0, sizeof(struct ibv_srq_init_attr_ex));
+ attr_ex.attr.max_wr = attr.attr.max_wr;
+ attr_ex.attr.max_sge = attr.attr.max_sge;
+ attr_ex.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_XRCD |
+ IBV_SRQ_INIT_ATTR_CQ | IBV_SRQ_INIT_ATTR_PD;
+ attr_ex.srq_type = IBV_SRQT_XRC;
+ attr_ex.xrcd = openib_btl->device->xrcd;
+ attr_ex.cq = openib_btl->device->ib_cq[qp_cq_prio(qp)];
+ attr_ex.pd = openib_btl->device->ib_pd;
+
+ openib_btl->qps[qp].u.srq_qp.srq =
+ ibv_create_srq_ex(openib_btl->device->ib_dev_context, &attr_ex);
+#else
openib_btl->qps[qp].u.srq_qp.srq =
ibv_create_xrc_srq(openib_btl->device->ib_pd,
openib_btl->device->xrc_domain,
openib_btl->device->ib_cq[qp_cq_prio(qp)], &attr);
+#endif
} else
#endif
{
@@ -1755,8 +1772,12 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
to_com_frag(frag)->endpoint = ep;
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
+#if OMPI_HAVE_CONNECTIB_XRC
+ frag->sr_desc.qp_type.xrc.remote_srqn=ep->rem_info.rem_srqs[qp].rem_srq_num;
+#else
frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
+#endif
descriptor->order = qp;
/* Setting opcode on a frag constructor isn't enough since prepare_src
@@ -1839,8 +1860,12 @@ int mca_btl_openib_get(mca_btl_base_module_t* btl,
#if HAVE_XRC
if (MCA_BTL_XRC_ENABLED && BTL_OPENIB_QP_TYPE_XRC(qp))
+#if OMPI_HAVE_CONNECTIB_XRC
+ frag->sr_desc.qp_type.xrc.remote_srqn=ep->rem_info.rem_srqs[qp].rem_srq_num;
+#else
frag->sr_desc.xrc_remote_srq_num=ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
+#endif
descriptor->order = qp;
qp_inflight_wqe_to_frag(ep, qp, to_com_frag(frag));
diff --git a/ompi/mca/btl/openib/btl_openib.h b/ompi/mca/btl/openib/btl_openib.h
index a685ef4..104b897 100644
--- a/ompi/mca/btl/openib/btl_openib.h
+++ b/ompi/mca/btl/openib/btl_openib.h
@@ -16,6 +16,7 @@
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -150,6 +151,12 @@ typedef struct mca_btl_openib_srq_manager_t {
} mca_btl_openib_srq_manager_t;
#endif
+typedef enum {
+ MCA_BTL_IB_XRC_API_NONE,
+ MCA_BTL_IB_XRC_API_BETA,
+ MCA_BTL_IB_XRC_API_OFED_3_12
+} mca_btl_openib_xrc_api_t;
+
struct mca_btl_openib_component_t {
mca_btl_base_component_2_0_0_t super; /**< base BTL component */
@@ -297,6 +304,10 @@ struct mca_btl_openib_component_t {
char* default_recv_qps;
/** GID index to use */
int gid_index;
+ int xrc_enable_warning;
+#if HAVE_XRC
+ mca_btl_openib_xrc_api_t xrc_api_version;
+#endif
/** Whether we want a dynamically resizing srq, enabled by default */
bool enable_srq_resize;
#if BTL_OPENIB_FAILOVER_ENABLED
@@ -383,7 +394,11 @@ typedef struct mca_btl_openib_device_t {
volatile bool got_port_event;
#endif
#if HAVE_XRC
+#if OMPI_HAVE_CONNECTIB_XRC
+ struct ibv_xrcd *xrcd;
+#else
struct ibv_xrc_domain *xrc_domain;
+#endif
int xrc_fd;
#endif
int32_t non_eager_rdma_endpoints;
diff --git a/ompi/mca/btl/openib/btl_openib_async.c b/ompi/mca/btl/openib/btl_openib_async.c
index 1cf9d5b..0763a8f 100644
--- a/ompi/mca/btl/openib/btl_openib_async.c
+++ b/ompi/mca/btl/openib/btl_openib_async.c
@@ -3,6 +3,7 @@
* Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -115,7 +116,11 @@ static mca_btl_openib_endpoint_t * xrc_qp2endpoint(uint32_t qp_num, mca_btl_open
int ep_i;
for(ep_i = 0; ep_i < opal_pointer_array_get_size(device->endpoints); ep_i++) {
ep = opal_pointer_array_get_item(device->endpoints, ep_i);
+#if OMPI_HAVE_CONNECTIB_XRC
+ if (qp_num == ep->xrc_recv_qp->qp_num)
+#else
if (qp_num == ep->xrc_recv_qp_num)
+#endif
return ep;
}
return NULL;
@@ -316,12 +321,20 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
event_type = event.event_type;
#if HAVE_XRC
/* is it XRC event ?*/
+#if OMPI_HAVE_CONNECTIB_XRC
+ if (event.element.qp &&
+ (IBV_QPT_XRC_RECV == event.element.qp->qp_type
+ || IBV_QPT_XRC_SEND == event.element.qp->qp_type)) {
+ xrc_event = true;
+ }
+#else
if (IBV_XRC_QP_EVENT_FLAG & event.event_type) {
xrc_event = true;
/* Clean the bitnd handel as usual */
event_type ^= IBV_XRC_QP_EVENT_FLAG;
}
#endif
+#endif
switch(event_type) {
case IBV_EVENT_PATH_MIG:
BTL_ERROR(("Alternative path migration event reported"));
@@ -331,10 +344,16 @@ static int btl_openib_async_deviceh(struct mca_btl_openib_async_poll *devices_po
mca_btl_openib_load_apm(event.element.qp,
qp2endpoint(event.element.qp, device));
#if HAVE_XRC
+#if OMPI_HAVE_CONNECTIB_XRC
+ else
+ mca_btl_openib_load_apm(event.element.qp,
+ xrc_qp2endpoint(event.element.qp->qp_num, device));
+#else
else
mca_btl_openib_load_apm_xrc_rcv(event.element.xrc_qp_num,
xrc_qp2endpoint(event.element.xrc_qp_num, device));
#endif
+#endif
}
break;
case IBV_EVENT_DEVICE_FATAL:
@@ -584,7 +603,7 @@ void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep)
qp->qp_num, strerror(errno), errno));
}
-#if HAVE_XRC
+#if HAVE_XRC && ! OMPI_HAVE_CONNECTIB_XRC
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep)
{
struct ibv_qp_init_attr qp_init_attr;
@@ -614,6 +633,7 @@ void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t
}
ibv_modify_xrc_rcv_qp(btl->device->xrc_domain, qp_num, &attr, mask);
+
/* Maybe the qp already was modified by other process - ignoring error */
}
#endif
diff --git a/ompi/mca/btl/openib/btl_openib_async.h b/ompi/mca/btl/openib/btl_openib_async.h
index f35694b..8eda380 100644
--- a/ompi/mca/btl/openib/btl_openib_async.h
+++ b/ompi/mca/btl/openib/btl_openib_async.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -16,7 +17,7 @@
void* btl_openib_async_thread(void *one_hca);
void mca_btl_openib_load_apm(struct ibv_qp *qp, mca_btl_openib_endpoint_t *ep);
int btl_openib_async_command_done(int exp);
-#if HAVE_XRC
+#if HAVE_XRC && ! OMPI_HAVE_CONNECTIB_XRC
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep);
#endif
diff --git a/ompi/mca/btl/openib/btl_openib_component.c b/ompi/mca/btl/openib/btl_openib_component.c
index 6e61b44..e8f2361 100644
--- a/ompi/mca/btl/openib/btl_openib_component.c
+++ b/ompi/mca/btl/openib/btl_openib_component.c
@@ -16,6 +16,7 @@
* reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -1133,6 +1134,27 @@ static int prepare_device_for_use(mca_btl_openib_device_t *device)
return OMPI_ERROR;
}
+
+ mca_btl_openib_component.xrc_api_version = mca_btl_openib_xrc_api();
+ opal_output_verbose(5, mca_btl_base_output, "openib BTL detected XRC API: %s\n",
+ mca_btl_openib_xrc_api_str(mca_btl_openib_component.xrc_api_version));
+ if (MCA_BTL_XRC_ENABLED) {
+#if ! OMPI_HAVE_CONNECTIB_XRC
+ if (mca_btl_openib_component.xrc_api_version != MCA_BTL_IB_XRC_API_BETA) {
+ BTL_ERROR(("XRC error: bad XRC API (compiled with %s api).",
+ mca_btl_openib_xrc_api_str(MCA_BTL_IB_XRC_API_OFED_3_12)));
+ return OMPI_ERROR;
+ }
+#else
+ if (mca_btl_openib_component.xrc_api_version != MCA_BTL_IB_XRC_API_OFED_3_12) {
+ BTL_ERROR(("XRC error: bad XRC API (compiled with %s api).",
+ mca_btl_openib_xrc_api_str(MCA_BTL_IB_XRC_API_OFED_3_12)));
+ return OMPI_ERROR;
+ }
+#endif
+ }
+
+
if (MCA_BTL_XRC_ENABLED) {
if (OMPI_SUCCESS != mca_btl_openib_open_xrc_domain(device)) {
BTL_ERROR(("XRC Internal error. Failed to open xrc domain"));
diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.c b/ompi/mca/btl/openib/btl_openib_endpoint.c
index d99b7b9..8f20e46 100644
--- a/ompi/mca/btl/openib/btl_openib_endpoint.c
+++ b/ompi/mca/btl/openib/btl_openib_endpoint.c
@@ -17,6 +17,7 @@
* Copyright (c) 2006-2009 Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
* Copyright (c) 2010-2011 Oracle and/or its affiliates. All rights reserved
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
*
* $COPYRIGHT$
*
@@ -346,7 +347,11 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
}
endpoint->ib_addr = NULL;
+#if OMPI_HAVE_CONNECTIB_XRC
+ endpoint->xrc_recv_qp = NULL;
+#else
endpoint->xrc_recv_qp_num = 0;
+#endif
endpoint->endpoint_btl = 0;
endpoint->endpoint_proc = 0;
endpoint->endpoint_local_cpc = NULL;
@@ -457,12 +462,24 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
/* unregister xrc recv qp */
#if HAVE_XRC
+#if ! OMPI_HAVE_CONNECTIB_XRC
if (0 != endpoint->xrc_recv_qp_num) {
if(ibv_unreg_xrc_rcv_qp(endpoint->endpoint_btl->device->xrc_domain,
endpoint->xrc_recv_qp_num)) {
BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp_num));
- }
+ } else {
+ BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp_num));
+ }
}
+#else
+ if (NULL != endpoint->xrc_recv_qp) {
+ if(ibv_destroy_qp(endpoint->xrc_recv_qp)) {
+ BTL_ERROR(("Failed to unregister XRC recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
+ } else {
+ BTL_VERBOSE(("Unregistered XRC Recv QP:%d\n", endpoint->xrc_recv_qp->qp_num));
+ }
+ }
+#endif
#endif
OBJ_DESTRUCT(&endpoint->endpoint_lock);
diff --git a/ompi/mca/btl/openib/btl_openib_endpoint.h b/ompi/mca/btl/openib/btl_openib_endpoint.h
index 57f03f7..648ca1d 100644
--- a/ompi/mca/btl/openib/btl_openib_endpoint.h
+++ b/ompi/mca/btl/openib/btl_openib_endpoint.h
@@ -15,6 +15,7 @@
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved.
* Copyright (c) 2010-2012 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -206,7 +207,11 @@ struct mca_btl_base_endpoint_t {
opal_list_t pending_lazy_frags;
mca_btl_openib_endpoint_qp_t *qps;
+#if OMPI_HAVE_CONNECTIB_XRC
+ struct ibv_qp *xrc_recv_qp;
+#else
uint32_t xrc_recv_qp_num; /* in xrc we will use it as recv qp */
+#endif
uint32_t xrc_recv_psn;
/** list of pending rget ops */
@@ -590,9 +595,14 @@ static inline int post_send(mca_btl_openib_endpoint_t *ep,
}
#if HAVE_XRC
+#if OMPI_HAVE_CONNECTIB_XRC
+ if(BTL_OPENIB_QP_TYPE_XRC(qp))
+ sr_desc->qp_type.xrc.remote_srqn = ep->rem_info.rem_srqs[qp].rem_srq_num;
+#else
if(BTL_OPENIB_QP_TYPE_XRC(qp))
sr_desc->xrc_remote_srq_num = ep->rem_info.rem_srqs[qp].rem_srq_num;
#endif
+#endif
assert(sg->addr == (uint64_t)(uintptr_t)frag->hdr);
if (sr_desc->send_flags & IBV_SEND_SIGNALED) {
diff --git a/ompi/mca/btl/openib/btl_openib_xrc.c b/ompi/mca/btl/openib/btl_openib_xrc.c
index 8236199..f1f738c 100644
--- a/ompi/mca/btl/openib/btl_openib_xrc.c
+++ b/ompi/mca/btl/openib/btl_openib_xrc.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -16,6 +17,7 @@
#include <fcntl.h>
#include <errno.h>
#include <unistd.h>
+#include <dlfcn.h>
#include "ompi/mca/btl/base/base.h"
#include "btl_openib_xrc.h"
@@ -32,12 +34,50 @@ OBJ_CLASS_INSTANCE(ib_address_t,
ib_address_constructor,
ib_address_destructor);
+/*
+ * Run-time check for which libibverbs XRC API we really have
+ * underneath.
+ *
+ * Note: I do not know any reliable way other than library symbols to
+ * distinguish between libibverbs having "beta" and official XRC API
+ * (no different versions or capabilities).
+ */
+mca_btl_openib_xrc_api_t mca_btl_openib_xrc_api()
+{
+ mca_btl_openib_xrc_api_t api = MCA_BTL_IB_XRC_API_NONE;
+ void *h_old, *h_new;
+ void *lib = dlopen(NULL, RTLD_NOW); /* current program */
+ if (!lib) return api;
+
+ h_old = dlsym(lib, "ibv_create_xrc_rcv_qp");
+ if (h_old) api = MCA_BTL_IB_XRC_API_BETA;
+
+ h_new = dlsym(lib, "ibv_cmd_open_xrcd");
+ if (h_new) api = MCA_BTL_IB_XRC_API_OFED_3_12;
+
+ dlclose(lib);
+ return api;
+
+}
+
+const char *mca_btl_openib_xrc_api_str(mca_btl_openib_xrc_api_t xrc_api)
+{
+ switch(xrc_api) {
+ case MCA_BTL_IB_XRC_API_BETA: return "beta, ofed 1.3+";
+ case MCA_BTL_IB_XRC_API_OFED_3_12: return "ofed 3.12+";
+ default: return "none";
+ }
+}
+
/* This func. opens XRC domain */
int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
{
int len;
char *xrc_file_name;
const char *dev_name;
+#if OMPI_HAVE_CONNECTIB_XRC
+ struct ibv_xrcd_init_attr xrcd_attr;
+#endif
dev_name = ibv_get_device_name(device->ib_dev);
len = asprintf(&xrc_file_name,
@@ -56,9 +96,17 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
free(xrc_file_name);
return OMPI_ERROR;
}
-
+#if OMPI_HAVE_CONNECTIB_XRC
+ memset(&xrcd_attr, 0, sizeof xrcd_attr);
+ xrcd_attr.comp_mask = IBV_XRCD_INIT_ATTR_FD | IBV_XRCD_INIT_ATTR_OFLAGS;
+ xrcd_attr.fd = device->xrc_fd;
+ xrcd_attr.oflags = O_CREAT;
+ device->xrcd = ibv_open_xrcd(device->ib_dev_context, &xrcd_attr);
+ if (NULL == device->xrcd) {
+#else
device->xrc_domain = ibv_open_xrc_domain(device->ib_dev_context, device->xrc_fd, O_CREAT);
if (NULL == device->xrc_domain) {
+#endif
BTL_ERROR(("Failed to open XRC domain\n"));
close(device->xrc_fd);
free(xrc_file_name);
@@ -71,11 +119,19 @@ int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device)
/* This func. closes XRC domain */
int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device)
{
+#if OMPI_HAVE_CONNECTIB_XRC
+ if (NULL == device->xrcd) {
+#else
if (NULL == device->xrc_domain) {
+#endif
/* No XRC domain, just exit */
return OMPI_SUCCESS;
}
+#if OMPI_HAVE_CONNECTIB_XRC
+ if (ibv_close_xrcd(device->xrcd)) {
+#else
if (ibv_close_xrc_domain(device->xrc_domain)) {
+#endif
BTL_ERROR(("Failed to close XRC domain, errno %d says %s\n",
device->xrc_fd, strerror(errno)));
return OMPI_ERROR;
diff --git a/ompi/mca/btl/openib/btl_openib_xrc.h b/ompi/mca/btl/openib/btl_openib_xrc.h
index d8313f4..b62540f 100644
--- a/ompi/mca/btl/openib/btl_openib_xrc.h
+++ b/ompi/mca/btl/openib/btl_openib_xrc.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2007-2008 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -41,9 +42,13 @@ struct ib_address_t {
};
typedef struct ib_address_t ib_address_t;
+
int mca_btl_openib_open_xrc_domain(struct mca_btl_openib_device_t *device);
int mca_btl_openib_close_xrc_domain(struct mca_btl_openib_device_t *device);
int mca_btl_openib_ib_address_add_new (uint16_t lid, uint64_t s_id,
orte_jobid_t ep_jobid, mca_btl_openib_endpoint_t *ep);
+mca_btl_openib_xrc_api_t mca_btl_openib_xrc_api(void);
+const char *mca_btl_openib_xrc_api_str(mca_btl_openib_xrc_api_t xrc_api);
+
#endif
diff --git a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
index 50c1ef5..07cd404 100644
--- a/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
+++ b/ompi/mca/btl/openib/connect/btl_openib_connect_xoob.c
@@ -2,6 +2,7 @@
* Copyright (c) 2007-2011 Mellanox Technologies. All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2009 IBM Corporation. All rights reserved.
+ * Copyright (c) 2014 Bull SAS. All rights reserved.
*
* $COPYRIGHT$
*
@@ -265,7 +266,11 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
qp_num = endpoint->qps[0].qp->lcl_qp->qp_num;
psn = endpoint->qps[0].qp->lcl_psn;
} else {
+#if OMPI_HAVE_CONNECTIB_XRC
+ qp_num = endpoint->xrc_recv_qp->qp_num;
+#else
qp_num = endpoint->xrc_recv_qp_num;
+#endif
psn = endpoint->xrc_recv_psn;
}
/* stuff all the QP info into the buffer */
@@ -341,10 +346,21 @@ static int xoob_send_connect_data(mca_btl_base_endpoint_t* endpoint,
}
/* on response we add all SRQ numbers */
for (srq = 0; srq < mca_btl_openib_component.num_xrc_qps; srq++) {
+#if OMPI_HAVE_CONNECTIB_XRC
+ uint32_t srq_num;
+ if (ibv_get_srq_num(endpoint->endpoint_btl->qps[srq].u.srq_qp.srq, &srq_num)) {
+ BTL_ERROR(("BTL openib XOOB internal error: can't get srq num"));
+ }
+ BTL_VERBOSE(("Send pack srq[%d] num = %d", srq, srq_num));
+ BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
+ rc = opal_dss.pack(buffer, &srq_num,
+ 1, OPAL_UINT32);
+#else
BTL_VERBOSE(("Send pack srq[%d] num = %d", srq, endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num));
BTL_VERBOSE(("packing %d of %d\n", 1, OPAL_UINT32));
rc = opal_dss.pack(buffer, &endpoint->endpoint_btl->qps[srq].u.srq_qp.srq->xrc_srq_num,
1, OPAL_UINT32);
+#endif
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
@@ -376,7 +392,11 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
uint32_t send_wr;
struct ibv_qp **qp;
uint32_t *psn;
+#if OMPI_HAVE_CONNECTIB_XRC
+ struct ibv_qp_init_attr_ex qp_init_attr;
+#else
struct ibv_qp_init_attr qp_init_attr;
+#endif
struct ibv_qp_attr attr;
int ret;
size_t req_inline;
@@ -392,7 +412,11 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
send_wr = endpoint->ib_addr->qp->sd_wqe +
(mca_btl_openib_component.use_eager_rdma ?
mca_btl_openib_component.max_eager_rdma : 0);
+#if OMPI_HAVE_CONNECTIB_XRC
+ memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr_ex));
+#else
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
+#endif
memset(&attr, 0, sizeof(struct ibv_qp_attr));
qp_init_attr.send_cq = qp_init_attr.recv_cq = openib_btl->device->ib_cq[prio];
@@ -405,9 +429,16 @@ static int xoob_send_qp_create (mca_btl_base_endpoint_t* endpoint)
qp_init_attr.cap.max_send_sge = 1;
/* this one is ignored by driver */
qp_init_attr.cap.max_recv_sge = 1; /* we do not use SG list */
+#if OMPI_HAVE_CONNECTIB_XRC
+ qp_init_attr.qp_type = IBV_QPT_XRC_SEND;
+ qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_PD;
+ qp_init_attr.pd = openib_btl->device->ib_pd;
+ *qp = ibv_create_qp_ex(openib_btl->device->ib_dev_context, &qp_init_attr);
+#else
qp_init_attr.qp_type = IBV_QPT_XRC;
qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
*qp = ibv_create_qp(openib_btl->device->ib_pd, &qp_init_attr);
+#endif
if (NULL == *qp) {
orte_show_help("help-mpi-btl-openib-cpc-base.txt",
"ibv_create_qp failed", true,
@@ -544,7 +575,11 @@ static int xoob_send_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_ope
/* Recv qp create */
static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info)
{
+#if OMPI_HAVE_CONNECTIB_XRC
+ struct ibv_qp_init_attr_ex qp_init_attr;
+#else
struct ibv_qp_init_attr qp_init_attr;
+#endif
struct ibv_qp_attr attr;
int ret;
@@ -553,6 +588,19 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
BTL_VERBOSE(("Connecting Recv QP\n"));
+#if OMPI_HAVE_CONNECTIB_XRC
+ memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr_ex));
+ qp_init_attr.qp_type = IBV_QPT_XRC_RECV;
+ qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_XRCD;
+ qp_init_attr.xrcd = openib_btl->device->xrcd;
+ endpoint->xrc_recv_qp = ibv_create_qp_ex(openib_btl->device->ib_dev_context,
+ &qp_init_attr);
+ if (NULL == endpoint->xrc_recv_qp) {
+ BTL_ERROR(("Error creating XRC recv QP, errno says: %s [%d]",
+ strerror(errno), errno));
+ return OMPI_ERROR;
+ }
+#else
memset(&qp_init_attr, 0, sizeof(struct ibv_qp_init_attr));
/* Only xrc_domain is required, all other are ignored */
qp_init_attr.xrc_domain = openib_btl->device->xrc_domain;
@@ -562,12 +610,26 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
endpoint->xrc_recv_qp_num, strerror(ret), ret));
return OMPI_ERROR;
}
+#endif
memset(&attr, 0, sizeof(struct ibv_qp_attr));
attr.qp_state = IBV_QPS_INIT;
attr.pkey_index = openib_btl->pkey_index;
attr.port_num = openib_btl->port_num;
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ;
+#if OMPI_HAVE_CONNECTIB_XRC
+ ret = ibv_modify_qp(endpoint->xrc_recv_qp,
+ &attr,
+ IBV_QP_STATE|
+ IBV_QP_PKEY_INDEX|
+ IBV_QP_PORT|
+ IBV_QP_ACCESS_FLAGS);
+ if (ret) {
+ BTL_ERROR(("Error modifying XRC recv QP to IBV_QPS_INIT, errno says: %s [%d]",
+ strerror(ret), ret));
+ return OMPI_ERROR;
+ }
+#else
ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
endpoint->xrc_recv_qp_num,
&attr,
@@ -578,9 +640,10 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
if (ret) {
BTL_ERROR(("Error modifying XRC recv QP[%x] to IBV_QPS_INIT, errno says: %s [%d]",
endpoint->xrc_recv_qp_num, strerror(ret), ret));
- while(1);
return OMPI_ERROR;
}
+#endif
+
memset(&attr, 0, sizeof(struct ibv_qp_attr));
attr.qp_state = IBV_QPS_RTR;
@@ -612,6 +675,7 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
}
#endif
+#if ! OMPI_HAVE_CONNECTIB_XRC
ret = ibv_modify_xrc_rcv_qp(openib_btl->device->xrc_domain,
endpoint->xrc_recv_qp_num,
&attr,
@@ -627,12 +691,37 @@ static int xoob_recv_qp_create(mca_btl_openib_endpoint_t *endpoint, mca_btl_open
endpoint->xrc_recv_qp_num, strerror(ret), ret));
return OMPI_ERROR;
}
+#else
+ ret = ibv_modify_qp(endpoint->xrc_recv_qp,
+ &attr,
+ IBV_QP_STATE|
+ IBV_QP_AV|
+ IBV_QP_PATH_MTU|
+ IBV_QP_DEST_QPN|
+ IBV_QP_RQ_PSN|
+ IBV_QP_MAX_DEST_RD_ATOMIC|
+ IBV_QP_MIN_RNR_TIMER);
+ if (ret) {
+ BTL_ERROR(("Error modifying XRC recv QP to IBV_QPS_RTR, errno says: %s [%d]",
+ strerror(ret), ret));
+ return OMPI_ERROR;
+ }
+#endif
+
#if OPAL_HAVE_THREADS
if (APM_ENABLED) {
+#if ! OMPI_HAVE_CONNECTIB_XRC
mca_btl_openib_load_apm_xrc_rcv(endpoint->xrc_recv_qp_num, endpoint);
+#else
+ mca_btl_openib_load_apm(endpoint->xrc_recv_qp, endpoint);
+#endif
}
#endif
-
+#if ! OMPI_HAVE_CONNECTIB_XRC
+ BTL_VERBOSE(("XRC Recv QP[%d] is in state RTR\n", endpoint->xrc_recv_qp_num));
+#else
+ BTL_VERBOSE(("XRC Recv QP[%d] is in state RTR\n", endpoint->xrc_recv_qp->qp_num));
+#endif
return OMPI_SUCCESS;
}
@@ -643,7 +732,7 @@ static int xoob_recv_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_ope
mca_btl_openib_module_t* openib_btl =
(mca_btl_openib_module_t*)endpoint->endpoint_btl;
-
+#if ! OMPI_HAVE_CONNECTIB_XRC
BTL_VERBOSE(("Connecting Recv QP\n"));
ret = ibv_reg_xrc_rcv_qp(openib_btl->device->xrc_domain, rem_info->rem_qps->rem_qp_num);
if (ret) { /* failed to regester the qp, so it is already die and we should create new one */
@@ -656,6 +745,25 @@ static int xoob_recv_qp_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_ope
endpoint->xrc_recv_qp_num = rem_info->rem_qps->rem_qp_num;
return OMPI_SUCCESS;
}
+#else
+ struct ibv_qp_open_attr attr;
+ memset(&attr, 0, sizeof(struct ibv_qp_open_attr));
+ attr.comp_mask = IBV_QP_OPEN_ATTR_NUM | IBV_QP_OPEN_ATTR_XRCD | IBV_QP_OPEN_ATTR_TYPE;
+ attr.qp_num = rem_info->rem_qps->rem_qp_num;
+ attr.qp_type = IBV_QPT_XRC_RECV;
+ attr.xrcd = openib_btl->device->xrcd;
+ BTL_VERBOSE(("Connecting Recv QP\n"));
+ endpoint->xrc_recv_qp = ibv_open_qp(openib_btl->device->ib_dev_context, &attr);
+ if (NULL == endpoint->xrc_recv_qp) { /* failed to regester the qp, so it is already die and we should create new one */
+ /* Return NOT READY !!!*/
+ BTL_ERROR(("Failed to register qp_num: %d , get error: %s (%d)\n. Replying with RNR",
+ rem_info->rem_qps->rem_qp_num, strerror(errno), errno));
+ return OMPI_ERROR;
+ } else {
+ BTL_VERBOSE(("Connected to XRC Recv qp [%d]", rem_info->rem_qps->rem_qp_num));
+ return OMPI_SUCCESS;
+ }
+#endif
}
/*
diff --git a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini
index 5c9e339..0577b0d 100644
--- a/ompi/mca/btl/openib/mca-btl-openib-device-params.ini
+++ b/ompi/mca/btl/openib/mca-btl-openib-device-params.ini
@@ -1,6 +1,7 @@
#
# Copyright (c) 2006-2013 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved.
+# Copyright (c) 2014 Bull SAS. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@@ -165,7 +166,7 @@ max_inline_data = 128
[Mellanox ConnectIB]
vendor_id = 0x2c9,0x5ad,0x66a,0x8f1,0x1708,0x03ba,0x15b3,0x119f
-vendor_part_id = 4113
+vendor_part_id = 4113,7059,7060
use_eager_rdma = 1
mtu = 4096
max_inline_data = 256