In case where MPI is compiled with --enable-mpi-thread-multiple, a call to
opal_using_threads() always returns 0 in the routine btl_xxx_component_init()
of the BTLs, event if the application calls MPI_Init_thread() with
MPI_THREAD_MULTIPLE.
This is because opal_set_using_threads(true) in ompi/runtime/ompi_mpi_init.c is
called to late.
I propose the following patch that solves the problem for me:
diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c
index 35509cf..c2370fc 100644
--- a/ompi/runtime/ompi_mpi_init.c
+++ b/ompi/runtime/ompi_mpi_init.c
@@ -512,6 +512,13 @@ int ompi_mpi_init(int argc, char **argv, int requested,
int *provided)
}
#endif
+ /* If thread support was enabled, then setup OPAL to allow for
+ them. */
+ if ((OPAL_ENABLE_PROGRESS_THREADS == 1) ||
+ (*provided != MPI_THREAD_SINGLE)) {
+ opal_set_using_threads(true);
+ }
+
/* initialize datatypes. This step should be done early as it will
* create the local convertor and local arch used in the proc
* init.
@@ -724,13 +731,6 @@ int ompi_mpi_init(int argc, char **argv, int requested,
int *provided)
goto error;
}
- /* If thread support was enabled, then setup OPAL to allow for
- them. */
- if ((OPAL_ENABLE_PROGRESS_THREADS == 1) ||
- (*provided != MPI_THREAD_SINGLE)) {
- opal_set_using_threads(true);
- }
-
/* start PML/BTL's */
ret = MCA_PML_CALL(enable(true));
if( OMPI_SUCCESS != ret ) {