Hi, We observe a hang when running the multi-threading support test "latency.c" (attached to this report), which uses MPI_THREAD_MULTIPLE.
The hang happens immediately at the begining of the test and is reproduced in the v1.8 release branch. The command line to reproduce the behavior is: $ mpirun --map-by node --bind-to core -display-map -np 2 -mca pml ob1 -mca btl tcp,self ./thread-tests-1.1/latency The last commit with which the hang doesn't reproduce is: commit: e4d4266d9c69e And problems begin after commit: commit 09b867374e9618007b81bfaf674ec6df04548bed Author: Jeff Squyres <jsquy...@cisco.com> List-Post: devel@lists.open-mpi.org Date: Fri Oct 31 12:42:50 2014 -0700 Revert most of open-mpi/ompi@6ef938de3fa9ca0fed2c5bcb0736f65b0d8803af Is this expected behavior? In other words, should we not expect any stable release in the 1.8.x series to be able to use MPI_THREAD_MULTIPLE with even the TCP and SM BTLs? Please advise. Thanks, Alina.
/* -*- Mode: C; c-basic-offset:4 ; -*- */ /* * Copyright (C) 2007 University of Chicago * See COPYRIGHT notice in top-level directory. */ #include "mpi.h" #include <stdio.h> #include <stdlib.h> #include <strings.h> #define MAXSIZE 1024 #define NTIMES 1000 /* Measures concurrent latency (for short messages). All even ranks send to rank i+1 and get a reply. Run on 2 nodes (with multiple processes). */ int main(int argc,char *argv[]) { int rank, nprocs, src, dest, tag, i, size, incr; double stime, etime, ttime; char *sendbuf, *recvbuf; int provided; // MPI_Init(&argc,&argv); MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); if (provided != MPI_THREAD_MULTIPLE) { printf("Thread multiple needed\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Comm_size(MPI_COMM_WORLD,&nprocs); MPI_Comm_rank(MPI_COMM_WORLD,&rank); sendbuf = (char *) malloc(MAXSIZE); if (!sendbuf) { printf("Cannot allocate buffer\n"); MPI_Abort(MPI_COMM_WORLD, 1); } recvbuf = (char *) malloc(MAXSIZE); if (!recvbuf) { printf("Cannot allocate buffer\n"); MPI_Abort(MPI_COMM_WORLD, 1); } incr = 16; if (rank == 0) printf("Size (bytes) \t Time (us)\n"); MPI_Barrier(MPI_COMM_WORLD); /* All even ranks send to (and recv from) rank i+1 many times */ if ((rank % 2) == 0) { /* even */ dest = rank + 1; for (size=0; size<=MAXSIZE; size+=incr) { tag = 0; stime = MPI_Wtime(); for (i=0; i<NTIMES; i++) { MPI_Send(sendbuf, size, MPI_BYTE, dest, tag, MPI_COMM_WORLD); tag++; MPI_Recv(recvbuf, size, MPI_BYTE, dest, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } etime = MPI_Wtime(); ttime = (etime - stime)/(2*NTIMES); if (rank == 0) printf("%d \t %f\n", size, ttime*1000000); if (size == 256) incr = 64; } } else { /* odd */ src = rank - 1; for (size=0; size<=MAXSIZE; size+=incr) { tag = 0; for (i=0; i<NTIMES; i++) { MPI_Recv(recvbuf, size, MPI_BYTE, src, tag, MPI_COMM_WORLD, MPI_STATUS_IGNORE); tag++; MPI_Send(sendbuf, size, MPI_BYTE, src, tag, MPI_COMM_WORLD); } if (size == 256) incr = 64; } } free(sendbuf); free(recvbuf); MPI_Finalize(); return 0; }