Package: libscalapack-openmpi1
Version: 1.8.0-13
Severity: important
Dear Maintainer,
After upgrade to the latest Debian testing my code started leaking memory at
horrendous pace.
The valgrind output traced the leak to Cpdgemr2d and Cpzgemr2d (see below).
It could be that the actual leak is in OpenMPI, but somehow it triggered
only by pXgemr2d.
The code works fine on jessy and older (not updated since November) testing.
I wrote a sample code which reproduces the bug (attached). I played around
with different processor grids and every time it was leaking.
Many thanks,
Grigory
valgrind output:
==2843== 217,280 bytes in 194 blocks are definitely lost in loss record 904 of
908
==2843==at 0x4C2BBAF: malloc (vg_replace_malloc.c:299)
==2843==by 0x1B076856: ???
==2843==by 0x70015FA: mca_coll_base_comm_select (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2)
==2843==by 0x6FA19AD: ??? (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2)
==2843==by 0x6FA3C92: ??? (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2)
==2843==by 0x90A909B: opal_progress (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libopen-pal.so.20.2.0)
==2843==by 0x6FA3894: ompi_comm_activate (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2)
==2843==by 0x6F9FA96: ompi_comm_split (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2)
==2843==by 0x6FD5F5D: PMPI_Comm_split (in
/usr/lib/x86_64-linux-gnu/openmpi/lib/libmpi.so.20.0.2)
==2843==by 0x5986CF7: Cblacs_gridmap (in /usr/lib/libblacs-openmpi.so.1.1)
==2843==by 0x6038D8C: Cpdgemr2d (in /usr/lib/libscalapack-openmpi.so.1.8.0)
==2843==by 0x362823: __m_evol4_lz_MOD_evol4_pade (evol4_lz.F:956)
==2843==
!*** Sample code (fortran) **!
! compiled with
! mpif90 -O2 -o mx_pdgemr2d mx_pdgemr2d.f90 -llapack -lblas -lblacs-openmpi
-lblacsF77init-openmpi /usr/lib/libscalapack-openmpi.so.1
!!
program main
use mpi
implicit none
integer, parameter :: Msz = 9
integer :: myrank,np
integer :: errorflag, info
integer :: i,j, k
double precision ,allocatable :: M(:,:)
double precision ,allocatable :: M0(:,:)
integer:: blacs_grid, nprows, npcols, myrow, mycol, numr, numc
integer :: desc(9), desc0(9), blacs_grid0
integer :: Mb, Nb
integer, external :: numroc, indxl2g, indxg2p, indxg2l, izamax
call MPI_Init(errorflag)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, errorflag)
call MPI_Comm_size(MPI_COMM_WORLD, np, errorflag)
nprows = int( sqrt( real(np) ) )
npcols = np / nprows
if (nprows .ne. npcols) then
npcols = nprows
endif
!if(myrank == 0) then
!print *, "nprows=", nprows, "npcols=", npcols
!endif
call Blacs_get( 0, 0, blacs_grid )
call Blacs_gridinit( blacs_grid, 'Column-major', nprows, npcols )
call Blacs_gridinfo( blacs_grid, nprows, npcols, myrow, mycol )
! the nodes that do not fit into the square
! are idle
if( myrow < 0 .or. mycol < 0 ) go to 10
! calculate the number of rows/columns on the current
! node
Mb = 4
Nb = 4
numr = Numroc(Msz, Mb, myrow, 0, nprows)
numc = Numroc(Msz, Nb, mycol, 0, npcols)
allocate(M(numr,numc))
M(:,:) = 0.d0
call Descinit(desc, Msz, Msz, Mb, Nb, &
0, 0, blacs_grid, numr, info)
if(myrank .eq. 0) then
allocate(M0(Msz,Msz))
M0(:,:) = 3.14d0
endif
call Blacs_get( 0, 0, blacs_grid0 )
if( nprows .ne. 1 .and. npcols .ne. 1) then
!call Blacs_gridinit(blacs_grid0, 'Column-major', nprows, npcols )
call Blacs_gridinit(blacs_grid0, 'Column-major', 1, 1 )
else
blacs_grid0 = blacs_grid
endif
!call Descinit(desc0, Msz, Msz, Msz, Msz, 0, 0,&
!blacs_grid0, Msz, info)
desc0 = (/1 , blacs_grid0, Msz , Msz, Msz, Msz, 0, 0, Msz/)
if(myrank .ne. 0) desc0(1) = -1
do i=1, 999
call pdgemr2d(Msz, Msz, M0, 1, 1, desc0, M , 1, 1, desc, &
blacs_grid)
call pdgemr2d(Msz,Msz,M,1,1,desc, &
M0,1,1,desc0, blacs_grid)
enddo
10 call blacs_exit(1)
call MPI_Finalize(errorflag)
call exit(0)
end program main
-- System Information:
Debian Release: 9.0
APT prefers testing
APT policy: (500, 'testing')
Architecture: amd64
(x86_64)
Kernel: Linux 4.8.0-2-amd64 (SMP w/8 CPU cores)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: systemd (via /run/systemd/system)
Versions of packages libscalapack-openmpi1 depends on:
ii libatlas3-base [liblapack.so.3] 3.10.3-1+b1
ii libblacs-openmpi11.1-38
ii libblas3 [libblas.so.3] 3.7.0-1
ii libc62.24-10
ii libgfortran3