Hi all,
I have compiled Open MPI 1.3.2 with Intel Fortran and C/C++ 11.0
compilers. Fortran Real*16 seems to be working except for MPI_Allreduce.
I have attached a simple program to show what I mean. I am not an MPI
programmer but I work for one and he actually wrote the attached
program. The program sets a variable to 1 on all processes then sums.
Running with real*8 (comment #define REAL16 in quad_test.F) produces the
expected results:
Number of Nodes = 4
ALLREDUCE sum = 4.00000000000000
ALLGATHER sum = 4.00000000000000
ISEND/IRECV sum = 4.00000000000000
Node = 0 Value = 1.00000000000000
Node = 2 Value = 1.00000000000000
Node = 3 Value = 1.00000000000000
Node = 1 Value = 1.00000000000000
Running with real*16 produces the following:
Number of Nodes = 4
ALLREDUCE sum = 1.00000000000000000000000000000000
ALLGATHER sum = 4.00000000000000000000000000000000
ISEND/IRECV sum = 4.00000000000000000000000000000000
Node = 0 Value = 1.00000000000000000000000000000000
Node = 1 Value = 1.00000000000000000000000000000000
Node = 2 Value = 1.00000000000000000000000000000000
Node = 3 Value = 1.00000000000000000000000000000000
As I mentioned, I'm not a parallel programmer but I would expect the
similar results from identical operations on real*8 and real*16 variables.
NOTE: I get the same behavior with MPICH and MPICH2.
Dave
# Makefile for quadruple MPI communications test.
#
NEED_VERSION := 3.80 3.81
$(if $(filter $(MAKE_VERSION),$(NEED_VERSION)),, \
$(error This makefile requires one of GNU make version $(NEED_VERSION).))
IFORT ?= on
OPENMPI ?= on
ifdef IFORT
ifdef OPENMPI
FC := /opt/intelsoft/openmpi/bin/mpif90
# FFLAGS := -ip -O3
FFLAGS := -g -check uninit -ftrapuv -traceback
else
# FC := /opt/intelsoft/mpich/bin/mpif90
FC := /opt/intelsoft/mpich2/mpich2-1.1/bin/mpif90
# FFLAGS := -ip -O3
FFLAGS := -g -check uninit -ftrapuv -traceback
endif
else
ifdef OPENMPI
FC := /opt/pgisoft/openmpi/bin/mpif90
# FFLAGS := -O3 -tp k8-64
FFLAGS := -g -C
else
FC := /opt/pgisoft/mpich/bin/mpif90
# FC := /opt/pgisoft/mpich2/bin/mpif90
# FFLAGS := -O3 -tp k8-64
FFLAGS := -g -C
endif
endif
LD := $(FC)
LDFLAGS :=
AR := ar
ARFLAGS := r
CPP := /usr/bin/cpp
CPPFLAGS := -P -traditional
CLEAN := $(HOME)/bin/cpp_clean
RM := rm -f
PERL := perl
TEST := test
clean_list := core *.o *.oo *.inc *.mod *.f90 lib*.a *.bak
BIN := quad_test
# Set Pattern rules.
%.o: %.F
$(FC) -c $(FFLAGS) $(notdir $<)
vpath %.F $(CURDIR)
vpath %.o $(CURDIR)
OBJS := quad_test.o
# Build targets.
.PHONY: all
all: $(BIN)
$(BIN): $(OBJS)
$(FC) $(LDFLAGS) $(OBJS) -o $(BIN)
# Clean target.
.PHONY: clean
clean:
$(RM) -r $(clean_list) $(BIN)
PROGRAM quad_test
! Program to test real*16 (quadruple precision) MPI communications.
implicit none
include 'mpif.h'
#define REAL16
#ifdef REAL16
integer, parameter :: r16 = selected_real_kind(16,3000) ! 128-bit
integer, parameter :: MP_FLOAT = MPI_REAL16
#else
integer, parameter :: r16 = selected_real_kind(12,300) ! 64-bit
integer, parameter :: MP_FLOAT = MPI_REAL8
!! integer, parameter :: MP_FLOAT = MPI_DOUBLE_PRECISION
#endif
logical :: Master
integer :: Lstr, MyMaster, MyRank, Nnodes, rank, request
integer :: MyError, Rerror, Rstatus, Serror, Sstatus
integer, allocatable :: Rrequest(:)
real(r16) :: a16, asum
real(r16), allocatable :: Arecv(:)
character (len=MPI_MAX_ERROR_STRING) :: string
!
! Initialize MPI.
!
CALL mpi_init (MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to initialize MPI.'
PRINT *, string(1:Lstr)
STOP
END IF
!
! Get rank of the local process in the group associated with the
! communicator.
!
CALL mpi_comm_rank (MPI_COMM_WORLD, MyRank, MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to inquire rank of local node.'
PRINT *, string(1:Lstr)
STOP
END IF
!
! Get number of processes in the group associated with the
! communicator.
!
CALL mpi_comm_size (MPI_COMM_WORLD, Nnodes, MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to inquire of processes in the group.'
PRINT *, string(1:Lstr)
STOP
END IF
!
! Identify master node.
!
Master=.FALSE.
MyMaster=0
IF (MyRank.eq.MyMaster) THEN
Master=.TRUE.
END IF
IF (.not.allocated(Arecv)) allocate ( Arecv(0:Nnodes-1) )
IF (.not.allocated(Rrequest)) allocate ( Rrequest(0:Nnodes-1) )
!
! Initialize variable
!
IF (Master) THEN
a16=1.0_r16
ELSE
a16=0.0_r16
END IF
asum=0.0_r16
Arecv=0.0_r16
Rrequest=0
!
! Broadcast master value to all nodes.
!
CALL mpi_bcast (a16, 1, MP_FLOAT, MyMaster, MPI_COMM_WORLD, &
& MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to broadcast variable.'
PRINT *, string(1:Lstr)
STOP
END IF
!
! Global sum using MPI_ALLREDUCE.
!
CALL mpi_allreduce (a16, asum, 1, MP_FLOAT, MPI_SUM, &
& MPI_COMM_WORLD, MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to compute global sum using ALLREDUCE.'
PRINT *, string(1:Lstr)
STOP
END IF
IF (Master) THEN
PRINT *, ' '
PRINT *, 'Number of Nodes = ', Nnodes
PRINT *, ' '
PRINT *, 'ALLREDUCE sum = ', asum
END IF
!
! Global sum using MPI_ALLGATHER.
!
CALL mpi_allgather (a16, 1, MP_FLOAT, &
& Arecv, 1, MP_FLOAT, &
& MPI_COMM_WORLD, MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to compute global sum using ALLGATHER.'
PRINT *, string(1:Lstr)
STOP
END IF
asum=0.0_r16
DO rank=0,Nnodes-1
asum=asum+Arecv(rank)
END DO
IF (Master) THEN
PRINT *, 'ALLGATHER sum = ', asum
END IF
!
! Global sum using IRECV/ISEND.
!
IF (MyRank.eq.MyMaster) THEN
DO rank=1,Nnodes-1
CALL mpi_irecv (Arecv(rank), 1, MP_FLOAT, rank, &
& rank+100, MPI_COMM_WORLD, Rrequest(rank), &
& MyError)
END DO
asum=a16
DO rank=1,Nnodes-1
CALL mpi_wait (Rrequest(rank), Rstatus, MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'MPI_IRECV', rank, Rerror, string(1:Lstr)
STOP
END IF
asum=asum+Arecv(rank)
END DO
ELSE
CALL mpi_isend (a16, 1, MP_FLOAT, MyMaster, MyRank+100, &
& MPI_COMM_WORLD, request, MyError)
CALL mpi_wait (request, Sstatus, MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'MPI_ISEND', MyRank, Serror, string(1:Lstr)
STOP
END IF
END IF
CALL mpi_bcast (asum, 1, MP_FLOAT, MyMaster, MPI_COMM_WORLD, &
& MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
STOP
END IF
IF (Master) THEN
PRINT *, 'ISEND/IRECV sum = ', asum
PRINT *, ' '
END IF
CALL mpi_barrier (MPI_COMM_WORLD, MyError)
PRINT *, 'Node = ', MyRank, ' Value = ', a16
CALL mpi_barrier (MPI_COMM_WORLD, MyError)
CALL flush (6)
!
! Terminate MPI communications
!
CALL mpi_finalize (MyError)
IF (MyError.ne.MPI_SUCCESS) THEN
CALL mpi_error_string (MyError, string, Lstr, Serror)
PRINT *, 'Unable to finalize MPI.'
PRINT *, string(1:Lstr)
STOP
END IF
IF (allocated(Arecv)) deallocate ( Arecv )
IF (allocated(Rrequest)) deallocate ( Rrequest )
END PROGRAM quad_test