On 02/21/2007 06:10 PM, Sam Lang wrote:
>
> On Feb 21, 2007, at 10:49 AM, Trach-Minh Tran wrote:
>
>> On 02/21/2007 05:18 PM, Sam Lang wrote:
>>>
>>> Hi Minh,
>>>
>>> I got the order of my AC_TRY_COMPILE arguments wrong. That was pretty
>>> sloppy on my part. I've attached a patch that should fix the error
>>> you're getting. I'm not sure it will apply cleanly to the already
>>> patched 2.6.2 source that you have. Better to start with a clean 2.6.2
>>> tarball.
>>
>> Hi Sam,
>>
>> Thanks for you prompt response. I can now load the module. I will do
>> some more tests with this 2.6.2 version. Until now, I've found
>> using my MPI-IO program, that this is not as stable as the 2.6.1 version:
>> During about 1/2 hour running the test, already 2 data servers (out
>> of 8) have died!
>
> That's surprising, the 2.6.2 release didn't include any changes to the
> servers from 2.6.1. Did you get any messages in the server logs on the
> nodes that died?
>
>>
>> Do you think that I should stay with 2.6.1 + the misc-bug.patch from
>> Murali?
>
> There aren't any other significant fixes in 2.6.2 besides support for
> the latest Berkeley DB release, and the misc-bug patch that you mention,
> so using 2.6.1 shouldn't be a problem for you. That being said, if the
> servers crash for you on 2.6.2, its likely that they will do so with
> 2.6.1 and you just haven't hit it yet. I'd also like to figure out
> exactly what is causing the servers to crash. Can you send your MPI-IO
> program to us?
>
Hi Sam,
There is nothing in the server logs! May be tomorrow (it is now 6:30 pm
here) I will have more infos from the mpi-io runs I've just submitted.
I have attached the mpi-io test program.
Best regards, -Minh.
SUBROUTINE record(flag, n, r, z, phi, u, mu, w, vol, psi)
IMPLICIT NONE
INCLUDE 'mpif.h'
CHARACTER*1, INTENT(in) :: flag
INTEGER, INTENT(in) :: n
DOUBLE PRECISION, DIMENSION(n), INTENT(out):: r, z, phi, u, mu, w, vol, psi
INTEGER :: me, npes, ierr, comm=MPI_COMM_WORLD
CHARACTER*128 :: fname="pvfs2:/scratch/ttran/io/io2.d"
!!$ CHARACTER*128 :: fname="io2.d"
INTEGER, DIMENSION(:), ALLOCATABLE :: nplocs
INTEGER :: i, nptot, fh, filetype
INTEGER(kind=MPI_OFFSET_KIND) :: displ
!________________________________________________________________________________
! Prologue
!
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_comm_size(comm, npes, ierr)
!________________________________________________________________________________
! Define file layout
!
ALLOCATE(nplocs(0:npes-1))
CALL mpi_allgather(n, 1, MPI_INTEGER, nplocs, 1, MPI_INTEGER, comm, ierr)
nptot = SUM(nplocs)
!!$ IF( me .EQ. 0 ) THEN
!!$ WRITE(*,'(a, 10(i10))') 'NPLOCS =', nplocs
!!$ END IF
CALL mpi_type_vector(8, n, nptot, MPI_DOUBLE_PRECISION, filetype, ierr)
CALL mpi_type_commit(filetype, ierr)
displ = 0
DO i=0,me-1
displ = displ+8*nplocs(i)
END DO
!________________________________________________________________________________
! Read restart file
!
IF( flag .EQ. 'R' ) THEN
CALL mpi_file_open(comm, fname, MPI_MODE_RDONLY, MPI_INFO_NULL, fh, ierr)
CALL mpi_file_set_view(fh, displ, MPI_DOUBLE_PRECISION, filetype,
"native", &
& MPI_INFO_NULL, ierr)
CALL mpi_file_read_all(fh, r, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, z, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, phi, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, u, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, mu, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, w, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, vol, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_read_all(fh, psi, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
END IF
!________________________________________________________________________________
! Write restart file
!
IF( flag .EQ. 'W' ) THEN
CALL mpi_file_open(comm, fname, MPI_MODE_WRONLY+MPI_MODE_CREATE, &
& MPI_INFO_NULL, fh, ierr)
CALL mpi_file_set_view(fh, displ, MPI_DOUBLE_PRECISION, filetype,
"native", &
& MPI_INFO_NULL, ierr)
CALL mpi_file_write_all(fh, r, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, z, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, phi, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, u, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, mu, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, w, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, vol, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
CALL mpi_file_write_all(fh, psi, n, MPI_DOUBLE_PRECISION,
MPI_STATUS_IGNORE, ierr)
END IF
!________________________________________________________________________________
! Epilogue
!
DEALLOCATE(nplocs)
CALL mpi_type_free(filetype, ierr)
CALL mpi_file_close(fh, ierr)
!
END SUBROUTINE record
PROGRAM main
!
! Parallel I/O benchmark
!
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD
INTEGER :: npart, npart_mp=1, start, nploc
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: r, z, phi, u, mu, w, vol, psi
DOUBLE PRECISION :: t0, twrite, tread, mbytes
INTEGER :: i, nit=1
CHARACTER*1 :: flag
INTEGER :: nerrors
!________________________________________________________________________________
! Init MPI
!
CALL mpi_init(ierr)
CALL mpi_comm_rank(comm, me, ierr)
CALL mpi_comm_size(comm, npes, ierr)
!________________________________________________________________________________
! Input data of the run
!
IF( me .EQ. 0 ) THEN
WRITE(*,*) 'Enter total number of particles (Mega Part.) and nits'
READ(*,*) npart_mp, nit
END IF
CALL mpi_bcast(npart_mp, 1, MPI_INTEGER, 0, comm, ierr)
CALL mpi_bcast(nit, 1, MPI_INTEGER, 0, comm, ierr)
npart = npart_mp*1024*1024
CALL dist1d(0, npart, start, nploc)
!________________________________________________________________________________
! Init particle arrays
!
ALLOCATE(r(nploc), z(nploc), phi(nploc), u(nploc), mu(nploc), w(nploc), &
& vol(nploc), psi(nploc))
CALL initp(nploc, r, z, phi, u, mu, w, vol, psi)
twrite=0.0
tread=0.0
!________________________________________________________________________________
! Write arrays to file
!
CALL mpi_barrier(comm, ierr)
t0=mpi_wtime()
DO i=1,nit
CALL record('W', nploc, r, z, phi, u, mu, w, vol, psi)
END DO
CALL mpi_barrier(comm, ierr)
twrite = mpi_wtime()-t0
!________________________________________________________________________________
! Read arrays to file
!
CALL mpi_barrier(comm, ierr)
t0=mpi_wtime()
DO i=1,nit
r=0.0d0; z=0.0d0; phi=0.0d0; u=0.0d0; mu=0.0d0; w=0.0d0; vol=0.0d0;
psi=0.0d0
CALL record('R', nploc, r, z, phi, u, mu, w, vol, psi)
END DO
CALL mpi_barrier(comm, ierr)
tread = mpi_wtime()-t0
!________________________________________________________________________________
! Check read arrays
!
CALL check(nploc, r, z, phi, u, mu, w, vol, psi, nerrors)
i = nerrors
CALL mpi_reduce(i, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr)
IF ( me .EQ. 0 ) THEN
WRITE(*, '(a,i12)' ) 'Number of errors return from CHECK:', nerrors
END IF
!________________________________________________________________________________
! Epilogue
!
IF(me .EQ. 0 ) THEN
twrite = twrite/REAL(nit)
tread = tread/REAL(nit)
mbytes = 8.0*8.0*REAL(npart)/1024.0/1024.0
WRITE(*,'(a,f12.3,a)') 'Write/Read of', mbytes, 'MB'
WRITE(*,'(a,4(f8.3,a))') 'Write/Read performance:', twrite, ' s', &
mbytes/twrite, ' MB/s', tread, ' s', mbytes/tread, ' MB/s'
END IF
CALL mpi_finalize(ierr)
END PROGRAM main
!
SUBROUTINE initp(n, r, z, phi, u, mu, w, vol, psi)
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER, INTENT(in) :: n
DOUBLE PRECISION, DIMENSION(n), INTENT(out):: r, z, phi, u, mu, w, vol, psi
DOUBLE PRECISION :: x
INTEGER :: me, ierr, comm=MPI_COMM_WORLD
INTEGER :: i
!________________________________________________________________________________
CALL mpi_comm_rank(comm, me, ierr)
x=0.0d0
DO i=1,n
x=x+1.0d0; r(i) = x
x=x+1.0d0; z(i) = x
x=x+1.0d0; phi(i) = x
x=x+1.0d0; u(i) = x
x=x+1.0d0; mu(i) = x
x=x+1.0d0; w(i) = x
x=x+1.0d0; vol(i) = x
x=x+1.0d0; psi(i) = x
END DO
END SUBROUTINE initp
!
SUBROUTINE check(n, r, z, phi, u, mu, w, vol, psi, nerrs)
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER, INTENT(in) :: n
INTEGER, INTENT(out) :: nerrs
DOUBLE PRECISION, DIMENSION(n), INTENT(in):: r, z, phi, u, mu, w, vol, psi
INTEGER :: i
DOUBLE PRECISION :: x
!________________________________________________________________________________
nerrs = 0
x=0.0d0
DO i=1,n
x=x+1.0d0; IF( r(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( z(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( phi(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( u(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( mu(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( w(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( vol(i) .NE. x ) nerrs=nerrs+1
x=x+1.0d0; IF( psi(i) .NE. x ) nerrs=nerrs+1
END DO
END SUBROUTINE check
SUBROUTINE dist1d(s0, ntot, s, nloc)
IMPLICIT NONE
INCLUDE 'mpif.h'
INTEGER, INTENT(in) :: s0, ntot
INTEGER, INTENT(out) :: s, nloc
INTEGER :: me, npes, ierr, naver, rem
!
CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
naver = ntot/npes
rem = MODULO(ntot,npes)
s = s0 + MIN(rem,me) + me*naver
nloc = naver
IF( me.LT.rem ) nloc = nloc+1
!
END SUBROUTINE dist1d
_______________________________________________
Pvfs2-users mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-users