Hello,

at RRZE we tried to write > 2 GB data (per process) at once to a file
with MPI_File_write_at(_all). Thereby the function returns with error
code 35.

Attached you will find the compressed output of "ompi_info --all" and a
test program (large_mpi_test.F90) with that the problem can be
reproduced.

The Open MPI Vesion used was 1.3.3. The underlaying file system used is
Lustre which is able to deal with files > 2GB.


Further if we retrieve the file size with MPI_File_get_size for a file
 >= 2 GB the returned size is negative. The attached test program
get_file_size.f90 will demonstrate the behavior:

$ dd if=/dev/zero of=delme.dat bs=1K count=1M
1048576+0 records in
1048576+0 records out
1073741824 bytes (1.1 GB) copied, 9.1401 seconds, 117 MB/s
$ mpirun -n 1 get_file_size
file size of delme.dat:
[0] file size = 1073741824 bytes, 0.102E+04 MB


$ dd if=/dev/zero of=delme.dat bs=1K count=2M
2097152+0 records in
2097152+0 records out
2147483648 bytes (2.1 GB) copied, 18.1567 seconds, 118 MB/s
$ mpirun -n 1 get_file_size
file size of delme.dat:
[0] file size = -2147483648 bytes, -.205E+04 MB


Regards
Markus Wittmann



Attachment: ompi_info.log.tar.gz
Description: GNU Zip compressed data

program large_mpi_test

  implicit none

  include "mpif.h"

  integer :: num_elements,mode

  real*8, allocatable :: fp_data(:)

  integer :: mp_ierr,mp_rk,mp_size

  character (len=255) :: fn = "test.sta" ! used for mpi-io

  integer :: fh,amode,status(MPI_STATUS_SIZE)
  integer(KIND=MPI_OFFSET_KIND) :: my_global_off

  call mpi_init(mp_ierr)
  call mpi_comm_rank(mpi_comm_world,mp_rk  ,mp_ierr)
  call mpi_comm_size(mpi_comm_world,mp_size,mp_ierr)

  if (mp_rk.eq.0) then
     write(*,*) "mp_size =", mp_size
     write(*,*) "enter number of data elements must be <2147483647"
     write(*,*) "but max be larger than 268435456"
     write(*,*) "the available memory should be larger than 8x this number"
     read(*,*) num_elements
     write(*,*) "size of data", num_elements*8.d0/1024.d0/1024.d0, " MB"
     write(*,*) "tests to use"
     write(*,*) "  1 : send/recv only"
     write(*,*) "  2 : MPI-IO only"
     write(*,*) "  3 : both"
     read(*,*) mode
  end if
  call mpi_bcast(num_elements,1,MPI_INTEGER,0,mpi_comm_world,mp_ierr)
  call mpi_bcast(mode,1,MPI_INTEGER,0,mpi_comm_world,mp_ierr)

  if (mp_rk.eq.0) then
     write(*,*) "allocating memory ..."
  end if
  allocate( fp_data(num_elements) ) ! there's hopefully enough memory
  fp_data(:) = 0

  if ( mode.eq.1 .or. mode.eq.3 ) then
     if (mp_rk.eq.0) then
        write(*,*) "starting send/recv."
        call mpi_send(fp_data,num_elements,MPI_DOUBLE_PRECISION,1,99,mpi_comm_world,mp_ierr)
     end if
     if (mp_rk.eq.1) then
        call mpi_recv(fp_data,num_elements,MPI_DOUBLE_PRECISION,0,99,mpi_comm_world,status,mp_ierr)
        write(*,*) "send/recv done."
     end if
     call mpi_barrier(mpi_comm_world,mp_ierr)
  end if


  if ( mode.eq.2 .or. mode.eq.3 ) then
     if (mp_rk.eq.0) then
        write(*,*) "starting MPI-IO test"
        ! delete old status file (should not procude a visable error if
        ! the file does not exit - and we do not check mp_ierr here)
        call mpi_file_delete(fn,MPI_INFO_NULL,mp_ierr)
     end if

     ! MPI_MODE_CREATE is definitely required as the file has to exist
     ! before! (according to my tests - no idea about the standard)
     amode = IOR(MPI_MODE_WRONLY, MPI_MODE_CREATE)

     call mpi_barrier(mpi_comm_world,mp_ierr)
     call mpi_file_open(MPI_COMM_WORLD,fn,amode,MPI_INFO_NULL,fh,mp_ierr)

     my_global_off = mp_rk ! make type conversin :-)
     my_global_off = (my_global_off*num_elements)*8 ! double_precision=8 assumed

     write(*,*) "rk=", mp_rk, "writing at", my_global_off

     call mpi_file_write_at(fh,my_global_off,fp_data,num_elements, &
         MPI_DOUBLE_PRECISION,status,mp_ierr)

     if ( mp_ierr .ne. MPI_SUCCESS ) then
        write(*,*) "MPI_File_write_at_all() failed - rank", mp_rk, "; error=", mp_ierr
     end if

     call mpi_file_close(fh,mp_ierr)
     if ( mp_ierr .ne. MPI_SUCCESS ) then
        write(*,*) "writing restart data: MPI_File_close() failed - rank", mp_rk
     end if

     if (mp_rk.eq.0) then
        write(*,*) "DONE"
     end if

  end if

  call mpi_finalize(mp_ierr)

end program large_mpi_test
program get_file_size
  implicit none

  include 'mpif.h'

  integer :: error
  integer :: rank
  integer :: fh
  integer (kind=MPI_OFFSET_KIND) :: file_size
  character (len=*), parameter :: file_name = 'delme.dat'

  call mpi_init(error)
  call mpi_assert_success(error)

  call mpi_comm_rank(MPI_COMM_WORLD, rank, error)
  call mpi_assert_success(error)

  if (rank.eq.0) write(*,'(a,a,a)') 'file size of ', file_name, ':'

  call mpi_file_open(MPI_COMM_WORLD, file_name, MPI_MODE_RDONLY,             &
                     MPI_INFO_NULL, fh, error)
  call mpi_assert_success(error)

  call mpi_file_get_size(fh, file_size, error)
  call mpi_assert_success(error)

  write(*,'(a,i0,a,i0,a,e9.3,a)') '[', rank, '] file size = ', file_size,        &
          ' bytes, ', float(file_size) / 2**20, ' MB'

  call mpi_file_close(fh, error)
  call mpi_assert_success(error)

  call mpi_finalize(error)
  call mpi_assert_success(error)

  contains

  subroutine mpi_assert_success(error)
    integer :: error
    character (len=MPI_MAX_ERROR_STRING) :: error_string
    integer :: string_length, error_code

    if (error.ne.MPI_SUCCESS) then
      call mpi_error_string(error, error_string, string_length, error_code)

      write(*,'(a,i0,a,i0,x,a)') '# [', rank,                                  &
          '] ERROR: mpi_assert_success failed with error code: ',              &
          error, error_string

      stop
    end if

  end subroutine mpi_assert_success


end program get_file_size

Reply via email to