Hi everyone!
For several days I have some trouble with OpenACC offloading and fortran
pointers. I'm testing with a very small peace of code to investigate but
I do not progress for several days and I need your help.
The attached code goal is just to initialize some data on the GPU and is
representative of my problem on a very large code.
- It works fine with nvfortran (22.11)
- it do not work with Gnu fortran (14.0.0 20230822 - experimental)
=> invalid memory
- it do not work with Cray Fortran (15.1) => wrong results
so I think the problem is the code, not the compiler.
It is also difficult to find some openACC offloading examples using
Fortran pointers and I'm stuck with this problem.
Could someone give me advices or a small explanation on what I have not
understood there ?
Thanks for your help
Patrick
Code details:
- all my fortran modules are grouped in the same file for simplification
of the provided test-case.
- compilation with GNU Firtran is: "gfortran -cpp -g -fopenacc grouped.f90"
- setting "runongpu=.false." line 7 (no GPU) the result is:
Default init OK
Default value OK
- setting "runongpu=.true." line 7 (no GPU) the result is:
Default init OK
libgomp: cuStreamSynchronize error: an illegal memory access was encountered
- with nvhpc/22.11 and "runongpu=.true.", built with "nvfortran
-acc=gpu,noautopar -gpu=cc80 -Minfo=accel grouped.f90"
Default init OK
Default value OK
!=================================================================
! Just to say run on the device or not.
!=================================================================
module openacc_defs
implicit none
logical, save :: runongpu=.true.
end module openacc_defs
!=================================================================
! Data structure for r2_tab and r2_ptr to manage pointers.
! r2_ptr is used in an allocatable array for a dynamivc number of r2_tab variables
! but could be used later in chained lists
! Memory is allocated on GPU each time.
!=================================================================
module tab_m
implicit none
type r2_tab
double precision, dimension(:,:), allocatable :: val
integer :: dim1
integer :: dim2
end type r2_tab
type r2_ptr
type(r2_tab), pointer :: ptr
type(r2_ptr), pointer :: next
end type r2_ptr
contains
subroutine new_r2_tab(tab,n,m)
implicit none
integer, intent(in) ::n,m
type(r2_tab), pointer, intent(inout) ::tab
!----------------------------
if (.not. associated(tab)) allocate(tab)
if (allocated(tab%val)) deallocate(tab%val)
allocate (tab%val(n,m))
tab%dim1=n
tab%dim2=m
tab%val(:,:)=1.0D0
!$acc enter data create(tab)
!$acc enter data create(tab%val)
end subroutine new_r2_tab
end module tab_m
!=================================================================
! This module implements data processing (just an initialization here)
! if runongpu is .true. initialization is run on the GPU and then host is updated.
!=================================================================
module manage_data
use openacc_defs
use tab_m
implicit none
contains
subroutine set_default_val_gpu(liste, defval, nitems)
implicit none
integer, intent(in) :: nitems
double precision, intent(in) :: defval
type(r2_ptr), dimension(nitems) :: liste
type(r2_tab), pointer :: current=>null()
integer:: item,j,k
do item=1, nitems
current=>liste(item)%ptr
! print*,current%dim1, current%dim2,size(current%val)
!$acc parallel loop collapse(2) default(present) if(runongpu)
do k=1, current%dim2
do j=1, current%dim1
current%val(j,k)=defval
end do
end do
!$acc update if(runongpu) host(current%val)
end do
end subroutine set_default_val_gpu
end module manage_data
!=================================================================
! main program.
!
!=================================================================
program main
use tab_m
use manage_data
implicit none
integer, parameter:: N=5
type(r2_ptr), dimension(N) :: liste
integer:: i,j,k
type(r2_tab), pointer :: current=>null()
double precision :: total
! Initialize
do i=1,N
nullify(liste(i)%ptr)
nullify(liste(i)%next)
end do
! Allocate (do not manage "next" pointer, all elements are set to 1.0)
do i=1,N
call new_r2_tab(liste(i)%ptr,N,i*N)
end do
! Check all is correct on host side
do i=1,N
if (sum(liste(i)%ptr%val) .NE. N*i*N) then
write(6,*)"Something goes wrong",sum(liste(i)%ptr%val)," != ",N*i*N
STOP (1)
end if
end do
write(6,*) "Default init OK"
! Update on host (runongpu is false)
call set_default_val_gpu(liste, 2.0D0, N)
! Check all is correct on host side
do i=1,N
if (sum(liste(i)%ptr%val) .NE. 2*N*i*N) then
write(6,*)"Something goes wrong",sum(liste(i)%ptr%val)," != ",2*N*i*N
!STOP (1)
end if
end do
write(6,*) "Default value OK"
end program main