https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93956
--- Comment #4 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Taking the slightly modified test case
program array_temps
implicit none
type :: tt
integer :: u = 1
integer :: v = 2
end type tt
type(tt), dimension(:), pointer :: r
integer :: n
integer, dimension(:), pointer :: p
n = 10
allocate(r(1:n))
p => get(r)
call foo(p, n)
print *,sum(p)
deallocate(r)
contains
subroutine foo(a, n)
integer, dimension(:), intent(in) :: a
integer, intent(in) :: n
print *, sum(a(1:n)), n
end subroutine foo
function get(x) result(q)
type(tt), dimension(:), target, intent(in) :: x
integer, dimension(:), pointer :: q
q => x(:)%v
end function get
end program array_temps
and looking at -fdump-tree-original shows something strange.
get looks good:
{
integer(kind=8) D.3946;
integer(kind=8) D.3947;
D.3946 = ubound.0;
__result->span = 8;
__result->dtype = {.elem_len=4, .rank=1, .type=1};
D.3947 = stride.1;
__result->dim[0].lbound = 1;
__result->dim[0].ubound = D.3946;
__result->dim[0].stride = NON_LVALUE_EXPR <D.3947>;
__result->data = (void *) &(*x.0)[0].v;
__result->offset = -NON_LVALUE_EXPR <D.3947>;
}
so the result for span is set.
The call to get and foo does not look to bad, either:
{
struct array01_integer(kind=4) ptrtemp.15;
struct array01_tt * D.4002;
struct tt[0:] * ifm.16;
integer(kind=8) D.4004;
integer(kind=8) D.4005;
ptrtemp.15.span = 4;
D.4002 = &r;
ifm.16 = (struct tt[0:] *) D.4002->data;
D.4004 = (D.4002->dim[0].ubound - D.4002->dim[0].lbound) + 1;
D.4005 = -NON_LVALUE_EXPR <D.4002->dim[0].stride>;
get (&ptrtemp.15, D.4002);
p = ptrtemp.15;
}
foo (&p, &n);
But it seems that foo does not use the span at all.
OK, so what about the test case
program array_temps
implicit none
type :: tt
integer :: u = 1
integer :: v = 2
end type tt
type(tt), dimension(:), pointer :: r
integer :: n
integer, dimension(:), pointer :: p
n = 10
allocate(r(1:n))
p => r%v
call foo(p, n)
print *,sum(p)
deallocate(r)
contains
subroutine foo(a, n)
integer, dimension(:), intent(in) :: a
integer, intent(in) :: n
print *, sum(a(1:n)), n
end subroutine foo
end program array_temps
?
There, we actually convert the argument on call to foo:
p = r;
p.data = (void *) &(*(struct tt[0:] *) r.data)[0].v;
p.span = r.span;
p.dim[0].ubound = p.dim[0].ubound + (1 - p.dim[0].lbound);
p.offset = p.offset - (1 - p.dim[0].lbound) * p.dim[0].stride;
p.dim[0].lbound = 1;
{
integer(kind=4)[0:] * D.3975;
integer(kind=8) D.3976;
integer(kind=8) D.3977;
integer(kind=8) D.3978;
integer(kind=8) D.3979;
struct array01_integer(kind=4) atmp.11;
logical(kind=4) D.3987;
integer(kind=8) D.3988;
void * restrict D.3989;
void * restrict D.3990;
integer(kind=8) D.3991;
integer(kind=4)[0:] * D.3995;
integer(kind=8) D.3996;
integer(kind=8) D.3997;
integer(kind=8) D.3998;
integer(kind=8) D.3999;
D.3975 = (integer(kind=4)[0:] *) p.data;
D.3976 = p.offset;
D.3977 = p.dim[0].lbound;
D.3978 = p.dim[0].ubound;
D.3979 = D.3978 - D.3977;
typedef integer(kind=4) [0:];
atmp.11.dtype = {.elem_len=4, .rank=1, .type=1};
atmp.11.dim[0].stride = 1;
atmp.11.dim[0].lbound = 0;
atmp.11.dim[0].ubound = D.3979;
D.3987 = D.3979 < 0;
D.3988 = D.3979 + 1;
atmp.11.span = 4;
D.3989 = (void * restrict) __builtin_malloc (D.3987 ? 1 : MAX_EXPR
<(unsigned long) (D.3988 * 4), 1>);
D.3990 = D.3989;
atmp.11.data = D.3990;
atmp.11.offset = 0;
D.3991 = NON_LVALUE_EXPR <D.3977>;
{
integer(kind=8) S.12;
integer(kind=8) D.3993;
D.3993 = p.dim[0].stride;
S.12 = 0;
while (1)
{
if (S.12 > D.3979) goto L.3;
(*(integer(kind=4)[0:] * restrict) atmp.11.data)[S.12] =
*((integer(kind=4) *) D.3975 + (sizetype) (((S.12 + D.3991) * D.3993 + D.3976)
* p.span));
S.12 = S.12 + 1;
}
L.3:;
}
foo (&atmp.11, &n);
__builtin_free ((void *) atmp.11.data);
This is not ideal from a performance perspective, but at least
it generated correct code.
So, it appears that somewhere, that conversion goes missing
(and it would also be enough just to convert the descriptor,
no real need to copy the data).