https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93956

--- Comment #4 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Taking the slightly modified test case

program array_temps
  implicit none

  type :: tt
     integer :: u = 1
     integer :: v = 2
  end type tt

  type(tt), dimension(:), pointer :: r
  integer :: n
  integer, dimension(:), pointer :: p

  n = 10
  allocate(r(1:n))
  p => get(r)
  call foo(p, n)
  print *,sum(p)

  deallocate(r)

contains

   subroutine foo(a, n)
      integer, dimension(:), intent(in) :: a
      integer, intent(in) :: n
      print *, sum(a(1:n)), n
   end subroutine foo

   function get(x) result(q)
      type(tt), dimension(:), target, intent(in) :: x
      integer, dimension(:), pointer :: q
      q => x(:)%v
   end function get
end program array_temps

and looking at -fdump-tree-original shows something strange.

get looks good:

  {
    integer(kind=8) D.3946;
    integer(kind=8) D.3947;

    D.3946 = ubound.0;
    __result->span = 8;
    __result->dtype = {.elem_len=4, .rank=1, .type=1};
    D.3947 = stride.1;
    __result->dim[0].lbound = 1;
    __result->dim[0].ubound = D.3946;
    __result->dim[0].stride = NON_LVALUE_EXPR <D.3947>;
    __result->data = (void *) &(*x.0)[0].v;
    __result->offset = -NON_LVALUE_EXPR <D.3947>;
  }

so the result for span is set.

The call to get and foo does not look to bad, either:

    {
      struct array01_integer(kind=4) ptrtemp.15;
      struct array01_tt * D.4002;
      struct tt[0:] * ifm.16;
      integer(kind=8) D.4004;
      integer(kind=8) D.4005;

      ptrtemp.15.span = 4;
      D.4002 = &r;
      ifm.16 = (struct tt[0:] *) D.4002->data;
      D.4004 = (D.4002->dim[0].ubound - D.4002->dim[0].lbound) + 1;
      D.4005 = -NON_LVALUE_EXPR <D.4002->dim[0].stride>;
      get (&ptrtemp.15, D.4002);
      p = ptrtemp.15;
    }
    foo (&p, &n);

But it seems that foo does not use the span at all.

OK, so what about the test case

program array_temps
  implicit none

  type :: tt
     integer :: u = 1
     integer :: v = 2
  end type tt

  type(tt), dimension(:), pointer :: r
  integer :: n
  integer, dimension(:), pointer :: p

  n = 10
  allocate(r(1:n))
  p => r%v
  call foo(p, n)
  print *,sum(p)

deallocate(r)

contains

   subroutine foo(a, n)
      integer, dimension(:), intent(in) :: a
      integer, intent(in) :: n
      print *, sum(a(1:n)), n
   end subroutine foo

end program array_temps

?

There, we actually convert the argument on call to foo:

   p = r;
    p.data = (void *) &(*(struct tt[0:] *) r.data)[0].v;
    p.span = r.span;
    p.dim[0].ubound = p.dim[0].ubound + (1 - p.dim[0].lbound);
    p.offset = p.offset - (1 - p.dim[0].lbound) * p.dim[0].stride;
    p.dim[0].lbound = 1;
    {
      integer(kind=4)[0:] * D.3975;
      integer(kind=8) D.3976;
      integer(kind=8) D.3977;
      integer(kind=8) D.3978;
      integer(kind=8) D.3979;
      struct array01_integer(kind=4) atmp.11;
      logical(kind=4) D.3987;
      integer(kind=8) D.3988;
      void * restrict D.3989;
      void * restrict D.3990;
      integer(kind=8) D.3991;
      integer(kind=4)[0:] * D.3995;
      integer(kind=8) D.3996;
      integer(kind=8) D.3997;
      integer(kind=8) D.3998;
      integer(kind=8) D.3999;

      D.3975 = (integer(kind=4)[0:] *) p.data;
      D.3976 = p.offset;
      D.3977 = p.dim[0].lbound;
      D.3978 = p.dim[0].ubound;
      D.3979 = D.3978 - D.3977;
            typedef integer(kind=4) [0:];
      atmp.11.dtype = {.elem_len=4, .rank=1, .type=1};
      atmp.11.dim[0].stride = 1;
      atmp.11.dim[0].lbound = 0;
      atmp.11.dim[0].ubound = D.3979;
      D.3987 = D.3979 < 0;
      D.3988 = D.3979 + 1;
      atmp.11.span = 4;
      D.3989 = (void * restrict) __builtin_malloc (D.3987 ? 1 : MAX_EXPR
<(unsigned long) (D.3988 * 4), 1>);
      D.3990 = D.3989;
      atmp.11.data = D.3990;
      atmp.11.offset = 0;
      D.3991 = NON_LVALUE_EXPR <D.3977>;
      {
        integer(kind=8) S.12;
        integer(kind=8) D.3993;

        D.3993 = p.dim[0].stride;
        S.12 = 0;
        while (1)
          {
            if (S.12 > D.3979) goto L.3;
            (*(integer(kind=4)[0:] * restrict) atmp.11.data)[S.12] =
*((integer(kind=4) *) D.3975 + (sizetype) (((S.12 + D.3991) * D.3993 + D.3976)
* p.span));
            S.12 = S.12 + 1;
          }
        L.3:;
      }
      foo (&atmp.11, &n);
      __builtin_free ((void *) atmp.11.data);

This is not ideal from a performance perspective, but at least
it generated correct code.

So, it appears that somewhere, that conversion goes missing
(and it would also be enough just to convert the descriptor,
no real need to copy the data).

Reply via email to