https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95018

--- Comment #9 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Created attachment 48502
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=48502&action=edit
Assembly file on x86 with -O2 -funroll-loops

So, it seems the decisions made for unrolling are bad for this case
independent of architecture - the cold loop is also unrolled 15 times
on x86_64  with -funroll-loops.  The change to POWER just happened
to expose it.

The code on x86_64 looks like

# ../../../trunk/libgfortran/generated/in_pack_i4.c:76:   destptr =
xmallocarray (ssize, sizeof (GFC_INTEGER_4));
        movq    %rax, 152(%rsp) # tmp309, %sfp
# ../../../trunk/libgfortran/generated/in_pack_i4.c:78:   src =
source->base_addr;
        movq    0(%rbp), %rax   # source_42(D)->base_addr, src
# ../../../trunk/libgfortran/generated/in_pack_i4.c:82:   while (src)
        testq   %rax, %rax      # src
        je      .L1     #,
# ../../../trunk/libgfortran/generated/in_pack_i4.c:91:       while (count[n]
== extent[n])
        movq    416(%rsp), %r15 # extent, _68
# ../../../trunk/libgfortran/generated/in_pack_i4.c:108:               src +=
stride[n];
        movq    552(%rsp), %rdi # stride, _92
# ../../../trunk/libgfortran/generated/in_pack_i4.c:87:       src += stride0;
        leaq    0(,%rsi,4), %r12        #, _13
# ../../../trunk/libgfortran/generated/in_pack_i4.c:108:               src +=
stride[n];
        movq    560(%rsp), %r14 # stride, _24
        movq    568(%rsp), %r13 # stride, _112
# ../../../trunk/libgfortran/generated/in_pack_i4.c:98:           src -=
stride[n] * extent[n];
        imulq   %r15, %rsi      # _68, tmp225
# ../../../trunk/libgfortran/generated/in_pack_i4.c:91:       while (count[n]
== extent[n])
        movq    %r15, 16(%rsp)  # _68, %sfp
        movq    424(%rsp), %r15 # extent, _73
        movq    %rdi, %r10      # _92, tmp226
        movq    %r14, %r9       # _24, tmp228
        movq    288(%rsp), %rdx # count, count_I_lsm.8
        movq    296(%rsp), %rcx # count, count_I_lsm.29
# ../../../trunk/libgfortran/generated/in_pack_i4.c:98:           src -=
stride[n] * extent[n];
        imulq   %r15, %rdi      # _73, tmp227
# ../../../trunk/libgfortran/generated/in_pack_i4.c:91:       while (count[n]
== extent[n])
        movq    %r15, 32(%rsp)  # _73, %sfp
        movq    432(%rsp), %r15 # extent, _14
        subq    %rsi, %r10      # tmp225, tmp226
        movq    320(%rsp), %r8  # count, count_I_lsm.35
        movq    304(%rsp), %rsi # count, count_I_lsm.31
# ../../../trunk/libgfortran/generated/in_pack_i4.c:98:           src -=
stride[n] * extent[n];
        imulq   %r15, %r14      # _14, tmp229
        leaq    0(,%r10,4), %r11        #, _78
        movq    %r13, %r10      # _112, tmp230
# ../../../trunk/libgfortran/generated/in_pack_i4.c:91:       while (count[n]
== extent[n])

... and so on.

Reply via email to