https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89317

            Bug ID: 89317
           Summary: Ineffective code from std::copy
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: bugzi...@poradnik-webmastera.com
  Target Milestone: ---

gcc produces ineffective code when std::copy is used to copy data. For test I
created my own version of std::copy and this version is optimized properly.

Compiles using g++ (GCC-Explorer-Build) 9.0.1 20190211 (experimental)
Options: -O3 -std=c++11 -march=skylake

[code]
#include <stdint.h>
#include <algorithm>

#define Size 8

class Test
{
public:
    void test1(void*__restrict ptr);
    void test2(void*__restrict ptr);

private:
    int16_t data1[Size];
    int16_t data2[Size];
};

template<typename T1, typename T2>
void mycopy(T1 begin, T1 end, T2 dest)
{
    while (begin != end)
    {
        *dest = *begin;
        ++dest;
        ++begin;
    }
}

void Test::test1(void*__restrict ptr)
{
    uint16_t* p = (uint16_t*)ptr;

    std::copy(data1, data1 + Size, p);
    p += Size;
    std::copy(data2, data2 + Size, p);
}

void Test::test2(void*__restrict ptr)
{
    int16_t* p = (int16_t*)ptr;

    mycopy(data1, data1 + Size, p);
    p += Size;
    mycopy(data2, data2 + Size, p);
}
[/code]

[asm]
Test::test1(void*):
        movzx   eax, WORD PTR [rdi]
        mov     edx, 16
        mov     WORD PTR [rsi], ax
        movzx   eax, WORD PTR [rdi+2]
        add     rsi, 16
        mov     WORD PTR [rsi-14], ax
        movzx   eax, WORD PTR [rdi+4]
        mov     WORD PTR [rsi-12], ax
        movzx   eax, WORD PTR [rdi+6]
        mov     WORD PTR [rsi-10], ax
        movzx   eax, WORD PTR [rdi+8]
        mov     WORD PTR [rsi-8], ax
        movzx   eax, WORD PTR [rdi+10]
        mov     WORD PTR [rsi-6], ax
        movzx   eax, WORD PTR [rdi+12]
        mov     WORD PTR [rsi-4], ax
        movzx   eax, WORD PTR [rdi+14]
        mov     WORD PTR [rsi-2], ax
        mov     rax, rdx
        sar     rax
        test    rdx, rdx
        jle     .L69
        movzx   edx, WORD PTR [rdi+16]
        mov     WORD PTR [rsi], dx
        cmp     rax, 1
        je      .L69
        movzx   edx, WORD PTR [rdi+18]
        mov     WORD PTR [rsi+2], dx
        cmp     rax, 2
        je      .L69
        movzx   edx, WORD PTR [rdi+20]
        mov     WORD PTR [rsi+4], dx
        cmp     rax, 3
        je      .L69
        movzx   edx, WORD PTR [rdi+22]
        mov     WORD PTR [rsi+6], dx
        cmp     rax, 4
        je      .L69
        movzx   edx, WORD PTR [rdi+24]
        mov     WORD PTR [rsi+8], dx
        cmp     rax, 5
        je      .L69
        movzx   edx, WORD PTR [rdi+26]
        mov     WORD PTR [rsi+10], dx
        cmp     rax, 6
        je      .L69
        movzx   edx, WORD PTR [rdi+28]
        mov     WORD PTR [rsi+12], dx
        cmp     rax, 7
        je      .L69
        movzx   edx, WORD PTR [rdi+30]
        mov     WORD PTR [rsi+14], dx
        cmp     rax, 8
        je      .L69
        movzx   edx, WORD PTR [rdi+32]
        mov     WORD PTR [rsi+16], dx
        cmp     rax, 9
        je      .L69
        movzx   edx, WORD PTR [rdi+34]
        mov     WORD PTR [rsi+18], dx
        cmp     rax, 10
        je      .L69
        movzx   edx, WORD PTR [rdi+36]
        mov     WORD PTR [rsi+20], dx
        cmp     rax, 11
        je      .L69
        movzx   edx, WORD PTR [rdi+38]
        mov     WORD PTR [rsi+22], dx
        cmp     rax, 12
        je      .L69
        movzx   edx, WORD PTR [rdi+40]
        mov     WORD PTR [rsi+24], dx
        cmp     rax, 13
        je      .L69
        movzx   edx, WORD PTR [rdi+42]
        mov     WORD PTR [rsi+26], dx
        cmp     rax, 14
        je      .L69
        movzx   eax, WORD PTR [rdi+44]
        mov     WORD PTR [rsi+28], ax
.L69:
        ret
Test::test2(void*):
        vmovdqu xmm0, XMMWORD PTR [rdi]
        vmovups XMMWORD PTR [rsi], xmm0
        vmovdqu xmm1, XMMWORD PTR [rdi+16]
        vmovups XMMWORD PTR [rsi+16], xmm1
        ret
[/asm]

Reply via email to