https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89317
Bug ID: 89317
Summary: Ineffective code from std::copy
Product: gcc
Version: 9.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: c++
Assignee: unassigned at gcc dot gnu.org
Reporter: [email protected]
Target Milestone: ---
gcc produces ineffective code when std::copy is used to copy data. For test I
created my own version of std::copy and this version is optimized properly.
Compiles using g++ (GCC-Explorer-Build) 9.0.1 20190211 (experimental)
Options: -O3 -std=c++11 -march=skylake
[code]
#include <stdint.h>
#include <algorithm>
#define Size 8
class Test
{
public:
void test1(void*__restrict ptr);
void test2(void*__restrict ptr);
private:
int16_t data1[Size];
int16_t data2[Size];
};
template<typename T1, typename T2>
void mycopy(T1 begin, T1 end, T2 dest)
{
while (begin != end)
{
*dest = *begin;
++dest;
++begin;
}
}
void Test::test1(void*__restrict ptr)
{
uint16_t* p = (uint16_t*)ptr;
std::copy(data1, data1 + Size, p);
p += Size;
std::copy(data2, data2 + Size, p);
}
void Test::test2(void*__restrict ptr)
{
int16_t* p = (int16_t*)ptr;
mycopy(data1, data1 + Size, p);
p += Size;
mycopy(data2, data2 + Size, p);
}
[/code]
[asm]
Test::test1(void*):
movzx eax, WORD PTR [rdi]
mov edx, 16
mov WORD PTR [rsi], ax
movzx eax, WORD PTR [rdi+2]
add rsi, 16
mov WORD PTR [rsi-14], ax
movzx eax, WORD PTR [rdi+4]
mov WORD PTR [rsi-12], ax
movzx eax, WORD PTR [rdi+6]
mov WORD PTR [rsi-10], ax
movzx eax, WORD PTR [rdi+8]
mov WORD PTR [rsi-8], ax
movzx eax, WORD PTR [rdi+10]
mov WORD PTR [rsi-6], ax
movzx eax, WORD PTR [rdi+12]
mov WORD PTR [rsi-4], ax
movzx eax, WORD PTR [rdi+14]
mov WORD PTR [rsi-2], ax
mov rax, rdx
sar rax
test rdx, rdx
jle .L69
movzx edx, WORD PTR [rdi+16]
mov WORD PTR [rsi], dx
cmp rax, 1
je .L69
movzx edx, WORD PTR [rdi+18]
mov WORD PTR [rsi+2], dx
cmp rax, 2
je .L69
movzx edx, WORD PTR [rdi+20]
mov WORD PTR [rsi+4], dx
cmp rax, 3
je .L69
movzx edx, WORD PTR [rdi+22]
mov WORD PTR [rsi+6], dx
cmp rax, 4
je .L69
movzx edx, WORD PTR [rdi+24]
mov WORD PTR [rsi+8], dx
cmp rax, 5
je .L69
movzx edx, WORD PTR [rdi+26]
mov WORD PTR [rsi+10], dx
cmp rax, 6
je .L69
movzx edx, WORD PTR [rdi+28]
mov WORD PTR [rsi+12], dx
cmp rax, 7
je .L69
movzx edx, WORD PTR [rdi+30]
mov WORD PTR [rsi+14], dx
cmp rax, 8
je .L69
movzx edx, WORD PTR [rdi+32]
mov WORD PTR [rsi+16], dx
cmp rax, 9
je .L69
movzx edx, WORD PTR [rdi+34]
mov WORD PTR [rsi+18], dx
cmp rax, 10
je .L69
movzx edx, WORD PTR [rdi+36]
mov WORD PTR [rsi+20], dx
cmp rax, 11
je .L69
movzx edx, WORD PTR [rdi+38]
mov WORD PTR [rsi+22], dx
cmp rax, 12
je .L69
movzx edx, WORD PTR [rdi+40]
mov WORD PTR [rsi+24], dx
cmp rax, 13
je .L69
movzx edx, WORD PTR [rdi+42]
mov WORD PTR [rsi+26], dx
cmp rax, 14
je .L69
movzx eax, WORD PTR [rdi+44]
mov WORD PTR [rsi+28], ax
.L69:
ret
Test::test2(void*):
vmovdqu xmm0, XMMWORD PTR [rdi]
vmovups XMMWORD PTR [rsi], xmm0
vmovdqu xmm1, XMMWORD PTR [rdi+16]
vmovups XMMWORD PTR [rsi+16], xmm1
ret
[/asm]