https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125856
Bug ID: 125856
Summary: x86 gcc16 codesize regression related to memcpy inline
caused by r16-2047-g401199377c5004
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: liuhongt at gcc dot gnu.org
Target Milestone: ---
cat test.c
#include <string.h>
void bounded_copy(char *dst, const char *src, unsigned long n) {
if (n <= 15)
memcpy(dst, src, n);
}
GCC15.1 -O2 generates
bounded_copy(char*, char const*, unsigned long):
cmpq $15, %rdx
jbe .L4
ret
.L4:
jmp memcpy
GCC16.1 -O2 generates
bounded_copy(char*, char const*, unsigned long):
cmpq $15, %rdx
jbe .L17
.L1:
ret
.L17:
cmpl $64, %edx
jnb .L3
testb $32, %dl
jne .L18
testb $16, %dl
jne .L19
testb $8, %dl
jne .L20
testb $4, %dl
jne .L21
testl %edx, %edx
je .L1
movzbl (%rsi), %eax
movb %al, (%rdi)
testb $2, %dl
je .L1
movl %edx, %edx
movzwl -2(%rsi,%rdx), %eax
movw %ax, -2(%rdi,%rdx)
ret
.L3:
movl %edx, %eax
subl $1, %edx
leaq (%rdi,%rax), %rcx
addq %rsi, %rax
movdqu -64(%rax), %xmm0
movups %xmm0, -64(%rcx)
movdqu -48(%rax), %xmm0
movups %xmm0, -48(%rcx)
movdqu -32(%rax), %xmm0
movups %xmm0, -32(%rcx)
movdqu -16(%rax), %xmm0
movups %xmm0, -16(%rcx)
cmpl $64, %edx
jb .L1
andl $-64, %edx
xorl %ecx, %ecx
.L10:
movl %ecx, %eax
addl $64, %ecx
movdqu (%rsi,%rax), %xmm3
movdqu 16(%rsi,%rax), %xmm2
movdqu 32(%rsi,%rax), %xmm1
movdqu 48(%rsi,%rax), %xmm0
movups %xmm3, (%rdi,%rax)
movups %xmm2, 16(%rdi,%rax)
movups %xmm1, 32(%rdi,%rax)
movups %xmm0, 48(%rdi,%rax)
cmpl %edx, %ecx
jb .L10
ret
.L18:
movdqu (%rsi), %xmm0
movl %edx, %edx
leaq 32(%rdi,%rdx), %rax
leaq 32(%rsi,%rdx), %rdx
movups %xmm0, (%rdi)
movdqu 16(%rsi), %xmm0
movups %xmm0, 16(%rdi)
movdqu -64(%rdx), %xmm0
movups %xmm0, -64(%rax)
movdqu -48(%rdx), %xmm0
movups %xmm0, -48(%rax)
ret
.L19:
movdqu (%rsi), %xmm0
movl %edx, %edx
movups %xmm0, (%rdi)
movdqu -16(%rsi,%rdx), %xmm0
movups %xmm0, -16(%rdi,%rdx)
ret
.L20:
movq (%rsi), %rax
movl %edx, %edx
movq %rax, (%rdi)
movq -8(%rsi,%rdx), %rax
movq %rax, -8(%rdi,%rdx)
ret
.L21:
movl (%rsi), %eax
movl %edx, %edx
movl %eax, (%rdi)
movl -4(%rsi,%rdx), %eax
movl %eax, -4(%rdi,%rdx)
ret
I think for this case, we probably should keep libcall instead of inline it.