| Issue |
165625
|
| Summary |
[AArch64] Suboptimal `memcpy` for non-power of two sizes greater than 64
|
| Labels |
backend:AArch64,
missed-optimization
|
| Assignees |
|
| Reporter |
Kmeakin
|
https://godbolt.org/z/9E1bPo9Eb
```c
#define COPY(size) \
void copy##size(void* dst, void* src) { __builtin_memcpy(dst, src, size); }
COPY(65)
COPY(66)
COPY(67)
```
LLVM assembly:
```asm
copy65:
ldr q0, [x1]
str q0, [x0]
ldp q2, q0, [x1, #32]
ldrb w8, [x1, #64]
ldr q1, [x1, #16]
strb w8, [x0, #64]
stp q2, q0, [x0, #32]
str q1, [x0, #16]
ret
copy66:
ldr q0, [x1]
str q0, [x0]
ldp q2, q0, [x1, #32]
ldrh w8, [x1, #64]
ldr q1, [x1, #16]
strh w8, [x0, #64]
stp q2, q0, [x0, #32]
str q1, [x0, #16]
ret
copy67:
ldr q0, [x1]
str q0, [x0]
ldp q2, q0, [x1, #32]
ldur w8, [x1, #63]
ldr q1, [x1, #16]
stur w8, [x0, #63]
stp q2, q0, [x0, #32]
str q1, [x0, #16]
ret
```
GCC assembly:
```asm
copy65:
ldp q29, q28, [x1]
ldp q31, q30, [x1, 32]
ldrb w1, [x1, 64]
stp q29, q28, [x0]
stp q31, q30, [x0, 32]
strb w1, [x0, 64]
ret
copy66:
ldp q29, q28, [x1]
ldp q31, q30, [x1, 32]
ldrh w1, [x1, 64]
stp q29, q28, [x0]
stp q31, q30, [x0, 32]
strh w1, [x0, 64]
ret
copy67:
ldp q29, q28, [x1, 32]
ldp q31, q30, [x1]
ldr w1, [x1, 63]
stp q29, q28, [x0, 32]
stp q31, q30, [x0]
str w1, [x0, 63]
ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs