Issue 165625
Summary [AArch64] Suboptimal `memcpy` for non-power of two sizes greater than 64
Labels backend:AArch64, missed-optimization
Assignees
Reporter Kmeakin
    https://godbolt.org/z/9E1bPo9Eb
```c
#define COPY(size) \
    void copy##size(void* dst, void* src) { __builtin_memcpy(dst, src, size); }

COPY(65)
COPY(66)
COPY(67)
```

LLVM assembly:
```asm
copy65:
 ldr     q0, [x1]
        str     q0, [x0]
        ldp     q2, q0, [x1, #32]
        ldrb    w8, [x1, #64]
        ldr     q1, [x1, #16]
 strb    w8, [x0, #64]
        stp     q2, q0, [x0, #32]
        str     q1, [x0, #16]
        ret

copy66:
        ldr     q0, [x1]
        str q0, [x0]
        ldp     q2, q0, [x1, #32]
        ldrh    w8, [x1, #64]
 ldr     q1, [x1, #16]
        strh    w8, [x0, #64]
        stp q2, q0, [x0, #32]
        str     q1, [x0, #16]
        ret

copy67:
 ldr     q0, [x1]
        str     q0, [x0]
        ldp     q2, q0, [x1, #32]
        ldur    w8, [x1, #63]
        ldr     q1, [x1, #16]
 stur    w8, [x0, #63]
        stp     q2, q0, [x0, #32]
        str     q1, [x0, #16]
        ret
```

GCC assembly:
```asm
copy65:
        ldp q29, q28, [x1]
        ldp     q31, q30, [x1, 32]
        ldrb    w1, [x1, 64]
        stp     q29, q28, [x0]
        stp     q31, q30, [x0, 32]
 strb    w1, [x0, 64]
        ret
copy66:
        ldp     q29, q28, [x1]
        ldp     q31, q30, [x1, 32]
        ldrh    w1, [x1, 64]
 stp     q29, q28, [x0]
        stp     q31, q30, [x0, 32]
        strh w1, [x0, 64]
        ret
copy67:
        ldp     q29, q28, [x1, 32]
 ldp     q31, q30, [x1]
        ldr     w1, [x1, 63]
        stp     q29, q28, [x0, 32]
        stp     q31, q30, [x0]
        str     w1, [x0, 63]
 ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to