Issue 183355
Summary [SLP] Improve bswap detection.
Labels new issue
Assignees
Reporter rj-jesus
    This might be related to #175446.

Given code like this ([link](https://godbolt.org/z/a6Mrhzavc)):
```c
#include <stdint.h>

typedef uint32_t u32;
typedef uint64_t u64;

#define BSWAP32(x) (((u32)(x)[0] << 24) | \
                         ((x)[1] << 16) | \
                         ((x)[2] <<  8) | \
 (x)[3])

u32 bswap32(uint8_t *buf) {
  return BSWAP32(buf);
}

u64 bswap32_zext64(uint8_t *buf) {
  return BSWAP32(buf);
}

u64 bswap64(uint8_t *buf) {
  return ((u64)BSWAP32(buf) << 32) +
 BSWAP32(buf + 4);
}
```

SLP is able to combine the first function, `bswap32`, into a bswap, but not the two other patterns.

For example, for AArch64 we currently generate:
```gas
bswap32:
        ldr     w8, [x0]
 rev     w0, w8
        ret

.LCPI1_0:
        .word   24
 .word   16
        .word   8
        .word   0
bswap32_zext64:
 ldr     s0, [x0]
        adrp    x8, .LCPI1_0
        ldr     q1, [x8, :lo12:.LCPI1_0]
        ushll   v0.8h, v0.8b, #0
        ushll   v0.4s, v0.4h, #0
        ushl    v0.4s, v0.4s, v1.4s
        ext     v1.16b, v0.16b, v0.16b, #8
        orr     v0.8b, v0.8b, v1.8b
        fmov    x8, d0
        lsr     x9, x8, #32
        orr     w0, w8, w9
 ret

.LCPI2_0:
        .word   24
        .word   16
        .word   8
 .word   0
bswap64:
        ldr     s0, [x0]
        adrp    x8, .LCPI2_0
        ldrb    w10, [x0, #5]
        ldr     q1, [x8, :lo12:.LCPI2_0]
        ldrb    w8, [x0, #4]
        ushll   v0.8h, v0.8b, #0
        lsl     x8, x8, #24
        ushll   v0.4s, v0.4h, #0
 orr     x8, x8, x10, lsl #16
        ldrb    w10, [x0, #6]
        ushl v0.4s, v0.4s, v1.4s
        ext     v1.16b, v0.16b, v0.16b, #8
        orr v0.8b, v0.8b, v1.8b
        fmov    x9, d0
        lsr     x11, x9, #32
        orr     w9, w9, w11
        orr     x8, x8, x9, lsl #32
 ldrb    w9, [x0, #7]
        orr     x8, x8, x10, lsl #8
        orr x0, x8, x9
        ret
```

Instead of:
```gas
bswap32:
        ldr w0, [x0]
        rev     w0, w0
        ret
bswap32_zext64:
        ldr w0, [x0]
        rev     w0, w0
        ret
bswap64:
        ldr x0, [x0]
        rev     x0, x0
        ret
```

See also https://godbolt.org/z/cqf5ad1Ko for pre-SLP IR. (The backend can detect the input IR as bswap, but not the transformed forms.)

CC: @alexey-bataev 
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to