| Issue |
179950
|
| Summary |
[CodeGenPrepare] Failure to hoist bitcast to legal type causes register splitting
|
| Labels |
missed-optimization
|
| Assignees |
|
| Reporter |
RKSimon
|
```ll
define i8 @src(ptr %a0, i8 %a1) {
entry:
%src256 = load i256, ptr %a0, align 1
%iszero = icmp eq i256 %src256, 0
br i1 %iszero, label %exit, label %reduction
reduction:
%src256.bitcast = bitcast i256 %src256 to <32 x i8>
%red = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %src256.bitcast)
br label %exit
exit:
%result = phi i8 [ 0, %entry], [ %red, %reduction ]
ret i8 %result
}
define i8 @dst(ptr %a0, i8 %a1) {
entry:
%src256 = load i256, ptr %a0, align 1
%src256.bitcast = bitcast i256 %src256 to <32 x i8>
%iszero = icmp eq i256 %src256, 0
br i1 %iszero, label %exit, label %reduction
reduction:
%red = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %src256.bitcast)
br label %exit
exit:
%result = phi i8 [ 0, %entry], [ %red, %reduction ]
ret i8 %result
}
```
https://rust.godbolt.org/z/Tv3MTjvnM
i256 is not a legal x86 type, so will get split into i64 CopyToReg/CopyFromReg across blocks and then put back together, causing a lot of spilling and build_vector noise.
But the only uses of the i256 is a legal <32 x i8> type - if we'd bitcasted to this in the entry block then we wouldn't need to split at all.
```s
src:
vmovdqu (%rdi), %ymm0
vptest %ymm0, %ymm0
je .LBB0_1
movq (%rdi), %rax
vmovd %eax, %xmm0
movq %rax, %rcx
movq %rax, %rdx
movq %rax, %rsi
movq %rax, %r8
movl %eax, %r9d
movl %eax, %r10d
shrl $8, %eax
shrl $16, %r10d
shrl $24, %r9d
shrq $32, %r8
shrq $40, %rsi
shrq $48, %rdx
shrq $56, %rcx
vpinsrb $1, %eax, %xmm0, %xmm0
movq 8(%rdi), %rax
vpinsrb $2, %r10d, %xmm0, %xmm0
vpinsrb $3, %r9d, %xmm0, %xmm0
vpinsrb $4, %r8d, %xmm0, %xmm0
vpinsrb $5, %esi, %xmm0, %xmm0
vpinsrb $6, %edx, %xmm0, %xmm0
vpinsrb $7, %ecx, %xmm0, %xmm0
movl %eax, %ecx
shrl $8, %ecx
vpinsrb $8, %eax, %xmm0, %xmm0
vpinsrb $9, %ecx, %xmm0, %xmm0
movl %eax, %ecx
shrl $16, %ecx
vpinsrb $10, %ecx, %xmm0, %xmm0
movl %eax, %ecx
shrl $24, %ecx
vpinsrb $11, %ecx, %xmm0, %xmm0
movq %rax, %rcx
shrq $32, %rcx
vpinsrb $12, %ecx, %xmm0, %xmm0
movq %rax, %rcx
shrq $40, %rcx
vpinsrb $13, %ecx, %xmm0, %xmm0
movq %rax, %rcx
shrq $56, %rax
shrq $48, %rcx
vpinsrb $14, %ecx, %xmm0, %xmm0
movq 16(%rdi), %rcx
vpinsrb $15, %eax, %xmm0, %xmm0
vmovd %ecx, %xmm1
movl %ecx, %eax
shrl $8, %eax
vpinsrb $1, %eax, %xmm1, %xmm1
movl %ecx, %eax
shrl $16, %eax
vpinsrb $2, %eax, %xmm1, %xmm1
movl %ecx, %eax
shrl $24, %eax
vpinsrb $3, %eax, %xmm1, %xmm1
movq %rcx, %rax
shrq $32, %rax
vpinsrb $4, %eax, %xmm1, %xmm1
movq %rcx, %rax
shrq $40, %rax
vpinsrb $5, %eax, %xmm1, %xmm1
movq %rcx, %rax
shrq $56, %rcx
shrq $48, %rax
vpinsrb $6, %eax, %xmm1, %xmm1
movq 24(%rdi), %rax
vpinsrb $7, %ecx, %xmm1, %xmm1
movl %eax, %ecx
vpinsrb $8, %eax, %xmm1, %xmm1
shrl $8, %ecx
vpinsrb $9, %ecx, %xmm1, %xmm1
movl %eax, %ecx
shrl $16, %ecx
vpinsrb $10, %ecx, %xmm1, %xmm1
movl %eax, %ecx
shrl $24, %ecx
vpinsrb $11, %ecx, %xmm1, %xmm1
movq %rax, %rcx
shrq $32, %rcx
vpinsrb $12, %ecx, %xmm1, %xmm1
movq %rax, %rcx
shrq $40, %rcx
vpinsrb $13, %ecx, %xmm1, %xmm1
movq %rax, %rcx
shrq $56, %rax
shrq $48, %rcx
vpinsrb $14, %ecx, %xmm1, %xmm1
vpinsrb $15, %eax, %xmm1, %xmm1
vpmaxub %xmm1, %xmm0, %xmm0
vpcmpeqd %xmm1, %xmm1, %xmm1
vpxor %xmm1, %xmm0, %xmm0
vpsrlw $8, %xmm0, %xmm1
vpminub %xmm1, %xmm0, %xmm0
vphminposuw %xmm0, %xmm0
vmovd %xmm0, %eax
notb %al
vzeroupper
retq
.LBB0_1:
xorl %eax, %eax
vzeroupper
retq
dst:
vmovdqu (%rdi), %ymm0
xorl %ecx, %ecx
vpcmpeqd %xmm1, %xmm1, %xmm1
vptest %ymm0, %ymm0
vpmaxub 16(%rdi), %xmm0, %xmm0
vpxor %xmm1, %xmm0, %xmm0
vpsrlw $8, %xmm0, %xmm1
vpminub %xmm1, %xmm0, %xmm0
vphminposuw %xmm0, %xmm0
vmovd %xmm0, %eax
notb %al
movzbl %al, %eax
cmovel %ecx, %eax
vzeroupper
retq
```
In this case its even worse, because the ` icmp eq i256` will be performed using a vector type in lowering :(
(Not sure if this is CodeGenPrepare but it already does the opposite fold to sink extension sources to avoid splitting).
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs