| Issue |
52971
|
| Summary |
SROA sub-vector memcpy w/subsequent load loses the store
|
| Labels |
miscompilation,
llvm:optimizations
|
| Assignees |
|
| Reporter |
nunoplopes
|
Test minimized from Transforms/SROA/vector-promotion.ll:
```llvm
define void @test_subvec_memcpy(i8* %f) {
%a = alloca <4 x float>, align 16
%a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2
%a.cast2 = bitcast float* %a.gep2 to i8*
%a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3
%a.cast3 = bitcast float* %a.gep3 to i8*
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %f, i8* %a.cast2, i32 8, i1 false)
; comment the load and it compiles fine
%ret = load <4 x float>, <4 x float>* %a, align 16
ret void
}
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
```
With the load we get (`opt -sroa`):
```llvm
define void @test_subvec_memcpy(i8* %f) {
%a.12.f.sroa_cast = bitcast i8* %f to float*
%a.12.copyload = load float, float* %a.12.f.sroa_cast, align 1
%a.12.vec.insert = insertelement <4 x float> undef, float %a.12.copyload, i32 3
%a.8.f.sroa_cast = bitcast i8* %f to <2 x float>*
%a.8.vec.extract = shufflevector <4 x float> %a.12.vec.insert, <4 x float> poison, <2 x i32> <i32 2, i32 3>
store <2 x float> %a.8.vec.extract, <2 x float>* %a.8.f.sroa_cast, align 1
ret void
}
```
The store to `%f` is gone! Without the load, there's no miscompilation.
The culprit is likely this: https://github.com/llvm/llvm-project/commit/21eb4e96c2268a31a19e0e307bb0ab8450e2e80f
cc @chandlerc @cdevadas @rotateright @LemonBoy @aeubanks
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs