================
@@ -53,32 +53,73 @@ struct S {
};
// struct splats
-// CHECK-LABEL: define void {{.*}}call3
-// CHECK: [[AA:%.*]] = alloca i32, align 4
-// CHECK: [[s:%.*]] = alloca %struct.S, align 1
-// CHECK-NEXT: store i32 %A, ptr [[AA]], align 4
-// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[AA]], align 4
-// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32
0, i32 0
-// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32
0, i32 1
-// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
-// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
+// CHECK-LABEL: define void @_Z5call3i(
+// CHECK-SAME: i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT: [[REF_TMP3:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr
[[S]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr
[[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr
[[REF_TMP]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[GEP]], align 4
+// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT: store float [[CONV]], ptr [[GEP1]], align 4
+// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr
[[REF_TMP]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X2]], align 1
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[X]], align 1
+// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr
[[S]], i32 0, i32 1
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr
[[REF_TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr
[[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[GEP4]], align 4
+// CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP2]] to float
+// CHECK-NEXT: store float [[CONV6]], ptr [[GEP5]], align 4
+// CHECK-NEXT: [[Y7:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr
[[REF_TMP3]], i32 0, i32 1
+// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[Y7]], align 1
+// CHECK-NEXT: store float [[TMP3]], ptr [[Y]], align 1
+// CHECK-NEXT: ret void
+//
----------------
hekota wrote:
I see, that makes sense. It might be worth a try to optimize the codegen for
this case, it might turn out to be a common pattern. It will get optimized away
by the backend passes, but if we generate less code it should reduce the
compile time at least. I will take a look if I can come up with something.
https://github.com/llvm/llvm-project/pull/190089
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits