https://github.com/addmisol updated https://github.com/llvm/llvm-project/pull/185083
>From c5ffb2e73bcf69513f94d8e7b89e8372d0d280b2 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Fri, 6 Mar 2026 23:56:34 +0530 Subject: [PATCH 01/17] Create amdgpu-abi-struct-coerce.c --- .../test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c new file mode 100644 index 00000000000000..2399630ff797b2 --- /dev/null +++ b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s + +// Check that structs containing mixed float and int types are not coerced +// to integer arrays. They should preserve the original struct type and +// individual field types. + +typedef struct fp_int_pair { + float f; + int i; +} fp_int_pair; + +// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.fp_int_pair +fp_int_pair return_fp_int_pair(fp_int_pair x) { + return x; +} + +typedef struct int_fp_pair { + int i; + float f; +} int_fp_pair; + +// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.int_fp_pair +int_fp_pair return_int_fp_pair(int_fp_pair x) { + return x; +} + +typedef struct two_floats { + float a; + float b; +} two_floats; + +// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.two_floats +two_floats return_two_floats(two_floats x) { + return x; +} + +typedef struct two_ints { + int a; + int b; +} two_ints; + +// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.two_ints +two_ints return_two_ints(two_ints x) { + return x; +} + +// Structs <= 32 bits should still be coerced to i32 for return value +typedef struct small_struct { + short a; + short b; +} small_struct; + +// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1) +small_struct return_small_struct(small_struct x) { + return x; +} + +// Structs <= 16 bits should still be coerced to i16 for return value +typedef struct tiny_struct { + char a; + char b; +} tiny_struct; + +// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1) +tiny_struct return_tiny_struct(tiny_struct x) { + return x; +} >From 68c200f848058ab22b3d25ce810f1639eac50556 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Fri, 6 Mar 2026 23:57:11 +0530 Subject: [PATCH 02/17] Delete clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c --- .../test/CodeGen/amdgpu-abi-struct-coerce.c | 71 ------------------- 1 file changed, 71 deletions(-) delete mode 100644 clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c diff --git a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c deleted file mode 100644 index 2399630ff797b2..00000000000000 --- a/clang/test/CodeGen/clang/test/CodeGen/amdgpu-abi-struct-coerce.c +++ /dev/null @@ -1,71 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s - -// Check that structs containing mixed float and int types are not coerced -// to integer arrays. They should preserve the original struct type and -// individual field types. - -typedef struct fp_int_pair { - float f; - int i; -} fp_int_pair; - -// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1) -// CHECK: ret %struct.fp_int_pair -fp_int_pair return_fp_int_pair(fp_int_pair x) { - return x; -} - -typedef struct int_fp_pair { - int i; - float f; -} int_fp_pair; - -// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1) -// CHECK: ret %struct.int_fp_pair -int_fp_pair return_int_fp_pair(int_fp_pair x) { - return x; -} - -typedef struct two_floats { - float a; - float b; -} two_floats; - -// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1) -// CHECK: ret %struct.two_floats -two_floats return_two_floats(two_floats x) { - return x; -} - -typedef struct two_ints { - int a; - int b; -} two_ints; - -// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1) -// CHECK: ret %struct.two_ints -two_ints return_two_ints(two_ints x) { - return x; -} - -// Structs <= 32 bits should still be coerced to i32 for return value -typedef struct small_struct { - short a; - short b; -} small_struct; - -// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1) -small_struct return_small_struct(small_struct x) { - return x; -} - -// Structs <= 16 bits should still be coerced to i16 for return value -typedef struct tiny_struct { - char a; - char b; -} tiny_struct; - -// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1) -tiny_struct return_tiny_struct(tiny_struct x) { - return x; -} >From 3c5401a8e20cdac719d6817e198cc330dc0e4e80 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Fri, 6 Mar 2026 23:58:43 +0530 Subject: [PATCH 03/17] fix for clang abi lowering --- clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 clang/test/CodeGen/amdgpu-abi-struct-coerce.c diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c new file mode 100644 index 00000000000000..2399630ff797b2 --- /dev/null +++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s + +// Check that structs containing mixed float and int types are not coerced +// to integer arrays. They should preserve the original struct type and +// individual field types. + +typedef struct fp_int_pair { + float f; + int i; +} fp_int_pair; + +// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.fp_int_pair +fp_int_pair return_fp_int_pair(fp_int_pair x) { + return x; +} + +typedef struct int_fp_pair { + int i; + float f; +} int_fp_pair; + +// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.int_fp_pair +int_fp_pair return_int_fp_pair(int_fp_pair x) { + return x; +} + +typedef struct two_floats { + float a; + float b; +} two_floats; + +// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.two_floats +two_floats return_two_floats(two_floats x) { + return x; +} + +typedef struct two_ints { + int a; + int b; +} two_ints; + +// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.two_ints +two_ints return_two_ints(two_ints x) { + return x; +} + +// Structs <= 32 bits should still be coerced to i32 for return value +typedef struct small_struct { + short a; + short b; +} small_struct; + +// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1) +small_struct return_small_struct(small_struct x) { + return x; +} + +// Structs <= 16 bits should still be coerced to i16 for return value +typedef struct tiny_struct { + char a; + char b; +} tiny_struct; + +// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1) +tiny_struct return_tiny_struct(tiny_struct x) { + return x; +} >From 6cd1099ec2e06c33fd5d7092206e778a1e8ba58a Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 00:00:29 +0530 Subject: [PATCH 04/17] Update amdgcn-openmp-device-math-complex.c --- clang/test/Headers/amdgcn-openmp-device-math-complex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Headers/amdgcn-openmp-device-math-complex.c b/clang/test/Headers/amdgcn-openmp-device-math-complex.c index b347cf4716df29..34c05e2974a64e 100644 --- a/clang/test/Headers/amdgcn-openmp-device-math-complex.c +++ b/clang/test/Headers/amdgcn-openmp-device-math-complex.c @@ -30,8 +30,8 @@ void test_complex_f32(float _Complex a) { // CHECK-LABEL: define {{.*}}test_complex_f32 #pragma omp target { - // CHECK: call [2 x i32] @__divsc3 - // CHECK: call [2 x i32] @__mulsc3 + // CHECK: call { float, float } @__divsc3 + // CHECK: call { float, float } @__mulsc3 (void)(a * (a / a)); } } >From a67bcdb1baecf786c7714a07d05306b614634ce5 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 00:11:07 +0530 Subject: [PATCH 05/17] Update amdgpu-abi-struct-coerce.cl --- .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 06d3cdb01deb25..a13f8e8bbe1199 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { } // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1) void func_struct_padding_arg(struct_padding_arg arg1) { } -// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce) +// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7) void func_struct_char_x8(struct_char_x8 arg) { } -// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce) +// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3) void func_struct_char_x4(struct_char_x4 arg) { } -// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce) +// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2) void func_struct_char_x3(struct_char_x3 arg) { } -// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce) +// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1) void func_struct_char_x2(struct_char_x2 arg) { } // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce) @@ -363,8 +363,10 @@ struct_padding_arg func_struct_padding_ret() return s; } -// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret() -// CHECK: ret [2 x i32] zeroinitializer +// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret() +// CHECK: ret %struct.struct_char_x8 zeroinitializer + struct_char_x8 func_struct_char_x8_ret() + { struct_char_x8 func_struct_char_x8_ret() { struct_char_x8 s = { 0 }; @@ -525,5 +527,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3, void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3, char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { } -// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5) +// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5) void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { } >From c299160a68b48335ff616aa586098403a9bb81b3 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 00:13:09 +0530 Subject: [PATCH 06/17] Update AMDGPU.cpp --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 4ac7f42289d6d7..f3c4b5ad0837b7 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -163,11 +163,6 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { if (Size <= 32) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - if (Size <= 64) { - llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); - } - if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) return ABIArgInfo::getDirect(); } @@ -246,23 +241,6 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic, RD && RD->hasFlexibleArrayMember()) return DefaultABIInfo::classifyArgumentType(Ty); - // Pack aggregates <= 8 bytes into single VGPR or pair. - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 64) { - unsigned NumRegs = (Size + 31) / 32; - NumRegsLeft -= std::min(NumRegsLeft, NumRegs); - - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); - - if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); - - // XXX: Should this be i64 instead, and should the limit increase? - llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); - return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); - } - if (NumRegsLeft > 0) { uint64_t NumRegs = numRegsForType(Ty); if (NumRegsLeft >= NumRegs) { >From 3c87855bcfb0874e8abad1f3735350bb56e369c7 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 00:31:52 +0530 Subject: [PATCH 07/17] Update amdgpu-abi-struct-coerce.cl --- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index a13f8e8bbe1199..fb5ba69c86c6d2 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -365,8 +365,6 @@ struct_padding_arg func_struct_padding_ret() // CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret() // CHECK: ret %struct.struct_char_x8 zeroinitializer - struct_char_x8 func_struct_char_x8_ret() - { struct_char_x8 func_struct_char_x8_ret() { struct_char_x8 s = { 0 }; >From cafbf0012a50ab060420db2f7833b8a6ef2dd299 Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 01:35:56 +0530 Subject: [PATCH 08/17] Update amdgpu-abi-struct-coerce.cl --- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index fb5ba69c86c6d2..3e4506b88aac6a 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3, void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3, char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { } -// CHECK: define{{.}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.}}, i32 noundef %arg5) +// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5) void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { } >From 2188c6fba42dd483d670bd22b75b533f5f27067c Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 01:37:44 +0530 Subject: [PATCH 09/17] Update amdgpu-abi-struct-coerce.cl >From 457f683653b6b0ed8165fad5b955c6bbda34670b Mon Sep 17 00:00:00 2001 From: addmisol <[email protected]> Date: Sat, 7 Mar 2026 01:43:50 +0530 Subject: [PATCH 10/17] Update amdgpu-variadic-call.c --- clang/test/CodeGen/amdgpu-variadic-call.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/amdgpu-variadic-call.c b/clang/test/CodeGen/amdgpu-variadic-call.c index 17eda215211a2a..22402118d862fe 100644 --- a/clang/test/CodeGen/amdgpu-variadic-call.c +++ b/clang/test/CodeGen/amdgpu-variadic-call.c @@ -217,10 +217,9 @@ typedef union } union_f32_i32; // CHECK-LABEL: define {{[^@]+}}@one_pair_union_f32_i32 -// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], i32 [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], float [[V0_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V0_COERCE]] to float -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0 +// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V0_COERCE]], 0 // CHECK-NEXT: tail call void (...) @sink_0([[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]] // CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]] // CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]]) #[[ATTR2]] @@ -273,13 +272,12 @@ void multiple_one(int f0, double f1, int v0, double v1) } // CHECK-LABEL: define {{[^@]+}}@multiple_two -// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], i32 [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[F0:%.*]], double noundef [[F1:%.*]], double [[V0_COERCE0:%.*]], double [[V0_COERCE1:%.*]], float noundef [[V1:%.*]], float [[V2_COERCE:%.*]], i32 noundef [[V3:%.*]]) local_unnamed_addr #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V2_COERCE]] to float // CHECK-NEXT: [[CONV:%.*]] = fpext float [[V1]] to double // CHECK-NEXT: [[DOTFCA_0_INSERT16:%.*]] = insertvalue [[STRUCT_PAIR_F64:%.*]] poison, double [[V0_COERCE0]], 0 // CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [[STRUCT_PAIR_F64]] [[DOTFCA_0_INSERT16]], double [[V0_COERCE1]], 1 -// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[TMP0]], 0 +// CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [[UNION_UNION_F32_I32:%.*]] poison, float [[V2_COERCE]], 0 // CHECK-NEXT: tail call void (...) @sink_0([[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]] // CHECK-NEXT: tail call void (i32, ...) @sink_1(i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]] // CHECK-NEXT: tail call void (double, i32, ...) @sink_2(double noundef [[F1]], i32 noundef [[F0]], [[STRUCT_PAIR_F64]] [[DOTFCA_1_INSERT]], double noundef [[CONV]], [[UNION_UNION_F32_I32]] [[DOTFCA_0_INSERT]], i32 noundef [[V3]]) #[[ATTR2]] >From 3da0a3310411fd65310faea9d8d364d961ea02e7 Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:00:33 +0530 Subject: [PATCH 11/17] Update amdgpu-abi-struct-coerce.cl --- .../test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 3e4506b88aac6a..06d3cdb01deb25 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -288,16 +288,16 @@ void func_struct_arg(struct_arg_t arg1) { } // CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1) void func_struct_padding_arg(struct_padding_arg arg1) { } -// CHECK: define{{.*}} void @func_struct_char_x8(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i8 %arg.coerce6, i8 %arg.coerce7) +// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce) void func_struct_char_x8(struct_char_x8 arg) { } -// CHECK: define{{.*}} void @func_struct_char_x4(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2, i8 %arg.coerce3) +// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce) void func_struct_char_x4(struct_char_x4 arg) { } -// CHECK: define{{.*}} void @func_struct_char_x3(i8 %arg.coerce0, i8 %arg.coerce1, i8 %arg.coerce2) +// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce) void func_struct_char_x3(struct_char_x3 arg) { } -// CHECK: define{{.*}} void @func_struct_char_x2(i8 %arg.coerce0, i8 %arg.coerce1) +// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce) void func_struct_char_x2(struct_char_x2 arg) { } // CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce) @@ -363,8 +363,8 @@ struct_padding_arg func_struct_padding_ret() return s; } -// CHECK: define{{.*}} %struct.struct_char_x8 @func_struct_char_x8_ret() -// CHECK: ret %struct.struct_char_x8 zeroinitializer +// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret() +// CHECK: ret [2 x i32] zeroinitializer struct_char_x8 func_struct_char_x8_ret() { struct_char_x8 s = { 0 }; @@ -525,5 +525,5 @@ void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3, void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3, char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { } -// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, ptr addrspace(5) noundef readnone byref(%struct.struct_char_x8) align 1 captures(none) %{{.*}}, i32 noundef %arg5) +// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5) void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { } >From d67e84dbfbc0a1d2f0f80e5c3008942107058829 Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:01:14 +0530 Subject: [PATCH 12/17] Update amdgpu-abi-struct-coerce.c --- clang/test/CodeGen/amdgpu-abi-struct-coerce.c | 55 ++++++++++++++++--- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c index 2399630ff797b2..f827978a8cd183 100644 --- a/clang/test/CodeGen/amdgpu-abi-struct-coerce.c +++ b/clang/test/CodeGen/amdgpu-abi-struct-coerce.c @@ -1,8 +1,12 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s -// Check that structs containing mixed float and int types are not coerced -// to integer arrays. They should preserve the original struct type and -// individual field types. +// Check that structs containing floats or full-sized integers (i32, i64) are +// NOT coerced to integer arrays. They should preserve their original types. +// However, structs containing only sub-32-bit integer types (char, short) +// should still be packed into i32 registers. + +// === Structs with floats - should NOT be coerced to integers === typedef struct fp_int_pair { float f; @@ -37,6 +41,8 @@ two_floats return_two_floats(two_floats x) { return x; } +// === Structs with full-sized integers - should NOT be coerced === + typedef struct two_ints { int a; int b; @@ -48,24 +54,59 @@ two_ints return_two_ints(two_ints x) { return x; } -// Structs <= 32 bits should still be coerced to i32 for return value +// === Structs with only sub-32-bit integers - SHOULD be coerced === + +// Structs of small integers <= 32 bits should be coerced to i32 typedef struct small_struct { short a; short b; } small_struct; -// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i16 %x.coerce0, i16 %x.coerce1) +// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce) small_struct return_small_struct(small_struct x) { return x; } -// Structs <= 16 bits should still be coerced to i16 for return value +// Structs of small integers <= 16 bits should be coerced to i16 typedef struct tiny_struct { char a; char b; } tiny_struct; -// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i8 %x.coerce0, i8 %x.coerce1) +// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce) tiny_struct return_tiny_struct(tiny_struct x) { return x; } + +// Struct of 8 chars (64 bits) should be coerced to [2 x i32] +typedef struct eight_chars { + char a, b, c, d, e, f, g, h; +} eight_chars; + +// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce) +eight_chars return_eight_chars(eight_chars x) { + return x; +} + +// Struct of 4 chars (32 bits) should be coerced to i32 +typedef struct four_chars { + char a, b, c, d; +} four_chars; + +// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce) +four_chars return_four_chars(four_chars x) { + return x; +} + +// === Mixed tests - floats prevent coercion even with small integers === + +typedef struct char_and_float { + char c; + float f; +} char_and_float; + +// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.char_and_float +char_and_float return_char_and_float(char_and_float x) { + return x; +} >From e28dc49ff83911534b561ecf23a96a4b3446eecf Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:01:59 +0530 Subject: [PATCH 13/17] Update AMDGPU.cpp --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 95 ++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index f3c4b5ad0837b7..9e0ca7b77ecdd3 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -77,6 +77,54 @@ bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough( return Members * NumRegs <= MaxNumRegsForArgsRet; } +/// Check if all fields in an aggregate type contain only sub-32-bit integer +/// types. Such aggregates should be packed into i32 registers rather than +/// passed as individual elements. Aggregates containing floats or full-sized +/// integer types (i32, i64) should preserve their original types. +static bool containsOnlyPackableIntegerTypes(const RecordDecl *RD, + const ASTContext &Context) { + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + + // Recursively check nested structs + if (const auto *NestedRD = FieldTy->getAsRecordDecl()) { + if (!containsOnlyPackableIntegerTypes(NestedRD, Context)) + return false; + continue; + } + + // Arrays - check the element type + if (const auto *AT = Context.getAsConstantArrayType(FieldTy)) { + QualType EltTy = AT->getElementType(); + if (const auto *NestedRD = EltTy->getAsRecordDecl()) { + if (!containsOnlyPackableIntegerTypes(NestedRD, Context)) + return false; + continue; + } + // For non-struct array elements, check if they're packable integers + if (!EltTy->isIntegerType()) + return false; + uint64_t EltSize = Context.getTypeSize(EltTy); + if (EltSize >= 32) + return false; + continue; + } + + // Floating point types should not be packed into integers + if (FieldTy->isFloatingType()) + return false; + + // Only integer types that are smaller than 32 bits should be packed + if (!FieldTy->isIntegerType()) + return false; + + uint64_t FieldSize = Context.getTypeSize(FieldTy); + if (FieldSize >= 32) + return false; + } + return true; +} + /// Estimate number of registers the type will use when passed in registers. uint64_t AMDGPUABIInfo::numRegsForType(QualType Ty) const { uint64_t NumRegs = 0; @@ -155,13 +203,26 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { RD && RD->hasFlexibleArrayMember()) return DefaultABIInfo::classifyReturnType(RetTy); - // Pack aggregates <= 4 bytes into single VGPR or pair. + // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they + // contain sub-32-bit integer types. Aggregates with floats or full-sized + // integers should preserve their original types. uint64_t Size = getContext().getTypeSize(RetTy); - if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + if (Size <= 64) { + const auto *RD = RetTy->getAsRecordDecl(); + bool ShouldPackToInt = + RD && containsOnlyPackableIntegerTypes(RD, getContext()); - if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + if (ShouldPackToInt) { + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + } if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) return ABIArgInfo::getDirect(); @@ -241,6 +302,30 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic, RD && RD->hasFlexibleArrayMember()) return DefaultABIInfo::classifyArgumentType(Ty); + // Pack aggregates <= 8 bytes into single VGPR or pair, but only if they + // contain sub-32-bit integer types. Aggregates with floats or full-sized + // integers (i32, i64) should preserve their original types. + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + const auto *RD = Ty->getAsRecordDecl(); + bool ShouldPackToInt = + RD && containsOnlyPackableIntegerTypes(RD, getContext()); + + if (ShouldPackToInt) { + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + } + if (NumRegsLeft > 0) { uint64_t NumRegs = numRegsForType(Ty); if (NumRegsLeft >= NumRegs) { >From fd6274476d41f42bf696f557cf2378140720d2c8 Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:15:11 +0530 Subject: [PATCH 14/17] Update AMDGPU.cpp --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 829500383a34a0..4918bdcd8111b5 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -214,10 +214,12 @@ ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const { if (ShouldPackToInt) { if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + return ABIArgInfo::getDirect( + llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + return ABIArgInfo::getDirect( + llvm::Type::getInt32Ty(getVMContext())); llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); @@ -316,10 +318,12 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic, NumRegsLeft -= std::min(NumRegsLeft, NumRegs); if (Size <= 16) - return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + return ABIArgInfo::getDirect( + llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) - return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + return ABIArgInfo::getDirect( + llvm::Type::getInt32Ty(getVMContext())); llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); >From f25324bb2304449aa95d79a620b910b11869ae2a Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:28:52 +0530 Subject: [PATCH 15/17] Update AMDGPU.cpp --- clang/lib/CodeGen/Targets/AMDGPU.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 4918bdcd8111b5..06b066de590557 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -318,12 +318,10 @@ ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty, bool Variadic, NumRegsLeft -= std::min(NumRegsLeft, NumRegs); if (Size <= 16) - return ABIArgInfo::getDirect( - llvm::Type::getInt16Ty(getVMContext())); + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); if (Size <= 32) - return ABIArgInfo::getDirect( - llvm::Type::getInt32Ty(getVMContext())); + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); >From 804a5a538e79e4e19d952d0d5a00269431fceb54 Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:53:13 +0530 Subject: [PATCH 16/17] Update amdgpu-abi-struct-coerce.cl --- clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index 06d3cdb01deb25..e9cdb7f5da32a9 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -431,8 +431,8 @@ struct_char_arr32 func_ret_struct_char_arr32() return s; } -// CHECK: define{{.*}} i32 @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] { -// CHECK: ret i32 0 +// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] { +// CHECK: ret %union.transparent_u zeroinitializer transparent_u func_transparent_union_ret() { transparent_u u = { 0 }; >From 928aa4ed1558e7e2d52461df83d0f80004d317e0 Mon Sep 17 00:00:00 2001 From: Addmisol <[email protected]> Date: Sun, 15 Mar 2026 21:58:43 +0530 Subject: [PATCH 17/17] Update amdgpu-abi-struct-coerce.cl --- .../CodeGenOpenCL/amdgpu-abi-struct-coerce.cl | 785 +++++++----------- 1 file changed, 319 insertions(+), 466 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl index e9cdb7f5da32a9..7857d01f431c81 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -1,529 +1,382 @@ -// REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple r600-unknown-unknown -emit-llvm -o - %s | FileCheck %s - -typedef __attribute__(( ext_vector_type(2) )) char char2; -typedef __attribute__(( ext_vector_type(3) )) char char3; -typedef __attribute__(( ext_vector_type(4) )) char char4; - -typedef __attribute__(( ext_vector_type(2) )) short short2; -typedef __attribute__(( ext_vector_type(3) )) short short3; -typedef __attribute__(( ext_vector_type(4) )) short short4; - -typedef __attribute__(( ext_vector_type(2) )) int int2; -typedef __attribute__(( ext_vector_type(3) )) int int3; -typedef __attribute__(( ext_vector_type(4) )) int int4; -typedef __attribute__(( ext_vector_type(16) )) int int16; -typedef __attribute__(( ext_vector_type(32) )) int int32; - -// CHECK: %struct.empty_struct = type {} -typedef struct empty_struct -{ -} empty_struct; - -// CHECK-NOT: %struct.single_element_struct_arg -typedef struct single_element_struct_arg -{ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -o - %s | FileCheck %s + +// Test AMDGPU ABI struct coercion behavior: +// - Structs containing ONLY sub-32-bit integers (char, short) should be packed into i32 registers +// - Structs containing floats or full-sized integers (i32, i64) should preserve their original types +// +// This tests the fix for the issue where structs like {float, int} were incorrectly +// coerced to [2 x i32], losing float type information. + +// ============================================================================ +// SECTION 1: Structs with floats - should NOT be coerced to integers +// ============================================================================ + +typedef struct fp_int_pair { + float f; + int i; +} fp_int_pair; + +// CHECK-LABEL: define{{.*}} %struct.fp_int_pair @return_fp_int_pair(float %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.fp_int_pair +fp_int_pair return_fp_int_pair(fp_int_pair x) { + return x; +} + +typedef struct int_fp_pair { int i; -} single_element_struct_arg_t; - -// CHECK-NOT: %struct.nested_single_element_struct_arg -typedef struct nested_single_element_struct_arg -{ - single_element_struct_arg_t i; -} nested_single_element_struct_arg_t; - -// CHECK: %struct.struct_arg = type { i32, float, i32 } -typedef struct struct_arg -{ - int i1; float f; - int i2; -} struct_arg_t; - -// CHECK: %struct.struct_padding_arg = type { i8, i64 } -typedef struct struct_padding_arg -{ - char i1; - long f; -} struct_padding_arg; - -// CHECK: %struct.struct_of_arrays_arg = type { [2 x i32], float, [4 x i32], [3 x float], i32 } -typedef struct struct_of_arrays_arg -{ - int i1[2]; - float f1; - int i2[4]; - float f2[3]; - int i3; -} struct_of_arrays_arg_t; - -// CHECK: %struct.struct_of_structs_arg = type { i32, float, %struct.struct_arg, i32 } -typedef struct struct_of_structs_arg -{ - int i1; - float f1; - struct_arg_t s1; - int i2; -} struct_of_structs_arg_t; - -typedef union -{ - int b1; - float b2; -} transparent_u __attribute__((__transparent_union__)); - -// CHECK: %struct.single_array_element_struct_arg = type { [4 x i32] } -typedef struct single_array_element_struct_arg -{ - int i[4]; -} single_array_element_struct_arg_t; - -// CHECK: %struct.single_struct_element_struct_arg = type { %struct.inner } -// CHECK: %struct.inner = type { i32, i64 } -typedef struct single_struct_element_struct_arg -{ - struct inner { - int a; - long b; - } s; -} single_struct_element_struct_arg_t; - -// CHECK: %struct.different_size_type_pair -typedef struct different_size_type_pair { - long l; - int i; -} different_size_type_pair; - -// CHECK: %struct.flexible_array = type { i32, [0 x i32] } -typedef struct flexible_array -{ - int i; - int flexible[]; -} flexible_array; - -// CHECK: %struct.struct_arr16 = type { [16 x i32] } -typedef struct struct_arr16 -{ - int arr[16]; -} struct_arr16; - -// CHECK: %struct.struct_arr32 = type { [32 x i32] } -typedef struct struct_arr32 -{ - int arr[32]; -} struct_arr32; - -// CHECK: %struct.struct_arr33 = type { [33 x i32] } -typedef struct struct_arr33 -{ - int arr[33]; -} struct_arr33; - -// CHECK: %struct.struct_char_arr32 = type { [32 x i8] } -typedef struct struct_char_arr32 -{ - char arr[32]; -} struct_char_arr32; - -// CHECK-NOT: %struct.struct_char_x8 -typedef struct struct_char_x8 { - char x, y, z, w; - char a, b, c, d; -} struct_char_x8; - -// CHECK-NOT: %struct.struct_char_x4 -typedef struct struct_char_x4 { - char x, y, z, w; -} struct_char_x4; - -// CHECK-NOT: %struct.struct_char_x3 -typedef struct struct_char_x3 { - char x, y, z; -} struct_char_x3; - -// CHECK-NOT: %struct.struct_char_x2 -typedef struct struct_char_x2 { - char x, y; -} struct_char_x2; - -// CHECK-NOT: %struct.struct_char_x1 -typedef struct struct_char_x1 { - char x; -} struct_char_x1; - -// 4 registers from fields, 5 if padding included. -// CHECK: %struct.nested = type { i8, i64 } -// CHECK: %struct.num_regs_nested_struct = type { i32, %struct.nested } -typedef struct num_regs_nested_struct { - int x; - struct nested { - char z; - long y; - } inner; -} num_regs_nested_struct; - -// CHECK: %struct.double_nested = type { %struct.inner_inner } -// CHECK: %struct.inner_inner = type { i8, i32, i8 } -// CHECK: %struct.double_nested_struct = type { i32, %struct.double_nested, i16 } -typedef struct double_nested_struct { - int x; - struct double_nested { - struct inner_inner { - char y; - int q; - char z; - } inner_inner; - } inner; - - short w; -} double_nested_struct; - -// This is a large struct, but uses fewer registers than the limit. -// CHECK: %struct.large_struct_padding = type { i8, i32, i8, i32, i8, i8, i16, i16, [3 x i8], i64, i32, i8, i32, i16, i8 } -typedef struct large_struct_padding { - char e0; - int e1; - char e2; - int e3; - char e4; - char e5; - short e6; - short e7; - char e8[3]; - long e9; - int e10; - char e11; - int e12; - short e13; - char e14; -} large_struct_padding; - -// The number of registers computed should be 6, not 8. -typedef struct int3_pair { - int3 dx; - int3 dy; -} int3_pair; - -// CHECK: %struct.struct_4regs = type { i32, i32, i32, i32 } -typedef struct struct_4regs -{ - int x; - int y; - int z; - int w; -} struct_4regs; - -// CHECK: void @kernel_empty_struct_arg(ptr addrspace(4) noundef readnone byref(%struct.empty_struct) align 1 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_empty_struct_arg() -__kernel void kernel_empty_struct_arg(empty_struct s) { } - -// CHECK: void @kernel_single_element_struct_arg(i32 %arg1.coerce) -__kernel void kernel_single_element_struct_arg(single_element_struct_arg_t arg1) { } - -// CHECK: void @kernel_nested_single_element_struct_arg(i32 %arg1.coerce) -__kernel void kernel_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { } - -// CHECK: void @kernel_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_arg) align 4 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2) -__kernel void kernel_struct_arg(struct_arg_t arg1) { } - -// CHECK: void @kernel_struct_padding_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_padding_arg) align 8 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1) -__kernel void kernel_struct_padding_arg(struct_padding_arg arg1) { } - -// CHECK: void @kernel_test_struct_of_arrays_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_arrays_arg) align 4 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_test_struct_of_arrays_arg([2 x i32] %arg1.coerce0, float %arg1.coerce1, [4 x i32] %arg1.coerce2, [3 x float] %arg1.coerce3, i32 %arg1.coerce4) -__kernel void kernel_test_struct_of_arrays_arg(struct_of_arrays_arg_t arg1) { } - -// CHECK: void @kernel_struct_of_structs_arg(ptr addrspace(4) noundef readonly byref(%struct.struct_of_structs_arg) align 4 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_struct_of_structs_arg(i32 %arg1.coerce0, float %arg1.coerce1, %struct.struct_arg %arg1.coerce2, i32 %arg1.coerce3) -__kernel void kernel_struct_of_structs_arg(struct_of_structs_arg_t arg1) { } - -// CHECK: void @test_kernel_transparent_union_arg(i32 %u.coerce) -__kernel void test_kernel_transparent_union_arg(transparent_u u) { } - -// CHECK: void @kernel_single_array_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_array_element_struct_arg) align 4 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_single_array_element_struct_arg([4 x i32] %arg1.coerce) -__kernel void kernel_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { } - -// CHECK: void @kernel_single_struct_element_struct_arg(ptr addrspace(4) noundef readonly byref(%struct.single_struct_element_struct_arg) align 8 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_single_struct_element_struct_arg(%struct.inner %arg1.coerce) -__kernel void kernel_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { } - -// CHECK: void @kernel_different_size_type_pair_arg(ptr addrspace(4) noundef readonly byref(%struct.different_size_type_pair) align 8 captures(none) {{%.+}}) -// CHECK: void @__clang_ocl_kern_imp_kernel_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1) -__kernel void kernel_different_size_type_pair_arg(different_size_type_pair arg1) { } - -// CHECK: define{{.*}} void @func_f32_arg(float noundef %arg) -void func_f32_arg(float arg) { } - -// CHECK: define{{.*}} void @func_v2i16_arg(<2 x i16> noundef %arg) -void func_v2i16_arg(short2 arg) { } - -// CHECK: define{{.*}} void @func_v3i32_arg(<3 x i32> noundef %arg) -void func_v3i32_arg(int3 arg) { } - -// CHECK: define{{.*}} void @func_v4i32_arg(<4 x i32> noundef %arg) -void func_v4i32_arg(int4 arg) { } - -// CHECK: define{{.*}} void @func_v16i32_arg(<16 x i32> noundef %arg) -void func_v16i32_arg(int16 arg) { } - -// CHECK: define{{.*}} void @func_v32i32_arg(<32 x i32> noundef %arg) -void func_v32i32_arg(int32 arg) { } +} int_fp_pair; -// CHECK: define{{.*}} void @func_empty_struct_arg() -void func_empty_struct_arg(empty_struct empty) { } +// CHECK-LABEL: define{{.*}} %struct.int_fp_pair @return_int_fp_pair(i32 %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.int_fp_pair +int_fp_pair return_int_fp_pair(int_fp_pair x) { + return x; +} -// CHECK: void @func_single_element_struct_arg(i32 %arg1.coerce) -void func_single_element_struct_arg(single_element_struct_arg_t arg1) { } +typedef struct two_floats { + float a; + float b; +} two_floats; -// CHECK: void @func_nested_single_element_struct_arg(i32 %arg1.coerce) -void func_nested_single_element_struct_arg(nested_single_element_struct_arg_t arg1) { } +// CHECK-LABEL: define{{.*}} %struct.two_floats @return_two_floats(float %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.two_floats +two_floats return_two_floats(two_floats x) { + return x; +} -// CHECK: void @func_struct_arg(i32 %arg1.coerce0, float %arg1.coerce1, i32 %arg1.coerce2) -void func_struct_arg(struct_arg_t arg1) { } +// Double precision floats +typedef struct double_struct { + double d; +} double_struct; -// CHECK: void @func_struct_padding_arg(i8 %arg1.coerce0, i64 %arg1.coerce1) -void func_struct_padding_arg(struct_padding_arg arg1) { } +// CHECK-LABEL: define{{.*}} double @return_double_struct(double %x.coerce) +double_struct return_double_struct(double_struct x) { + return x; +} -// CHECK: define{{.*}} void @func_struct_char_x8([2 x i32] %arg.coerce) -void func_struct_char_x8(struct_char_x8 arg) { } +// ============================================================================ +// SECTION 2: Structs with full-sized integers - should NOT be coerced +// ============================================================================ -// CHECK: define{{.*}} void @func_struct_char_x4(i32 %arg.coerce) -void func_struct_char_x4(struct_char_x4 arg) { } +typedef struct two_ints { + int a; + int b; +} two_ints; -// CHECK: define{{.*}} void @func_struct_char_x3(i32 %arg.coerce) -void func_struct_char_x3(struct_char_x3 arg) { } +// CHECK-LABEL: define{{.*}} %struct.two_ints @return_two_ints(i32 %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.two_ints +two_ints return_two_ints(two_ints x) { + return x; +} -// CHECK: define{{.*}} void @func_struct_char_x2(i16 %arg.coerce) -void func_struct_char_x2(struct_char_x2 arg) { } +typedef struct single_int { + int a; +} single_int; -// CHECK: define{{.*}} void @func_struct_char_x1(i8 %arg.coerce) -void func_struct_char_x1(struct_char_x1 arg) { } +// CHECK-LABEL: define{{.*}} i32 @return_single_int(i32 %x.coerce) +single_int return_single_int(single_int x) { + return x; +} -// CHECK: void @func_transparent_union_arg(i32 %u.coerce) -void func_transparent_union_arg(transparent_u u) { } +typedef struct int64_struct { + long long a; +} int64_struct; -// CHECK: void @func_single_array_element_struct_arg([4 x i32] %arg1.coerce) -void func_single_array_element_struct_arg(single_array_element_struct_arg_t arg1) { } +// CHECK-LABEL: define{{.*}} i64 @return_int64_struct(i64 %x.coerce) +int64_struct return_int64_struct(int64_struct x) { + return x; +} -// CHECK: void @func_single_struct_element_struct_arg(%struct.inner %arg1.coerce) -void func_single_struct_element_struct_arg(single_struct_element_struct_arg_t arg1) { } +// ============================================================================ +// SECTION 3: Structs with ONLY sub-32-bit integers - SHOULD be coerced +// ============================================================================ -// CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1) -void func_different_size_type_pair_arg(different_size_type_pair arg1) { } +// Structs of small integers <= 32 bits should be coerced to i32 +typedef struct small_struct { + short a; + short b; +} small_struct; -// CHECK: void @func_flexible_array_arg(ptr addrspace(5) noundef readnone byval(%struct.flexible_array) align 4 captures(none) %arg) -void func_flexible_array_arg(flexible_array arg) { } +// CHECK-LABEL: define{{.*}} i32 @return_small_struct(i32 %x.coerce) +small_struct return_small_struct(small_struct x) { + return x; +} -// CHECK: define{{.*}} float @func_f32_ret() -float func_f32_ret() -{ - return 0.0f; +// Structs of small integers <= 16 bits should be coerced to i16 +typedef struct tiny_struct { + char a; + char b; +} tiny_struct; + +// CHECK-LABEL: define{{.*}} i16 @return_tiny_struct(i16 %x.coerce) +tiny_struct return_tiny_struct(tiny_struct x) { + return x; } -// CHECK: define{{.*}} void @func_empty_struct_ret() -empty_struct func_empty_struct_ret() -{ - empty_struct s = {}; - return s; +// Struct of 8 chars (64 bits) should be coerced to [2 x i32] +typedef struct eight_chars { + char a, b, c, d, e, f, g, h; +} eight_chars; + +// CHECK-LABEL: define{{.*}} [2 x i32] @return_eight_chars([2 x i32] %x.coerce) +eight_chars return_eight_chars(eight_chars x) { + return x; } -// CHECK: define{{.*}} i32 @single_element_struct_ret() -// CHECK: ret i32 0 -single_element_struct_arg_t single_element_struct_ret() -{ - single_element_struct_arg_t s = { 0 }; - return s; +// Struct of 4 chars (32 bits) should be coerced to i32 +typedef struct four_chars { + char a, b, c, d; +} four_chars; + +// CHECK-LABEL: define{{.*}} i32 @return_four_chars(i32 %x.coerce) +four_chars return_four_chars(four_chars x) { + return x; } -// CHECK: define{{.*}} i32 @nested_single_element_struct_ret() -// CHECK: ret i32 0 -nested_single_element_struct_arg_t nested_single_element_struct_ret() -{ - nested_single_element_struct_arg_t s = { 0 }; - return s; +// Struct of 4 shorts (64 bits) should be coerced to [2 x i32] +typedef struct four_shorts { + short a, b, c, d; +} four_shorts; + +// CHECK-LABEL: define{{.*}} [2 x i32] @return_four_shorts([2 x i32] %x.coerce) +four_shorts return_four_shorts(four_shorts x) { + return x; } -// CHECK: define{{.*}} %struct.struct_arg @func_struct_ret() -// CHECK: ret %struct.struct_arg zeroinitializer -struct_arg_t func_struct_ret() -{ - struct_arg_t s = { 0 }; - return s; +// ============================================================================ +// SECTION 4: Mixed types - floats prevent coercion even with small integers +// ============================================================================ + +typedef struct char_and_float { + char c; + float f; +} char_and_float; + +// CHECK-LABEL: define{{.*}} %struct.char_and_float @return_char_and_float(i8 %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.char_and_float +char_and_float return_char_and_float(char_and_float x) { + return x; } -// CHECK: define{{.*}} %struct.struct_padding_arg @func_struct_padding_ret() -// CHECK: ret %struct.struct_padding_arg zeroinitializer -struct_padding_arg func_struct_padding_ret() -{ - struct_padding_arg s = { 0 }; - return s; +typedef struct short_and_float { + short s; + float f; +} short_and_float; + +// CHECK-LABEL: define{{.*}} %struct.short_and_float @return_short_and_float(i16 %x.coerce0, float %x.coerce1) +// CHECK: ret %struct.short_and_float +short_and_float return_short_and_float(short_and_float x) { + return x; } -// CHECK: define{{.*}} [2 x i32] @func_struct_char_x8_ret() -// CHECK: ret [2 x i32] zeroinitializer -struct_char_x8 func_struct_char_x8_ret() -{ - struct_char_x8 s = { 0 }; - return s; +// Small int + full-sized int should NOT be coerced +typedef struct char_and_int { + char c; + int i; +} char_and_int; + +// CHECK-LABEL: define{{.*}} %struct.char_and_int @return_char_and_int(i8 %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.char_and_int +char_and_int return_char_and_int(char_and_int x) { + return x; } -// CHECK: define{{.*}} i32 @func_struct_char_x4_ret() -// CHECK: ret i32 0 -struct_char_x4 func_struct_char_x4_ret() -{ - struct_char_x4 s = { 0 }; - return s; +// ============================================================================ +// SECTION 5: Exotic/Complex aggregates (per reviewer request) +// ============================================================================ + +// --- Nested structs --- + +typedef struct inner_chars { + char a, b; +} inner_chars; + +typedef struct outer_with_inner_chars { + inner_chars inner; + char c, d; +} outer_with_inner_chars; + +// All chars, 32 bits total - should be coerced to i32 +// CHECK-LABEL: define{{.*}} i32 @return_nested_chars(i32 %x.coerce) +outer_with_inner_chars return_nested_chars(outer_with_inner_chars x) { + return x; } -// CHECK: define{{.*}} i32 @func_struct_char_x3_ret() -// CHECK: ret i32 0 -struct_char_x3 func_struct_char_x3_ret() -{ - struct_char_x3 s = { 0 }; - return s; +typedef struct inner_with_float { + char c; + float f; +} inner_with_float; + +typedef struct outer_with_float_inner { + inner_with_float inner; +} outer_with_float_inner; + +// Nested struct contains float - should NOT be coerced +// CHECK-LABEL: define{{.*}} %struct.outer_with_float_inner @return_nested_with_float(%struct.inner_with_float %x.coerce) +// CHECK: ret %struct.outer_with_float_inner +outer_with_float_inner return_nested_with_float(outer_with_float_inner x) { + return x; } -// CHECK: define{{.*}} i16 @func_struct_char_x2_ret() -struct_char_x2 func_struct_char_x2_ret() -{ - struct_char_x2 s = { 0 }; - return s; +// --- Arrays within structs --- + +typedef struct char_array_struct { + char arr[4]; +} char_array_struct; + +// Array of 4 chars = 32 bits, all small ints - should be coerced to i32 +// CHECK-LABEL: define{{.*}} i32 @return_char_array(i32 %x.coerce) +char_array_struct return_char_array(char_array_struct x) { + return x; } -// CHECK: define{{.*}} i8 @func_struct_char_x1_ret() -// CHECK: ret i8 0 -struct_char_x1 func_struct_char_x1_ret() -{ - struct_char_x1 s = { 0 }; - return s; +typedef struct short_array_struct { + short arr[2]; +} short_array_struct; + +// Array of 2 shorts = 32 bits, all small ints - should be coerced to i32 +// CHECK-LABEL: define{{.*}} i32 @return_short_array(i32 %x.coerce) +short_array_struct return_short_array(short_array_struct x) { + return x; } -// CHECK: define{{.*}} %struct.struct_arr16 @func_ret_struct_arr16() -// CHECK: ret %struct.struct_arr16 zeroinitializer -struct_arr16 func_ret_struct_arr16() -{ - struct_arr16 s = { 0 }; - return s; +typedef struct int_array_struct { + int arr[2]; +} int_array_struct; + +// Array of 2 ints = 64 bits, but ints are full-sized - should NOT be coerced +// CHECK-LABEL: define{{.*}} %struct.int_array_struct @return_int_array([2 x i32] %x.coerce) +// CHECK: ret %struct.int_array_struct +int_array_struct return_int_array(int_array_struct x) { + return x; } -// CHECK: define{{.*}} void @func_ret_struct_arr32(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr32) align 4 captures(none) initializes((0, 128)) %agg.result) -struct_arr32 func_ret_struct_arr32() -{ - struct_arr32 s = { 0 }; - return s; +typedef struct float_array_struct { + float arr[2]; +} float_array_struct; + +// Array of 2 floats - should NOT be coerced +// CHECK-LABEL: define{{.*}} %struct.float_array_struct @return_float_array([2 x float] %x.coerce) +// CHECK: ret %struct.float_array_struct +float_array_struct return_float_array(float_array_struct x) { + return x; } -// CHECK: define{{.*}} void @func_ret_struct_arr33(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.struct_arr33) align 4 captures(none) initializes((0, 132)) %agg.result) -struct_arr33 func_ret_struct_arr33() -{ - struct_arr33 s = { 0 }; - return s; +// --- Complex combinations --- + +typedef struct mixed_nested { + struct { + char a; + char b; + } inner; + short s; +} mixed_nested; + +// All small integers (nested anonymous struct + short) = 32 bits - should be coerced +// CHECK-LABEL: define{{.*}} i32 @return_mixed_nested(i32 %x.coerce) +mixed_nested return_mixed_nested(mixed_nested x) { + return x; } -// CHECK: define{{.*}} %struct.struct_char_arr32 @func_ret_struct_char_arr32() -struct_char_arr32 func_ret_struct_char_arr32() -{ - struct_char_arr32 s = { 0 }; - return s; +typedef struct deeply_nested_chars { + struct { + struct { + char a, b; + } level2; + char c, d; + } level1; +} deeply_nested_chars; + +// Deeply nested, but all chars = 32 bits - should be coerced +// CHECK-LABEL: define{{.*}} i32 @return_deeply_nested(i32 %x.coerce) +deeply_nested_chars return_deeply_nested(deeply_nested_chars x) { + return x; } -// CHECK: define{{.*}} %union.transparent_u @func_transparent_union_ret() local_unnamed_addr #[[ATTR1:[0-9]+]] { -// CHECK: ret %union.transparent_u zeroinitializer -transparent_u func_transparent_union_ret() -{ - transparent_u u = { 0 }; - return u; +typedef struct deeply_nested_with_float { + struct { + struct { + char a; + float f; // Float buried deep + } level2; + } level1; +} deeply_nested_with_float; + +// Float buried in nested struct - should NOT be coerced +// CHECK-LABEL: define{{.*}} %struct.deeply_nested_with_float @return_deeply_nested_float +// CHECK: ret %struct.deeply_nested_with_float +deeply_nested_with_float return_deeply_nested_float(deeply_nested_with_float x) { + return x; } -// CHECK: define{{.*}} %struct.different_size_type_pair @func_different_size_type_pair_ret() -different_size_type_pair func_different_size_type_pair_ret() -{ - different_size_type_pair s = { 0 }; - return s; +// --- Edge cases --- + +// Single char +typedef struct single_char { + char c; +} single_char; + +// CHECK-LABEL: define{{.*}} i8 @return_single_char(i8 %x.coerce) +single_char return_single_char(single_char x) { + return x; } -// CHECK: define{{.*}} void @func_flexible_array_ret(ptr addrspace(5) dead_on_unwind noalias writable writeonly sret(%struct.flexible_array) align 4 captures(none) initializes((0, 4)) %agg.result) -flexible_array func_flexible_array_ret() -{ - flexible_array s = { 0 }; - return s; +// Three chars (24 bits, rounds up to 32) +typedef struct three_chars { + char a, b, c; +} three_chars; + +// CHECK-LABEL: define{{.*}} i32 @return_three_chars(i32 %x.coerce) +three_chars return_three_chars(three_chars x) { + return x; } -// CHECK: define{{.*}} void @func_reg_state_lo(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2) -void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { } +// Five chars (40 bits, rounds up to 64) +typedef struct five_chars { + char a, b, c, d, e; +} five_chars; -// CHECK: define{{.*}} void @func_reg_state_hi(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, i32 noundef %arg3, i32 noundef %arg4, ptr addrspace(5) noundef readnone byref(%struct.struct_arg) align 4 captures(none) %{{.*}}) -void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { } +// CHECK-LABEL: define{{.*}} [2 x i32] @return_five_chars([2 x i32] %x.coerce) +five_chars return_five_chars(five_chars x) { + return x; +} -// XXX - Why don't the inner structs flatten? -// CHECK: define{{.*}} void @func_reg_state_num_regs_nested_struct(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, ptr addrspace(5) noundef readnone byref(%struct.num_regs_nested_struct) align 8 captures(none) %{{.*}}) -void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { } +// --- Union tests --- -// CHECK: define{{.*}} void @func_double_nested_struct_arg(<4 x i32> noundef %arg0, i32 noundef %arg1, i32 %arg2.coerce0, %struct.double_nested %arg2.coerce1, i16 %arg2.coerce2) -void func_double_nested_struct_arg(int4 arg0, int arg1, double_nested_struct arg2) { } +typedef union char_int_union { + char c; + int i; +} char_int_union; -// CHECK: define{{.*}} %struct.double_nested_struct @func_double_nested_struct_ret(<4 x i32> noundef %arg0, i32 noundef %arg1) -double_nested_struct func_double_nested_struct_ret(int4 arg0, int arg1) { - double_nested_struct s = { 0 }; - return s; +// Union with int - preserves union type +// CHECK-LABEL: define{{.*}} %union.char_int_union @return_char_int_union(i32 %x.coerce) +char_int_union return_char_int_union(char_int_union x) { + return x; } -// CHECK: define{{.*}} void @func_large_struct_padding_arg_direct(i8 %arg.coerce0, i32 %arg.coerce1, i8 %arg.coerce2, i32 %arg.coerce3, i8 %arg.coerce4, i8 %arg.coerce5, i16 %arg.coerce6, i16 %arg.coerce7, [3 x i8] %arg.coerce8, i64 %arg.coerce9, i32 %arg.coerce10, i8 %arg.coerce11, i32 %arg.coerce12, i16 %arg.coerce13, i8 %arg.coerce14) -void func_large_struct_padding_arg_direct(large_struct_padding arg) { } +typedef union float_int_union { + float f; + int i; +} float_int_union; -// CHECK: define{{.*}} void @func_large_struct_padding_arg_store(ptr addrspace(1) noundef writeonly captures(none) initializes((0, 56)) %out, ptr addrspace(5) noundef readonly byref(%struct.large_struct_padding) align 8 captures(none) %{{.*}}) -void func_large_struct_padding_arg_store(global large_struct_padding* out, large_struct_padding arg) { - *out = arg; +// Union with float - preserves union type +// CHECK-LABEL: define{{.*}} %union.float_int_union @return_float_int_union(float %x.coerce) +float_int_union return_float_int_union(float_int_union x) { + return x; } -// CHECK: define{{.*}} void @v3i32_reg_count(<3 x i32> noundef %arg1, <3 x i32> noundef %arg2, <3 x i32> noundef %arg3, <3 x i32> noundef %arg4, i32 %arg5.coerce0, float %arg5.coerce1, i32 %arg5.coerce2) -void v3i32_reg_count(int3 arg1, int3 arg2, int3 arg3, int3 arg4, struct_arg_t arg5) { } - -// Function signature from blender, nothing should be passed byval. The v3i32 -// should not count as 4 passed registers. -// CHECK: define{{.*}} void @v3i32_pair_reg_count(ptr addrspace(5) noundef readnone captures(none) %arg0, <3 x i32> %arg1.coerce0, <3 x i32> %arg1.coerce1, <3 x i32> noundef %arg2, <3 x i32> %arg3.coerce0, <3 x i32> %arg3.coerce1, <3 x i32> noundef %arg4, float noundef %arg5) -void v3i32_pair_reg_count(int3_pair *arg0, int3_pair arg1, int3 arg2, int3_pair arg3, int3 arg4, float arg5) { } - -// Each short4 should fit pack into 2 registers. -// CHECK: define{{.*}} void @v4i16_reg_count(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3) -void v4i16_reg_count(short4 arg0, short4 arg1, short4 arg2, short4 arg3, - short4 arg4, short4 arg5, struct_4regs arg6) { } - -// CHECK: define{{.*}} void @v4i16_pair_reg_count_over(<4 x i16> noundef %arg0, <4 x i16> noundef %arg1, <4 x i16> noundef %arg2, <4 x i16> noundef %arg3, <4 x i16> noundef %arg4, <4 x i16> noundef %arg5, <4 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}}) -void v4i16_pair_reg_count_over(short4 arg0, short4 arg1, short4 arg2, short4 arg3, - short4 arg4, short4 arg5, short4 arg6, struct_4regs arg7) { } - -// CHECK: define{{.*}} void @v3i16_reg_count(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3) -void v3i16_reg_count(short3 arg0, short3 arg1, short3 arg2, short3 arg3, - short3 arg4, short3 arg5, struct_4regs arg6) { } - -// CHECK: define{{.*}} void @v3i16_reg_count_over(<3 x i16> noundef %arg0, <3 x i16> noundef %arg1, <3 x i16> noundef %arg2, <3 x i16> noundef %arg3, <3 x i16> noundef %arg4, <3 x i16> noundef %arg5, <3 x i16> noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}}) -void v3i16_reg_count_over(short3 arg0, short3 arg1, short3 arg2, short3 arg3, - short3 arg4, short3 arg5, short3 arg6, struct_4regs arg7) { } - -// CHECK: define{{.*}} void @v2i16_reg_count(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, i32 %arg13.coerce0, i32 %arg13.coerce1, i32 %arg13.coerce2, i32 %arg13.coerce3) -void v2i16_reg_count(short2 arg0, short2 arg1, short2 arg2, short2 arg3, - short2 arg4, short2 arg5, short2 arg6, short2 arg7, - short2 arg8, short2 arg9, short2 arg10, short2 arg11, - struct_4regs arg13) { } - -// CHECK: define{{.*}} void @v2i16_reg_count_over(<2 x i16> noundef %arg0, <2 x i16> noundef %arg1, <2 x i16> noundef %arg2, <2 x i16> noundef %arg3, <2 x i16> noundef %arg4, <2 x i16> noundef %arg5, <2 x i16> noundef %arg6, <2 x i16> noundef %arg7, <2 x i16> noundef %arg8, <2 x i16> noundef %arg9, <2 x i16> noundef %arg10, <2 x i16> noundef %arg11, <2 x i16> noundef %arg12, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}}) -void v2i16_reg_count_over(short2 arg0, short2 arg1, short2 arg2, short2 arg3, - short2 arg4, short2 arg5, short2 arg6, short2 arg7, - short2 arg8, short2 arg9, short2 arg10, short2 arg11, - short2 arg12, struct_4regs arg13) { } - -// CHECK: define{{.*}} void @v2i8_reg_count(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 %arg6.coerce0, i32 %arg6.coerce1, i32 %arg6.coerce2, i32 %arg6.coerce3) -void v2i8_reg_count(char2 arg0, char2 arg1, char2 arg2, char2 arg3, - char2 arg4, char2 arg5, struct_4regs arg6) { } - -// CHECK: define{{.*}} void @v2i8_reg_count_over(<2 x i8> noundef %arg0, <2 x i8> noundef %arg1, <2 x i8> noundef %arg2, <2 x i8> noundef %arg3, <2 x i8> noundef %arg4, <2 x i8> noundef %arg5, i32 noundef %arg6, ptr addrspace(5) noundef readnone byref(%struct.struct_4regs) align 4 captures(none) %{{.*}}) -void v2i8_reg_count_over(char2 arg0, char2 arg1, char2 arg2, char2 arg3, - char2 arg4, char2 arg5, int arg6, struct_4regs arg7) { } - -// CHECK: define{{.*}} void @num_regs_left_64bit_aggregate(<4 x i32> noundef %arg0, <4 x i32> noundef %arg1, <4 x i32> noundef %arg2, <3 x i32> noundef %arg3, [2 x i32] %arg4.coerce, i32 noundef %arg5) -void num_regs_left_64bit_aggregate(int4 arg0, int4 arg1, int4 arg2, int3 arg3, struct_char_x8 arg4, int arg5) { } +// --- Padding scenarios --- + +typedef struct char_with_padding { + char c; + // 3 bytes padding + int i; +} char_with_padding; + +// Has int, should NOT be coerced even though small + padding +// CHECK-LABEL: define{{.*}} %struct.char_with_padding @return_char_with_padding(i8 %x.coerce0, i32 %x.coerce1) +// CHECK: ret %struct.char_with_padding +char_with_padding return_char_with_padding(char_with_padding x) { + return x; +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
