[PATCH] D40806: CodeGen: Fix invalid bitcasts for memcpy
rjmccall added inline comments. Comment at: cfe/trunk/lib/CodeGen/CGCall.cpp:1238 + Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); + Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.AllocaInt8PtrTy); CGF.Builder.CreateMemCpy(Casted, SrcCasted, There is a CreateElementBitCast method on CGBuilderTy that preserves the source address space; that seems like the obviously correct fix here instead of assuming any specific address space on either operand. Comment at: cfe/trunk/lib/CodeGen/CGCall.cpp:1320 +Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.AllocaInt8PtrTy); +Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.AllocaInt8PtrTy); CGF.Builder.CreateMemCpy(DstCasted, Casted, Same. Repository: rL LLVM https://reviews.llvm.org/D40806 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40806: CodeGen: Fix invalid bitcasts for memcpy
This revision was automatically updated to reflect the committed changes. Closed by commit rL32: CodeGen: Fix invalid bitcasts for memcpy (authored by yaxunl). Changed prior to commit: https://reviews.llvm.org/D40806?vs=125417=125863#toc Repository: rL LLVM https://reviews.llvm.org/D40806 Files: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl Index: cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl === --- cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ cfe/trunk/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-amdgiz -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s +// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,R600 %s typedef __attribute__(( ext_vector_type(2) )) char char2; typedef __attribute__(( ext_vector_type(3) )) char char3; @@ -309,7 +309,8 @@ // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1) void func_different_size_type_pair_arg(different_size_type_pair arg1) { } -// CHECK: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg) +// AMDGCN: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg) +// R600: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg) void func_flexible_array_arg(flexible_array arg) { } // CHECK: define float @func_f32_ret() @@ -404,14 +405,16 @@ return s; } -// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result) +// AMDGCN: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result) +// R600: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result) struct_arr32 func_ret_struct_arr32() { struct_arr32 s = { 0 }; return s; } -// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result) +// AMDGCN: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result) +// R600: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result) struct_arr33 func_ret_struct_arr33() { struct_arr33 s = { 0 }; @@ -440,7 +443,8 @@ return s; } -// CHECK: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result) +// AMDGCN: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result) +// R600: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result) flexible_array func_flexible_array_ret() { flexible_array s = { 0 }; @@ -450,11 +454,13 @@ // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2) void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { } -// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s) +// AMDGCN: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s) +// R600: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s) void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { } // XXX - Why don't the inner structs flatten? -// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4) +// AMDGCN: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4) +// R600: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4) void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { } // CHECK: define void
[PATCH] D40806: CodeGen: Fix invalid bitcasts for memcpy
arsenm accepted this revision. arsenm added a comment. This revision is now accepted and ready to land. LGTM https://reviews.llvm.org/D40806 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D40806: CodeGen: Fix invalid bitcasts for memcpy
yaxunl created this revision. Herald added a subscriber: nhaehnle. CreateCoercedLoad/CreateCoercedStore assumes pointer argument of memcpy is in addr space 0, which is not correct and causes invalid bitcasts for triple amdgcn---amdgiz. It is fixed by using alloca addr space instead. https://reviews.llvm.org/D40806 Files: lib/CodeGen/CGCall.cpp test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl Index: test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl === --- test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl +++ test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown-amdgiz -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s +// RUN: %clang_cc1 -triple r600-unknown-unknown -S -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,R600 %s typedef __attribute__(( ext_vector_type(2) )) char char2; typedef __attribute__(( ext_vector_type(3) )) char char3; @@ -309,7 +309,8 @@ // CHECK: void @func_different_size_type_pair_arg(i64 %arg1.coerce0, i32 %arg1.coerce1) void func_different_size_type_pair_arg(different_size_type_pair arg1) { } -// CHECK: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg) +// AMDGCN: void @func_flexible_array_arg(%struct.flexible_array addrspace(5)* byval nocapture align 4 %arg) +// R600: void @func_flexible_array_arg(%struct.flexible_array* byval nocapture align 4 %arg) void func_flexible_array_arg(flexible_array arg) { } // CHECK: define float @func_f32_ret() @@ -404,14 +405,16 @@ return s; } -// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result) +// AMDGCN: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result) +// R600: define void @func_ret_struct_arr32(%struct.struct_arr32* noalias nocapture sret %agg.result) struct_arr32 func_ret_struct_arr32() { struct_arr32 s = { 0 }; return s; } -// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result) +// AMDGCN: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result) +// R600: define void @func_ret_struct_arr33(%struct.struct_arr33* noalias nocapture sret %agg.result) struct_arr33 func_ret_struct_arr33() { struct_arr33 s = { 0 }; @@ -440,7 +443,8 @@ return s; } -// CHECK: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result) +// AMDGCN: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result) +// R600: define void @func_flexible_array_ret(%struct.flexible_array* noalias nocapture sret %agg.result) flexible_array func_flexible_array_ret() { flexible_array s = { 0 }; @@ -450,11 +454,13 @@ // CHECK: define void @func_reg_state_lo(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %s.coerce0, float %s.coerce1, i32 %s.coerce2) void func_reg_state_lo(int4 arg0, int4 arg1, int4 arg2, int arg3, struct_arg_t s) { } -// CHECK: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s) +// AMDGCN: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg addrspace(5)* byval nocapture align 4 %s) +// R600: define void @func_reg_state_hi(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2, i32 %arg3, i32 %arg4, %struct.struct_arg* byval nocapture align 4 %s) void func_reg_state_hi(int4 arg0, int4 arg1, int4 arg2, int arg3, int arg4, struct_arg_t s) { } // XXX - Why don't the inner structs flatten? -// CHECK: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4) +// AMDGCN: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct addrspace(5)* byval nocapture align 8 %arg4) +// R600: define void @func_reg_state_num_regs_nested_struct(<4 x i32> %arg0, i32 %arg1, i32 %arg2.coerce0, %struct.nested %arg2.coerce1, i32 %arg3.coerce0, %struct.nested %arg3.coerce1, %struct.num_regs_nested_struct* byval nocapture align 8 %arg4) void func_reg_state_num_regs_nested_struct(int4 arg0, int arg1, num_regs_nested_struct arg2, num_regs_nested_struct arg3, num_regs_nested_struct arg4) { } // CHECK: define void @func_double_nested_struct_arg(<4 x i32> %arg0, i32