Author: yaxunl Date: Thu Jun 29 11:47:45 2017 New Revision: 306721 URL: http://llvm.org/viewvc/llvm-project?rev=306721&view=rev Log: CodeGen: Fix invalid bitcast for coerced function argument
Clang assumes coerced function argument is in address space 0, which is not always true and results in invalid bitcasts. This patch fixes failure in OpenCL conformance test api/get_kernel_arg_info with amdgcn---amdgizcl triple, where non-zero alloca address space is used. Differential Revision: https://reviews.llvm.org/D34777 Modified: cfe/trunk/lib/CodeGen/CGCall.cpp cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl Modified: cfe/trunk/lib/CodeGen/CGCall.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=306721&r1=306720&r2=306721&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp (original) +++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Jun 29 11:47:45 2017 @@ -1297,7 +1297,7 @@ static void CreateCoercedStore(llvm::Val // If store is legal, just bitcast the src pointer. if (SrcSize <= DstSize) { - Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy)); + Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy); BuildAggStore(CGF, Src, Dst, DstIsVolatile); } else { // Otherwise do coercion through memory. This is stupid, but @@ -2412,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog Address AddrToStoreInto = Address::invalid(); if (SrcSize <= DstSize) { - AddrToStoreInto = - Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy)); + AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy); } else { AddrToStoreInto = CreateTempAlloca(STy, Alloca.getAlignment(), "coerce"); Modified: cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl?rev=306721&r1=306720&r2=306721&view=diff ============================================================================== --- cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl (original) +++ cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl Thu Jun 29 11:47:45 2017 @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=COM,X86 %s +// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s typedef struct { int cells[9]; @@ -8,16 +9,57 @@ typedef struct { int cells[16]; } Mat4X4; +struct StructOneMember { + int2 x; +}; + +struct StructTwoMember { + int2 x; + int2 y; +}; + +// COM-LABEL: define void @foo Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) { Mat4X4 out; return out; } +// COM-LABEL: define {{.*}} void @ker +// Expect two mem copies: one for the argument "in", and one for +// the return value. +// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8* +// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* +// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* +// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* kernel void ker(global Mat3X3 *in, global Mat4X4 *out) { out[0] = foo(in[1]); } -// Expect two mem copies: one for the argument "in", and one for -// the return value. -// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8* -// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* +// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %u) +void FuncOneMember(struct StructOneMember u) { + u.x = (int2)(0, 0); +} + +// AMD-LABEL: define amdgpu_kernel void @KernelOneMember +// AMD-SAME: (<2 x i32> %[[u_coerce:.*]]) +// AMD: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5) +// AMD: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, %struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0 +// AMD: store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* %[[coerce_dive]] +// AMD: call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval align 8 %[[u]]) +kernel void KernelOneMember(struct StructOneMember u) { + FuncOneMember(u); +} + +// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %u) +void FuncTwoMember(struct StructTwoMember u) { + u.x = (int2)(0, 0); +} + +// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember +// AMD-SAME: (%struct.StructTwoMember %[[u_coerce:.*]]) +// AMD: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5) +// AMD: store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember addrspace(5)* %[[u]] +// AMD: call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval align 8 %[[u]]) +kernel void KernelTwoMember(struct StructTwoMember u) { + FuncTwoMember(u); +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits