Author: yaxunl
Date: Thu Jun 29 11:47:45 2017
New Revision: 306721

URL: http://llvm.org/viewvc/llvm-project?rev=306721&view=rev
Log:
CodeGen: Fix invalid bitcast for coerced function argument

Clang assumes coerced function argument is in address space 0, which is not 
always true and results in invalid bitcasts.

This patch fixes failure in OpenCL conformance test api/get_kernel_arg_info 
with amdgcn---amdgizcl triple, where non-zero alloca address space is used.

Differential Revision: https://reviews.llvm.org/D34777

Modified:
    cfe/trunk/lib/CodeGen/CGCall.cpp
    cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl

Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=306721&r1=306720&r2=306721&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Thu Jun 29 11:47:45 2017
@@ -1297,7 +1297,7 @@ static void CreateCoercedStore(llvm::Val
 
   // If store is legal, just bitcast the src pointer.
   if (SrcSize <= DstSize) {
-    Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+    Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
     BuildAggStore(CGF, Src, Dst, DstIsVolatile);
   } else {
     // Otherwise do coercion through memory. This is stupid, but
@@ -2412,8 +2412,7 @@ void CodeGenFunction::EmitFunctionProlog
 
         Address AddrToStoreInto = Address::invalid();
         if (SrcSize <= DstSize) {
-          AddrToStoreInto =
-            Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+          AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
         } else {
           AddrToStoreInto =
             CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");

Modified: cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl?rev=306721&r1=306720&r2=306721&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl Thu Jun 29 11:47:45 
2017
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple 
i686-pc-darwin | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header 
-ffake-address-space-map -triple i686-pc-darwin | FileCheck 
-check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple 
amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s
 
 typedef struct {
   int cells[9];
@@ -8,16 +9,57 @@ typedef struct {
   int cells[16];
 } Mat4X4;
 
+struct StructOneMember {
+  int2 x;
+};
+
+struct StructTwoMember {
+  int2 x;
+  int2 y;
+};
+
+// COM-LABEL: define void @foo
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
   return out;
 }
 
+// COM-LABEL: define {{.*}} void @ker
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
-// Expect two mem copies: one for the argument "in", and one for
-// the return value.
-// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
-// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* 
byval align 8 %u)
+void FuncOneMember(struct StructOneMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMD-SAME:  (<2 x i32> %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMD:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, 
%struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMD:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* 
%[[coerce_dive]]
+// AMD:  call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval 
align 8 %[[u]])
+kernel void KernelOneMember(struct StructOneMember u) {
+  FuncOneMember(u);
+}
+
+// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* 
byval align 8 %u)
+void FuncTwoMember(struct StructTwoMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMD-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMD:  store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember 
addrspace(5)* %[[u]]
+// AMD:  call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval 
align 8 %[[u]])
+kernel void KernelTwoMember(struct StructTwoMember u) {
+  FuncTwoMember(u);
+}


_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to