[PATCH] D34777: CodeGen: Fix invalid bitcast for coerced function argument

2017-06-29 Thread Yaxun Liu via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL306721: CodeGen: Fix invalid bitcast for coerced function 
argument (authored by yaxunl).

Changed prior to commit:
  https://reviews.llvm.org/D34777?vs=104505=104711#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D34777

Files:
  cfe/trunk/lib/CodeGen/CGCall.cpp
  cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl


Index: cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
===
--- cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple 
i686-pc-darwin | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header 
-ffake-address-space-map -triple i686-pc-darwin | FileCheck 
-check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple 
amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s
 
 typedef struct {
   int cells[9];
@@ -8,16 +9,57 @@
   int cells[16];
 } Mat4X4;
 
+struct StructOneMember {
+  int2 x;
+};
+
+struct StructTwoMember {
+  int2 x;
+  int2 y;
+};
+
+// COM-LABEL: define void @foo
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
   return out;
 }
 
+// COM-LABEL: define {{.*}} void @ker
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
-// Expect two mem copies: one for the argument "in", and one for
-// the return value.
-// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
-// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* 
byval align 8 %u)
+void FuncOneMember(struct StructOneMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMD-SAME:  (<2 x i32> %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMD:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, 
%struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMD:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* 
%[[coerce_dive]]
+// AMD:  call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval 
align 8 %[[u]])
+kernel void KernelOneMember(struct StructOneMember u) {
+  FuncOneMember(u);
+}
+
+// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* 
byval align 8 %u)
+void FuncTwoMember(struct StructTwoMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMD-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMD:  store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember 
addrspace(5)* %[[u]]
+// AMD:  call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval 
align 8 %[[u]])
+kernel void KernelTwoMember(struct StructTwoMember u) {
+  FuncTwoMember(u);
+}
Index: cfe/trunk/lib/CodeGen/CGCall.cpp
===
--- cfe/trunk/lib/CodeGen/CGCall.cpp
+++ cfe/trunk/lib/CodeGen/CGCall.cpp
@@ -1297,7 +1297,7 @@
 
   // If store is legal, just bitcast the src pointer.
   if (SrcSize <= DstSize) {
-Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
 BuildAggStore(CGF, Src, Dst, DstIsVolatile);
   } else {
 // Otherwise do coercion through memory. This is stupid, but
@@ -2412,8 +2412,7 @@
 
 Address AddrToStoreInto = Address::invalid();
 if (SrcSize <= DstSize) {
-  AddrToStoreInto =
-Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+  AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
 } else {
   AddrToStoreInto =
 CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");


Index: cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
===
--- cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ cfe/trunk/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header 

[PATCH] D34777: CodeGen: Fix invalid bitcast for coerced function argument

2017-06-29 Thread John McCall via Phabricator via cfe-commits
rjmccall accepted this revision.
rjmccall added a comment.
This revision is now accepted and ready to land.

LGTM.


https://reviews.llvm.org/D34777



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D34777: CodeGen: Fix invalid bitcast for coerced function argument

2017-06-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
Herald added a subscriber: Anastasia.

Clang assumes coerced function argument is in address space 0, which is not 
always true and results in invalid bitcasts.

This patch fixes failure in OpenCL conformance test api/get_kernel_arg_info 
with amdgcn---amdgizcl triple, where non-zero alloca address space is used.


https://reviews.llvm.org/D34777

Files:
  lib/CodeGen/CGCall.cpp
  test/CodeGenOpenCL/addr-space-struct-arg.cl


Index: test/CodeGenOpenCL/addr-space-struct-arg.cl
===
--- test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple 
i686-pc-darwin | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header 
-ffake-address-space-map -triple i686-pc-darwin | FileCheck 
-check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple 
amdgcn-amdhsa-amd-amdgizcl | FileCheck -check-prefixes=COM,AMD %s
 
 typedef struct {
   int cells[9];
@@ -8,16 +9,57 @@
   int cells[16];
 } Mat4X4;
 
+struct StructOneMember {
+  int2 x;
+};
+
+struct StructTwoMember {
+  int2 x;
+  int2 y;
+};
+
+// COM-LABEL: define void @foo
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
   return out;
 }
 
+// COM-LABEL: define {{.*}} void @ker
+// Expect two mem copies: one for the argument "in", and one for
+// the return value.
+// X86: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
+// X86: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD: call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)*
+// AMD: call void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)*
 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
-// Expect two mem copies: one for the argument "in", and one for
-// the return value.
-// CHECK: call void @llvm.memcpy.p0i8.p1i8.i32(i8*
-// CHECK: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)*
+// AMD-LABEL: define void @FuncOneMember(%struct.StructOneMember addrspace(5)* 
byval align 8 %u)
+void FuncOneMember(struct StructOneMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelOneMember
+// AMD-SAME:  (<2 x i32> %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
+// AMD:  %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, 
%struct.StructOneMember addrspace(5)* %[[u]], i32 0, i32 0
+// AMD:  store <2 x i32> %[[u_coerce]], <2 x i32> addrspace(5)* 
%[[coerce_dive]]
+// AMD:  call void @FuncOneMember(%struct.StructOneMember addrspace(5)* byval 
align 8 %[[u]])
+kernel void KernelOneMember(struct StructOneMember u) {
+  FuncOneMember(u);
+}
+
+// AMD-LABEL: define void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* 
byval align 8 %u)
+void FuncTwoMember(struct StructTwoMember u) {
+  u.x = (int2)(0, 0);
+}
+
+// AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
+// AMD-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
+// AMD:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
+// AMD:  store %struct.StructTwoMember %[[u_coerce]], %struct.StructTwoMember 
addrspace(5)* %[[u]]
+// AMD:  call void @FuncTwoMember(%struct.StructTwoMember addrspace(5)* byval 
align 8 %[[u]])
+kernel void KernelTwoMember(struct StructTwoMember u) {
+  FuncTwoMember(u);
+}
Index: lib/CodeGen/CGCall.cpp
===
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -1297,7 +1297,7 @@
 
   // If store is legal, just bitcast the src pointer.
   if (SrcSize <= DstSize) {
-Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
 BuildAggStore(CGF, Src, Dst, DstIsVolatile);
   } else {
 // Otherwise do coercion through memory. This is stupid, but
@@ -2412,8 +2412,7 @@
 
 Address AddrToStoreInto = Address::invalid();
 if (SrcSize <= DstSize) {
-  AddrToStoreInto =
-Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
+  AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
 } else {
   AddrToStoreInto =
 CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");


Index: test/CodeGenOpenCL/addr-space-struct-arg.cl
===
--- test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck