[PATCH] D69818: [HIP] Fix pointer type kernel arg for amdgpu

2019-11-04 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl abandoned this revision.
yaxunl added a comment.

Please review Michael's patch https://reviews.llvm.org/D69826 which supersedes 
this one. Thanks.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69818/new/

https://reviews.llvm.org/D69818



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69818: [HIP] Fix pointer type kernel arg for amdgpu

2019-11-04 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked an inline comment as done.
yaxunl added a comment.

BTW Michael Liao will create another patch which will supersede this patch. 
That patch contains similar changes and also handles pointers in a byval struct 
or array.




Comment at: clang/lib/CodeGen/CGCall.cpp:1172
 if (isa(Ty))
-  return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
+  return CGF.Builder.CreatePointerCast(Val, Ty, "coerce.val");
 

tra wrote:
> What's the purpose of this change?
this is supposed to cast the coerced type to the original type. Previously, no 
coerced type changes the address space, therefore a bitcast is sufficient. Now 
we coerce a type to different address space, therefore we need a pointer cast.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69818/new/

https://reviews.llvm.org/D69818



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69818: [HIP] Fix pointer type kernel arg for amdgpu

2019-11-04 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/CodeGen/CGCall.cpp:1172
 if (isa(Ty))
-  return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
+  return CGF.Builder.CreatePointerCast(Val, Ty, "coerce.val");
 

What's the purpose of this change?



Comment at: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu:15-16
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 
addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+__attribute__((address_space(1))) int *y) {
+  y[0] = x[0];

Interesting. Clang used to crash on explicit address space attribute in the 
past. I'm glad it works now.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69818/new/

https://reviews.llvm.org/D69818



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69818: [HIP] Fix pointer type kernel arg for amdgpu

2019-11-04 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 227764.
yaxunl added a comment.

add a test for non-kernel function.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69818/new/

https://reviews.llvm.org/D69818

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu


Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+  x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* 
dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+  x++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 
addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+__attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
+
+// CHECK: define void @_Z4funcPi(i32* %x)
+__device__ void func(int *x) {
+  x[0]++;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7816,6 +7816,27 @@
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 
+  // Coerce pointer type kernel arguments in default address space to
+  // device address space for HIP.
+  QualType PointeeTy;
+  if (getContext().getLangOpts().HIP) {
+if (auto *PT = Ty->getAs()) {
+  if (PT->getPointeeType().getAddressSpace() == LangAS::Default) {
+PointeeTy = PT->getPointeeType();
+  }
+} else if (auto *RT = Ty->getAs()) {
+  if (RT->getPointeeType().getAddressSpace() == LangAS::Default) {
+PointeeTy = RT->getPointeeType();
+  }
+}
+
+if (PointeeTy != QualType()) {
+  return ABIArgInfo::getDirect(
+CGT.ConvertType(PointeeTy)
+->getPointerTo(
+getContext().getTargetAddressSpace(LangAS::cuda_device)));
+}
+  }
   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
   // individual elements, which confuses the Clover OpenCL backend; therefore 
we
   // have to set it to false here. Other args of getDirect() are just defaults.
Index: clang/lib/CodeGen/CGCall.cpp
===
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1169,7 +1169,7 @@
   if (isa(Val->getType())) {
 // If this is Pointer->Pointer avoid conversion to and from int.
 if (isa(Ty))
-  return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
+  return CGF.Builder.CreatePointerCast(Val, Ty, "coerce.val");
 
 // Convert the pointer to an integer so we can play with its width.
 Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi");


Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+  x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+  x++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+__attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
+
+// CHECK: define void @_Z4funcPi(i32* %x)
+__device__ void func(int *x) {
+  x[0]++;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7816,6 +7816,27 @@
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 
+  // Coerce pointer type kernel arguments in default address space to
+  // device address space for HIP.
+  QualType PointeeTy;
+  if (getContext().getLangOpts().HIP) {
+if (auto *PT = Ty->getAs()) {
+  if (PT->getPointeeType().getAddressSpace() == LangAS::Default) {
+PointeeTy = PT->getPointeeType();
+  }
+} else if (auto *RT = Ty->getAs()) {
+  if (RT

[PATCH] D69818: [HIP] Fix pointer type kernel arg for amdgpu

2019-11-04 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
yaxunl added reviewers: tra, rjmccall.
Herald added subscribers: t-tye, tpr, dstuttard, nhaehnle, wdng, jvesely, 
kzhuravl.

amdgpu target prefers pointer type kernel arg in default address space
to be coerced to device address space for better performance.

  

This patch fixes that.


https://reviews.llvm.org/D69818

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu


Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+  x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* 
dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+  x++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 
addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+__attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7816,6 +7816,27 @@
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 
+  // Coerce pointer type kernel arguments in default address space to
+  // device address space for HIP.
+  QualType PointeeTy;
+  if (getContext().getLangOpts().HIP) {
+if (auto *PT = Ty->getAs()) {
+  if (PT->getPointeeType().getAddressSpace() == LangAS::Default) {
+PointeeTy = PT->getPointeeType();
+  }
+} else if (auto *RT = Ty->getAs()) {
+  if (RT->getPointeeType().getAddressSpace() == LangAS::Default) {
+PointeeTy = RT->getPointeeType();
+  }
+}
+
+if (PointeeTy != QualType()) {
+  return ABIArgInfo::getDirect(
+CGT.ConvertType(PointeeTy)
+->getPointerTo(
+getContext().getTargetAddressSpace(LangAS::cuda_device)));
+}
+  }
   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
   // individual elements, which confuses the Clover OpenCL backend; therefore 
we
   // have to set it to false here. Other args of getDirect() are just defaults.
Index: clang/lib/CodeGen/CGCall.cpp
===
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1169,7 +1169,7 @@
   if (isa(Val->getType())) {
 // If this is Pointer->Pointer avoid conversion to and from int.
 if (isa(Ty))
-  return CGF.Builder.CreateBitCast(Val, Ty, "coerce.val");
+  return CGF.Builder.CreatePointerCast(Val, Ty, "coerce.val");
 
 // Convert the pointer to an integer so we can play with its width.
 Val = CGF.Builder.CreatePtrToInt(Val, CGF.IntPtrTy, "coerce.val.pi");


Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN: -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+  x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+  x++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+__attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7816,6 +7816,27 @@
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
 return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
 
+  // Coerce pointer type kernel arguments in default address space to
+  // device address space for HIP.
+  QualType PointeeTy;
+  if (getContext().getLangOpts().HIP) {
+if (auto *PT = Ty->getAs()) {
+  if (PT->getPointeeType().getAddressSpace() == LangAS::Default) {
+PointeeTy = PT->getPointeeType();
+  }
+} else if (auto *RT = Ty->getAs()) {
+  if (RT->getPointeeType().getAddressSpace