hliao created this revision.
hliao added reviewers: tra, rjmccall, yaxunl.
Herald added subscribers: cfe-commits, nhaehnle, jvesely.
Herald added a project: clang.
hliao added a comment.

It happens that Sam has a similar patch of this one. After discussion, we 
agreed that this patch addresses more cases found in the workloads. Thank Sam 
for the test case.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69826

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu

Index: clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:     -emit-llvm -x hip %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
+__global__ void kernel1(int *x) {
+  x[0]++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
+__global__ void kernel2(int &x) {
+  x++;
+}
+
+// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
+__global__ void kernel3(__attribute__((address_space(2))) int *x,
+                        __attribute__((address_space(1))) int *y) {
+  y[0] = x[0];
+}
+
+// CHECK: define void @_Z4funcPi(i32* %x)
+__device__ void func(int *x) {
+  x[0]++;
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -7685,6 +7685,53 @@
   bool isHomogeneousAggregateSmallEnough(const Type *Base,
                                          uint64_t Members) const override;
 
+  // Coercion type builder for lower HIP pointer argument from generic pointer
+  // to global pointer.
+  class CoerceGenericPointerTypeBuilder {
+    llvm::LLVMContext &Context;
+    unsigned DefaultAS;
+    unsigned GlobalAS;
+
+  public:
+    CoerceGenericPointerTypeBuilder(llvm::LLVMContext &VMCtx, unsigned DAS,
+                                    unsigned GAS)
+        : Context(VMCtx), DefaultAS(DAS), GlobalAS(GAS) {}
+
+    llvm::Type *coerce(llvm::Type *Ty) {
+      // Structure types.
+      if (auto STy = dyn_cast<llvm::StructType>(Ty)) {
+        SmallVector<llvm::Type *, 8> EltTys;
+        bool Changed = false;
+        for (auto T : STy->elements()) {
+          auto NT = coerce(T);
+          EltTys.push_back(NT);
+          Changed |= (NT != T);
+        }
+        // Skip if there is no change in element types.
+        if (!Changed)
+          return STy;
+        if (STy->hasName())
+          return llvm::StructType::create(
+              EltTys, (STy->getName() + ".coerce").str(), STy->isPacked());
+        return llvm::StructType::get(Context, EltTys, STy->isPacked());
+      }
+      // Arrary types.
+      if (auto ATy = dyn_cast<llvm::ArrayType>(Ty)) {
+        auto T = ATy->getElementType();
+        auto NT = coerce(T);
+        // Skip if there is no change in that element type.
+        if (NT == T)
+          return ATy;
+        return llvm::ArrayType::get(NT, ATy->getNumElements());
+      }
+      // Single value types.
+      if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == DefaultAS)
+        return llvm::PointerType::get(
+            cast<llvm::PointerType>(Ty)->getElementType(), GlobalAS);
+      return Ty;
+    }
+  };
+
 public:
   explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
     DefaultABIInfo(CGT) {}
@@ -7812,14 +7859,23 @@
 
   // TODO: Can we omit empty structs?
 
-  // Coerce single element structs to its element.
+  llvm::Type *LTy = nullptr;
   if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
-    return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+    LTy = CGT.ConvertType(QualType(SeltTy, 0));
+
+  if (getContext().getLangOpts().HIP) {
+    if (!LTy)
+      LTy = CGT.ConvertType(Ty);
+    CoerceGenericPointerTypeBuilder Builder(getVMContext(),
+        getContext().getTargetAddressSpace(LangAS::Default),
+        getContext().getTargetAddressSpace(LangAS::cuda_device));
+    LTy = Builder.coerce(LTy);
+  }
 
   // If we set CanBeFlattened to true, CodeGen will expand the struct to its
   // individual elements, which confuses the Clover OpenCL backend; therefore we
   // have to set it to false here. Other args of getDirect() are just defaults.
-  return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
+  return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
 }
 
 ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1305,6 +1305,14 @@
     DstTy = Dst.getType()->getElementType();
   }
 
+  if (isa<llvm::PointerType>(SrcTy) &&
+      isa<llvm::PointerType>(DstTy) &&
+      SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) {
+    Src = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Src, DstTy);
+    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
+    return;
+  }
+
   // If the source and destination are integer or pointer types, just do an
   // extension or truncation to the desired type.
   if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to