yaxunl created this revision.
yaxunl added a reviewer: rjmccall.
Herald added subscribers: kerbowa, tpr, nhaehnle, jvesely.
yaxunl requested review of this revision.

sret is returned through temporary variables allocated on stack,
therefore it should use alloca address space.

Currently clang use default address space for sret pointers. This
causes inefficient code generated for AMDGPU backend since
alloca address space is 32 bit whereas generic pointer is 64 bit.
It also causes assertions where alloca address space is expected.

This patch uses alloca address space for sret pointers. It is NFC
for targets alloca address space of which is default address space.


https://reviews.llvm.org/D101389

Files:
  clang/lib/CodeGen/CGCall.cpp
  clang/test/CodeGenCUDA/amdgpu-sret.cu

Index: clang/test/CodeGenCUDA/amdgpu-sret.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/amdgpu-sret.cu
@@ -0,0 +1,67 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device \
+// RUN:   -emit-llvm -o - -x hip %s \
+// RUN:   | FileCheck %s
+
+// Check no assertion with debug info.
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
+// RUN:   -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device \
+// RUN:   -S -o %t.s -x hip %s \
+// RUN:   -debug-info-kind=limited
+
+#include "Inputs/cuda.h"
+ 
+struct A {
+  int x[100];
+};
+
+__device__ A a;
+
+// CHECK-LABEL: @_Z5func1v(%struct.A addrspace(5)* noalias sret(%struct.A) align 4 
+__device__ A func1() {
+  A a;
+  return a;
+}
+
+// Check returning the return value again.
+
+// CHECK-LABEL: @_Z5func2v(
+// CHECK-SAME: %struct.A addrspace(5)* noalias sret(%struct.A) align 4 %[[ARG:.*]])
+// CHECK: call void @_Z5func1v(%struct.A addrspace(5)* sret(%struct.A) align 4 %[[ARG]])
+__device__ A func2() {
+  A a = func1();
+  return a;
+}
+
+// Check assigning the return value to a global variable.
+
+// CHECK-LABEL: @_Z5func3v(
+// CHECK: %[[RET:.*]] = alloca %struct.A, align 4, addrspace(5)
+// CHECK: %[[CAST1:.*]] = addrspacecast %struct.A addrspace(5)* %[[RET]] to %struct.A*
+// CHECK: %[[CAST2:.*]] = addrspacecast %struct.A* %[[CAST1]] to %struct.A addrspace(5)*
+// CHECK: call void @_Z5func1v(%struct.A addrspace(5)* sret(%struct.A) align 4 %[[CAST2]]
+// CHECK: %[[CAST3:.*]] = bitcast %struct.A* %[[CAST1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64{{.*}}@a{{.*}}%[[CAST3]]
+__device__ void func3() {
+  a = func1();
+}
+
+// Check assigning the return value to a temporary variable.
+
+// CHECK-LABEL: @_Z5func4v(
+// CHECK: %[[TMP:.*]] = alloca %struct.A, align 4, addrspace(5)
+// CHECK: %[[TMP_CAST1:.*]] = addrspacecast %struct.A addrspace(5)* %[[TMP]] to %struct.A*
+// CHECK: %[[RET:.*]] = alloca %struct.A, align 4, addrspace(5)
+// CHECK: %[[RET_CAST1:.*]] = addrspacecast %struct.A addrspace(5)* %[[RET]] to %struct.A*
+// CHECK: %[[RET_CAST2:.*]] = addrspacecast %struct.A* %[[RET_CAST1]] to %struct.A addrspace(5)*
+// CHECK: call void @_Z5func1v(%struct.A addrspace(5)* sret(%struct.A) align 4 %[[RET_CAST2]]
+// CHECK: %[[TMP_CAST2:.*]] = bitcast %struct.A* %[[TMP_CAST1]] to i8*
+// CHECK: %[[RET_CAST3:.*]] = bitcast %struct.A* %[[RET_CAST1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64{{.*}}%[[TMP_CAST2]]{{.*}}%[[RET_CAST3]]
+__device__ void func4() {
+  A a;
+  a = func1();
+}
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1624,7 +1624,8 @@
   if (IRFunctionArgs.hasSRetArg()) {
     QualType Ret = FI.getReturnType();
     llvm::Type *Ty = ConvertType(Ret);
-    unsigned AddressSpace = Context.getTargetAddressSpace(Ret);
+    unsigned AddressSpace =
+        Context.getTargetAddressSpace(CGM.getASTAllocaAddressSpace());
     ArgTypes[IRFunctionArgs.getSRetArgNo()] =
         llvm::PointerType::get(Ty, AddressSpace);
   }
@@ -4671,7 +4672,17 @@
       }
     }
     if (IRFunctionArgs.hasSRetArg()) {
-      IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer();
+      IRCallArgs[IRFunctionArgs.getSRetArgNo()] =
+          getTargetHooks().performAddrSpaceCast(
+              *this, SRetPtr.getPointer(), LangAS::Default,
+              getASTAllocaAddressSpace(),
+              SRetPtr.getPointer()
+                  ->getType()
+                  ->getPointerElementType()
+                  ->getPointerTo(getContext().getTargetAddressSpace(
+                      getASTAllocaAddressSpace())),
+              /*non-null*/ true);
+
     } else if (RetAI.isInAlloca()) {
       Address Addr =
           Builder.CreateStructGEP(ArgMemory, RetAI.getInAllocaFieldIndex());
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to