yaxunl updated this revision to Diff 113443.
yaxunl added a comment.

Revised by John's comments.


https://reviews.llvm.org/D34367

Files:
  lib/CodeGen/CGCall.cpp
  lib/CodeGen/CGCall.h
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCXX/amdgcn-func-arg.cpp
  test/CodeGenOpenCL/addr-space-struct-arg.cl

Index: test/CodeGenOpenCL/addr-space-struct-arg.cl
===================================================================
--- test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
 // RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -enable-var-scope -check-prefixes=COM,AMD %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn-amdhsa-amd-amdgizcl | FileCheck -enable-var-scope -check-prefixes=COM,AMD,AMD20 %s
 
 typedef struct {
   int cells[9];
@@ -35,6 +36,9 @@
   int2 y[20];
 };
 
+#if __OPENCL_C_VERSION__ >= 200
+struct LargeStructOneMember g_s;
+#endif
 
 // X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval align 4 %in)
 // AMD-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
@@ -80,10 +84,42 @@
 }
 
 // AMD-LABEL: define void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %u)
+// AMD-NOT: addrspacecast
+// AMD:   store <2 x i32> %{{.*}}, <2 x i32> addrspace(5)*
 void FuncOneLargeMember(struct LargeStructOneMember u) {
   u.x[0] = (int2)(0, 0);
 }
 
+// AMD20-LABEL: define void @test_indirect_arg_globl()
+// AMD20:  %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMD20:  %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMD20:  call void @llvm.memcpy.p5i8.p1i8.i64(i8 addrspace(5)* %[[r0]], i8 addrspace(1)* bitcast (%struct.LargeStructOneMember addrspace(1)* @g_s to i8 addrspace(1)*), i64 800, i32 8, i1 false)
+// AMD20:  call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+#if __OPENCL_C_VERSION__ >= 200
+void test_indirect_arg_globl(void) {
+  FuncOneLargeMember(g_s);
+}
+#endif
+
+// AMD-LABEL: define amdgpu_kernel void @test_indirect_arg_local()
+// AMD: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMD: %[[r0:.*]] = bitcast %struct.LargeStructOneMember addrspace(5)* %[[byval_temp]] to i8 addrspace(5)*
+// AMD: call void @llvm.memcpy.p5i8.p3i8.i64(i8 addrspace(5)* %[[r0]], i8 addrspace(3)* bitcast (%struct.LargeStructOneMember addrspace(3)* @test_indirect_arg_local.l_s to i8 addrspace(3)*), i64 800, i32 8, i1 false)
+// AMD: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[byval_temp]])
+kernel void test_indirect_arg_local(void) {
+  local struct LargeStructOneMember l_s;
+  FuncOneLargeMember(l_s);
+}
+
+// AMD-LABEL: define void @test_indirect_arg_private()
+// AMD: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
+// AMD-NOT: @llvm.memcpy
+// AMD-NEXT: call void @FuncOneLargeMember(%struct.LargeStructOneMember addrspace(5)* byval align 8 %[[p_s]])
+void test_indirect_arg_private(void) {
+  struct LargeStructOneMember p_s;
+  FuncOneLargeMember(p_s);
+}
+
 // AMD-LABEL: define amdgpu_kernel void @KernelOneMember
 // AMD-SAME:  (<2 x i32> %[[u_coerce:.*]])
 // AMD:  %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
@@ -112,7 +148,6 @@
   u.y[0] = (int2)(0, 0);
 }
 
-
 // AMD-LABEL: define amdgpu_kernel void @KernelTwoMember
 // AMD-SAME:  (%struct.StructTwoMember %[[u_coerce:.*]])
 // AMD:  %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
Index: test/CodeGenCXX/amdgcn-func-arg.cpp
===================================================================
--- /dev/null
+++ test/CodeGenCXX/amdgcn-func-arg.cpp
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -O0 -triple amdgcn---amdgiz -emit-llvm %s -o - | FileCheck %s
+
+class A {
+public:
+  int x;
+  A():x(0) {}
+  ~A() {}
+};
+
+class B {
+int x[100];
+};
+
+A g_a;
+B g_b;
+
+void func_with_ref_arg(A &a);
+void func_with_ref_arg(B &b);
+
+// CHECK-LABEL: define void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %a)
+// CHECK:  %p = alloca %class.A*, align 8, addrspace(5)
+// CHECK:  %[[r1:.+]] = addrspacecast %class.A* addrspace(5)* %p to %class.A**
+// CHECK:  %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A*
+// CHECK:  store %class.A* %[[r0]], %class.A** %[[r1]], align 8
+void func_with_indirect_arg(A a) {
+  A *p = &a;
+}
+
+// CHECK-LABEL: define void @_Z22test_indirect_arg_autov()
+// CHECK:  %a = alloca %class.A, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %a to %class.A*
+// CHECK:  %agg.tmp = alloca %class.A, align 4, addrspace(5)
+// CHECK:  %[[r1:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A*
+// CHECK:  call void @_ZN1AC1Ev(%class.A* %[[r0]])
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r4:.+]] = addrspacecast %class.A* %[[r1]] to %class.A addrspace(5)*
+// CHECK:  call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r4]])
+// CHECK:  call void @_ZN1AD1Ev(%class.A* %[[r1]])
+// CHECK:  call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) %[[r0]])
+// CHECK:  call void @_ZN1AD1Ev(%class.A* %[[r0]])
+void test_indirect_arg_auto() {
+  A a;
+  func_with_indirect_arg(a);
+  func_with_ref_arg(a);
+}
+
+// CHECK: define void @_Z24test_indirect_arg_globalv()
+// CHECK:  %agg.tmp = alloca %class.A, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.A addrspace(5)* %agg.tmp to %class.A*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r2:.+]] = addrspacecast %class.A* %[[r0]] to %class.A addrspace(5)*
+// CHECK:  call void @_Z22func_with_indirect_arg1A(%class.A addrspace(5)* %[[r2]])
+// CHECK:  call void @_ZN1AD1Ev(%class.A* %[[r0]])
+// CHECK:  call void @_Z17func_with_ref_argR1A(%class.A* dereferenceable(4) addrspacecast (%class.A addrspace(1)* @g_a to %class.A*))
+void test_indirect_arg_global() {
+  func_with_indirect_arg(g_a);
+  func_with_ref_arg(g_a);
+}
+
+// CHECK-LABEL: define void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %b)
+// CHECK:  %p = alloca %class.B*, align 8, addrspace(5)
+// CHECK:  %[[r1:.+]] = addrspacecast %class.B* addrspace(5)* %p to %class.B**
+// CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
+// CHECK:  store %class.B* %[[r0]], %class.B** %[[r1]], align 8
+void func_with_byval_arg(B b) {
+  B *p = &b;
+}
+
+// CHECK-LABEL: define void @_Z19test_byval_arg_autov()
+// CHECK:  %b = alloca %class.B, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %b to %class.B*
+// CHECK:  %agg.tmp = alloca %class.B, align 4, addrspace(5)
+// CHECK:  %[[r1:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r4:.+]] = addrspacecast %class.B* %[[r1]] to %class.B addrspace(5)*
+// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r4]])
+// CHECK:  call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(400) %[[r0]])
+void test_byval_arg_auto() {
+  B b;
+  func_with_byval_arg(b);
+  func_with_ref_arg(b);
+}
+
+// CHECK-LABEL: define void @_Z21test_byval_arg_globalv()
+// CHECK:  %agg.tmp = alloca %class.B, align 4, addrspace(5)
+// CHECK:  %[[r0:.+]] = addrspacecast %class.B addrspace(5)* %agg.tmp to %class.B*
+// CHECK:  call void @llvm.memcpy.p0i8.p0i8.i64
+// CHECK:  %[[r2:.+]] = addrspacecast %class.B* %[[r0]] to %class.B addrspace(5)*
+// CHECK:  call void @_Z19func_with_byval_arg1B(%class.B addrspace(5)* byval align 4 %[[r2]])
+// CHECK:  call void @_Z17func_with_ref_argR1B(%class.B* dereferenceable(400) addrspacecast (%class.B addrspace(1)* @g_b to %class.B*))
+void test_byval_arg_global() {
+  func_with_byval_arg(g_b);
+  func_with_ref_arg(g_b);
+}
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -1821,6 +1821,23 @@
     llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
     if (DeclPtr.getType() != IRTy)
       DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName());
+    // Indirect argument is in alloca address space, which may be different
+    // from the default address space.
+    auto AllocaAS = CGM.getASTAllocaAddressSpace();
+    auto *V = DeclPtr.getPointer();
+    // TODO: After adding LangAS::opencl_private, change SrcLangAS and
+    // DestLangAS to opencl_private for OpenCL.
+    auto SrcLangAS = getLangOpts().OpenCL ? LangAS::Default : AllocaAS;
+    auto DestLangAS = LangAS::Default;
+    if (SrcLangAS != DestLangAS) {
+      assert(getContext().getTargetAddressSpace(SrcLangAS) ==
+             CGM.getDataLayout().getAllocaAddrSpace());
+      auto DestAS = getContext().getTargetAddressSpace(DestLangAS);
+      auto *T = V->getType()->getPointerElementType()->getPointerTo(DestAS);
+      DeclPtr = Address(getTargetHooks().performAddrSpaceCast(
+                            *this, V, SrcLangAS, DestLangAS, T, true),
+                        DeclPtr.getAlignment());
+    }
 
     // Push a destructor cleanup for this parameter if the ABI requires it.
     // Don't push a cleanup in a thunk for a method that will also emit a
Index: lib/CodeGen/CGCall.h
===================================================================
--- lib/CodeGen/CGCall.h
+++ lib/CodeGen/CGCall.h
@@ -172,9 +172,10 @@
     RValue RV;
     QualType Ty;
     bool NeedsCopy;
-    CallArg(RValue rv, QualType ty, bool needscopy)
-    : RV(rv), Ty(ty), NeedsCopy(needscopy)
-    { }
+    unsigned AS; /* Address space of indirect argument */
+    CallArg(RValue rv, QualType ty, bool needscopy,
+            unsigned _AS = LangAS::Default)
+        : RV(rv), Ty(ty), NeedsCopy(needscopy), AS(_AS) {}
   };
 
   /// CallArgList - Type for representing both the value and type of
@@ -204,8 +205,9 @@
       llvm::Instruction *IsActiveIP;
     };
 
-    void add(RValue rvalue, QualType type, bool needscopy = false) {
-      push_back(CallArg(rvalue, type, needscopy));
+    void add(RValue rvalue, QualType type, bool needscopy = false,
+             unsigned AS = LangAS::Default) {
+      push_back(CallArg(rvalue, type, needscopy, AS));
     }
 
     /// Add all the arguments from another CallArgList to this one. After doing
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -3454,6 +3454,12 @@
 void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                                   QualType type) {
   DisableDebugLocationUpdates Dis(*this, E);
+  unsigned AS = 0;
+  if (isa<ImplicitCastExpr>(E) &&
+      cast<CastExpr>(E)->getCastKind() == CK_LValueToRValue) {
+    AS = cast<CastExpr>(E)->getSubExpr()->getType().getAddressSpace();
+  }
+
   if (const ObjCIndirectCopyRestoreExpr *CRE
         = dyn_cast<ObjCIndirectCopyRestoreExpr>(E)) {
     assert(getLangOpts().ObjCAutoRefCount);
@@ -3465,7 +3471,7 @@
 
   if (E->isGLValue()) {
     assert(E->getObjectKind() == OK_Ordinary);
-    return args.add(EmitReferenceBindingToExpr(E), type);
+    return args.add(EmitReferenceBindingToExpr(E), type, false, AS);
   }
 
   bool HasAggregateEvalKind = hasAggregateEvaluationKind(type);
@@ -3492,7 +3498,7 @@
 
     EmitAggExpr(E, Slot);
     RValue RV = Slot.asRValue();
-    args.add(RV, type);
+    args.add(RV, type, false, AS);
 
     if (DestroyedInCallee) {
       // Create a no-op GEP between the placeholder and the cleanup so we can
@@ -3512,18 +3518,18 @@
     LValue L = EmitLValue(cast<CastExpr>(E)->getSubExpr());
     assert(L.isSimple());
     if (L.getAlignment() >= getContext().getTypeAlignInChars(type)) {
-      args.add(L.asAggregateRValue(), type, /*NeedsCopy*/true);
+      args.add(L.asAggregateRValue(), type, /*NeedsCopy*/ true, AS);
     } else {
       // We can't represent a misaligned lvalue in the CallArgList, so copy
       // to an aligned temporary now.
       Address tmp = CreateMemTemp(type);
       EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile());
-      args.add(RValue::getAggregate(tmp), type);
+      args.add(RValue::getAggregate(tmp), type, false, AS);
     }
     return;
   }
 
-  args.add(EmitAnyExprToTemp(E), type);
+  args.add(EmitAnyExprToTemp(E), type, false, AS);
 }
 
 QualType CodeGenFunction::getVarArgType(const Expr *Arg) {
@@ -3819,6 +3825,12 @@
     }
 
     case ABIArgInfo::Indirect: {
+      auto CastToAllocaAddrSpace = [&](llvm::Value *V) {
+        auto *T = V->getType()->getPointerElementType()->getPointerTo(
+            CGM.getDataLayout().getAllocaAddrSpace());
+        return getTargetHooks().performAddrSpaceCast(
+            *this, V, LangAS::Default, CGM.getASTAllocaAddressSpace(), T, true);
+      };
       assert(NumIRArgs == 1);
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
@@ -3836,29 +3848,29 @@
         // 2. If the argument is byval, RV is not sufficiently aligned, and
         //    we cannot force it to be sufficiently aligned.
         // 3. If the argument is byval, but RV is located in an address space
-        //    different than that of the argument (0).
+        //    different than that of the argument (alloca address space).
         Address Addr = RV.getAggregateAddress();
         CharUnits Align = ArgInfo.getIndirectAlign();
         const llvm::DataLayout *TD = &CGM.getDataLayout();
-        const unsigned RVAddrSpace = Addr.getType()->getAddressSpace();
-        const unsigned ArgAddrSpace =
-            (FirstIRArg < IRFuncTy->getNumParams()
-                 ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
-                 : 0);
+        const unsigned RVAddrSpace = I->AS;
+        assert((FirstIRArg >= IRFuncTy->getNumParams() ||
+             IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace() ==
+             TD->getAllocaAddrSpace()) && "indirect argument must be in alloca address space");
         if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
             (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align &&
              llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
-                                              Align.getQuantity(), *TD)
-               < Align.getQuantity()) ||
-            (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
+                                              Align.getQuantity(),
+                                              *TD) < Align.getQuantity()) ||
+            (ArgInfo.getIndirectByVal() && RVAddrSpace != LangAS::Default &&
+             RVAddrSpace != CGM.getASTAllocaAddressSpace())) {
           // Create an aligned temporary, and copy to it.
           Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign(),
                                      "byval-temp", false);
           IRCallArgs[FirstIRArg] = AI.getPointer();
           EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
         } else {
           // Skip the extra memcpy call.
-          IRCallArgs[FirstIRArg] = Addr.getPointer();
+          IRCallArgs[FirstIRArg] = CastToAllocaAddrSpace(Addr.getPointer());
         }
       }
       break;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to