[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-29 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 140310.
yaxunl added a comment.

Revised by John's comments. Also simplified the test by Artem's comments.


https://reviews.llvm.org/D44985

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCUDA/address-spaces.cu
  test/CodeGenCUDA/device-var-init.cu

Index: test/CodeGenCUDA/device-var-init.cu
===
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -1,10 +1,14 @@
 // REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
 
 // Make sure we don't allow dynamic initialization for device
 // variables, but accept empty constructors allowed by CUDA.
 
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
 
 #ifdef __clang__
 #include "Inputs/cuda.h"
@@ -105,68 +109,120 @@
 __constant__ EC_I_EC c_ec_i_ec;
 // CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
 
+// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
+// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
+
 // We should not emit global initializers for device-side variables.
 // CHECK-NOT: @__cxx_global_var_init
 
 // Make sure that initialization restrictions do not apply to local
 // variables.
 __device__ void df() {
+  // NVPTX:  %[[ec:.*]] = alloca %struct.EC
+  // NVPTX:  %[[ed:.*]] = alloca %struct.ED
+  // NVPTX:  %[[ecd:.*]] = alloca %struct.ECD
+  // NVPTX:  %[[etc:.*]] = alloca %struct.ETC
+  // NVPTX:  %[[uc:.*]] = alloca %struct.UC
+  // NVPTX:  %[[ud:.*]] = alloca %struct.UD
+  // NVPTX:  %[[eci:.*]] = alloca %struct.ECI
+  // NVPTX:  %[[nec:.*]] = alloca %struct.NEC
+  // NVPTX:  %[[ned:.*]] = alloca %struct.NED
+  // NVPTX:  %[[ncv:.*]] = alloca %struct.NCV
+  // NVPTX:  %[[vd:.*]] = alloca %struct.VD
+  // NVPTX:  %[[ncf:.*]] = alloca %struct.NCF
+  // NVPTX:  %[[ncfs:.*]] = alloca %struct.NCFS
+  // NVPTX:  %[[utc:.*]] = alloca %struct.UTC
+  // NVPTX:  %[[netc:.*]] = alloca %struct.NETC
+  // NVPTX:  %[[ec_i_ec:.*]] = alloca %struct.EC_I_EC
+  // NVPTX:  %[[ec_i_ec1:.*]] = alloca %struct.EC_I_EC1
+  // NVPTX:  %[[t_v_t:.*]] = alloca %struct.T_V_T
+  // NVPTX:  %[[t_b_nec:.*]] = alloca %struct.T_B_NEC
+  // NVPTX:  %[[t_f_nec:.*]] = alloca %struct.T_F_NEC
+  // NVPTX:  %[[t_fa_nec:.*]] = alloca %struct.T_FA_NEC
+  // NVPTX:  %[[t_b_ned:.*]] = alloca %struct.T_B_NED
+  // NVPTX:  %[[t_f_ned:.*]] = alloca %struct.T_F_NED
+  // NVPTX:  %[[t_fa_ned:.*]] = alloca %struct.T_FA_NED
+  // AMDGCN:  %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC*
+  // AMDGCN:  %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED*
+  // AMDGCN:  %[[ecd:.*]] = addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD*
+  // AMDGCN:  %[[etc:.*]] = addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC*
+  // AMDGCN:  %[[uc:.*]] = addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC*
+  // AMDGCN:  %[[ud:.*]] = addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD*
+  // AMDGCN:  %[[eci:.*]] = addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI*
+  // AMDGCN:  %[[nec:.*]] = addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC*
+  // AMDGCN:  %[[ned:.*]] = addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED*
+  // AMDGCN:  %[[ncv:.*]] = addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV*
+  // AMDGCN:  %[[vd:.*]] = addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD*
+  // AMDGCN:  %[[ncf:.*]] = addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF*
+  // AMDGCN:  %[[ncfs:.*]] = addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS*
+  // AMDGCN:  %[[utc:.*]] = addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC*
+  // AMDGCN:  %[[netc:.*]] = addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC*
+  // AMDGCN:  %[[ec_i_ec:.*]] = addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC*
+  // AMDGCN:  %[[ec_i_ec1:.*]] = addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1*
+  // AMDGCN:  %[[t_v_t:.*]] = addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T*
+  // AMDGCN:  %[[t_b_nec:.*]] = addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC*
+  // AMDGCN:  %[[t_f_nec:.*]] = addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC*
+  // AMDGCN:  %[[t_fa_nec:.*]] = addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC*
+  // AMDGCN:  %[[t_b_ned:.*]] = addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED*
+  // AMDGCN:  %[[t_f_ned:.*]] 

[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-29 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In https://reviews.llvm.org/D44985#1051840, @yaxunl wrote:

> In https://reviews.llvm.org/D44985#1050876, @rjmccall wrote:
>
> > In https://reviews.llvm.org/D44985#1050674, @yaxunl wrote:
> >
> > > In https://reviews.llvm.org/D44985#1050670, @rjmccall wrote:
> > >
> > > > What exactly are you trying to express here?  Are you just trying to 
> > > > make these external declarations when compiling for the device because 
> > > > `__shared__` variables are actually defined on the host?  That should 
> > > > be handled by the frontend by setting up the AST so that these 
> > > > declarations are not definitions.
> > >
> > >
> > > No. These variables are not like external symbols defined on the host. 
> > > They behave like global variables in the kernel code but never 
> > > initialized. Currently no targets are able to initialize them and it is 
> > > users' responsibility to initialize them explicitly.
> > >
> > > Giving them an initial value will cause error in some backends since they 
> > > cannot handle them, therefore put undef as initializer.
> >
> >
> > So undef is being used as a special marker to the backends that it's okay 
> > not to try to initialize these variables?
>
>
> I think undef as the initializer tells the llvm passes and backend that this 
> global variable contains undefined value. I am not sure if this is better 
> than without an initializer. I saw code in 
> CodeGenModule::getOrCreateStaticVarDecl
>
>   // Local address space cannot have an initializer.
>   llvm::Constant *Init = nullptr;
>   if (Ty.getAddressSpace() != LangAS::opencl_local)
> Init = EmitNullConstant(Ty);
>   else
> Init = llvm::UndefValue::get(LTy);
>   
>   
>
> which means OpenCL static variable in local address space (equivalent to CUDA 
> shared address space) gets an undef initializer.
>
> For CUDA shared variable, in CodeGenFunction::EmitStaticVarDecl, it first 
> goes through call of CodeGenModule::getOrCreateStaticVarDecl and gets a 
> zeroinitializer, then it reaches line 400
>
>   // Whatever initializer such variable may have when it gets here is
> // a no-op and should not be emitted.
> bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
>D.hasAttr();
> // If this value has an initializer, emit it.
> if (D.getInit() && !isCudaSharedVar)
>   var = AddInitializerToStaticVarDecl(D, var);
>  
>
>
> Although this disables adding initializer from D, var already has a 
> zeroinitializer from CodeGenModule::getOrCreateStaticVarDecl, therefore its 
> initializer needs to be overwritten by undef.
>
> Probably a better solution would be do it in  
> CodeGenModule::getOrCreateStaticVarDecl, side by side by the OpenCL code.


Yes, I agree, just updating the condition to trigger if either language mode is 
set is the right fix.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-29 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D44985#1050876, @rjmccall wrote:

> In https://reviews.llvm.org/D44985#1050674, @yaxunl wrote:
>
> > In https://reviews.llvm.org/D44985#1050670, @rjmccall wrote:
> >
> > > What exactly are you trying to express here?  Are you just trying to make 
> > > these external declarations when compiling for the device because 
> > > `__shared__` variables are actually defined on the host?  That should be 
> > > handled by the frontend by setting up the AST so that these declarations 
> > > are not definitions.
> >
> >
> > No. These variables are not like external symbols defined on the host. They 
> > behave like global variables in the kernel code but never initialized. 
> > Currently no targets are able to initialize them and it is users' 
> > responsibility to initialize them explicitly.
> >
> > Giving them an initial value will cause error in some backends since they 
> > cannot handle them, therefore put undef as initializer.
>
>
> So undef is being used as a special marker to the backends that it's okay not 
> to try to initialize these variables?


I think undef as the initializer tells the llvm passes and backend that this 
global variable contains undefined value. I am not sure if this is better than 
without an initializer. I saw code in CodeGenModule::getOrCreateStaticVarDecl

  // Local address space cannot have an initializer.
  llvm::Constant *Init = nullptr;
  if (Ty.getAddressSpace() != LangAS::opencl_local)
Init = EmitNullConstant(Ty);
  else
Init = llvm::UndefValue::get(LTy);

which means OpenCL static variable in local address space (equivalent to CUDA 
shared address space) gets an undef initializer.

For CUDA shared variable, in CodeGenFunction::EmitStaticVarDecl, it first goes 
through call of CodeGenModule::getOrCreateStaticVarDecl and gets a 
zeroinitializer, then it reaches line 400

  // Whatever initializer such variable may have when it gets here is
// a no-op and should not be emitted.
bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice &&
   D.hasAttr();
// If this value has an initializer, emit it.
if (D.getInit() && !isCudaSharedVar)
  var = AddInitializerToStaticVarDecl(D, var);

Although this disables adding initializer from D, var already has a 
zeroinitializer from CodeGenModule::getOrCreateStaticVarDecl, therefore its 
initializer needs to be overwritten by undef.

Probably a better solution would be do it in  
CodeGenModule::getOrCreateStaticVarDecl, side by side by the OpenCL code.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In https://reviews.llvm.org/D44985#1050674, @yaxunl wrote:

> In https://reviews.llvm.org/D44985#1050670, @rjmccall wrote:
>
> > What exactly are you trying to express here?  Are you just trying to make 
> > these external declarations when compiling for the device because 
> > `__shared__` variables are actually defined on the host?  That should be 
> > handled by the frontend by setting up the AST so that these declarations 
> > are not definitions.
>
>
> No. These variables are not like external symbols defined on the host. They 
> behave like global variables in the kernel code but never initialized. 
> Currently no targets are able to initialize them and it is users' 
> responsibility to initialize them explicitly.
>
> Giving them an initial value will cause error in some backends since they 
> cannot handle them, therefore put undef as initializer.


So undef is being used as a special marker to the backends that it's okay not 
to try to initialize these variables?


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added a comment.

In https://reviews.llvm.org/D44985#1050670, @rjmccall wrote:

> What exactly are you trying to express here?  Are you just trying to make 
> these external declarations when compiling for the device because 
> `__shared__` variables are actually defined on the host?  That should be 
> handled by the frontend by setting up the AST so that these declarations are 
> not definitions.


No. These variables are not like external symbols defined on the host. They 
behave like global variables in the kernel code but never initialized. 
Currently no targets are able to initialize them and it is users' 
responsibility to initialize them explicitly.

Giving them an initial value will cause error in some backends since they 
cannot handle them, therefore put undef as initializer.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

In https://reviews.llvm.org/D44985#1050670, @rjmccall wrote:

> What exactly are you trying to express here?  Are you just trying to make 
> these external declarations when compiling for the device because 
> `__shared__` variables are actually defined on the host?  That should be 
> handled by the frontend by setting up the AST so that these declarations are 
> not definitions.


__shared__ vars (at least in CUDA) are weird. Local-scoped ones are implicitly 
static (which compiler will attempt to zero-init) but in CUDA __shared__ 
variables can't have static initializers and we don't know the value of such 
vars when we launch the kernel.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

What exactly are you trying to express here?  Are you just trying to make these 
external declarations when compiling for the device because `__shared__` 
variables are actually defined on the host?  That should be handled by the 
frontend by setting up the AST so that these declarations are not definitions.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: test/CodeGenCUDA/device-var-init.cu:121
 __device__ void df() {
+  // AMDGCN:  %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to 
%struct.EC*
+  // AMDGCN:  %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to 
%struct.ED*

tra wrote:
> Perhaps it would make sense to capture there names for NVPTX as well and 
> avoid duplicating all the checks below.
will do when committing.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: test/CodeGenCUDA/device-var-init.cu:121
 __device__ void df() {
+  // AMDGCN:  %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to 
%struct.EC*
+  // AMDGCN:  %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to 
%struct.ED*

Perhaps it would make sense to capture there names for NVPTX as well and avoid 
duplicating all the checks below.


https://reviews.llvm.org/D44985



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D44985: Remove initializer for CUDA shared varirable

2018-03-28 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 140110.
yaxunl retitled this revision from "Disable zeroinitializer for CUDA shared 
varirable for amdgcn target" to "Remove initializer for CUDA shared varirable".
yaxunl edited the summary of this revision.
yaxunl added a reviewer: tra.
yaxunl added a comment.

Revised by Artem's comments.


https://reviews.llvm.org/D44985

Files:
  lib/CodeGen/CGDecl.cpp
  test/CodeGenCUDA/address-spaces.cu
  test/CodeGenCUDA/device-var-init.cu

Index: test/CodeGenCUDA/device-var-init.cu
===
--- test/CodeGenCUDA/device-var-init.cu
+++ test/CodeGenCUDA/device-var-init.cu
@@ -1,10 +1,14 @@
 // REQUIRES: nvptx-registered-target
+// REQUIRES: amdgpu-registered-target
 
 // Make sure we don't allow dynamic initialization for device
 // variables, but accept empty constructors allowed by CUDA.
 
 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \
-// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,NVPTX %s
+
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \
+// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,AMDGCN %s
 
 #ifdef __clang__
 #include "Inputs/cuda.h"
@@ -105,68 +109,114 @@
 __constant__ EC_I_EC c_ec_i_ec;
 // CHECK: @c_ec_i_ec = addrspace(4) externally_initialized global %struct.EC_I_EC zeroinitializer,
 
+// CHECK: @_ZZ2dfvE4s_ec = internal addrspace(3) global %struct.EC undef
+// CHECK: @_ZZ2dfvE5s_etc = internal addrspace(3) global %struct.ETC undef
+
 // We should not emit global initializers for device-side variables.
 // CHECK-NOT: @__cxx_global_var_init
 
 // Make sure that initialization restrictions do not apply to local
 // variables.
 __device__ void df() {
+  // AMDGCN:  %[[ec:.*]] = addrspacecast %struct.EC addrspace(5)* %ec to %struct.EC*
+  // AMDGCN:  %[[ed:.*]] = addrspacecast %struct.ED addrspace(5)* %ed to %struct.ED*
+  // AMDGCN:  %[[ecd:.*]] = addrspacecast %struct.ECD addrspace(5)* %ecd to %struct.ECD*
+  // AMDGCN:  %[[etc:.*]] = addrspacecast %struct.ETC addrspace(5)* %etc to %struct.ETC*
+  // AMDGCN:  %[[uc:.*]] = addrspacecast %struct.UC addrspace(5)* %uc to %struct.UC*
+  // AMDGCN:  %[[ud:.*]] = addrspacecast %struct.UD addrspace(5)* %ud to %struct.UD*
+  // AMDGCN:  %[[eci:.*]] = addrspacecast %struct.ECI addrspace(5)* %eci to %struct.ECI*
+  // AMDGCN:  %[[nec:.*]] = addrspacecast %struct.NEC addrspace(5)* %nec to %struct.NEC*
+  // AMDGCN:  %[[ned:.*]] = addrspacecast %struct.NED addrspace(5)* %ned to %struct.NED*
+  // AMDGCN:  %[[ncv:.*]] = addrspacecast %struct.NCV addrspace(5)* %ncv to %struct.NCV*
+  // AMDGCN:  %[[vd:.*]] = addrspacecast %struct.VD addrspace(5)* %vd to %struct.VD*
+  // AMDGCN:  %[[ncf:.*]] = addrspacecast %struct.NCF addrspace(5)* %ncf to %struct.NCF*
+  // AMDGCN:  %[[ncfs:.*]] = addrspacecast %struct.NCFS addrspace(5)* %ncfs to %struct.NCFS*
+  // AMDGCN:  %[[utc:.*]] = addrspacecast %struct.UTC addrspace(5)* %utc to %struct.UTC*
+  // AMDGCN:  %[[netc:.*]] = addrspacecast %struct.NETC addrspace(5)* %netc to %struct.NETC*
+  // AMDGCN:  %[[ec_i_ec:.*]] = addrspacecast %struct.EC_I_EC addrspace(5)* %ec_i_ec to %struct.EC_I_EC*
+  // AMDGCN:  %[[ec_i_ec1:.*]] = addrspacecast %struct.EC_I_EC1 addrspace(5)* %ec_i_ec1 to %struct.EC_I_EC1*
+  // AMDGCN:  %[[t_v_t:.*]] = addrspacecast %struct.T_V_T addrspace(5)* %t_v_t to %struct.T_V_T*
+  // AMDGCN:  %[[t_b_nec:.*]] = addrspacecast %struct.T_B_NEC addrspace(5)* %t_b_nec to %struct.T_B_NEC*
+  // AMDGCN:  %[[t_f_nec:.*]] = addrspacecast %struct.T_F_NEC addrspace(5)* %t_f_nec to %struct.T_F_NEC*
+  // AMDGCN:  %[[t_fa_nec:.*]] = addrspacecast %struct.T_FA_NEC addrspace(5)* %t_fa_nec to %struct.T_FA_NEC*
+  // AMDGCN:  %[[t_b_ned:.*]] = addrspacecast %struct.T_B_NED addrspace(5)* %t_b_ned to %struct.T_B_NED*
+  // AMDGCN:  %[[t_f_ned:.*]] = addrspacecast %struct.T_F_NED addrspace(5)* %t_f_ned to %struct.T_F_NED*
+  // AMDGCN:  %[[t_fa_ned:.*]] = addrspacecast %struct.T_FA_NED addrspace(5)* %t_fa_ned to %struct.T_FA_NED*
+
   T t;
   // CHECK-NOT: call
   EC ec;
-  // CHECK:   call void @_ZN2ECC1Ev(%struct.EC* %ec)
+  // NVPTX:   call void @_ZN2ECC1Ev(%struct.EC* %ec)
+  // AMDGCN:  call void @_ZN2ECC1Ev(%struct.EC* %[[ec]])
   ED ed;
   // CHECK-NOT: call
   ECD ecd;
-  // CHECK:   call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+  // NVPTX:   call void @_ZN3ECDC1Ev(%struct.ECD* %ecd)
+  // AMDGCN:  call void @_ZN3ECDC1Ev(%struct.ECD* %[[ecd]])
   ETC etc;
-  // CHECK:   call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+  // NVPTX:   call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %etc)
+  // AMDGCN:  call void @_ZN3ETCC1IJEEEDpT_(%struct.ETC* %[[etc]])
   UC uc;
   // undefined constructor -- not allowed
-  // CHECK:   call void @_ZN2UCC1Ev(%struct.UC* %uc)
+  // NVPTX:   call void @_ZN2UCC1Ev(%struct.UC* %uc)
+  // AMDGCN:  call void