Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added inline comments. Comment at: lib/Sema/SemaCUDA.cpp:429-430 @@ +428,4 @@ + CXXConstructorDecl *CD) { + if (!CD->isDefined() && CD->isTemplateInstantiation()) +InstantiateFunctionDefinition(VarLoc, CD->getFirstDecl()); + rsmith wrote: > The function might still not be defined after this (if the template is not > defined); you should presumably return `false` here in that case. I don't think it's needed. If it's still not definied, it will be caught by hasTrivialBody() check below. Comment at: lib/Sema/SemaDecl.cpp:10191-10198 @@ +10190,10 @@ + bool AllowedInit = false; + if (const CXXConstructExpr *CE = dyn_cast(Init)) +AllowedInit = +isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + else if ((VD->hasAttr() || +VD->hasAttr()) && + VD->getInit()->isConstantInitializer( + Context, VD->getType()->isReferenceType())) +AllowedInit = true; + rsmith wrote: > What should happen if the init is a constant initializer that is a > `CXXConstructExpr`, but it uses a constructor that is not empty from CUDA's > perspective? Such as: > > struct X { constexpr X() { int n = 0; } }; > __device__ X x; > > I would assume this should be valid, but I think you'll reject it. Maybe > change `else if (` to `if (!AllowedInit &&`? NVCC produces an error (probably because it does not support c++14): zz.cu(1): error: statement may not appear in a constexpr constructor clang w/ this patch indeed considers it to be a non-empty initializer and produces an error. I agree that allowing constant initializer is the right thing to do. Your example requires c++14, so there's no direct comparison with nvcc, but I think allowing it is indeed the right thing to do here. Repository: rL LLVM http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
This revision was automatically updated to reflect the committed changes. Closed by commit rL259592: [CUDA] Do not allow dynamic initialization of global device side variables. (authored by tra). Changed prior to commit: http://reviews.llvm.org/D15305?vs=46696&id=46707#toc Repository: rL LLVM http://reviews.llvm.org/D15305 Files: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td cfe/trunk/include/clang/Sema/Sema.h cfe/trunk/lib/CodeGen/CGDeclCXX.cpp cfe/trunk/lib/CodeGen/CodeGenModule.cpp cfe/trunk/lib/Sema/SemaCUDA.cpp cfe/trunk/lib/Sema/SemaDecl.cpp cfe/trunk/test/CodeGenCUDA/device-var-init.cu Index: cfe/trunk/test/CodeGenCUDA/device-var-init.cu === --- cfe/trunk/test/CodeGenCUDA/device-var-init.cu +++ cfe/trunk/test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,393 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -DERROR_CASE -verify -o /dev/null %s + +#ifdef __clang__ +#include "Inputs/cuda.h" +#endif + +// Base classes with different initializer variants. + +// trivial constructor -- allowed +struct T { + int t; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} // -- allowed + __device__ EC(int) {} // -- not allowed +}; + +// empty templated constructor -- allowed with no arguments +struct ETC { + template __device__ ETC(T...) {} +}; + +// undefined constructor -- not allowed +struct UC { + int uc; + __device__ UC(); +}; + +// empty constructor w/ initializer list -- not allowed +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor -- not allowed +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method -- not allowed +struct NCV { + int ncv; + __device__ virtual void vm() {} +}; + +// dynamic in-class field initializer -- not allowed +__device__ int f(); +struct NCF { + int ncf = f(); +}; + +// static in-class field initializer. NVCC does not allow it, but +// clang generates static initializer for this, so we'll accept it. +struct NCFS { + int ncfs = 3; +}; + +// undefined templated constructor -- not allowed +struct UTC { + template __device__ UTC(T...); +}; + +// non-empty templated constructor -- not allowed +struct NETC { + int netc; + template __device__ NETC(T...) { netc = 1; } +}; + +__device__ int d_v; +// CHECK: @d_v = addrspace(1) externally_initialized global i32 0, +__shared__ int s_v; +// CHECK: @s_v = addrspace(3) global i32 undef, +__constant__ int c_v; +// CHECK: addrspace(4) externally_initialized global i32 0, + +__device__ int d_v_i = 1; +// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, +#ifdef ERROR_CASE +__shared__ int s_v_i = 1; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ int c_v_i = 1; +// CHECK: @c_v_i = addrspace(4) externally_initialized global i32 1, + +#ifdef ERROR_CASE +__device__ int d_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ int s_v_f = f(); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ int c_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +#endif + +__device__ T d_t; +// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +__shared__ T s_t; +// CHECK: @s_t = addrspace(3) global %struct.T undef, +__constant__ T c_t; +// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, + +__device__ T d_t_i = {2}; +// CHECKL @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +#ifdef ERROR_CASE +__shared__ T s_t_i = {2}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ T c_t_i = {2}; +// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, + +__device__ EC d_ec; +// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +__shared__ EC s_ec; +// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +__constant__ EC c_ec; +// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, + +#if ERROR_CASE +__device__ EC d_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ EC s_ec_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra updated this revision to Diff 46696. tra marked 8 inline comments as done. tra added a comment. Addressed Richard's comments. Relaxed restrictions a bit to allow constant initializers even those CUDA would not considered to be empty. Updated test case accordingly. http://reviews.llvm.org/D15305 Files: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaCUDA.cpp lib/Sema/SemaDecl.cpp test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu === --- /dev/null +++ test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,393 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -DERROR_CASE -verify -o /dev/null %s + +#ifdef __clang__ +#include "Inputs/cuda.h" +#endif + +// Base classes with different initializer variants. + +// trivial constructor -- allowed +struct T { + int t; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} // -- allowed + __device__ EC(int) {} // -- not allowed +}; + +// empty templated constructor -- allowed with no arguments +struct ETC { + template __device__ ETC(T...) {} +}; + +// undefined constructor -- not allowed +struct UC { + int uc; + __device__ UC(); +}; + +// empty constructor w/ initializer list -- not allowed +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor -- not allowed +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method -- not allowed +struct NCV { + int ncv; + __device__ virtual void vm() {} +}; + +// dynamic in-class field initializer -- not allowed +__device__ int f(); +struct NCF { + int ncf = f(); +}; + +// static in-class field initializer. NVCC does not allow it, but +// clang generates static initializer for this, so we'll accept it. +struct NCFS { + int ncfs = 3; +}; + +// undefined templated constructor -- not allowed +struct UTC { + template __device__ UTC(T...); +}; + +// non-empty templated constructor -- not allowed +struct NETC { + int netc; + template __device__ NETC(T...) { netc = 1; } +}; + +__device__ int d_v; +// CHECK: @d_v = addrspace(1) externally_initialized global i32 0, +__shared__ int s_v; +// CHECK: @s_v = addrspace(3) global i32 undef, +__constant__ int c_v; +// CHECK: addrspace(4) externally_initialized global i32 0, + +__device__ int d_v_i = 1; +// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, +#ifdef ERROR_CASE +__shared__ int s_v_i = 1; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ int c_v_i = 1; +// CHECK: @c_v_i = addrspace(4) externally_initialized global i32 1, + +#ifdef ERROR_CASE +__device__ int d_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ int s_v_f = f(); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ int c_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +#endif + +__device__ T d_t; +// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +__shared__ T s_t; +// CHECK: @s_t = addrspace(3) global %struct.T undef, +__constant__ T c_t; +// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, + +__device__ T d_t_i = {2}; +// CHECKL @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +#ifdef ERROR_CASE +__shared__ T s_t_i = {2}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ T c_t_i = {2}; +// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, + +__device__ EC d_ec; +// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +__shared__ EC s_ec; +// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +__constant__ EC c_ec; +// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, + +#if ERROR_CASE +__device__ EC d_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ EC s_ec_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} + +__device__ EC d_ec_i2 =
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
rsmith accepted this revision. rsmith added a comment. This revision is now accepted and ready to land. Some minor things, but feel free to commit after addressing them. I agree that we should figure out what to do about the zero/undef initialization separately. Comment at: lib/Sema/SemaCUDA.cpp:429-430 @@ +428,4 @@ + CXXConstructorDecl *CD) { + if (!CD->isDefined() && CD->isTemplateInstantiation()) +InstantiateFunctionDefinition(VarLoc, CD->getFirstDecl()); + The function might still not be defined after this (if the template is not defined); you should presumably return `false` here in that case. Comment at: lib/Sema/SemaCUDA.cpp:442 @@ +441,3 @@ + // and the function body is an empty compound statement. + if (!(CD->isDefined() && CD->getNumParams() == 0 && CD->hasTrivialBody())) +return false; Please do remove the `isDefined` check here. Including it makes a reader wonder what case it's trying to handle. Comment at: lib/Sema/SemaCUDA.cpp:455-457 @@ +454,5 @@ + + // Its class has no virtual functions and no virtual base classes. + if (CD->getParent()->isDynamicClass()) +return false; + Maybe reorder this before the `CXXCtorInitializer` check? It's a much cheaper test. Comment at: lib/Sema/SemaDecl.cpp:10191-10198 @@ +10190,10 @@ + bool AllowedInit = false; + if (const CXXConstructExpr *CE = dyn_cast(Init)) +AllowedInit = +isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); + else if ((VD->hasAttr() || +VD->hasAttr()) && + VD->getInit()->isConstantInitializer( + Context, VD->getType()->isReferenceType())) +AllowedInit = true; + What should happen if the init is a constant initializer that is a `CXXConstructExpr`, but it uses a constructor that is not empty from CUDA's perspective? Such as: struct X { constexpr X() { int n = 0; } }; __device__ X x; I would assume this should be valid, but I think you'll reject it. Maybe change `else if (` to `if (!AllowedInit &&`? Comment at: lib/Sema/SemaDecl.cpp:10196-10198 @@ +10195,5 @@ +VD->hasAttr()) && + VD->getInit()->isConstantInitializer( + Context, VD->getType()->isReferenceType())) +AllowedInit = true; + Might be clearer as if (__device__ || __constant__) AllowedInit = isConstantInitializer(...) http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
Richard, On Fri, Jan 15, 2016 at 5:32 PM, Richard Smith wrote: > On Fri, Jan 15, 2016 at 5:29 PM, Richard Smith > wrote: > > On Fri, Jan 15, 2016 at 4:22 PM, Artem Belevich wrote: > >> tra added inline comments. > >> > >> > >> Comment at: lib/CodeGen/CodeGenModule.cpp:2334 > >> @@ -2339,1 +2333,3 @@ > >> + D->hasAttr()) > >> Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); > >> + else if (!InitExpr) { > >> > >> rsmith wrote: > >>> As this is a global variable, it should presumably still be statically > zero-initialized. > >> There is no way to initialize __shared__ variables. They are rough > equivalent of local variables, only in this case CUDA allocates them per > kernel invocation from a shared buffer with no guarantees regarding its > contents. > >> > >> They used to be zero-initialized by compiler, but that was > intentionally changed to undef in r245786 / http://reviews.llvm.org/D12241 > > > > That doesn't seem right. C++ guarantees zero-initialization for all > > globals, prior to performing any other initialization. > > It looks like the problem being fixed by D12241 was probably caused by > the __shared__ variables having the wrong linkage. > I'll take a look at this separately as it's unrelated to this patch. I believe current patch addresses your other comments. --Artem -- --Artem Belevich ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
jpienaar added a comment. @jlebar: We defer it to your and Richard's approval. Thanks http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
jlebar added a comment. jingyue/jpienaar/rsmith - friendly ping? Without this, -O0 builds don't work, because they emit empty global initializers that don't get optimized out. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra marked 3 inline comments as done. Comment at: lib/Sema/SemaCUDA.cpp:436 @@ +435,3 @@ + if (CD->isTrivial()) +return true; + jlebar wrote: > The test passes if I comment out this if statement. I'm not sure if that's > expected; this may or may not be entirely covered below. According to [[ http://en.cppreference.com/w/cpp/language/default_constructor#Trivial_default_constructor | CPP reference ]] trivial constructor will pass all other checks below. Comment at: lib/Sema/SemaCUDA.cpp:442 @@ +441,3 @@ + // and the function body is an empty compound statement. + if (!(CD->isDefined() && CD->getNumParams() == 0 && CD->hasTrivialBody())) +return false; jlebar wrote: > Tests pass if I comment out the isDefined check. hasTrivialBody() would only return true if we have a body which only happens if function is defined. isDefined() is mostly for readability here. Comment at: lib/Sema/SemaDecl.cpp:10186 @@ +10185,3 @@ + const bool IsGlobal = VD->hasGlobalStorage() && !VD->isStaticLocal(); + if (Init && IsGlobal && getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + (VD->hasAttr() || VD->hasAttr() || jlebar wrote: > Test passes if I comment out IsGlobal or CUDAIsDevice. (I'm not sure if you > care to test the latter, but the former seems important.) IsGlobal -- all test cases were using either global or local variables. I've added a static __shared__ variable in the device function. Now IsGlobal check (or, rather !isStaticLocal() part of it) is required in order for the tests to succeed. CUDAIsDevice is not triggered because all test cases are run with -fcuda-is-device. It's hard to run host-side test with -verify here because I'd have to put #ifdef around every 'expected-error' http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra updated this revision to Diff 45312. tra marked 2 inline comments as done. tra added a comment. Addressed Justin's comments. http://reviews.llvm.org/D15305 Files: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaCUDA.cpp lib/Sema/SemaDecl.cpp test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu === --- /dev/null +++ test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,389 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -DERROR_CASE -verify -o /dev/null %s + +#ifdef __clang__ +#include "Inputs/cuda.h" +#endif + +// Base classes with different initializer variants. + +// trivial constructor -- allowed +struct T { + int t; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} // -- allowed + __device__ EC(int) {} // -- not allowed +}; + +// empty templated constructor -- allowed with no arguments +struct ETC { + template __device__ ETC(T...) {} +}; + +// undefined constructor -- not allowed +struct UC { + int uc; + __device__ UC(); +}; + +// empty constructor w/ initializer list -- not allowed +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor -- not allowed +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method -- not allowed +struct NCV { + int ncv; + __device__ virtual void vm() {} +}; + +// dynamic in-class field initializer -- not allowed +__device__ int f(); +struct NCF { + int ncf = f(); +}; + +// static in-class field initializer. +struct NCFS { + int ncfs = 3; +}; + +// undefined templated constructor -- not allowed +struct UTC { + template __device__ UTC(T...); +}; + +// non-empty templated constructor -- not allowed +struct NETC { + int netc; + template __device__ NETC(T...) { netc = 1; } +}; + +__device__ int d_v; +// CHECK: @d_v = addrspace(1) externally_initialized global i32 0, +__shared__ int s_v; +// CHECK: @s_v = addrspace(3) global i32 undef, +__constant__ int c_v; +// CHECK: addrspace(4) externally_initialized global i32 0, + +__device__ int d_v_i = 1; +// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, +#ifdef ERROR_CASE +__shared__ int s_v_i = 1; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ int c_v_i = 1; +// CHECK: @c_v_i = addrspace(4) externally_initialized global i32 1, + +#ifdef ERROR_CASE +__device__ int d_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ int s_v_f = f(); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ int c_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +#endif + +__device__ T d_t; +// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +__shared__ T s_t; +// CHECK: @s_t = addrspace(3) global %struct.T undef, +__constant__ T c_t; +// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, + +__device__ T d_t_i = {2}; +// CHECKL @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +#ifdef ERROR_CASE +__shared__ T s_t_i = {2}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ T c_t_i = {2}; +// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, + +__device__ EC d_ec; +// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +__shared__ EC s_ec; +// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +__constant__ EC c_ec; +// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, + +#if ERROR_CASE +__device__ EC d_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ EC s_ec_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} + +__device__ EC d_ec_i2 = {3}; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__, and __shared__ variables.}} +__shared__ EC s_ec_i2 = {3}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
jlebar added a comment. tra asked me to check for coverage. Looks pretty good in that respect. Comment at: include/clang/Basic/DiagnosticSemaKinds.td:6419 @@ +6418,3 @@ +"dynamic initialization is not supported for " +"__device__, __constant__ and __shared__ variables.">; +def err_shared_var_init : Error< Nit, but, since we're all language nerds here, suggest adding an Oxford comma. Comment at: lib/Sema/SemaCUDA.cpp:436 @@ +435,3 @@ + if (CD->isTrivial()) +return true; + The test passes if I comment out this if statement. I'm not sure if that's expected; this may or may not be entirely covered below. Comment at: lib/Sema/SemaCUDA.cpp:442 @@ +441,3 @@ + // and the function body is an empty compound statement. + if (!(CD->isDefined() && CD->getNumParams() == 0 && CD->hasTrivialBody())) +return false; Tests pass if I comment out the isDefined check. Comment at: lib/Sema/SemaDecl.cpp:10183 @@ +10182,3 @@ + // 7.5). We also allow constant initializers for __constant__ and + // __device__ variables. + const Expr *Init = VD->getInit(); > We also allow constant initializers for __constant__ and __device__ variables. Consider rephrasing this -- it sounds like this is a clang extension, but I just checked and it does not appear to be. Comment at: lib/Sema/SemaDecl.cpp:10186 @@ +10185,3 @@ + const bool IsGlobal = VD->hasGlobalStorage() && !VD->isStaticLocal(); + if (Init && IsGlobal && getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + (VD->hasAttr() || VD->hasAttr() || Test passes if I comment out IsGlobal or CUDAIsDevice. (I'm not sure if you care to test the latter, but the former seems important.) http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
On Fri, Jan 15, 2016 at 5:29 PM, Richard Smith wrote: > On Fri, Jan 15, 2016 at 4:22 PM, Artem Belevich wrote: >> tra added inline comments. >> >> >> Comment at: lib/CodeGen/CodeGenModule.cpp:2334 >> @@ -2339,1 +2333,3 @@ >> + D->hasAttr()) >> Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); >> + else if (!InitExpr) { >> >> rsmith wrote: >>> As this is a global variable, it should presumably still be statically >>> zero-initialized. >> There is no way to initialize __shared__ variables. They are rough >> equivalent of local variables, only in this case CUDA allocates them per >> kernel invocation from a shared buffer with no guarantees regarding its >> contents. >> >> They used to be zero-initialized by compiler, but that was intentionally >> changed to undef in r245786 / http://reviews.llvm.org/D12241 > > That doesn't seem right. C++ guarantees zero-initialization for all > globals, prior to performing any other initialization. It looks like the problem being fixed by D12241 was probably caused by the __shared__ variables having the wrong linkage. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
On Fri, Jan 15, 2016 at 4:22 PM, Artem Belevich wrote: > tra added inline comments. > > > Comment at: lib/CodeGen/CodeGenModule.cpp:2334 > @@ -2339,1 +2333,3 @@ > + D->hasAttr()) > Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); > + else if (!InitExpr) { > > rsmith wrote: >> As this is a global variable, it should presumably still be statically >> zero-initialized. > There is no way to initialize __shared__ variables. They are rough equivalent > of local variables, only in this case CUDA allocates them per kernel > invocation from a shared buffer with no guarantees regarding its contents. > > They used to be zero-initialized by compiler, but that was intentionally > changed to undef in r245786 / http://reviews.llvm.org/D12241 That doesn't seem right. C++ guarantees zero-initialization for all globals, prior to performing any other initialization. ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra updated this revision to Diff 45051. tra marked an inline comment as done. tra added a comment. Typo fix. http://reviews.llvm.org/D15305 Files: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaCUDA.cpp lib/Sema/SemaDecl.cpp test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu === --- /dev/null +++ test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,387 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -DERROR_CASE -verify -o /dev/null %s + +#ifdef __clang__ +#include "Inputs/cuda.h" +#endif + +// Base classes with different initializer variants. + +// trivial constructor -- allowed +struct T { + int t; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} // -- allowed + __device__ EC(int) {} // -- not allowed +}; + +// empty templated constructor -- allowed with no arguments +struct ETC { + template __device__ ETC(T...) {} +}; + +// undefined constructor -- not allowed +struct UC { + int uc; + __device__ UC(); +}; + +// empty constructor w/ initializer list -- not allowed +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor -- not allowed +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method -- not allowed +struct NCV { + int ncv; + __device__ virtual void vm() {} +}; + +// dynamic in-class field initializer -- not allowed +__device__ int f(); +struct NCF { + int ncf = f(); +}; + +// static in-class field initializer. +struct NCFS { + int ncfs = 3; +}; + +// undefined templated constructor -- not allowed +struct UTC { + template __device__ UTC(T...); +}; + +// non-empty templated constructor -- not allowed +struct NETC { + int netc; + template __device__ NETC(T...) { netc = 1; } +}; + +__device__ int d_v; +// CHECK: @d_v = addrspace(1) externally_initialized global i32 0, +__shared__ int s_v; +// CHECK: @s_v = addrspace(3) global i32 undef, +__constant__ int c_v; +// CHECK: addrspace(4) externally_initialized global i32 0, + +__device__ int d_v_i = 1; +// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, +#ifdef ERROR_CASE +__shared__ int s_v_i = 1; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ int c_v_i = 1; +// CHECK: @c_v_i = addrspace(4) externally_initialized global i32 1, + +#ifdef ERROR_CASE +__device__ int d_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ int s_v_f = f(); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ int c_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +#endif + +__device__ T d_t; +// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +__shared__ T s_t; +// CHECK: @s_t = addrspace(3) global %struct.T undef, +__constant__ T c_t; +// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, + +__device__ T d_t_i = {2}; +// CHECKL @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +#ifdef ERROR_CASE +__shared__ T s_t_i = {2}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ T c_t_i = {2}; +// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, + +__device__ EC d_ec; +// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +__shared__ EC s_ec; +// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +__constant__ EC c_ec; +// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, + +#if ERROR_CASE +__device__ EC d_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ EC s_ec_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} + +__device__ EC d_ec_i2 = {3}; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ EC s_ec_i2 = {3}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i2 = {3}; +// expected-
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added inline comments. Comment at: lib/CodeGen/CodeGenModule.cpp:2334 @@ -2339,1 +2333,3 @@ + D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); + else if (!InitExpr) { rsmith wrote: > As this is a global variable, it should presumably still be statically > zero-initialized. There is no way to initialize __shared__ variables. They are rough equivalent of local variables, only in this case CUDA allocates them per kernel invocation from a shared buffer with no guarantees regarding its contents. They used to be zero-initialized by compiler, but that was intentionally changed to undef in r245786 / http://reviews.llvm.org/D12241 http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
rsmith added inline comments. Comment at: lib/CodeGen/CGDeclCXX.cpp:312 @@ +311,3 @@ + // the checks have been done in Sema by now. Whatever initializers + // areallowed are empty and we just need to ignore them here. + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && areallowed -> are allowed Comment at: lib/CodeGen/CodeGenModule.cpp:2334 @@ -2339,1 +2333,3 @@ + D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy)); + else if (!InitExpr) { As this is a global variable, it should presumably still be statically zero-initialized. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra marked an inline comment as done. tra added a comment. In http://reviews.llvm.org/D15305#327226, @rsmith wrote: > I think you missed this from my previous review: > > > This should be checked and diagnosed in Sema, not in CodeGen. > Done. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added a reviewer: jlebar. tra updated this revision to Diff 45044. tra added a comment. Moved initializer checks from CodeGen to Sema. Added test cases for initializers of non-class variables. http://reviews.llvm.org/D15305 Files: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CodeGenModule.cpp lib/Sema/SemaCUDA.cpp lib/Sema/SemaDecl.cpp test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu === --- /dev/null +++ test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,387 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -DERROR_CASE -verify -o /dev/null %s + +#ifdef __clang__ +#include "Inputs/cuda.h" +#endif + +// Base classes with different initializer variants. + +// trivial constructor -- allowed +struct T { + int t; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} // -- allowed + __device__ EC(int) {} // -- not allowed +}; + +// empty templated constructor -- allowed with no arguments +struct ETC { + template __device__ ETC(T...) {} +}; + +// undefined constructor -- not allowed +struct UC { + int uc; + __device__ UC(); +}; + +// empty constructor w/ initializer list -- not allowed +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor -- not allowed +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method -- not allowed +struct NCV { + int ncv; + __device__ virtual void vm() {} +}; + +// dynamic in-class field initializer -- not allowed +__device__ int f(); +struct NCF { + int ncf = f(); +}; + +// static in-class field initializer. +struct NCFS { + int ncfs = 3; +}; + +// undefined templated constructor -- not allowed +struct UTC { + template __device__ UTC(T...); +}; + +// non-empty templated constructor -- not allowed +struct NETC { + int netc; + template __device__ NETC(T...) { netc = 1; } +}; + +__device__ int d_v; +// CHECK: @d_v = addrspace(1) externally_initialized global i32 0, +__shared__ int s_v; +// CHECK: @s_v = addrspace(3) global i32 undef, +__constant__ int c_v; +// CHECK: addrspace(4) externally_initialized global i32 0, + +__device__ int d_v_i = 1; +// CHECK: @d_v_i = addrspace(1) externally_initialized global i32 1, +#ifdef ERROR_CASE +__shared__ int s_v_i = 1; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ int c_v_i = 1; +// CHECK: @c_v_i = addrspace(4) externally_initialized global i32 1, + +#ifdef ERROR_CASE +__device__ int d_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ int s_v_f = f(); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ int c_v_f = f(); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +#endif + +__device__ T d_t; +// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +__shared__ T s_t; +// CHECK: @s_t = addrspace(3) global %struct.T undef, +__constant__ T c_t; +// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, + +__device__ T d_t_i = {2}; +// CHECKL @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +#ifdef ERROR_CASE +__shared__ T s_t_i = {2}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ T c_t_i = {2}; +// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, + +__device__ EC d_ec; +// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +__shared__ EC s_ec; +// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +__constant__ EC c_ec; +// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, + +#if ERROR_CASE +__device__ EC d_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ EC s_ec_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} + +__device__ EC d_ec_i2 = {3}; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ EC s_ec_i2 = {3}; +// expected-error@-1 {{initialization is
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
rsmith added a comment. I think you missed this from my previous review: > This should be checked and diagnosed in Sema, not in CodeGen. Comment at: lib/CodeGen/CGDeclCXX.cpp:333-337 @@ +332,7 @@ + [](const CXXMethodDecl *Method) { return Method->isVirtual(); })) +return false; + + // .. and no virtual base classes. + if (RD->getNumVBases() != 0) +return false; + You can check these conditions with `RD->isDynamicClass()`. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added a comment. Richard, I've updated the patch as you've suggested -- it indeed simplifies things quite a bit and handles the corner cases you've mentioned. Comment at: lib/CodeGen/CGDeclCXX.cpp:323-324 @@ +322,4 @@ + + // The constructor function has no parameters, + if (CD->getNumParams() != 0) +return false; rsmith wrote: > What if the constructor is a C-style varargs function: > > struct X { X(...) {} }; > > ? CUDA does not support varargs on device side. nvcc fails with an error: > error: a "device" function cannot have ellipsis That's another thing I'll need to fix (as a separate patch) as clang currently accepts varargs everywhere. This patch will ignore number of arguments passed to varargs constructor, but the checks for empty body still do apply. Comment at: lib/CodeGen/CGDeclCXX.cpp:329 @@ +328,3 @@ + for (const CXXCtorInitializer *CI: CD->inits()) +if (CI->isAnyMemberInitializer() && CI->isWritten()) + return false; rsmith wrote: > tra wrote: > > @rsmith: is this a good way to find member initializer list items? > > > > ``` > > struct S { > > int a,b,c; > > S() : a(1),b(2),c(3) {} > > }; > > ``` > > I'm looking for a(),b(),c() which is what I think CUDA spec wants to check > > for, but CD->inits() appears to have other initializers on the list as well. > You shouldn't need to check `isAnyMemberInitializer`: if there's any written > inits, the constructor violates the rules. As it turns out, the rules don't apply to all written initializers. For instance, nvcc allows empty constructor on init list: ``` struct A { __device__ A(){}; }; struct B { __device__ B(){}; }; struct C : A { B b; __device__ C() : A(), b() {} }; __device__ C c; ``` I've simplified the patch so that in only checks for constructor's 'emptiness', but disregards how that constructor gets to be executed. Comment at: lib/CodeGen/CGDeclCXX.cpp:333 @@ +332,3 @@ + // and the function body is an empty compound statement. + // That does not always work. + if (!CD->hasTrivialBody()) rsmith wrote: > What doesn't always work? It was leftover from early patch variant that didn't defer emitting global vars. If I don't defer and need to emit a global var before constructor definition is available, hasTrivialBody() returns false and triggers diagnostics. Comment at: lib/CodeGen/CGDeclCXX.cpp:347-367 @@ +346,23 @@ + + // The default constructors of all base classes of its class can be + // considered empty. + for (auto &Base : RD->bases()) +if (hasNonEmptyDefaultConstructors(*this, + Base.getType()->getAsCXXRecordDecl())) + return false; + + // For all the nonstatic data members of its class that are of class type + // (or array thereof), the default constructors can be considered empty. + for (const auto *I : RD->decls()) +if (const FieldDecl *V = dyn_cast(I)) { + QualType T = V->getType(); + + if (const ArrayType *Ty = dyn_cast(T)) +while ((Ty = dyn_cast(T))) + T = Ty->getElementType(); + + if (const CXXRecordDecl *R = T->getAsCXXRecordDecl()) +if (hasNonEmptyDefaultConstructors(*this, R)) + return false; +} + rsmith wrote: > Rather than checking these properties this way, I'd suggest you check the > initialization expression in each `CXXCtorInitializer` only contains > `CXXConstructExpr`s for empty constructors (or any other whitelisted > constructs). Your current approach will miss a couple of cases which the CUDA > spec misses but presumably meant to exclude: > > 1) Default member initializers > > int f(); > struct X { int n = f(); X() {} }; > > 2) Cases where a constructor other than a default constructor is implicitly > invoked > > struct A { template A(T...); }; > struct B : A { B() {} }; Nice. This has simplified the checks a lot. Comment at: lib/CodeGen/CodeGenModule.cpp:1347-1351 @@ -1346,2 +1346,7 @@ return false; + // Delay codegen for device-side CUDA variables. We need to have all + // constructor definitions available before we can determine whether + // we can skip them or produce an error. + if (LangOpts.CUDA && LangOpts.CUDAIsDevice && isa(Global)) +return false; rsmith wrote: > According to the quoted specification, you're supposed to check whether the > constructor can be considered empty at the point in the translation unit > where the definition of the variable occurs, so I don't think you need to > delay anything. I guess it's a bug in their guide as nvcc accepts following code with constructor definition appearing *after* the variable: ``` struct S { S(); }; __device__ S s; S::S() {} ``` http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra updated this revision to Diff 44687. tra added a comment. Check all variable initializers and only allow 'empty constructors' as Richard has suggested. Changed test structure so that we test for allowed/disallowed constructors separately from testing how we handle initialization of base classes or member fields. http://reviews.llvm.org/D15305 Files: lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu === --- /dev/null +++ test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,364 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -std=c++11 \ +// RUN: -emit-llvm -DERROR_CASE -verify -o /dev/null %s + +#ifdef __clang__ +#include "Inputs/cuda.h" +#endif + +// Base classes with different initializer variants. + +// trivial constructor -- allowed +struct T { + int t; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} // -- allowed + __device__ EC(int) {} // -- not allowed +}; + +// empty templated constructor -- allowed with no arguments +struct ETC { + template __device__ ETC(T...) {} +}; + +// undefined constructor -- not allowed +struct UC { + int uc; + __device__ UC(); +}; + +// empty constructor w/ initializer list -- not allowed +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor -- not allowed +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method -- not allowed +struct NCV { + int ncv; + __device__ virtual void vm() {} +}; + +// dynamic in-class field initializer -- not allowed +__device__ int f(); +struct NCF { + int ncf = f(); +}; + +// static in-class field initializer -- not allowed by nvcc. +// NOTE: clang does generate statically initalized field here. +// So in practice it could be supported. +struct NCFS { + int ncfs = 3; +}; + +// undefined templated constructor -- not allowed +struct UTC { + template __device__ UTC(T...); +}; + +// non-empty templated constructor -- not allowed +struct NETC { + int netc; + template __device__ NETC(T...) { netc = 1; } +}; + +__device__ T d_t; +// CHECK: @d_t = addrspace(1) externally_initialized global %struct.T zeroinitializer +__shared__ T s_t; +// CHECK: @s_t = addrspace(3) global %struct.T undef, +__constant__ T c_t; +// CHECK: @c_t = addrspace(4) externally_initialized global %struct.T zeroinitializer, + +__device__ T d_t_i = {2}; +// CHECKL @d_t_i = addrspace(1) externally_initialized global %struct.T { i32 2 }, +#ifdef ERROR_CASE +__shared__ T s_t_i = {2}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +#endif +__constant__ T c_t_i = {2}; +// CHECK: @c_t_i = addrspace(4) externally_initialized global %struct.T { i32 2 }, + +__device__ EC d_ec; +// CHECK: @d_ec = addrspace(1) externally_initialized global %struct.EC zeroinitializer, +__shared__ EC s_ec; +// CHECK: @s_ec = addrspace(3) global %struct.EC undef, +__constant__ EC c_ec; +// CHECK: @c_ec = addrspace(4) externally_initialized global %struct.EC zeroinitializer, + +#if ERROR_CASE +__device__ EC d_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ EC s_ec_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} + +__device__ EC d_ec_i2 = {3}; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ EC s_ec_i2 = {3}; +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ EC c_ec_i2 = {3}; +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +#endif + +__device__ ETC d_etc; +// CHETCK: @d_etc = addrspace(1) externally_initialized global %struct.ETC zeroinitializer, +__shared__ ETC s_etc; +// CHETCK: @s_etc = addrspace(3) global %struct.ETC undef, +__constant__ ETC c_etc; +// CHETCK: @c_etc = addrspace(4) externally_initialized global %struct.ETC zeroinitializer, + +#if ERROR_CASE +__device__ ETC d_etc_i(3); +// expected-error@-1 {{dynamic initialization is not supported for __device__, __constant__ and __shared__ variables.}} +__shared__ ETC s_etc_i(3); +// expected-error@-1 {{initialization is not supported for __shared__ variables.}} +__constant__ ETC c_etc_i(3); +// expected-error@-1 {{dynami
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
rsmith added a comment. This should be checked and diagnosed in Sema, not in CodeGen. Comment at: lib/CodeGen/CGDeclCXX.cpp:323-324 @@ +322,4 @@ + + // The constructor function has no parameters, + if (CD->getNumParams() != 0) +return false; What if the constructor is a C-style varargs function: struct X { X(...) {} }; ? Comment at: lib/CodeGen/CGDeclCXX.cpp:329 @@ +328,3 @@ + for (const CXXCtorInitializer *CI: CD->inits()) +if (CI->isAnyMemberInitializer() && CI->isWritten()) + return false; tra wrote: > @rsmith: is this a good way to find member initializer list items? > > ``` > struct S { > int a,b,c; > S() : a(1),b(2),c(3) {} > }; > ``` > I'm looking for a(),b(),c() which is what I think CUDA spec wants to check > for, but CD->inits() appears to have other initializers on the list as well. You shouldn't need to check `isAnyMemberInitializer`: if there's any written inits, the constructor violates the rules. Comment at: lib/CodeGen/CGDeclCXX.cpp:333 @@ +332,3 @@ + // and the function body is an empty compound statement. + // That does not always work. + if (!CD->hasTrivialBody()) What doesn't always work? Comment at: lib/CodeGen/CGDeclCXX.cpp:347-367 @@ +346,23 @@ + + // The default constructors of all base classes of its class can be + // considered empty. + for (auto &Base : RD->bases()) +if (hasNonEmptyDefaultConstructors(*this, + Base.getType()->getAsCXXRecordDecl())) + return false; + + // For all the nonstatic data members of its class that are of class type + // (or array thereof), the default constructors can be considered empty. + for (const auto *I : RD->decls()) +if (const FieldDecl *V = dyn_cast(I)) { + QualType T = V->getType(); + + if (const ArrayType *Ty = dyn_cast(T)) +while ((Ty = dyn_cast(T))) + T = Ty->getElementType(); + + if (const CXXRecordDecl *R = T->getAsCXXRecordDecl()) +if (hasNonEmptyDefaultConstructors(*this, R)) + return false; +} + Rather than checking these properties this way, I'd suggest you check the initialization expression in each `CXXCtorInitializer` only contains `CXXConstructExpr`s for empty constructors (or any other whitelisted constructs). Your current approach will miss a couple of cases which the CUDA spec misses but presumably meant to exclude: 1) Default member initializers int f(); struct X { int n = f(); X() {} }; 2) Cases where a constructor other than a default constructor is implicitly invoked struct A { template A(T...); }; struct B : A { B() {} }; Comment at: lib/CodeGen/CodeGenModule.cpp:1347-1351 @@ -1346,2 +1346,7 @@ return false; + // Delay codegen for device-side CUDA variables. We need to have all + // constructor definitions available before we can determine whether + // we can skip them or produce an error. + if (LangOpts.CUDA && LangOpts.CUDAIsDevice && isa(Global)) +return false; According to the quoted specification, you're supposed to check whether the constructor can be considered empty at the point in the translation unit where the definition of the variable occurs, so I don't think you need to delay anything. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added a comment. ping. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added a comment. Ping. @rsmith -- Richard, can you take a look? http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra added inline comments. Comment at: lib/CodeGen/CGDeclCXX.cpp:329 @@ +328,3 @@ + for (const CXXCtorInitializer *CI: CD->inits()) +if (CI->isAnyMemberInitializer() && CI->isWritten()) + return false; @rsmith: is this a good way to find member initializer list items? ``` struct S { int a,b,c; S() : a(1),b(2),c(3) {} }; ``` I'm looking for a(),b(),c() which is what I think CUDA spec wants to check for, but CD->inits() appears to have other initializers on the list as well. http://reviews.llvm.org/D15305 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D15305: [CUDA] Do not allow dynamic initialization of global device side variables.
tra created this revision. tra added reviewers: rsmith, jingyue, jpienaar. tra added a subscriber: cfe-commits. In general CUDA does not allow dynamic initialization of global device-side variables except for records with empty constructors as described in section [[ http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#device-memory-qualifiers | E.2.3.1 of CUDA 7.5 Programming guide ]]: > __device__, __constant__ and __shared__ variables defined in namespace scope, > that are of class type, cannot have a non-empty constructor or a non-empty > destructor. > A constructor for a class type is considered empty at a point in the > translation unit, > if it is either a trivial constructor or it satisfies all of the following > conditions: > * The constructor function has been defined. > * The constructor function has no parameters, the initializer list is empty > and the function body is an empty compound statement. > * Its class has no virtual functions and no virtual base classes. > * The default constructors of all base classes of its class can be considered > empty. > * For all the nonstatic data members of its class that are of class type (or > array thereof), the default constructors can be considered empty. Clang is already enforcing no-initializers for __shared__ variables, but currently allows dynamic initialization for __device__ and __constant__ variables. This patch applies initializer checks for all device-side variables. Empty constructors are accepted, but no code is generated for them. http://reviews.llvm.org/D15305 Files: lib/CodeGen/CGDeclCXX.cpp lib/CodeGen/CodeGenModule.cpp lib/CodeGen/CodeGenModule.h test/CodeGenCUDA/device-var-init.cu Index: test/CodeGenCUDA/device-var-init.cu === --- /dev/null +++ test/CodeGenCUDA/device-var-init.cu @@ -0,0 +1,371 @@ +// REQUIRES: nvptx-registered-target + +// Make sure we don't allow dynamic initialization for device +// variables, but accept empty constructors allowed by CUDA. + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \ +// RUN: | FileCheck %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm \ +// RUN: -DERROR_CASE -verify -o /dev/null %s + +#include "Inputs/cuda.h" + +// no-constructor +struct NC { + int nc; +}; + +// empty constructor +struct EC { + int ec; + __device__ EC() {} +}; + +// empty constructor w/ initializer list +struct ECI { + int eci; + __device__ ECI() : eci(1) {} +}; + +// non-empty constructor +struct NEC { + int nec; + __device__ NEC() { nec = 1; } +}; + +// no-constructor, virtual method +struct NCV { + virtual void vm() {} +}; + +// no-constructor, no-constructor base class +struct NC_B_NC : NC { + int nc_b_nc; +}; + +// no-constructor, empty-constructor base class +struct NC_B_EC : EC { + int nc_b_ec; +}; + +// no-constructor, base class w/ constructor+init list. +struct NC_B_ECI : ECI { +}; + +// no-constructor, non-empty-constructor base class +struct NC_B_NEC : NEC { + int nc_b_nec; +}; + +// no-constructor, base class w/ virtual method +struct NC_B_NCV : NCV { + int nc_b_ncv; +}; + +// empty constructor, no-constructor base class +struct EC_B_NC : NC { + __device__ EC_B_NC() {} +}; + +// empty constructor, empty-constructor base class +struct EC_B_EC : EC { + __device__ EC_B_EC() {} +}; + +// empty constructor, base class w/ constructor+init list. +struct EC_B_ECI : ECI { + __device__ EC_B_ECI() {} +}; + +// empty constructor, non-empty-constructor base class +struct EC_B_NEC : NEC { + __device__ EC_B_NEC() {} +}; + +// empty constructor, non-empty-constructor base class +struct EC_B_NCV : NCV { + __device__ EC_B_NCV() {} +}; + +// no-constructor, no-constructor virtual base class +struct NC_V_NC : virtual NC { +}; + +// no-constructor, empty constructor virtual base class +struct NC_V_EC : virtual EC { +}; + +// empty constructor, no-constructor virtual base class +struct EC_V_NC : virtual NC { + __device__ EC_V_NC() {} +}; + +// empty constructor, empty constructor virtual base class +struct EC_V_EC : virtual EC { + __device__ EC_V_EC() {} +}; + +// no-constructor, no-constructor field +struct NC_F_NC { + NC nc_f_nc; +}; + +// no-constructor, empty-constructor field +struct NC_F_EC{ + EC nc_f_ec; +}; + +// no-constructor, empty-constructor+initializer field +struct NC_F_ECI{ + ECI nc_f_ec; +}; + +// no-constructor, non-empty-constructor field +struct NC_F_NEC { + NEC nc_f_nec; +}; + +// no-constructor, field w/ virtual method +struct NC_F_NCV { + NCV nc_f_ncv; +}; + +// no-constructor, no-constructor field +struct NC_FA_NC { + NC nc_fa_nc[2]; +}; + +// no-constructor, empty-constructor field +struct NC_FA_EC{ + EC nc_fa_ec[2]; +}; + +// no-constructor, non-empty-constructor field +struct NC_FA_NEC { + NEC nc_fa_nec[2]; +}; + +// no-constructor, field w/ virtual method +struct NC_FA_NCV { + NCV nc_fa_ncv[2]; +}; + +