Author: tra Date: Mon Aug 10 15:57:02 2015 New Revision: 244501 URL: http://llvm.org/viewvc/llvm-project?rev=244501&view=rev Log: [CUDA] Add implicit __attribute__((used)) to all __global__ functions.
This allows emitting kernels that were instantiated from the host code and which would never be explicitly referenced otherwise. Differential Revision: http://reviews.llvm.org/D11666 Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=244501&r1=244500&r2=244501&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original) +++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Mon Aug 10 15:57:02 2015 @@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, De D->addAttr(::new (S.Context) CUDAGlobalAttr(Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex())); + + // Add implicit attribute((used)) so we don't eliminate kernels + // because there is nothing referencing them on device side. + D->addAttr(UsedAttr::CreateImplicit(S.Context)); } static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) { Modified: cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu?rev=244501&r1=244500&r2=244501&view=diff ============================================================================== --- cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu (original) +++ cfe/trunk/test/CodeGenCUDA/ptx-kernels.cu Mon Aug 10 15:57:02 2015 @@ -1,7 +1,16 @@ +// Make sure that __global__ functions are emitted along with correct +// annotations and are added to @llvm.used to prevent their elimination. +// REQUIRES: nvptx-registered-target +// // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s #include "Inputs/cuda.h" +// Make sure that all __global__ functiona are added to @llvm.used +// CHECK: @llvm.used = appending global +// CHECK-SAME: @global_function +// CHECK-SAME: @_Z16templated_kernelIiEvT_ + // CHECK-LABEL: define void @device_function extern "C" __device__ void device_function() {} @@ -13,4 +22,10 @@ __global__ void global_function() { device_function(); } +// Make sure host-instantiated kernels are preserved on device side. +template <typename T> __global__ void templated_kernel(T param) {} +// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_ +void host_function() { templated_kernel<<<0,0>>>(0); } + // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} +// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits