[PATCH] D57488: [CUDA] add support for the new kernel launch API in CUDA-9.2+.
This revision was automatically updated to reflect the committed changes. Closed by commit rC352799: [CUDA] add support for the new kernel launch API in CUDA-9.2+. (authored by tra, committed by ). Changed prior to commit: https://reviews.llvm.org/D57488?vs=184592=184598#toc Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D57488/new/ https://reviews.llvm.org/D57488 Files: include/clang/Basic/DiagnosticSemaKinds.td include/clang/Sema/Sema.h lib/CodeGen/CGCUDANV.cpp lib/Headers/__clang_cuda_runtime_wrapper.h lib/Sema/SemaCUDA.cpp lib/Sema/SemaDecl.cpp test/CodeGenCUDA/Inputs/cuda.h test/CodeGenCUDA/device-stub.cu test/CodeGenCUDA/kernel-args-alignment.cu test/CodeGenCUDA/kernel-call.cu test/Driver/cuda-simple.cu test/SemaCUDA/Inputs/cuda.h test/SemaCUDA/config-type.cu unittests/ASTMatchers/ASTMatchersTest.h Index: lib/Headers/__clang_cuda_runtime_wrapper.h === --- lib/Headers/__clang_cuda_runtime_wrapper.h +++ lib/Headers/__clang_cuda_runtime_wrapper.h @@ -426,5 +426,15 @@ #pragma pop_macro("__USE_FAST_MATH__") #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") +// CUDA runtime uses this undocumented function to access kernel launch +// configuration. The declaration is in crt/device_functions.h but that file +// includes a lot of other stuff we don't want. Instead, we'll provide our own +// declaration for it here. +#if CUDA_VERSION >= 9020 +extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim, +size_t sharedMem = 0, +void *stream = 0); +#endif + #endif // __CUDA__ #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ Index: lib/CodeGen/CGCUDANV.cpp === --- lib/CodeGen/CGCUDANV.cpp +++ lib/CodeGen/CGCUDANV.cpp @@ -15,6 +15,8 @@ #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/Decl.h" +#include "clang/Basic/Cuda.h" +#include "clang/CodeGen/CodeGenABITypes.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -102,7 +104,8 @@ return DummyFunc; } - void emitDeviceStubBody(CodeGenFunction , FunctionArgList ); + void emitDeviceStubBodyLegacy(CodeGenFunction , FunctionArgList ); + void emitDeviceStubBodyNew(CodeGenFunction , FunctionArgList ); public: CGNVCUDARuntime(CodeGenModule ); @@ -187,11 +190,110 @@ void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction , FunctionArgList ) { EmittedKernels.push_back(CGF.CurFn); - emitDeviceStubBody(CGF, Args); + if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(), + CudaFeature::CUDA_USES_NEW_LAUNCH)) +emitDeviceStubBodyNew(CGF, Args); + else +emitDeviceStubBodyLegacy(CGF, Args); +} + +// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local +// array and kernels are launched using cudaLaunchKernel(). +void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction , +FunctionArgList ) { + // Build the shadow stack entry at the very start of the function. + + // Calculate amount of space we will need for all arguments. If we have no + // args, allocate a single pointer so we still have a valid pointer to the + // argument array that we can pass to runtime, even if it will be unused. + Address KernelArgs = CGF.CreateTempAlloca( + VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", + llvm::ConstantInt::get(SizeTy, std::max(1, Args.size(; + // Store pointers to the arguments in a locally allocated launch_args. + for (unsigned i = 0; i < Args.size(); ++i) { +llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); +llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); +CGF.Builder.CreateDefaultAlignedStore( +VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); + } + + llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); + + // Lookup cudaLaunchKernel function. + // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, + // void **args, size_t sharedMem, + // cudaStream_t stream); + TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); + DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); + IdentifierInfo = + CGM.getContext().Idents.get("cudaLaunchKernel"); + FunctionDecl *cudaLaunchKernelFD = nullptr; + for (const auto : DC->lookup()) { +if (FunctionDecl *FD = dyn_cast(Result)) + cudaLaunchKernelFD = FD; + } + + if (cudaLaunchKernelFD == nullptr) { +CGM.Error(CGF.CurFuncDecl->getLocation(), + "Can't find declaration for cudaLaunchKernel()"); +
[PATCH] D57488: [CUDA] add support for the new kernel launch API in CUDA-9.2+.
tra updated this revision to Diff 184592. tra added a comment. Updated ASTMatchers unit test. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D57488/new/ https://reviews.llvm.org/D57488 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/CodeGen/CGCUDANV.cpp clang/lib/Headers/__clang_cuda_runtime_wrapper.h clang/lib/Sema/SemaCUDA.cpp clang/lib/Sema/SemaDecl.cpp clang/test/CodeGenCUDA/Inputs/cuda.h clang/test/CodeGenCUDA/device-stub.cu clang/test/CodeGenCUDA/kernel-args-alignment.cu clang/test/CodeGenCUDA/kernel-call.cu clang/test/Driver/cuda-simple.cu clang/test/SemaCUDA/Inputs/cuda.h clang/test/SemaCUDA/config-type.cu clang/unittests/ASTMatchers/ASTMatchersTest.h Index: clang/unittests/ASTMatchers/ASTMatchersTest.h === --- clang/unittests/ASTMatchers/ASTMatchersTest.h +++ clang/unittests/ASTMatchers/ASTMatchersTest.h @@ -183,7 +183,9 @@ "typedef struct cudaStream *cudaStream_t;" "int cudaConfigureCall(dim3 gridSize, dim3 blockSize," " size_t sharedSize = 0," - " cudaStream_t stream = 0);"; + " cudaStream_t stream = 0);" + "extern \"C\" unsigned __cudaPushCallConfiguration(" + "dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0);"; bool Found = false, DynamicFound = false; MatchFinder Finder; Index: clang/test/SemaCUDA/config-type.cu === --- clang/test/SemaCUDA/config-type.cu +++ clang/test/SemaCUDA/config-type.cu @@ -1,3 +1,7 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -target-sdk-version=8.0 -fsyntax-only -verify=legacy-launch %s +// RUN: %clang_cc1 -target-sdk-version=9.2 -fsyntax-only -verify=new-launch %s -void cudaConfigureCall(unsigned gridSize, unsigned blockSize); // expected-error {{must have scalar return type}} +// legacy-launch-error@+1 {{must have scalar return type}} +void cudaConfigureCall(unsigned gridSize, unsigned blockSize); +// new-launch-error@+1 {{must have scalar return type}} +void __cudaPushCallConfiguration(unsigned gridSize, unsigned blockSize); Index: clang/test/SemaCUDA/Inputs/cuda.h === --- clang/test/SemaCUDA/Inputs/cuda.h +++ clang/test/SemaCUDA/Inputs/cuda.h @@ -18,9 +18,17 @@ }; typedef struct cudaStream *cudaStream_t; +typedef enum cudaError {} cudaError_t; -int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, - cudaStream_t stream = 0); +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, +dim3 blockDim, void **args, +size_t sharedMem, cudaStream_t stream); // Host- and device-side placement new overloads. void *operator new(__SIZE_TYPE__, void *p) { return p; } Index: clang/test/Driver/cuda-simple.cu === --- clang/test/Driver/cuda-simple.cu +++ clang/test/Driver/cuda-simple.cu @@ -2,7 +2,7 @@ // http://llvm.org/PR22936 // RUN: %clang -nocudainc -nocudalib -Werror -fsyntax-only -c %s // -// Verify that we pass -x cuda-cpp-output to compiler after +// Verify that we pass -x cuda-cpp-output to compiler after // preprocessing a CUDA file // RUN: %clang -Werror -### -save-temps -c %s 2>&1 | FileCheck %s // CHECK: "-cc1" @@ -14,7 +14,9 @@ // Verify that compiler accepts CUDA syntax with "-x cuda-cpp-output". // RUN: %clang -Werror -fsyntax-only -x cuda-cpp-output -c %s -int cudaConfigureCall(int, int); +extern "C" int cudaConfigureCall(int, int); +extern "C" int __cudaPushCallConfiguration(int, int); + __attribute__((global)) void kernel() {} void func() { Index: clang/test/CodeGenCUDA/kernel-call.cu === --- clang/test/CodeGenCUDA/kernel-call.cu +++ clang/test/CodeGenCUDA/kernel-call.cu @@ -1,5 +1,9 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK -// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECK +// RUN: %clang_cc1 -target-sdk-version=8.0 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK +// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK +// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \ +// RUN: |
[PATCH] D57488: [CUDA] add support for the new kernel launch API in CUDA-9.2+.
tra added inline comments. Comment at: clang/lib/CodeGen/CGCUDANV.cpp:239 +CGM.Error(CGF.CurFuncDecl->getLocation(), + "Can't find declaration for cudaLaunchKernel()"); // FIXME. +return; jlebar wrote: > Unfixed FIXME? Fixed the comment. :-) There's not much we can do if we have no declaration for cudaLaunchKernel, so throwing the error here is the best we can do. Comment at: clang/lib/CodeGen/CGCUDANV.cpp:260 + /*isVarArg=*/false), + "__cudaPopCallConfiguration"); + jlebar wrote: > I see lots of references to `__cudaPushCallConfiguration`, but this is the > only reference I see to `__cudaPopCallConfiguration`. Is this a typo? Also > are we supposed to emit matching push and pop function calls? Kind of weird > to do one without the other... the `pop` part is indeed used only here. `Push` is something that takes user-specified parameters, so we get Sema to check them. `Pop` is much simpler and does not have any direct user exposure, so we can just create and use it here. As for matching, it is balanced. `Push` is called at the kernel launch site with the parameters of `<<<>>>` .`Pop` is done in the host-side kernel stub where we retrieve those parameters and pass them to the CUDA runtime. Essentially, push/pop are poor names for these functions are the nesting is never more than one level deep. We could've just stashed the arguments in a fixed buffer somewhere. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D57488/new/ https://reviews.llvm.org/D57488 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D57488: [CUDA] add support for the new kernel launch API in CUDA-9.2+.
tra updated this revision to Diff 184543. tra marked 8 inline comments as done. tra edited the summary of this revision. tra added a comment. Addressed Justin's comments. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D57488/new/ https://reviews.llvm.org/D57488 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/CodeGen/CGCUDANV.cpp clang/lib/Headers/__clang_cuda_runtime_wrapper.h clang/lib/Sema/SemaCUDA.cpp clang/lib/Sema/SemaDecl.cpp clang/test/CodeGenCUDA/Inputs/cuda.h clang/test/CodeGenCUDA/device-stub.cu clang/test/CodeGenCUDA/kernel-args-alignment.cu clang/test/CodeGenCUDA/kernel-call.cu clang/test/Driver/cuda-simple.cu clang/test/SemaCUDA/Inputs/cuda.h clang/test/SemaCUDA/config-type.cu Index: clang/test/SemaCUDA/config-type.cu === --- clang/test/SemaCUDA/config-type.cu +++ clang/test/SemaCUDA/config-type.cu @@ -1,3 +1,7 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fno-cuda-new-launch -fsyntax-only -verify=legacy-launch %s +// RUN: %clang_cc1 -fcuda-new-launch -fsyntax-only -verify=new-launch %s -void cudaConfigureCall(unsigned gridSize, unsigned blockSize); // expected-error {{must have scalar return type}} +// legacy-launch-error@+1 {{must have scalar return type}} +void cudaConfigureCall(unsigned gridSize, unsigned blockSize); +// new-launch-error@+1 {{must have scalar return type}} +void __cudaPushCallConfiguration(unsigned gridSize, unsigned blockSize); Index: clang/test/SemaCUDA/Inputs/cuda.h === --- clang/test/SemaCUDA/Inputs/cuda.h +++ clang/test/SemaCUDA/Inputs/cuda.h @@ -18,9 +18,17 @@ }; typedef struct cudaStream *cudaStream_t; +typedef enum cudaError {} cudaError_t; -int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, - cudaStream_t stream = 0); +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, +dim3 blockDim, void **args, +size_t sharedMem, cudaStream_t stream); // Host- and device-side placement new overloads. void *operator new(__SIZE_TYPE__, void *p) { return p; } Index: clang/test/Driver/cuda-simple.cu === --- clang/test/Driver/cuda-simple.cu +++ clang/test/Driver/cuda-simple.cu @@ -2,7 +2,7 @@ // http://llvm.org/PR22936 // RUN: %clang -nocudainc -nocudalib -Werror -fsyntax-only -c %s // -// Verify that we pass -x cuda-cpp-output to compiler after +// Verify that we pass -x cuda-cpp-output to compiler after // preprocessing a CUDA file // RUN: %clang -Werror -### -save-temps -c %s 2>&1 | FileCheck %s // CHECK: "-cc1" @@ -14,7 +14,9 @@ // Verify that compiler accepts CUDA syntax with "-x cuda-cpp-output". // RUN: %clang -Werror -fsyntax-only -x cuda-cpp-output -c %s -int cudaConfigureCall(int, int); +extern "C" int cudaConfigureCall(int, int); +extern "C" int __cudaPushCallConfiguration(int, int); + __attribute__((global)) void kernel() {} void func() { Index: clang/test/CodeGenCUDA/kernel-call.cu === --- clang/test/CodeGenCUDA/kernel-call.cu +++ clang/test/CodeGenCUDA/kernel-call.cu @@ -1,5 +1,9 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK -// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECK +// RUN: %clang_cc1 -target-sdk-version=8.0 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK +// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK +// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=HIP,CHECK #include "Inputs/cuda.h" @@ -7,14 +11,17 @@ // CHECK-LABEL: define{{.*}}g1 // HIP: call{{.*}}hipSetupArgument // HIP: call{{.*}}hipLaunchByPtr -// CUDA: call{{.*}}cudaSetupArgument -// CUDA: call{{.*}}cudaLaunch +// CUDA-OLD: call{{.*}}cudaSetupArgument +// CUDA-OLD: call{{.*}}cudaLaunch +// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration +// CUDA-NEW: call{{.*}}cudaLaunchKernel __global__ void g1(int x) {} // CHECK-LABEL: define{{.*}}main int main(void) { // HIP: call{{.*}}hipConfigureCall - // CUDA: call{{.*}}cudaConfigureCall + // CUDA-OLD: call{{.*}}cudaConfigureCall + // CUDA-NEW: call{{.*}}__cudaPushCallConfiguration // CHECK: icmp //
[PATCH] D57488: [CUDA] add support for the new kernel launch API in CUDA-9.2+.
jlebar accepted this revision. jlebar added a comment. This revision is now accepted and ready to land. LGTM, mostly nits. Comment at: clang/include/clang/Sema/Sema.h:10316 + /// Returns the name of the launch configuration function. + std::string getCudaConfigureFuncName() const; Could we be a little less vague, what exactly is the launch-configuration function? (Could be as simple as adding `e.g. cudaFooBar()`.) Comment at: clang/lib/CodeGen/CGCUDANV.cpp:201 -void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction , - FunctionArgList ) { +// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in local +// array and kernels are launched using cudaLaunchKernel(). nit `in a local array` Comment at: clang/lib/CodeGen/CGCUDANV.cpp:212 + VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", + llvm::ConstantInt::get(SizeTy, std::max(1UL, Args.size(; + // Store pointers to the arguments in a locally allocated launch_args. Nit, s/`1UL`/`uint64{1}`/ or size_t, whatever this function takes. As-is we're baking in the assumption that unsigned long is the same as the type returned by Args.size(), which isn't necessarily true. As an alternative, you could do `std::max(1, Args.size())` or whatever the appropriate type is. Comment at: clang/lib/CodeGen/CGCUDANV.cpp:239 +CGM.Error(CGF.CurFuncDecl->getLocation(), + "Can't find declaration for cudaLaunchKernel()"); // FIXME. +return; Unfixed FIXME? Comment at: clang/lib/CodeGen/CGCUDANV.cpp:260 + /*isVarArg=*/false), + "__cudaPopCallConfiguration"); + I see lots of references to `__cudaPushCallConfiguration`, but this is the only reference I see to `__cudaPopCallConfiguration`. Is this a typo? Also are we supposed to emit matching push and pop function calls? Kind of weird to do one without the other... Comment at: clang/lib/CodeGen/CGCUDANV.cpp:266 + // Emit the call to cudaLaunch + + llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); Whitespace nit, maybe move this whitespace line before the comment? Comment at: clang/lib/Headers/__clang_cuda_runtime_wrapper.h:429 +// CUDA runtime uses undocumented function to access kernel launch +// configuration. We need to provide our own declaration for it here. s/undocumented function/this undocumented function/? CHANGES SINCE LAST ACTION https://reviews.llvm.org/D57488/new/ https://reviews.llvm.org/D57488 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D57488: [CUDA] add support for the new kernel launch API in CUDA-9.2+.
tra created this revision. tra added a reviewer: jlebar. Herald added subscribers: bixia, sanjoy. Instead of calling CUDA runtime to arrange function arguments, the new API constructs arguments in a local array and the kernels are launched with __cudaLaunchKernel(). The old API has been deprecated and is expected to go away in the next CUDA release. https://reviews.llvm.org/D57488 Files: clang/include/clang/Basic/DiagnosticSemaKinds.td clang/include/clang/Sema/Sema.h clang/lib/CodeGen/CGCUDANV.cpp clang/lib/Headers/__clang_cuda_runtime_wrapper.h clang/lib/Sema/SemaCUDA.cpp clang/lib/Sema/SemaDecl.cpp clang/test/CodeGenCUDA/Inputs/cuda.h clang/test/CodeGenCUDA/device-stub.cu clang/test/CodeGenCUDA/kernel-args-alignment.cu clang/test/CodeGenCUDA/kernel-call.cu clang/test/Driver/cuda-simple.cu clang/test/SemaCUDA/Inputs/cuda.h clang/test/SemaCUDA/config-type.cu Index: clang/test/SemaCUDA/config-type.cu === --- clang/test/SemaCUDA/config-type.cu +++ clang/test/SemaCUDA/config-type.cu @@ -1,3 +1,7 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -fno-cuda-new-launch -fsyntax-only -verify=legacy-launch %s +// RUN: %clang_cc1 -fcuda-new-launch -fsyntax-only -verify=new-launch %s -void cudaConfigureCall(unsigned gridSize, unsigned blockSize); // expected-error {{must have scalar return type}} +// legacy-launch-error@+1 {{must have scalar return type}} +void cudaConfigureCall(unsigned gridSize, unsigned blockSize); +// new-launch-error@+1 {{must have scalar return type}} +void __cudaPushCallConfiguration(unsigned gridSize, unsigned blockSize); Index: clang/test/SemaCUDA/Inputs/cuda.h === --- clang/test/SemaCUDA/Inputs/cuda.h +++ clang/test/SemaCUDA/Inputs/cuda.h @@ -18,9 +18,17 @@ }; typedef struct cudaStream *cudaStream_t; +typedef enum cudaError {} cudaError_t; -int cudaConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0, - cudaStream_t stream = 0); +extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, + size_t sharedSize = 0, + cudaStream_t stream = 0); +extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, +dim3 blockDim, void **args, +size_t sharedMem, cudaStream_t stream); // Host- and device-side placement new overloads. void *operator new(__SIZE_TYPE__, void *p) { return p; } Index: clang/test/Driver/cuda-simple.cu === --- clang/test/Driver/cuda-simple.cu +++ clang/test/Driver/cuda-simple.cu @@ -2,7 +2,7 @@ // http://llvm.org/PR22936 // RUN: %clang -nocudainc -nocudalib -Werror -fsyntax-only -c %s // -// Verify that we pass -x cuda-cpp-output to compiler after +// Verify that we pass -x cuda-cpp-output to compiler after // preprocessing a CUDA file // RUN: %clang -Werror -### -save-temps -c %s 2>&1 | FileCheck %s // CHECK: "-cc1" @@ -14,7 +14,9 @@ // Verify that compiler accepts CUDA syntax with "-x cuda-cpp-output". // RUN: %clang -Werror -fsyntax-only -x cuda-cpp-output -c %s -int cudaConfigureCall(int, int); +extern "C" int cudaConfigureCall(int, int); +extern "C" int __cudaPushCallConfiguration(int, int); + __attribute__((global)) void kernel() {} void func() { Index: clang/test/CodeGenCUDA/kernel-call.cu === --- clang/test/CodeGenCUDA/kernel-call.cu +++ clang/test/CodeGenCUDA/kernel-call.cu @@ -1,5 +1,9 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s --check-prefixes=CUDA,CHECK -// RUN: %clang_cc1 -x hip -emit-llvm %s -o - | FileCheck %s --check-prefixes=HIP,CHECK +// RUN: %clang_cc1 -target-sdk-version=8.0 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK +// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK +// RUN: %clang_cc1 -x hip -emit-llvm %s -o - \ +// RUN: | FileCheck %s --check-prefixes=HIP,CHECK #include "Inputs/cuda.h" @@ -7,14 +11,17 @@ // CHECK-LABEL: define{{.*}}g1 // HIP: call{{.*}}hipSetupArgument // HIP: call{{.*}}hipLaunchByPtr -// CUDA: call{{.*}}cudaSetupArgument -// CUDA: call{{.*}}cudaLaunch +// CUDA-OLD: call{{.*}}cudaSetupArgument +// CUDA-OLD: call{{.*}}cudaLaunch +// CUDA-NEW: call{{.*}}__cudaPopCallConfiguration +// CUDA-NEW: call{{.*}}cudaLaunchKernel __global__ void g1(int x) {} // CHECK-LABEL: define{{.*}}main int main(void) { // HIP: call{{.*}}hipConfigureCall - // CUDA: call{{.*}}cudaConfigureCall + //