llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-modules
Author: None (darkbuck)
<details>
<summary>Changes</summary>
- CUDA's dynamic parallelism extension allows device-side kernel launches,
which share the identical syntax to host-side launches, e.g.,
kernel<<<Dg, Db, Ns, S>>>(arguments);
but differ from the code generation. That device-side kernel launches is
eventually translated into the following sequence
config = cudaGetParameterBuffer(alignment, size);
// setup arguments by copying them into `config`.
cudaLaunchDevice(func, config, Dg, Db, Ns, S);
- To support the device-side kernel launch, 'CUDAKernelCallExpr' is reused but
its config expr is set to a call to 'cudaLaunchDevice'. During the code
generation, 'CUDAKernelCallExpr' is expanded into the sequence aforementioned.
- As the device-side kernel launch requires the source to be compiled as
relocatable device code and linked with '-lcudadevrt'. Linkers are changed to
pass relevant link options to 'nvlink'.
---
Patch is 33.18 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/165519.diff
20 Files Affected:
- (modified) clang/include/clang/AST/ASTContext.h (+16)
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+6)
- (modified) clang/include/clang/Sema/SemaCUDA.h (+5)
- (modified) clang/include/clang/Serialization/ASTReader.h (+1-1)
- (modified) clang/lib/CodeGen/CGCUDARuntime.cpp (+110)
- (modified) clang/lib/CodeGen/CGCUDARuntime.h (+4)
- (modified) clang/lib/CodeGen/CGExprCXX.cpp (+6)
- (modified) clang/lib/Sema/SemaCUDA.cpp (+86-4)
- (modified) clang/lib/Sema/SemaDecl.cpp (+24-8)
- (modified) clang/lib/Serialization/ASTReader.cpp (+6-2)
- (modified) clang/lib/Serialization/ASTWriter.cpp (+23-14)
- (modified) clang/test/CodeGenCUDA/Inputs/cuda.h (+6-1)
- (added) clang/test/CodeGenCUDA/device-kernel-call.cu (+17)
- (modified) clang/test/SemaCUDA/Inputs/cuda.h (+6)
- (modified) clang/test/SemaCUDA/call-kernel-from-kernel.cu (+4-1)
- (modified) clang/test/SemaCUDA/function-overload.cu (+8-18)
- (modified) clang/test/SemaCUDA/function-target.cu (+2-2)
- (modified) clang/test/SemaCUDA/reference-to-kernel-fn.cu (+2-2)
- (modified) clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp (+10)
- (modified) clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp (+7-1)
``````````diff
diff --git a/clang/include/clang/AST/ASTContext.h
b/clang/include/clang/AST/ASTContext.h
index 33aa2d343aa7a..f64e29be3205f 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -488,6 +488,10 @@ class ASTContext : public RefCountedBase<ASTContext> {
/// Declaration for the CUDA cudaConfigureCall function.
FunctionDecl *cudaConfigureCallDecl = nullptr;
+ /// Declaration for the CUDA cudaGetParameterBuffer function.
+ FunctionDecl *cudaGetParameterBufferDecl = nullptr;
+ /// Declaration for the CUDA cudaLaunchDevice function.
+ FunctionDecl *cudaLaunchDeviceDecl = nullptr;
/// Keeps track of all declaration attributes.
///
@@ -1641,6 +1645,18 @@ class ASTContext : public RefCountedBase<ASTContext> {
return cudaConfigureCallDecl;
}
+ void setcudaGetParameterBufferDecl(FunctionDecl *FD) {
+ cudaGetParameterBufferDecl = FD;
+ }
+
+ FunctionDecl *getcudaGetParameterBufferDecl() {
+ return cudaGetParameterBufferDecl;
+ }
+
+ void setcudaLaunchDeviceDecl(FunctionDecl *FD) { cudaLaunchDeviceDecl = FD; }
+
+ FunctionDecl *getcudaLaunchDeviceDecl() { return cudaLaunchDeviceDecl; }
+
/// Returns true iff we need copy/dispose helpers for the given type.
bool BlockRequiresCopying(QualType Ty, const VarDecl *D);
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4e369be0bbb92..5e010cb52954d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9499,6 +9499,8 @@ def err_kern_is_nonstatic_method : Error<
"kernel function %0 must be a free function or static member function">;
def err_config_scalar_return : Error<
"CUDA special function '%0' must have scalar return type">;
+def err_config_pointer_return
+ : Error<"CUDA special function '%0' must have pointer return type">;
def err_kern_call_not_global_function : Error<
"kernel call to non-global function %0">;
def err_global_call_not_config : Error<
@@ -13690,4 +13692,8 @@ def err_amdgcn_load_lds_size_invalid_value :
Error<"invalid size value">;
def note_amdgcn_load_lds_size_valid_value : Note<"size must be %select{1, 2,
or 4|1, 2, 4, 12 or 16}0">;
def err_amdgcn_coop_atomic_invalid_as : Error<"cooperative atomic requires a
global or generic pointer">;
+
+def err_cuda_device_kernel_launch_require_rdc
+ : Error<"kernel launch from __device__ or __global__ function requires "
+ "relocatable device code, also known as separate compilation
mode">;
} // end of sema component.
diff --git a/clang/include/clang/Sema/SemaCUDA.h
b/clang/include/clang/Sema/SemaCUDA.h
index dbc1432860d89..dbb4290f5d149 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -273,6 +273,11 @@ class SemaCUDA : public SemaBase {
/// of the function that will be called to configure kernel call, with the
/// parameters specified via <<<>>>.
std::string getConfigureFuncName() const;
+ /// Return the name of the parameter buffer allocation function for the
+ /// device kernel launch.
+ std::string getGetParameterBufferFuncName() const;
+ /// Return the name of the device kernel launch function.
+ std::string getLaunchDeviceFuncName() const;
/// Record variables that are potentially ODR-used in CUDA/HIP.
void recordPotentialODRUsedVariable(MultiExprArg Args,
diff --git a/clang/include/clang/Serialization/ASTReader.h
b/clang/include/clang/Serialization/ASTReader.h
index af856a8097ab1..a65f7fd2d1d43 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1013,7 +1013,7 @@ class ASTReader
///
/// The AST context tracks a few important decls, currently
cudaConfigureCall,
/// directly.
- SmallVector<GlobalDeclID, 2> CUDASpecialDeclRefs;
+ SmallVector<GlobalDeclID, 4> CUDASpecialDeclRefs;
/// The floating point pragma option settings.
SmallVector<uint64_t, 1> FPPragmaOptions;
diff --git a/clang/lib/CodeGen/CGCUDARuntime.cpp
b/clang/lib/CodeGen/CGCUDARuntime.cpp
index 121a481213396..cd1476ebd6754 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.cpp
+++ b/clang/lib/CodeGen/CGCUDARuntime.cpp
@@ -22,6 +22,116 @@ using namespace CodeGen;
CGCUDARuntime::~CGCUDARuntime() {}
+static llvm::Value *emitGetParamBuf(CodeGenFunction &CGF,
+ const CUDAKernelCallExpr *E) {
+ auto *GetParamBuf = CGF.getContext().getcudaGetParameterBufferDecl();
+ const FunctionProtoType *GetParamBufProto =
+ GetParamBuf->getType()->getAs<FunctionProtoType>();
+
+ DeclRefExpr *DRE = DeclRefExpr::Create(
+ CGF.getContext(), {}, {}, GetParamBuf,
+ /*RefersToEnclosingVariableOrCapture=*/false, GetParamBuf->getNameInfo(),
+ GetParamBuf->getType(), VK_PRValue);
+ auto *ImpCast = ImplicitCastExpr::Create(
+ CGF.getContext(),
CGF.getContext().getPointerType(GetParamBuf->getType()),
+ CK_FunctionToPointerDecay, DRE, nullptr, VK_PRValue,
FPOptionsOverride());
+
+ CGCallee Callee = CGF.EmitCallee(ImpCast);
+ CallArgList Args;
+ // Use 64B alignment.
+ Args.add(RValue::get(CGF.CGM.getSize(CharUnits::fromQuantity(64))),
+ CGF.getContext().getSizeType());
+ // Calculate parameter sizes.
+ const PointerType *PT = E->getCallee()->getType()->getAs<PointerType>();
+ const FunctionProtoType *FTP =
+ PT->getPointeeType()->getAs<FunctionProtoType>();
+ CharUnits Offset = CharUnits::Zero();
+ for (auto ArgTy : FTP->getParamTypes()) {
+ auto TInfo = CGF.CGM.getContext().getTypeInfoInChars(ArgTy);
+ Offset = Offset.alignTo(TInfo.Align);
+ Offset += TInfo.Width;
+ }
+ Args.add(RValue::get(CGF.CGM.getSize(Offset)),
+ CGF.getContext().getSizeType());
+ const CGFunctionInfo &CallInfo = CGF.CGM.getTypes().arrangeFreeFunctionCall(
+ Args, GetParamBufProto, /*ChainCall=*/false);
+ auto Ret = CGF.EmitCall(CallInfo, Callee, /*ReturnValue=*/{}, Args);
+
+ return Ret.getScalarVal();
+}
+
+RValue CGCUDARuntime::EmitCUDADeviceKernelCallExpr(
+ CodeGenFunction &CGF, const CUDAKernelCallExpr *E,
+ ReturnValueSlot ReturnValue, llvm::CallBase **CallOrInvoke) {
+ ASTContext &Ctx = CGM.getContext();
+ assert(Ctx.getcudaLaunchDeviceDecl() == E->getConfig()->getDirectCallee());
+
+ llvm::BasicBlock *ConfigOKBlock = CGF.createBasicBlock("dkcall.configok");
+ llvm::BasicBlock *ContBlock = CGF.createBasicBlock("dkcall.end");
+
+ llvm::Value *Config = emitGetParamBuf(CGF, E);
+ CGF.Builder.CreateCondBr(
+ CGF.Builder.CreateICmpNE(Config,
+
llvm::Constant::getNullValue(Config->getType())),
+ ConfigOKBlock, ContBlock);
+
+ CodeGenFunction::ConditionalEvaluation eval(CGF);
+
+ eval.begin(CGF);
+ CGF.EmitBlock(ConfigOKBlock);
+
+ QualType KernelCalleeFuncTy =
+ E->getCallee()->getType()->getAs<PointerType>()->getPointeeType();
+ CGCallee KernelCallee = CGF.EmitCallee(E->getCallee());
+ // Emit kernel arguments.
+ CallArgList KernelCallArgs;
+ CGF.EmitCallArgs(
+ KernelCallArgs,
+ dyn_cast<FunctionProtoType>(KernelCalleeFuncTy->castAs<FunctionType>()),
+ E->arguments(), E->getDirectCallee());
+ // Copy emitted kernel arguments into that parameter buffer.
+ RawAddress CfgBase(Config, CGM.Int8Ty,
+ /*Alignment=*/CharUnits::fromQuantity(64));
+ CharUnits Offset = CharUnits::Zero();
+ for (auto &Arg : KernelCallArgs) {
+ auto TInfo = CGM.getContext().getTypeInfoInChars(Arg.getType());
+ Offset = Offset.alignTo(TInfo.Align);
+ Address Addr =
+ CGF.Builder.CreateConstInBoundsGEP(CfgBase, Offset.getQuantity());
+ Arg.copyInto(CGF, Addr);
+ Offset += TInfo.Width;
+ }
+ // Make `cudaLaunchDevice` call, i.e. E->getConfig().
+ const CallExpr *LaunchCall = E->getConfig();
+ QualType LaunchCalleeFuncTy = LaunchCall->getCallee()
+ ->getType()
+ ->getAs<PointerType>()
+ ->getPointeeType();
+ CGCallee LaunchCallee = CGF.EmitCallee(LaunchCall->getCallee());
+ CallArgList LaunchCallArgs;
+ CGF.EmitCallArgs(
+ LaunchCallArgs,
+ dyn_cast<FunctionProtoType>(LaunchCalleeFuncTy->castAs<FunctionType>()),
+ LaunchCall->arguments(), LaunchCall->getDirectCallee());
+ // Replace func and paramterbuffer arguments.
+ LaunchCallArgs[0] = CallArg(RValue::get(KernelCallee.getFunctionPointer()),
+ CGM.getContext().VoidPtrTy);
+ LaunchCallArgs[1] = CallArg(RValue::get(Config), CGM.getContext().VoidPtrTy);
+ const CGFunctionInfo &LaunchCallInfo =
CGM.getTypes().arrangeFreeFunctionCall(
+ LaunchCallArgs,
+ dyn_cast<FunctionProtoType>(LaunchCalleeFuncTy->castAs<FunctionType>()),
+ /*ChainCall=*/false);
+ CGF.EmitCall(LaunchCallInfo, LaunchCallee, ReturnValue, LaunchCallArgs,
+ CallOrInvoke,
+ /*IsMustTail=*/false, E->getExprLoc());
+ CGF.EmitBranch(ContBlock);
+
+ CGF.EmitBlock(ContBlock);
+ eval.end(CGF);
+
+ return RValue::get(nullptr);
+}
+
RValue CGCUDARuntime::EmitCUDAKernelCallExpr(CodeGenFunction &CGF,
const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue,
diff --git a/clang/lib/CodeGen/CGCUDARuntime.h
b/clang/lib/CodeGen/CGCUDARuntime.h
index 86f776004ee7c..64fb9a31422e0 100644
--- a/clang/lib/CodeGen/CGCUDARuntime.h
+++ b/clang/lib/CodeGen/CGCUDARuntime.h
@@ -88,6 +88,10 @@ class CGCUDARuntime {
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke = nullptr);
+ virtual RValue EmitCUDADeviceKernelCallExpr(
+ CodeGenFunction &CGF, const CUDAKernelCallExpr *E,
+ ReturnValueSlot ReturnValue, llvm::CallBase **CallOrInvoke = nullptr);
+
/// Emits a kernel launch stub.
virtual void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) = 0;
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 14d8db32bafc6..0c01933790100 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -503,6 +503,12 @@ RValue CodeGenFunction::EmitCXXOperatorMemberCallExpr(
RValue CodeGenFunction::EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
ReturnValueSlot ReturnValue,
llvm::CallBase **CallOrInvoke) {
+ auto *FD = E->getConfig()->getDirectCallee();
+ // Emit as a device kernel call if the config is prepared using
+ // 'cudaGetParameterBuffer'.
+ if (FD && CGM.getContext().getcudaLaunchDeviceDecl() == FD)
+ return CGM.getCUDARuntime().EmitCUDADeviceKernelCallExpr(
+ *this, E, ReturnValue, CallOrInvoke);
return CGM.getCUDARuntime().EmitCUDAKernelCallExpr(*this, E, ReturnValue,
CallOrInvoke);
}
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 31735a0f5feb3..a60a32dcb9e4c 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -52,16 +52,85 @@ bool SemaCUDA::PopForceHostDevice() {
ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
MultiExprArg ExecConfig,
SourceLocation GGGLoc) {
- FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl();
+ bool IsDeviceKernelCall = false;
+ switch (CurrentTarget()) {
+ case CUDAFunctionTarget::Global:
+ case CUDAFunctionTarget::Device:
+ IsDeviceKernelCall = true;
+ break;
+ case CUDAFunctionTarget::HostDevice:
+ if (getLangOpts().CUDAIsDevice) {
+ // Under the device compilation, config call under an HD function should
+ // be treated as a device kernel call. But, for implicit HD ones (such as
+ // lambdas), need to check whether RDC is enabled or not.
+ IsDeviceKernelCall = true;
+ if (!getLangOpts().GPURelocatableDeviceCode) {
+ FunctionDecl *Caller =
SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
+ if (Caller && isImplicitHostDeviceFunction(Caller))
+ IsDeviceKernelCall = false;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (IsDeviceKernelCall && !getLangOpts().GPURelocatableDeviceCode)
+ return ExprError(
+ Diag(LLLLoc, diag::err_cuda_device_kernel_launch_require_rdc));
+
+ FunctionDecl *ConfigDecl = IsDeviceKernelCall
+ ? getASTContext().getcudaLaunchDeviceDecl()
+ : getASTContext().getcudaConfigureCallDecl();
if (!ConfigDecl)
return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
- << getConfigureFuncName());
+ << (IsDeviceKernelCall ? getLaunchDeviceFuncName()
+ : getConfigureFuncName()));
+ // Additional check on the launch function if it's a device kernel call.
+ if (IsDeviceKernelCall) {
+ auto *GetParamBuf = getASTContext().getcudaGetParameterBufferDecl();
+ if (!GetParamBuf)
+ return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
+ << getGetParameterBufferFuncName());
+ }
+
QualType ConfigQTy = ConfigDecl->getType();
DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr(
getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
SemaRef.MarkFunctionReferenced(LLLLoc, ConfigDecl);
+ if (IsDeviceKernelCall) {
+ SmallVector<Expr *> Args;
+ // Use a null pointer as the kernel function, which may not be resolvable
+ // here. For example, resolving that kernel function may need additional
+ // kernel arguments.
+ llvm::APInt Zero(SemaRef.Context.getTypeSize(SemaRef.Context.IntTy), 0);
+ Args.push_back(IntegerLiteral::Create(SemaRef.Context, Zero,
+ SemaRef.Context.IntTy, LLLLoc));
+ // Use a null pointer as the parameter buffer, which should be allocated in
+ // the codegen.
+ Args.push_back(IntegerLiteral::Create(SemaRef.Context, Zero,
+ SemaRef.Context.IntTy, LLLLoc));
+ // Add the original config arguments.
+ llvm::append_range(Args, ExecConfig);
+ // Add the default blockDim if it's missing.
+ if (Args.size() < 4) {
+ llvm::APInt One(SemaRef.Context.getTypeSize(SemaRef.Context.IntTy), 1);
+ Args.push_back(IntegerLiteral::Create(SemaRef.Context, One,
+ SemaRef.Context.IntTy, LLLLoc));
+ }
+ // Add the default sharedMemSize if it's missing.
+ if (Args.size() < 5)
+ Args.push_back(IntegerLiteral::Create(SemaRef.Context, Zero,
+ SemaRef.Context.IntTy, LLLLoc));
+ // Add the default stream if it's missing.
+ if (Args.size() < 6)
+ Args.push_back(IntegerLiteral::Create(SemaRef.Context, Zero,
+ SemaRef.Context.IntTy, LLLLoc));
+ return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, Args, GGGLoc, nullptr,
+ /*IsExecConfig=*/true);
+ }
return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc,
nullptr,
/*IsExecConfig=*/true);
}
@@ -251,7 +320,7 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
if (CalleeTarget == CUDAFunctionTarget::Global &&
(CallerTarget == CUDAFunctionTarget::Global ||
CallerTarget == CUDAFunctionTarget::Device))
- return CFP_Never;
+ return CFP_Native;
// (b) Calling HostDevice is OK for everyone.
if (CalleeTarget == CUDAFunctionTarget::HostDevice)
@@ -279,7 +348,8 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
if (CallerTarget == CUDAFunctionTarget::HostDevice) {
// It's OK to call a compilation-mode matching function from an HD one.
if ((getLangOpts().CUDAIsDevice &&
- CalleeTarget == CUDAFunctionTarget::Device) ||
+ (CalleeTarget == CUDAFunctionTarget::Device ||
+ CalleeTarget == CUDAFunctionTarget::Global)) ||
(!getLangOpts().CUDAIsDevice &&
(CalleeTarget == CUDAFunctionTarget::Host ||
CalleeTarget == CUDAFunctionTarget::Global)))
@@ -1103,6 +1173,18 @@ std::string SemaCUDA::getConfigureFuncName() const {
return "cudaConfigureCall";
}
+std::string SemaCUDA::getGetParameterBufferFuncName() const {
+ // FIXME: Use the API from CUDA programming guide. Add V2 support when
+ // necessary.
+ return "cudaGetParameterBuffer";
+}
+
+std::string SemaCUDA::getLaunchDeviceFuncName() const {
+ // FIXME: Use the API from CUDA programming guide. Add V2 support when
+ // necessary.
+ return "cudaLaunchDevice";
+}
+
// Record any local constexpr variables that are passed one way on the host
// and another on the device.
void SemaCUDA::recordPotentialODRUsedVariable(
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index fc3aabf5741ca..1e39bfb5e42cd 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -11050,14 +11050,30 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator
&D, DeclContext *DC,
}
if (getLangOpts().CUDA) {
- IdentifierInfo *II = NewFD->getIdentifier();
- if (II && II->isStr(CUDA().getConfigureFuncName()) &&
- !NewFD->isInvalidDecl() &&
- NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
- if (!R->castAs<FunctionType>()->getReturnType()->isScalarType())
- Diag(NewFD->getLocation(), diag::err_config_scalar_return)
- << CUDA().getConfigureFuncName();
- Context.setcudaConfigureCallDecl(NewFD);
+ if (IdentifierInfo *II = NewFD->getIdentifier()) {
+ if (II->isStr(CUDA().getConfigureFuncName()) && !NewFD->isInvalidDecl()
&&
+ NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
+ if (!R->castAs<FunctionType>()->getReturnType()->isScalarType())
+ Diag(NewFD->getLocation(), diag::err_config_scalar_return)
+ << CUDA().getConfigureFuncName();
+ Context.setcudaConfigureCallDecl(NewFD);
+ }
+ if (II->isStr(CUDA().getGetParameterBufferFuncName()) &&
+ !NewFD->isInvalidDecl() &&
+ NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
+ if (!R->castAs<FunctionType>()->getReturnType()->isPointerType())
+ Diag(NewFD->getLocation(), diag::err_config_pointer_return)
+ << CUDA().getConfigureFuncName();
+ Context.setcudaGetParameterBufferDecl(NewFD);
+ }
+ if (II->isStr(CUDA().getLaunchDeviceFuncName()) &&
+ !NewFD->isInvalidDecl() &&
+ NewFD->getDeclContext()->getRedeclContext()->isTranslationUnit()) {
+ if (!R->castAs<FunctionType>()->getReturnType()->isScalarType())
+ Diag(NewFD->getLocation(), diag::err_config_scalar_return)
+ << CUDA().getConfigureFuncName();
+ Context.setcudaLaunchDeviceDecl(NewFD);
+ }
}
}
diff --git a/clang/lib/Serialization/ASTReader.cpp
b/clang/lib/Serialization/ASTReader.cpp
index c1b5cb730e4a4..e415d5816ab01 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5588,9 +5588,13 @@ void ASTReader::InitializeContext() {
// If there were any CUDA special declarations, deserialize them.
if (!CUDASpecialDeclRefs.empty()) {
- assert(CUDASpecialDeclRefs.size() == 1 && "More decl refs than expected!");
+ assert(CUDASpecialDeclRefs.size() == 3 && "More decl refs than expected!");
Context.setcudaConfigureCallDecl(
-
cast<FunctionDecl>(GetDecl(CUDASpecialDeclRefs[0])));
+ cast_or_null<FunctionDecl>(GetDecl(CUDASpecialDeclRefs[0])));
+ Context.setcudaGetParameterBufferDecl(
+ cast_or_null<FunctionDecl>(GetDecl(CUDASpecialDeclRefs[1])));
+ Context.setcudaLaunchDeviceDecl(
+ cast_or_null<FunctionDecl>(GetDecl(CUDASpecialDeclRefs[2])));
}
// Re-export any modules that were imported by a non-module AST file.
diff --git a/clang/lib/Serialization/ASTWriter.cpp
b/clang/lib/Serialization/ASTWriter.cpp
index 377e3966874f3..8e527db972fb0 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -5714,8 +5714,13 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &Sem...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/165519
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits