dhruvachak created this revision. Herald added a subscriber: hiraditya. Herald added a project: All. dhruvachak requested review of this revision. Herald added a reviewer: jdoerfert. Herald added subscribers: llvm-commits, cfe-commits, sstefan1. Herald added projects: clang, LLVM.
If an inlined kernel is called in a loop, the launch point alloca would lead to increasing stack usage every time the kernel is invoked. This could make the application run out of stack space and crash. This problem is fixed by moving the alloca to the entry block. Fixes https://github.com/llvm/llvm-project/issues/60602 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D145820 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -815,12 +815,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( const LocationDescription &Loc, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, - ArrayRef<Value *> KernelArgs) { + ArrayRef<Value *> KernelArgs, llvm::BasicBlock *EntryBlock) { if (!updateToLocation(Loc)) return Loc.IP; - auto *KernelArgsPtr = - Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args"); + // Insert the alloca at the start of the function entry block. + AllocaInst *KernelArgsPtr = nullptr; + { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(EntryBlock, EntryBlock->begin()); + KernelArgsPtr = Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, + "kernel_args"); + } for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) { llvm::Value *Arg = Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I); Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -995,10 +995,12 @@ /// \param NumThreads Number of threads via the 'thread_limit' clause. /// \param HostPtr Pointer to the host-side pointer of the target kernel. /// \param KernelArgs Array of arguments to the kernel. + /// \param EntryBlock Entry basic block of the enclosing function. InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, - ArrayRef<Value *> KernelArgs); + ArrayRef<Value *> KernelArgs, + llvm::BasicBlock *EntryBlock); /// Generate a barrier runtime call. /// Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9967,7 +9967,7 @@ // Check the error code and execute the host version if required. CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel( CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads, - OutlinedFnID, KernelArgs)); + OutlinedFnID, KernelArgs, &CGF.CurFn->getEntryBlock())); llvm::BasicBlock *OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp =================================================================== --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -815,12 +815,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel( const LocationDescription &Loc, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, - ArrayRef<Value *> KernelArgs) { + ArrayRef<Value *> KernelArgs, llvm::BasicBlock *EntryBlock) { if (!updateToLocation(Loc)) return Loc.IP; - auto *KernelArgsPtr = - Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args"); + // Insert the alloca at the start of the function entry block. + AllocaInst *KernelArgsPtr = nullptr; + { + IRBuilder<>::InsertPointGuard IPG(Builder); + Builder.SetInsertPoint(EntryBlock, EntryBlock->begin()); + KernelArgsPtr = Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, + "kernel_args"); + } for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) { llvm::Value *Arg = Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I); Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -995,10 +995,12 @@ /// \param NumThreads Number of threads via the 'thread_limit' clause. /// \param HostPtr Pointer to the host-side pointer of the target kernel. /// \param KernelArgs Array of arguments to the kernel. + /// \param EntryBlock Entry basic block of the enclosing function. InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, - ArrayRef<Value *> KernelArgs); + ArrayRef<Value *> KernelArgs, + llvm::BasicBlock *EntryBlock); /// Generate a barrier runtime call. /// Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9967,7 +9967,7 @@ // Check the error code and execute the host version if required. CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel( CGF.Builder, Return, RTLoc, DeviceID, NumTeams, NumThreads, - OutlinedFnID, KernelArgs)); + OutlinedFnID, KernelArgs, &CGF.CurFn->getEntryBlock())); llvm::BasicBlock *OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits