https://github.com/sushgokh updated https://github.com/llvm/llvm-project/pull/96386
>From af4dc96c25f32b477337cedaeb0a696f75840ac0 Mon Sep 17 00:00:00 2001 From: sgokhale <sgokh...@nvidia.com> Date: Sat, 22 Jun 2024 17:16:24 +0530 Subject: [PATCH] [OpenMP] Fix stack corruption due to argument mismatch While lowering (#pragma omp target update from), clang's generated .omp_task_entry. is setting up 9 arguments while calling __tgt_target_data_update_nowait_mapper. At the same time, in __tgt_target_data_update_nowait_mapper, call to targetData<TaskAsyncInfoWrapperTy>() is converted to a sibcall assuming it has the argument count listed in the signature. AARCH64 asm sequence for this is as follows (removed unrelated insns): .omp_task_entry..108: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8. // stack canary str xzr, [sp] bl __tgt_target_data_update_nowait_mapper __tgt_target_data_update_nowait_mapper: sub sp, sp, #32 stp x29, x30, sp, #16 // 16-byte Folded Spill add x29, sp, #16 str x8, sp, #8 // stack canary // Sibcall argument setup adrp x8, :got:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb ldr x8, [x8, :got_lo12:_Z16targetDataUpdateP7ident_tR8DeviceTyiPPvS4_PlS5_S4_S4_R11AsyncInfoTyb] stp x9, x8, x29, #16 adrp x8, .L.str.8 add x8, x8, :lo12:.L.str.8 str x8, x29, #32. <==. This is the insn that erases $fp ldp x29, x30, sp, #16 // 16-byte Folded Reload add sp, sp, #32 // Sibcall b ZL10targetDataI22TaskAsyncInfoWrapperTyEvP7ident_tliPPvS4_PlS5_S4_S4_PFiS2_R8DeviceTyiS4_S4_S5_S5_S4_S4_R11AsyncInfoTybEPKcSD On AArch64, call to __tgt_target_data_update_nowait_mapper in .omp_task_entry. sets up only single space on stack and this results in ovewriting $fp and subsequent stack corruption. This issue can be credited to discrepancy of __tgt_target_data_update_nowait_mapper signature in openmp/libomptarget/include/omptarget.h taking 13 arguments while clang/lib/CodeGen/CGOpenMPRuntime.cpp and llvm/include/llvm/Frontend/OpenMP/OMPKinds.def taking only 9 arguments. This patch modifies __tgt_target_data_update_nowait_mapper signature to match .omp_task_entry usage(and other 2 files mentioned above). Co-authored-by: Kugan Vivekanandarajah <kvivekana...@nvidia.com> --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 28 +++++++++++++++-- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 30 ++++++++++++------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f6d12d46cfc07..fc3ad533666ca 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10343,6 +10343,23 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( MapNamesArray, InputInfo.MappersArray.emitRawPointer(CGF)}; + // Nowait calls have header declarations that take 13 arguments. Hence, the + // divergence from the OffloadingArgs definition. + llvm::Value *NowaitOffloadingArgs[] = { + RTLoc, + DeviceID, + PointerNum, + InputInfo.BasePointersArray.emitRawPointer(CGF), + InputInfo.PointersArray.emitRawPointer(CGF), + InputInfo.SizesArray.emitRawPointer(CGF), + MapTypesArray, + MapNamesArray, + InputInfo.MappersArray.emitRawPointer(CGF), + llvm::Constant::getNullValue(CGF.Int32Ty), + llvm::Constant::getNullValue(CGF.VoidPtrTy), + llvm::Constant::getNullValue(CGF.Int32Ty), + llvm::Constant::getNullValue(CGF.VoidPtrTy)}; + // Select the right runtime function call for each standalone // directive. const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); @@ -10430,9 +10447,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( llvm_unreachable("Unexpected standalone target data directive."); break; } - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), - OffloadingArgs); + if (HasNowait) + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), + NowaitOffloadingArgs); + else + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), + OffloadingArgs); }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index fe09bb8177c28..ebd928470109a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -438,19 +438,22 @@ __OMP_RTL(__tgt_target_kernel_nowait, false, Int32, IdentPtr, Int64, Int32, Int32, VoidPtr, KernelArgsPtr, Int32, VoidPtr, Int32, VoidPtr) __OMP_RTL(__tgt_target_data_begin_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) -__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, IdentPtr, Int64, + Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, + VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr) __OMP_RTL(__tgt_target_data_begin_mapper_issue, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr, AsyncInfoPtr) __OMP_RTL(__tgt_target_data_begin_mapper_wait, false, Void, Int64, AsyncInfoPtr) __OMP_RTL(__tgt_target_data_end_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) -__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, IdentPtr, Int64, + Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, + VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr) __OMP_RTL(__tgt_target_data_update_mapper, false, Void, IdentPtr, Int64, Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) -__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, Int32, - VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, VoidPtrPtr) +__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, IdentPtr, Int64, + Int32, VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, + VoidPtrPtr, Int32, VoidPtr, Int32, VoidPtr) __OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr) __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr, Int64, Int64, VoidPtr) @@ -1026,10 +1029,12 @@ __OMP_RTL_ATTRS(__tgt_target_kernel_nowait, ForkAttrs, SExt, SExt)) __OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), ParamAttrs(AttributeSet(), AttributeSet(), SExt)) -__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, AttributeSet(), +__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs, + AttributeSet(), ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(), AttributeSet(), AttributeSet(), AttributeSet(), - AttributeSet(), AttributeSet())) + AttributeSet(), AttributeSet(), SExt, AttributeSet(), + SExt, AttributeSet())) __OMP_RTL_ATTRS(__tgt_target_data_begin_mapper_issue, AttributeSet(), AttributeSet(), ParamAttrs(AttributeSet(), AttributeSet(), SExt)) @@ -1038,13 +1043,16 @@ __OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), __OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs, AttributeSet(), ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(), AttributeSet(), AttributeSet(), AttributeSet(), - AttributeSet(), AttributeSet())) + AttributeSet(), AttributeSet(), SExt, AttributeSet(), + SExt, AttributeSet())) __OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), ParamAttrs(AttributeSet(), AttributeSet(), SExt)) -__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, AttributeSet(), +__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs, + AttributeSet(), ParamAttrs(AttributeSet(), AttributeSet(), SExt, AttributeSet(), AttributeSet(), AttributeSet(), AttributeSet(), - AttributeSet(), AttributeSet())) + AttributeSet(), AttributeSet(), SExt, AttributeSet(), + SExt, AttributeSet())) __OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits