https://github.com/tblah created https://github.com/llvm/llvm-project/pull/125304
This still doesn't fix the memory safety issues because the stack allocations created here for the private variables might go out of scope. I will add a more complete lit test later in this patch series. >From c5f10057ff564a4fdc64673f73474ae20f2ea574 Mon Sep 17 00:00:00 2001 From: Tom Eccles <tom.ecc...@arm.com> Date: Fri, 24 Jan 2025 16:00:53 +0000 Subject: [PATCH] [mlir][OpenMP] initialize (first)private variables before task exec This still doesn't fix the memory safety issues because the stack allocations created here for the private variables might go out of scope. I will add a more complete lit test later in this patch series. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 89 +++++++++++++++---- mlir/test/Target/LLVMIR/openmp-llvm.mlir | 48 +++++----- 2 files changed, 96 insertions(+), 41 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index ea044fe0c8c196c..8a9a69cefad8ee1 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1750,25 +1750,80 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, for (mlir::Value privateVar : taskOp.getPrivateVars()) mlirPrivateVars.push_back(privateVar); - auto bodyCB = [&](InsertPointTy allocaIP, - InsertPointTy codegenIP) -> llvm::Error { - // Save the alloca insertion point on ModuleTranslation stack for use in - // nested regions. - LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( - moduleTranslation, allocaIP); + // Allocate and copy private variables before creating the task. This avoids + // accessing invalid memory if (after this scope ends) the private variables + // are initialized from host variables or if the variables are copied into + // from host variables (firstprivate). The insertion point is just before + // where the code for creating and scheduling the task will go. That puts this + // code outside of the outlined task region, which is what we want because + // this way the initialization and copy regions are executed immediately while + // the host variable data are still live. - llvm::Expected<llvm::BasicBlock *> afterAllocas = - allocateAndInitPrivateVars(builder, moduleTranslation, privateBlockArgs, - privateDecls, mlirPrivateVars, - llvmPrivateVars, allocaIP); - if (handleError(afterAllocas, *taskOp).failed()) - return llvm::make_error<PreviouslyReportedError>(); + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); - if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars, - llvmPrivateVars, privateDecls, - afterAllocas.get()))) - return llvm::make_error<PreviouslyReportedError>(); + // Not using splitBB() because that requires the current block to have a + // terminator. + assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end()); + llvm::BasicBlock *taskStartBlock = llvm::BasicBlock::Create( + builder.getContext(), "omp.task.start", + /*Parent=*/builder.GetInsertBlock()->getParent()); + llvm::Instruction *branchToTaskStartBlock = builder.CreateBr(taskStartBlock); + builder.SetInsertPoint(branchToTaskStartBlock); + + // Now do this again to make the initialization and copy blocks + llvm::BasicBlock *copyBlock = + splitBB(builder, /*CreateBranch=*/true, "omp.private.copy"); + llvm::BasicBlock *initBlock = + splitBB(builder, /*CreateBranch=*/true, "omp.private.init"); + + // Now the control flow graph should look like + // starter_block: + // <---- where we started when convertOmpTaskOp was called + // br %omp.private.init + // omp.private.init: + // br %omp.private.copy + // omp.private.copy: + // br %omp.task.start + // omp.task.start: + // <---- where we want the insertion point to be when we call createTask() + + // Save the alloca insertion point on ModuleTranslation stack for use in + // nested regions. + LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( + moduleTranslation, allocaIP); + + // Allocate and initialize private variables + // TODO: package private variables up in a structure + builder.SetInsertPoint(initBlock->getTerminator()); + for (auto [privDecl, mlirPrivVar, blockArg] : + llvm::zip_equal(privateDecls, mlirPrivateVars, privateBlockArgs)) { + llvm::Type *llvmAllocType = + moduleTranslation.convertType(privDecl.getType()); + // Allocations: + builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); + llvm::Value *llvmPrivateVar = builder.CreateAlloca( + llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); + + // builder.SetInsertPoint(initBlock->getTerminator()); + auto err = + initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar, + blockArg, llvmPrivateVar, llvmPrivateVars, initBlock); + if (err) + return handleError(std::move(err), *taskOp.getOperation()); + } + + // firstprivate copy region + if (failed(initFirstPrivateVars(builder, moduleTranslation, mlirPrivateVars, + llvmPrivateVars, privateDecls, copyBlock))) + return llvm::failure(); + + // Set up for call to createTask() + builder.SetInsertPoint(taskStartBlock); + + auto bodyCB = [&](InsertPointTy allocaIP, + InsertPointTy codegenIP) -> llvm::Error { // translate the body of the task: builder.restoreIP(codegenIP); auto continuationBlockOrError = convertOmpOpRegions( @@ -1789,8 +1844,6 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, buildDependData(taskOp.getDependKinds(), taskOp.getDependVars(), moduleTranslation, dds); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createTask( diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 8a95793b96fd531..b4f0dfc46471a53 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2790,11 +2790,14 @@ llvm.func @par_task_(%arg0: !llvm.ptr {fir.bindc_name = "a"}) { } // CHECK-LABEL: @par_task_ +// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 // CHECK: %[[TASK_ALLOC:.*]] = call ptr @__kmpc_omp_task_alloc({{.*}}ptr @[[task_outlined_fn:.+]]) // CHECK: call i32 @__kmpc_omp_task({{.*}}, ptr %[[TASK_ALLOC]]) -// CHECK: define internal void @[[task_outlined_fn]] -// CHECK: %[[ARG_ALLOC:.*]] = alloca { ptr }, align 8 -// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[ARG_ALLOC]]) +// CHECK: define internal void @[[task_outlined_fn]](i32 %[[GLOBAL_TID_VAL:.*]], ptr %[[STRUCT_ARG:.*]]) +// CHECK: %[[LOADED_STRUCT_PTR:.*]] = load ptr, ptr %[[STRUCT_ARG]], align 8 +// CHECK: %[[GEP_STRUCTARG:.*]] = getelementptr { ptr, ptr }, ptr %[[LOADED_STRUCT_PTR]], i32 0, i32 0 +// CHECK: %[[LOADGEP_STRUCTARG:.*]] = load ptr, ptr %[[GEP_STRUCTARG]], align 8 +// CHECK: call void ({{.*}}) @__kmpc_fork_call({{.*}}, ptr @[[parallel_outlined_fn:.+]], ptr %[[LOADGEP_STRUCTARG]]) // CHECK: define internal void @[[parallel_outlined_fn]] // ----- @@ -2820,27 +2823,18 @@ llvm.func @task(%arg0 : !llvm.ptr) { llvm.return } // CHECK-LABEL: @task..omp_par -// CHECK: task.alloca: -// CHECK: %[[VAL_11:.*]] = load ptr, ptr %[[VAL_12:.*]], align 8 -// CHECK: %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_11]], i32 0, i32 0 +// CHECK: task.alloca: +// CHECK: %[[VAL_12:.*]] = load ptr, ptr %[[STRUCT_ARG:.*]], align 8 +// CHECK: %[[VAL_13:.*]] = getelementptr { ptr }, ptr %[[VAL_12]], i32 0, i32 0 // CHECK: %[[VAL_14:.*]] = load ptr, ptr %[[VAL_13]], align 8 -// CHECK: %[[VAL_15:.*]] = alloca i32, align 4 -// CHECK: br label %omp.private.init -// CHECK: omp.private.init: ; preds = %task.alloca -// CHECK: br label %omp.private.copy -// CHECK: omp.private.copy: ; preds = %omp.private.init -// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_14]], align 4 -// CHECK: store i32 %[[VAL_19]], ptr %[[VAL_15]], align 4 -// CHECK: br label %[[VAL_20:.*]] -// CHECK: [[VAL_20]]: // CHECK: br label %task.body -// CHECK: task.body: ; preds = %[[VAL_20]] +// CHECK: task.body: ; preds = %task.alloca // CHECK: br label %omp.task.region // CHECK: omp.task.region: ; preds = %task.body -// CHECK: call void @foo(ptr %[[VAL_15]]) +// CHECK: call void @foo(ptr %[[VAL_14]]) // CHECK: br label %omp.region.cont // CHECK: omp.region.cont: ; preds = %omp.task.region -// CHECK: call void @destroy(ptr %[[VAL_15]]) +// CHECK: call void @destroy(ptr %[[VAL_14]]) // CHECK: br label %task.exit.exitStub // CHECK: task.exit.exitStub: ; preds = %omp.region.cont // CHECK: ret void @@ -2910,6 +2904,19 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[omp_region_cont:[^,]+]] // CHECK: [[omp_taskgroup_region]]: // CHECK: %{{.+}} = alloca i8, align 1 +// CHECK: br label %[[omp_private_init:[^,]+]] +// CHECK: [[omp_private_init]]: +// CHECK: br label %[[omp_private_copy:[^,]+]] +// CHECK: [[omp_private_copy]]: +// CHECK: br label %[[omp_task_start:[^,]+]] + +// CHECK: [[omp_region_cont:[^,]+]]: +// CHECK: br label %[[taskgroup_exit:[^,]+]] +// CHECK: [[taskgroup_exit]]: +// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]]) +// CHECK: ret void + +// CHECK: [[omp_task_start]]: // CHECK: br label %[[codeRepl:[^,]+]] // CHECK: [[codeRepl]]: // CHECK: %[[omp_global_thread_num_t1:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) @@ -2933,11 +2940,6 @@ llvm.func @omp_taskgroup_task(%x: i32, %y: i32, %zaddr: !llvm.ptr) { // CHECK: br label %[[task_exit3:[^,]+]] // CHECK: [[task_exit3]]: // CHECK: br label %[[omp_taskgroup_region1]] -// CHECK: [[omp_region_cont]]: -// CHECK: br label %[[taskgroup_exit:[^,]+]] -// CHECK: [[taskgroup_exit]]: -// CHECK: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[omp_global_thread_num]]) -// CHECK: ret void // CHECK: } // ----- _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits