llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-flang-openmp Author: Julian Brown (jtb20) <details> <summary>Changes</summary> Build on the per-task relocation infrastructure introduced for 'omp task' in the previous patch and apply the same scheme to taskloop tasks recorded inside a taskgraph. Without this, by-reference captures inside a recorded taskloop iteration still point at the original recording's stack on each replay. On the compiler side (CGOpenMPRuntime.cpp), emit a relocation thunk for the taskloop's captured statement via the shared emitTaskRelocationFunction helper and pass it as the new trailing argument of __kmpc_taskgraph_taskloop. The now-unused 'shareds' and 'sizeof_shareds' parameters are dropped from the call and from TGTaskLoopArgs. On the runtime side, update __kmpc_taskgraph_taskloop to match the new ABI (drop the dead 'shareds' / 'sizeof_shareds' parameters, add the trailing kmp_task_relocate_t parameter) and plumb the callback through the taskgraph variant of __kmp_taskloop and __kmp_taskloop_linear so every recorded subtask gets node->relocate = reloc, mirroring the explicit-task path. Non-taskgraph callers pass a default nullptr and are unaffected. Add taskloop counterparts of the omp-task runtime tests added in the previous patch. Assisted-By: Claude Opus 4.7 --- Patch is 31.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/200405.diff 14 Files Affected: - (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-13) - (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+2-2) - (modified) openmp/runtime/src/kmp.h (+1-2) - (modified) openmp/runtime/src/kmp_tasking.cpp (+13-10) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp (+43) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp (+85) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp (+57) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp (+83) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp (+42) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_recursive_taskloop.cpp (+70) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_works_taskloop.cpp (+41) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_nonlexical_shared_fails_1_taskloop.cpp (+46) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_nonlexical_shared_fails_2_taskloop.cpp (+59) - (added) openmp/runtime/test/taskgraph/taskgraph_replayable_saved_stack_depth_taskloop.cpp (+126) ``````````diff diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9f342038f2285..ee8583a9f5519 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -5152,7 +5152,7 @@ void CGOpenMPRuntime::emitTaskLoopCall( PrePostActionTy &) { llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); - std::array<llvm::Value *, 16> TGTaskLoopArgs; + std::array<llvm::Value *, 14> TGTaskLoopArgs; // This is all copy/pasted from below. Refactor! LValue LBLVal = CGF.EmitLValueForField( @@ -5197,30 +5197,34 @@ void CGOpenMPRuntime::emitTaskLoopCall( TGTaskLoopArgs[1] = ThreadId; TGTaskLoopArgs[2] = TaskInitResult.NewTask; TGTaskLoopArgs[3] = TaskAllocArgs[0]; // TaskFlags - TGTaskLoopArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize - TGTaskLoopArgs[5] = Shareds.emitRawPointer(CGF); - TGTaskLoopArgs[6] = TaskAllocArgs[2]; // SharedsSize - TGTaskLoopArgs[7] = IfVal; - TGTaskLoopArgs[8] = LBLVal.getPointer(CGF); - TGTaskLoopArgs[9] = UBLVal.getPointer(CGF); - TGTaskLoopArgs[10] = CGF.EmitLoadOfScalar(StLVal, Loc); - TGTaskLoopArgs[11] = + TGTaskLoopArgs[4] = IfVal; + TGTaskLoopArgs[5] = LBLVal.getPointer(CGF); + TGTaskLoopArgs[6] = UBLVal.getPointer(CGF); + TGTaskLoopArgs[7] = CGF.EmitLoadOfScalar(StLVal, Loc); + TGTaskLoopArgs[8] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0); - TGTaskLoopArgs[12] = llvm::ConstantInt::getSigned( + TGTaskLoopArgs[9] = llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize : NoSchedule); - TGTaskLoopArgs[13] = + TGTaskLoopArgs[10] = Data.Schedule.getPointer() ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0); - TGTaskLoopArgs[14] = + TGTaskLoopArgs[11] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.HasModifier ? 1 : 0); - TGTaskLoopArgs[15] = TaskInitResult.TaskDupFn + TGTaskLoopArgs[12] = TaskInitResult.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskInitResult.TaskDupFn, CGF.VoidPtrTy) : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + const auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + llvm::Function *RelocFn = + emitTaskRelocationFunction(CGM, Loc, *CS, CGF.CapturedStmtInfo, Data); + TGTaskLoopArgs[13] = RelocFn + ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + RelocFn, CGM.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_taskgraph_taskloop), TGTaskLoopArgs); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index e32308df74cae..02e3e1f98e969 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -362,8 +362,8 @@ __OMP_RTL(__kmpc_taskgraph, false, Void, IdentPtr, Int32, VoidPtrPtr, SizeTy, __OMP_RTL(__kmpc_taskgraph_task, false, Int32, IdentPtr, Int32, VoidPtr, Int32, SizeTy, SizeTy, Int32, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_taskgraph_taskloop, false, Int32, IdentPtr, Int32, VoidPtr, - Int32, SizeTy, VoidPtr, SizeTy, Int32, Int64Ptr, Int64Ptr, Int64, - Int32, Int32, Int64, Int32, VoidPtr) + Int32, Int32, Int64Ptr, Int64Ptr, Int64, + Int32, Int32, Int64, Int32, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_taskgraph_taskwait, false, Void, IdentPtr, Int32, Int32, VoidPtr, Int32) __OMP_RTL(__kmpc_taskgraph_taskred_init, false, /* kmp_taskgroup */ VoidPtr, diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index befca12786e70..9ebb7e6f654bc 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -4514,10 +4514,9 @@ KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task( kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_task_relocate_t reloc); KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags, - size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds, kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize, - kmp_int32 modifier, void *task_dup); + kmp_int32 modifier, void *task_dup, kmp_task_relocate_t reloc); KMP_EXPORT void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index d595c555a72c0..7b3f4b04fbd16 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -5074,7 +5074,8 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, void *task_dup #if OMP_TASKGRAPH_EXPERIMENTAL , - kmp_taskgraph_record_t *taskgraph_rec = nullptr + kmp_taskgraph_record_t *taskgraph_rec = nullptr, + kmp_task_relocate_t relocate = nullptr #endif ) { KMP_COUNT_BLOCK(OMP_TASKLOOP); @@ -5170,6 +5171,7 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, kmp_uint64 *lb, taskgroup->taskgraph.reduce_input = nullptr; } node->taskloop_task = true; + node->relocate = relocate; next_taskdata->owning_taskgraph = taskgraph_rec; // FIXME: These dependency fields might be back-filled by the as-yet // unimplemented task_iteration subsidiary directive. We'll need a way @@ -5462,7 +5464,8 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, int modifier, void *task_dup #if OMP_TASKGRAPH_EXPERIMENTAL , - kmp_taskgraph_record_t *taskgraph_rec = nullptr + kmp_taskgraph_record_t *taskgraph_rec = nullptr, + kmp_task_relocate_t relocate = nullptr #endif ) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); @@ -5596,7 +5599,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, #if OMPT_SUPPORT OMPT_GET_RETURN_ADDRESS(0), #endif - task_dup, taskgraph_rec); + task_dup, taskgraph_rec, relocate); // check if clause value next // Also require GOMP_taskloop to reduce to linear // (taskdata->td_flags.native) @@ -6121,12 +6124,12 @@ void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid, kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags, - size_t sizeof_kmp_task_t, void *shareds, - size_t sizeof_shareds, kmp_int32 if_val, - kmp_uint64 *lb, kmp_uint64 *ub, - kmp_int64 st, kmp_int32 nogroup, - kmp_int32 sched, kmp_uint64 grainsize, - kmp_int32 modifier, void *task_dup) { + kmp_int32 if_val, kmp_uint64 *lb, + kmp_uint64 *ub, kmp_int64 st, + kmp_int32 nogroup, kmp_int32 sched, + kmp_uint64 grainsize, kmp_int32 modifier, + void *task_dup, + kmp_task_relocate_t relocate) { kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup; kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup); @@ -6135,7 +6138,7 @@ kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid, kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status); if (status == KMP_TDG_RECORDING) __kmp_taskloop(loc_ref, gtid, new_task, if_val, lb, ub, st, nogroup, - sched, grainsize, modifier, task_dup, rec); + sched, grainsize, modifier, task_dup, rec, relocate); else if (status == KMP_TDG_READY) { #ifdef DEBUG_TASKGRAPH fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in " diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp new file mode 100644 index 0000000000000..a6b473069d50c --- /dev/null +++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp @@ -0,0 +1,43 @@ +// clang-format off +// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t +// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s +// REQUIRES: omp_taskgraph_experimental +// clang-format on + +#include <cstdio> + +__attribute__((noinline)) static int run_taskgraph_mixed_capture(int seed) { + int x = seed; + int y = seed * 2; + int fp = 7; + int res = 0; + +#pragma omp taskgraph graph_id(612) + { +#pragma omp taskloop replayable num_tasks(8) shared(x, y) firstprivate(fp) reduction(+ : res) + for (int i = 0; i < 16; ++i) { + res += x + y + fp + i; + } + } + + return res; +} + +int main() { + const int first = run_taskgraph_mixed_capture(1); + const int second = run_taskgraph_mixed_capture(100); + + if (first != 280 || second != 5032) { + std::fprintf(stderr, + "FAIL lexical mixed capture taskloop replay first=%d second=%d expected=280/5032\n", + first, second); + return 1; + } + + std::fprintf(stderr, + "PASS lexical mixed capture taskloop replay first=%d second=%d\n", + first, second); + return 0; +} + +// CHECK: PASS lexical mixed capture taskloop replay first=280 second=5032 diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp new file mode 100644 index 0000000000000..5adb743c5a007 --- /dev/null +++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp @@ -0,0 +1,85 @@ +// clang-format off +// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t +// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s +// REQUIRES: omp_taskgraph_experimental +// clang-format on + +#include <cstdio> + +struct Tracker { + static int Ctors; + static int Dtors; + + int Value; + + explicit Tracker(int V) : Value(V) { ++Ctors; } + ~Tracker() { ++Dtors; } + + void bump(int Delta) { Value += Delta; } +}; + +int Tracker::Ctors = 0; +int Tracker::Dtors = 0; + +__attribute__((noinline)) static int expected_recursive(int depth, int seed, + int run_tag) { + int local = 16 * (seed + (depth + 1) * 5 + run_tag) + 120; + if (depth == 0) + return local; + return local + expected_recursive(depth - 1, seed + 9, run_tag); +} + +__attribute__((noinline)) static int run_recursive_nontrivial(int depth, int seed, + int run_tag) { + Tracker Obj(seed); + int res = 0; + + int gid = 620 + depth; +#pragma omp taskgraph graph_id(gid) + { +#pragma omp taskloop replayable num_tasks(8) shared(Obj, depth, run_tag) reduction(+ : res) + for (int i = 0; i < 16; ++i) { + res += Obj.Value + (depth + 1) * 5 + run_tag + i; + } + } + + if (depth == 0) + return res; + return res + run_recursive_nontrivial(depth - 1, seed + 9, run_tag); +} + +int main() { + const int depth = 3; + int total_actual = 0; + int total_expected = 0; + + for (int run = 0; run < 3; ++run) { + const int seed = 100 * run + 1; + const int actual = run_recursive_nontrivial(depth, seed, run); + const int expected = expected_recursive(depth, seed, run); + + if (actual != expected) { + std::fprintf(stderr, + "FAIL recursive nontrivial taskloop run=%d actual=%d expected=%d\n", + run, actual, expected); + return 1; + } + + total_actual += actual; + total_expected += expected; + } + + if (Tracker::Ctors != Tracker::Dtors || Tracker::Ctors < 12) { + std::fprintf(stderr, + "FAIL recursive nontrivial taskloop lifetime ctors=%d dtors=%d total=%d expected=%d\n", + Tracker::Ctors, Tracker::Dtors, total_actual, total_expected); + return 1; + } + + std::fprintf(stderr, + "PASS recursive nontrivial taskloop total=%d expected=%d ctors=%d dtors=%d\n", + total_actual, total_expected, Tracker::Ctors, Tracker::Dtors); + return 0; +} + +// CHECK: PASS recursive nontrivial taskloop total= diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp new file mode 100644 index 0000000000000..df846e80e0b9f --- /dev/null +++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp @@ -0,0 +1,57 @@ +// clang-format off +// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t +// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s +// REQUIRES: omp_taskgraph_experimental +// clang-format on + +#include <cstdio> + +struct Tracker { + static int Ctors; + static int Dtors; + + int Value; + + explicit Tracker(int V) : Value(V) { ++Ctors; } + ~Tracker() { ++Dtors; } + + void bump(int Delta) { Value += Delta; } +}; + +int Tracker::Ctors = 0; +int Tracker::Dtors = 0; + +__attribute__((noinline)) static int run_taskgraph_nontrivial(int seed) { + Tracker Obj(seed); + int res = 0; + +#pragma omp taskgraph graph_id(614) + { +#pragma omp taskloop replayable num_tasks(8) shared(Obj) reduction(+ : res) + for (int i = 0; i < 16; ++i) { + res += Obj.Value + i; + } + } + + return res; +} + +int main() { + const int first = run_taskgraph_nontrivial(1); + const int second = run_taskgraph_nontrivial(100); + + if (first != 136 || second != 1720 || Tracker::Ctors < 2 || + Tracker::Dtors < 2 || Tracker::Ctors != Tracker::Dtors) { + std::fprintf(stderr, + "FAIL lexical nontrivial taskloop replay first=%d second=%d ctors=%d dtors=%d\n", + first, second, Tracker::Ctors, Tracker::Dtors); + return 1; + } + + std::fprintf(stderr, + "PASS lexical nontrivial taskloop replay first=%d second=%d ctors=%d dtors=%d\n", + first, second, Tracker::Ctors, Tracker::Dtors); + return 0; +} + +// CHECK: PASS lexical nontrivial taskloop replay first=136 second=1720 diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp new file mode 100644 index 0000000000000..5c5aada2222e8 --- /dev/null +++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp @@ -0,0 +1,83 @@ +// clang-format off +// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t +// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s +// REQUIRES: omp_taskgraph_experimental +// XFAIL: * +// clang-format on + +#include <cstdio> +#include <cstdint> + +__attribute__((noinline)) static int expected_recursive(int depth, int seed, + int run_tag) { + int value = 16 * (seed + (depth + 1) * 3 + run_tag) + 120; + if (depth == 0) + return value; + return value + expected_recursive(depth - 1, seed + 7, run_tag); +} + +__attribute__((noinline)) static int run_recursive_frameid(int depth, int seed, + int run_tag) { + int value = seed; + int *ptr = &value; + int *&ptr_ref = ptr; + int sum_delta = 0; + uintptr_t frame_gid = reinterpret_cast<uintptr_t>(__builtin_frame_address(0)); + + // Typically, if captured pointers refer to locations on the stack, that + // would not be safe for taskgraph record/replay because we in general we + // cannot rewrite such pointers to point to the current (live) stack frame. + // + // This is one possible way around that though: we keep a taskgraph record + // per stack-depth, each of which may refer to the local stack frame. + // + // I probably wouldn't recommend use of this technique in production code. +#pragma omp taskgraph graph_id(frame_gid) + { +#pragma omp taskloop replayable num_tasks(8) shared(ptr_ref, depth, run_tag) reduction(+ : sum_delta) + for (int i = 0; i < 16; ++i) { + int delta = (depth + 1) * 3 + run_tag + i; + __atomic_fetch_add(ptr_ref, delta, __ATOMIC_RELAXED); + sum_delta += delta; + } + } + + int local = value * 17 + sum_delta; + + if (depth == 0) + return local; + return local + run_recursive_frameid(depth - 1, seed + 7, run_tag); +} + +int main() { + const int depth = 3; + int recorded_sum = 0; + int replayed_sum = 0; + + for (int run = 0; run < 3; ++run) { + int seed = 100 * run + 1; + int val = run_recursive_frameid(depth, seed, run); + if (run == 0) + recorded_sum = val; + else + replayed_sum += val; + } + + // With missing relocation for taskloop replay, recursive invocations that + // mutate through shared-block pointers are expected to diverge from the + // expected replay behavior. + const int expected_replayed = 2 * recorded_sum; + if (replayed_sum == expected_replayed) { + std::fprintf(stderr, + "UNEXPECTED SUCCESS recursive pointer taskloop replay recorded=%d replayed_total=%d expected_total=%d\n", + recorded_sum, replayed_sum, expected_replayed); + return 0; + } + + std::fprintf(stderr, + "EXPECTED FAILURE recursive pointer taskloop replay recorded=%d replayed_total=%d expected_total=%d\n", + recorded_sum, replayed_sum, expected_replayed); + return 1; +} + +// CHECK: EXPECTED FAILURE recursive pointer taskloop replay diff --git a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp new file mode 100644 index 0000000000000..8e8f98f4d86b6 --- /dev/null +++ b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp @@ -0,0 +1,42 @@ +// clang-format off +// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t +// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s +// REQUIRES: omp_taskgraph_experimental +// clang-format on + +#include <cstdio> + +__attribute__((noinline)) static int run_taskgraph_pointer_shared(int seed) { + int value = seed; + int *ptr = &value; + int res = 0; + +#pragma omp taskgraph graph_id(613) + { +#pragma omp taskloop replayable num_tasks(8) shared(ptr) reduction(+ : res) + for (int i = 0; i < 16; ++i) { + res += *ptr + i; + } + } + + return res; +} + +int main() { + const int first = run_taskgraph_pointer_shared(1); + const int second = run_taskgraph_pointer_shared(100); + + if (first != 136 || second != 1720) { + std::fprintf(stderr, + "FAIL lexical pointer shared taskloop replay first=%d second=%d expected=136/1720\n", + first, second); + return 1; + } + + std::fprintf(stderr, + "PASS lexical pointer shared taskloop replay first=%d second=%d\n", + first, second); +... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/200405 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
