llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-flang-openmp

Author: Julian Brown (jtb20)

<details>
<summary>Changes</summary>

Build on the per-task relocation infrastructure introduced for
'omp task' in the previous patch and apply the same scheme to
taskloop tasks recorded inside a taskgraph.  Without this,
by-reference captures inside a recorded taskloop iteration still
point at the original recording's stack on each replay.

On the compiler side (CGOpenMPRuntime.cpp), emit a relocation
thunk for the taskloop's captured statement via the shared
emitTaskRelocationFunction helper and pass it as the new trailing
argument of __kmpc_taskgraph_taskloop.  The now-unused 'shareds'
and 'sizeof_shareds' parameters are dropped from the call and from
TGTaskLoopArgs.

On the runtime side, update __kmpc_taskgraph_taskloop to match
the new ABI (drop the dead 'shareds' / 'sizeof_shareds'
parameters, add the trailing kmp_task_relocate_t parameter) and
plumb the callback through the taskgraph variant of
__kmp_taskloop and __kmp_taskloop_linear so every recorded
subtask gets node-&gt;relocate = reloc, mirroring the explicit-task
path.  Non-taskgraph callers pass a default nullptr and are
unaffected.

Add taskloop counterparts of the omp-task runtime tests added in
the previous patch.

Assisted-By: Claude Opus 4.7


---

Patch is 31.66 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/200405.diff


14 Files Affected:

- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+17-13) 
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+2-2) 
- (modified) openmp/runtime/src/kmp.h (+1-2) 
- (modified) openmp/runtime/src/kmp_tasking.cpp (+13-10) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp
 (+43) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp
 (+85) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp
 (+57) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp
 (+83) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp
 (+42) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_recursive_taskloop.cpp
 (+70) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_works_taskloop.cpp
 (+41) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_nonlexical_shared_fails_1_taskloop.cpp
 (+46) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_nonlexical_shared_fails_2_taskloop.cpp
 (+59) 
- (added) 
openmp/runtime/test/taskgraph/taskgraph_replayable_saved_stack_depth_taskloop.cpp
 (+126) 


``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 9f342038f2285..ee8583a9f5519 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5152,7 +5152,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(
                                                      PrePostActionTy &) {
     llvm::Value *ThreadId = getThreadID(CGF, Loc);
     llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
-    std::array<llvm::Value *, 16> TGTaskLoopArgs;
+    std::array<llvm::Value *, 14> TGTaskLoopArgs;
 
     // This is all copy/pasted from below. Refactor!
     LValue LBLVal = CGF.EmitLValueForField(
@@ -5197,30 +5197,34 @@ void CGOpenMPRuntime::emitTaskLoopCall(
     TGTaskLoopArgs[1] = ThreadId;
     TGTaskLoopArgs[2] = TaskInitResult.NewTask;
     TGTaskLoopArgs[3] = TaskAllocArgs[0]; // TaskFlags
-    TGTaskLoopArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize
-    TGTaskLoopArgs[5] = Shareds.emitRawPointer(CGF);
-    TGTaskLoopArgs[6] = TaskAllocArgs[2]; // SharedsSize
-    TGTaskLoopArgs[7] = IfVal;
-    TGTaskLoopArgs[8] = LBLVal.getPointer(CGF);
-    TGTaskLoopArgs[9] = UBLVal.getPointer(CGF);
-    TGTaskLoopArgs[10] = CGF.EmitLoadOfScalar(StLVal, Loc);
-    TGTaskLoopArgs[11] =
+    TGTaskLoopArgs[4] = IfVal;
+    TGTaskLoopArgs[5] = LBLVal.getPointer(CGF);
+    TGTaskLoopArgs[6] = UBLVal.getPointer(CGF);
+    TGTaskLoopArgs[7] = CGF.EmitLoadOfScalar(StLVal, Loc);
+    TGTaskLoopArgs[8] =
         llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0);
-    TGTaskLoopArgs[12] = llvm::ConstantInt::getSigned(
+    TGTaskLoopArgs[9] = llvm::ConstantInt::getSigned(
         CGF.IntTy, Data.Schedule.getPointer()
                        ? Data.Schedule.getInt() ? NumTasks : Grainsize
                        : NoSchedule);
-    TGTaskLoopArgs[13] =
+    TGTaskLoopArgs[10] =
         Data.Schedule.getPointer()
             ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), 
CGF.Int64Ty,
                                         /*isSigned=*/false)
             : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0);
-    TGTaskLoopArgs[14] =
+    TGTaskLoopArgs[11] =
         llvm::ConstantInt::getSigned(CGF.IntTy, Data.HasModifier ? 1 : 0);
-    TGTaskLoopArgs[15] = TaskInitResult.TaskDupFn
+    TGTaskLoopArgs[12] = TaskInitResult.TaskDupFn
                              ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
                                    TaskInitResult.TaskDupFn, CGF.VoidPtrTy)
                              : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+    const auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+    llvm::Function *RelocFn =
+        emitTaskRelocationFunction(CGM, Loc, *CS, CGF.CapturedStmtInfo, Data);
+    TGTaskLoopArgs[13] = RelocFn
+                             ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                                   RelocFn, CGM.VoidPtrTy)
+                             : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
     CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
                             CGM.getModule(), OMPRTL___kmpc_taskgraph_taskloop),
                         TGTaskLoopArgs);
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def 
b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index e32308df74cae..02e3e1f98e969 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -362,8 +362,8 @@ __OMP_RTL(__kmpc_taskgraph, false, Void, IdentPtr, Int32, 
VoidPtrPtr, SizeTy,
 __OMP_RTL(__kmpc_taskgraph_task, false, Int32, IdentPtr, Int32, VoidPtr, Int32,
           SizeTy, SizeTy, Int32, VoidPtr, VoidPtr)
 __OMP_RTL(__kmpc_taskgraph_taskloop, false, Int32, IdentPtr, Int32, VoidPtr,
-          Int32, SizeTy, VoidPtr, SizeTy, Int32, Int64Ptr, Int64Ptr, Int64,
-          Int32, Int32, Int64, Int32, VoidPtr)
+          Int32, Int32, Int64Ptr, Int64Ptr, Int64,
+          Int32, Int32, Int64, Int32, VoidPtr, VoidPtr)
 __OMP_RTL(__kmpc_taskgraph_taskwait, false, Void, IdentPtr, Int32, Int32,
           VoidPtr, Int32)
 __OMP_RTL(__kmpc_taskgraph_taskred_init, false, /* kmp_taskgroup */ VoidPtr,
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index befca12786e70..9ebb7e6f654bc 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4514,10 +4514,9 @@ KMP_EXPORT kmp_uint32 __kmpc_taskgraph_task(
     kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_task_relocate_t reloc);
 KMP_EXPORT kmp_uint32 __kmpc_taskgraph_taskloop(
     ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 flags,
-    size_t sizeof_kmp_task_t, void *shareds, size_t sizeof_shareds,
     kmp_int32 if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
     kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize,
-    kmp_int32 modifier, void *task_dup);
+    kmp_int32 modifier, void *task_dup, kmp_task_relocate_t reloc);
 KMP_EXPORT void __kmpc_taskgraph_taskwait(ident_t *loc_ref, kmp_int32 gtid,
                                           kmp_int32 ndeps,
                                           kmp_depend_info_t *dep_list,
diff --git a/openmp/runtime/src/kmp_tasking.cpp 
b/openmp/runtime/src/kmp_tasking.cpp
index d595c555a72c0..7b3f4b04fbd16 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -5074,7 +5074,8 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t 
*task, kmp_uint64 *lb,
                       void *task_dup
 #if OMP_TASKGRAPH_EXPERIMENTAL
                       ,
-                      kmp_taskgraph_record_t *taskgraph_rec = nullptr
+                      kmp_taskgraph_record_t *taskgraph_rec = nullptr,
+                      kmp_task_relocate_t relocate = nullptr
 #endif
 ) {
   KMP_COUNT_BLOCK(OMP_TASKLOOP);
@@ -5170,6 +5171,7 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t 
*task, kmp_uint64 *lb,
         taskgroup->taskgraph.reduce_input = nullptr;
       }
       node->taskloop_task = true;
+      node->relocate = relocate;
       next_taskdata->owning_taskgraph = taskgraph_rec;
       // FIXME: These dependency fields might be back-filled by the as-yet
       // unimplemented task_iteration subsidiary directive.  We'll need a way
@@ -5462,7 +5464,8 @@ static void __kmp_taskloop(ident_t *loc, int gtid, 
kmp_task_t *task, int if_val,
                            int modifier, void *task_dup
 #if OMP_TASKGRAPH_EXPERIMENTAL
                            ,
-                           kmp_taskgraph_record_t *taskgraph_rec = nullptr
+                           kmp_taskgraph_record_t *taskgraph_rec = nullptr,
+                           kmp_task_relocate_t relocate = nullptr
 #endif
 ) {
   kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
@@ -5596,7 +5599,7 @@ static void __kmp_taskloop(ident_t *loc, int gtid, 
kmp_task_t *task, int if_val,
 #if OMPT_SUPPORT
                           OMPT_GET_RETURN_ADDRESS(0),
 #endif
-                          task_dup, taskgraph_rec);
+                          task_dup, taskgraph_rec, relocate);
     // check if clause value next
     // Also require GOMP_taskloop to reduce to linear
     // (taskdata->td_flags.native)
@@ -6121,12 +6124,12 @@ void __kmpc_taskgraph_taskwait(ident_t *loc_ref, 
kmp_int32 gtid,
 
 kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, kmp_int32 gtid,
                                      kmp_task_t *new_task, kmp_int32 flags,
-                                     size_t sizeof_kmp_task_t, void *shareds,
-                                     size_t sizeof_shareds, kmp_int32 if_val,
-                                     kmp_uint64 *lb, kmp_uint64 *ub,
-                                     kmp_int64 st, kmp_int32 nogroup,
-                                     kmp_int32 sched, kmp_uint64 grainsize,
-                                     kmp_int32 modifier, void *task_dup) {
+                                     kmp_int32 if_val, kmp_uint64 *lb,
+                                     kmp_uint64 *ub, kmp_int64 st,
+                                     kmp_int32 nogroup, kmp_int32 sched,
+                                     kmp_uint64 grainsize, kmp_int32 modifier,
+                                     void *task_dup,
+                                     kmp_task_relocate_t relocate) {
   kmp_info_t *thread = __kmp_threads[gtid];
   kmp_taskgroup_t *taskgroup = thread->th.th_current_task->td_taskgroup;
   kmp_taskgraph_record_t *rec = __kmp_taskgraph_or_parent_recording(taskgroup);
@@ -6135,7 +6138,7 @@ kmp_uint32 __kmpc_taskgraph_taskloop(ident_t *loc_ref, 
kmp_int32 gtid,
     kmp_taskgraph_status_t status = KMP_ATOMIC_LD_ACQ(&rec->status);
     if (status == KMP_TDG_RECORDING)
       __kmp_taskloop(loc_ref, gtid, new_task, if_val, lb, ub, st, nogroup,
-                     sched, grainsize, modifier, task_dup, rec);
+                     sched, grainsize, modifier, task_dup, rec, relocate);
     else if (status == KMP_TDG_READY) {
 #ifdef DEBUG_TASKGRAPH
       fprintf(stderr, "non-taskgraph taskloop entry point for taskloop in "
diff --git 
a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp
 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp
new file mode 100644
index 0000000000000..a6b473069d50c
--- /dev/null
+++ 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_mixed_capture_taskloop.cpp
@@ -0,0 +1,43 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+__attribute__((noinline)) static int run_taskgraph_mixed_capture(int seed) {
+  int x = seed;
+  int y = seed * 2;
+  int fp = 7;
+  int res = 0;
+
+#pragma omp taskgraph graph_id(612)
+  {
+#pragma omp taskloop replayable num_tasks(8) shared(x, y) firstprivate(fp) 
reduction(+ : res)
+    for (int i = 0; i < 16; ++i) {
+      res += x + y + fp + i;
+    }
+  }
+
+  return res;
+}
+
+int main() {
+  const int first = run_taskgraph_mixed_capture(1);
+  const int second = run_taskgraph_mixed_capture(100);
+
+  if (first != 280 || second != 5032) {
+    std::fprintf(stderr,
+                 "FAIL lexical mixed capture taskloop replay first=%d 
second=%d expected=280/5032\n",
+                 first, second);
+    return 1;
+  }
+
+  std::fprintf(stderr,
+               "PASS lexical mixed capture taskloop replay first=%d 
second=%d\n",
+               first, second);
+  return 0;
+}
+
+// CHECK: PASS lexical mixed capture taskloop replay first=280 second=5032
diff --git 
a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp
 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp
new file mode 100644
index 0000000000000..5adb743c5a007
--- /dev/null
+++ 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_recursive_taskloop.cpp
@@ -0,0 +1,85 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+struct Tracker {
+  static int Ctors;
+  static int Dtors;
+
+  int Value;
+
+  explicit Tracker(int V) : Value(V) { ++Ctors; }
+  ~Tracker() { ++Dtors; }
+
+  void bump(int Delta) { Value += Delta; }
+};
+
+int Tracker::Ctors = 0;
+int Tracker::Dtors = 0;
+
+__attribute__((noinline)) static int expected_recursive(int depth, int seed,
+                                                        int run_tag) {
+  int local = 16 * (seed + (depth + 1) * 5 + run_tag) + 120;
+  if (depth == 0)
+    return local;
+  return local + expected_recursive(depth - 1, seed + 9, run_tag);
+}
+
+__attribute__((noinline)) static int run_recursive_nontrivial(int depth, int 
seed,
+                                                              int run_tag) {
+  Tracker Obj(seed);
+  int res = 0;
+
+  int gid = 620 + depth;
+#pragma omp taskgraph graph_id(gid)
+  {
+#pragma omp taskloop replayable num_tasks(8) shared(Obj, depth, run_tag) 
reduction(+ : res)
+    for (int i = 0; i < 16; ++i) {
+      res += Obj.Value + (depth + 1) * 5 + run_tag + i;
+    }
+  }
+
+  if (depth == 0)
+    return res;
+  return res + run_recursive_nontrivial(depth - 1, seed + 9, run_tag);
+}
+
+int main() {
+  const int depth = 3;
+  int total_actual = 0;
+  int total_expected = 0;
+
+  for (int run = 0; run < 3; ++run) {
+    const int seed = 100 * run + 1;
+    const int actual = run_recursive_nontrivial(depth, seed, run);
+    const int expected = expected_recursive(depth, seed, run);
+
+    if (actual != expected) {
+      std::fprintf(stderr,
+                   "FAIL recursive nontrivial taskloop run=%d actual=%d 
expected=%d\n",
+                   run, actual, expected);
+      return 1;
+    }
+
+    total_actual += actual;
+    total_expected += expected;
+  }
+
+  if (Tracker::Ctors != Tracker::Dtors || Tracker::Ctors < 12) {
+    std::fprintf(stderr,
+                 "FAIL recursive nontrivial taskloop lifetime ctors=%d 
dtors=%d total=%d expected=%d\n",
+                 Tracker::Ctors, Tracker::Dtors, total_actual, total_expected);
+    return 1;
+  }
+
+  std::fprintf(stderr,
+               "PASS recursive nontrivial taskloop total=%d expected=%d 
ctors=%d dtors=%d\n",
+               total_actual, total_expected, Tracker::Ctors, Tracker::Dtors);
+  return 0;
+}
+
+// CHECK: PASS recursive nontrivial taskloop total=
diff --git 
a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp
 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp
new file mode 100644
index 0000000000000..df846e80e0b9f
--- /dev/null
+++ 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_nontrivial_type_taskloop.cpp
@@ -0,0 +1,57 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+struct Tracker {
+  static int Ctors;
+  static int Dtors;
+
+  int Value;
+
+  explicit Tracker(int V) : Value(V) { ++Ctors; }
+  ~Tracker() { ++Dtors; }
+
+  void bump(int Delta) { Value += Delta; }
+};
+
+int Tracker::Ctors = 0;
+int Tracker::Dtors = 0;
+
+__attribute__((noinline)) static int run_taskgraph_nontrivial(int seed) {
+  Tracker Obj(seed);
+  int res = 0;
+
+#pragma omp taskgraph graph_id(614)
+  {
+#pragma omp taskloop replayable num_tasks(8) shared(Obj) reduction(+ : res)
+    for (int i = 0; i < 16; ++i) {
+      res += Obj.Value + i;
+    }
+  }
+
+  return res;
+}
+
+int main() {
+  const int first = run_taskgraph_nontrivial(1);
+  const int second = run_taskgraph_nontrivial(100);
+
+  if (first != 136 || second != 1720 || Tracker::Ctors < 2 ||
+      Tracker::Dtors < 2 || Tracker::Ctors != Tracker::Dtors) {
+    std::fprintf(stderr,
+                 "FAIL lexical nontrivial taskloop replay first=%d second=%d 
ctors=%d dtors=%d\n",
+                 first, second, Tracker::Ctors, Tracker::Dtors);
+    return 1;
+  }
+
+  std::fprintf(stderr,
+               "PASS lexical nontrivial taskloop replay first=%d second=%d 
ctors=%d dtors=%d\n",
+               first, second, Tracker::Ctors, Tracker::Dtors);
+  return 0;
+}
+
+// CHECK: PASS lexical nontrivial taskloop replay first=136 second=1720
diff --git 
a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp
 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp
new file mode 100644
index 0000000000000..5c5aada2222e8
--- /dev/null
+++ 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_recursive_frameid_taskloop.cpp
@@ -0,0 +1,83 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// XFAIL: *
+// clang-format on
+
+#include <cstdio>
+#include <cstdint>
+
+__attribute__((noinline)) static int expected_recursive(int depth, int seed,
+                                                        int run_tag) {
+  int value = 16 * (seed + (depth + 1) * 3 + run_tag) + 120;
+  if (depth == 0)
+    return value;
+  return value + expected_recursive(depth - 1, seed + 7, run_tag);
+}
+
+__attribute__((noinline)) static int run_recursive_frameid(int depth, int seed,
+                                                           int run_tag) {
+  int value = seed;
+  int *ptr = &value;
+  int *&ptr_ref = ptr;
+  int sum_delta = 0;
+  uintptr_t frame_gid = 
reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
+
+  // Typically, if captured pointers refer to locations on the stack, that
+  // would not be safe for taskgraph record/replay because we in general we
+  // cannot rewrite such pointers to point to the current (live) stack frame.
+  //
+  // This is one possible way around that though: we keep a taskgraph record
+  // per stack-depth, each of which may refer to the local stack frame.
+  //
+  // I probably wouldn't recommend use of this technique in production code.
+#pragma omp taskgraph graph_id(frame_gid)
+  {
+#pragma omp taskloop replayable num_tasks(8) shared(ptr_ref, depth, run_tag) 
reduction(+ : sum_delta)
+    for (int i = 0; i < 16; ++i) {
+      int delta = (depth + 1) * 3 + run_tag + i;
+      __atomic_fetch_add(ptr_ref, delta, __ATOMIC_RELAXED);
+      sum_delta += delta;
+    }
+  }
+
+  int local = value * 17 + sum_delta;
+
+  if (depth == 0)
+    return local;
+  return local + run_recursive_frameid(depth - 1, seed + 7, run_tag);
+}
+
+int main() {
+  const int depth = 3;
+  int recorded_sum = 0;
+  int replayed_sum = 0;
+
+  for (int run = 0; run < 3; ++run) {
+    int seed = 100 * run + 1;
+    int val = run_recursive_frameid(depth, seed, run);
+    if (run == 0)
+      recorded_sum = val;
+    else
+      replayed_sum += val;
+  }
+
+  // With missing relocation for taskloop replay, recursive invocations that
+  // mutate through shared-block pointers are expected to diverge from the
+  // expected replay behavior.
+  const int expected_replayed = 2 * recorded_sum;
+  if (replayed_sum == expected_replayed) {
+    std::fprintf(stderr,
+                 "UNEXPECTED SUCCESS recursive pointer taskloop replay 
recorded=%d replayed_total=%d expected_total=%d\n",
+                 recorded_sum, replayed_sum, expected_replayed);
+    return 0;
+  }
+
+  std::fprintf(stderr,
+               "EXPECTED FAILURE recursive pointer taskloop replay recorded=%d 
replayed_total=%d expected_total=%d\n",
+               recorded_sum, replayed_sum, expected_replayed);
+  return 1;
+}
+
+// CHECK: EXPECTED FAILURE recursive pointer taskloop replay
diff --git 
a/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp
 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp
new file mode 100644
index 0000000000000..8e8f98f4d86b6
--- /dev/null
+++ 
b/openmp/runtime/test/taskgraph/taskgraph_replayable_lexical_shared_pointer_taskloop.cpp
@@ -0,0 +1,42 @@
+// clang-format off
+// RUN: %clangXX %flags %openmp_flags -fopenmp-version=60 %s -o %t
+// RUN: env OMP_NUM_THREADS=4 %libomp-run 2>&1 | FileCheck %s
+// REQUIRES: omp_taskgraph_experimental
+// clang-format on
+
+#include <cstdio>
+
+__attribute__((noinline)) static int run_taskgraph_pointer_shared(int seed) {
+  int value = seed;
+  int *ptr = &value;
+  int res = 0;
+
+#pragma omp taskgraph graph_id(613)
+  {
+#pragma omp taskloop replayable num_tasks(8) shared(ptr) reduction(+ : res)
+    for (int i = 0; i < 16; ++i) {
+      res += *ptr + i;
+    }
+  }
+
+  return res;
+}
+
+int main() {
+  const int first = run_taskgraph_pointer_shared(1);
+  const int second = run_taskgraph_pointer_shared(100);
+
+  if (first != 136 || second != 1720) {
+    std::fprintf(stderr,
+                 "FAIL lexical pointer shared taskloop replay first=%d 
second=%d expected=136/1720\n",
+                 first, second);
+    return 1;
+  }
+
+  std::fprintf(stderr,
+               "PASS lexical pointer shared taskloop replay first=%d 
second=%d\n",
+               first, second);
+...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/200405
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to