https://github.com/Thyre updated https://github.com/llvm/llvm-project/pull/194168
From ee9378b676af90002b84ee20a202cc18606772e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Andr=C3=A9=20Reuter?= <[email protected]> Date: Wed, 29 Apr 2026 14:11:57 +0200 Subject: [PATCH] [OMPT] Add callback for `omp_target_memset` calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenMP v6.0 added new enum values to `ompt_callback_target_data_op_t` for communicating `memset` events. Add a corresponding callback invokation to `omp_target_memset` calls. Expand the tests to ensure correct behavior. Signed-off-by: Jan André Reuter <[email protected]> --- clang/docs/OpenMPSupport.rst | 1 + offload/include/OpenMP/OMPT/Interface.h | 13 +++ offload/libomptarget/OpenMP/API.cpp | 5 ++ offload/libomptarget/OpenMP/OMPT/Callback.cpp | 27 +++++++ offload/test/ompt/target_memset.c | 76 ++++++++++++++++++ offload/test/ompt/target_memset_async.c | 79 +++++++++++++++++++ offload/test/ompt/target_memset_emi.c | 78 ++++++++++++++++++ 7 files changed, 279 insertions(+) create mode 100644 offload/test/ompt/target_memset.c create mode 100644 offload/test/ompt/target_memset_async.c create mode 100644 offload/test/ompt/target_memset_emi.c diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 962fc717bc496..c026f5a30a192 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -620,6 +620,7 @@ implementation. | OMPT: ompt_target_data_transfer(_async) | :part:`partial` | :good:`N/A` | Enum: https://github.com/llvm/llvm-project/pull/195829 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | OMPT: ompt_target_data_memset(_async) | :part:`partial` | :good:`N/A` | Enum: https://github.com/llvm/llvm-project/pull/195829 | +| | | | Callbacks: https://github.com/llvm/llvm-project/pull/194168 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ | OMPT: workdistribute work callback enum | :part:`partial` | :good:`N/A` | Enum: https://github.com/llvm/llvm-project/pull/195829 | +-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ diff --git a/offload/include/OpenMP/OMPT/Interface.h b/offload/include/OpenMP/OMPT/Interface.h index 43fb193bc75a6..6961641769b76 100644 --- a/offload/include/OpenMP/OMPT/Interface.h +++ b/offload/include/OpenMP/OMPT/Interface.h @@ -126,6 +126,14 @@ class Interface { void endTargetDisassociatePointer(int64_t DeviceId, void *HstPtrBegin, void *TgtPtrBegin, size_t Size, void *Code); + /// Top-level function for invoking callback before target memset API + void beginTargetMemset(int64_t DeviceId, void *HostPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code); + + /// Top-level function for invoking callback after target memset API + void endTargetMemset(int64_t DeviceId, void *HostPtrBegin, void *TgtPtrBegin, + size_t Size, void *Code); + // Target kernel callbacks /// Top-level function for invoking callback before target construct @@ -166,6 +174,11 @@ class Interface { std::mem_fn(&Interface::beginTargetDisassociatePointer), std::mem_fn(&Interface::endTargetDisassociatePointer)); + if constexpr (OpType == ompt_target_data_memset || + OpType == ompt_target_data_memset_async) + return std::make_pair(std::mem_fn(&Interface::beginTargetMemset), + std::mem_fn(&Interface::endTargetMemset)); + llvm_unreachable("Unhandled target data operation type!"); } diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 6dcd94e48e987..dc4bccd01dfea 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -477,6 +477,11 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes, ODBG(ODT_Interface) << "filling memory on host via memset"; memset(Ptr, ByteVal, NumBytes); // ignore return value, memset() cannot fail } else { + OMPT_IF_BUILT(InterfaceRAII TargetMemsetRAII( + RegionInterface.getCallbacks<ompt_target_data_memset>(), DeviceNum, + nullptr, const_cast<void *>(Ptr), NumBytes, + __builtin_return_address(0))); + // TODO: replace the omp_target_memset() slow path with the fast path. // That will require the ability to execute a kernel from within // libomptarget.so (which we do not have at the moment). diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp index c107fa00ce291..1e03f1455d1b2 100644 --- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp +++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp @@ -387,6 +387,33 @@ void Interface::endTargetDisassociatePointer(int64_t DeviceId, } } +void Interface::beginTargetMemset(int64_t DeviceId, void *HostPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code) { + beginTargetDataOperation(); + if (ompt_callback_target_data_op_emi_fn) { + ompt_callback_target_data_op_emi_fn( + ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId, + ompt_target_data_memset, HostPtrBegin, omp_initial_device, TgtPtrBegin, + DeviceId, Size, Code); + } else if (ompt_callback_target_data_op_fn) { + HostOpId = createOpId(); + ompt_callback_target_data_op_fn( + TargetData.value, HostOpId, ompt_target_data_memset, HostPtrBegin, + omp_initial_device, TgtPtrBegin, DeviceId, Size, Code); + } +} + +void Interface::endTargetMemset(int64_t DeviceId, void *HostPtrBegin, + void *TgtPtrBegin, size_t Size, void *Code) { + if (ompt_callback_target_data_op_emi_fn) { + ompt_callback_target_data_op_emi_fn( + ompt_scope_end, TargetTaskData, &TargetData, &HostOpId, + ompt_target_data_memset, HostPtrBegin, omp_initial_device, TgtPtrBegin, + DeviceId, Size, Code); + } + endTargetDataOperation(); +} + void Interface::beginTarget(int64_t DeviceId, void *Code) { beginTargetRegion(); if (ompt_callback_target_emi_fn) { diff --git a/offload/test/ompt/target_memset.c b/offload/test/ompt/target_memset.c new file mode 100644 index 0000000000000..9f7730a6d4fbb --- /dev/null +++ b/offload/test/ompt/target_memset.c @@ -0,0 +1,76 @@ +// clang-format off +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// REQUIRES: gpu +// clang-format on + +/* + * Verify that for the target OpenMP APIs, the return address is non-null and + * distinct. + */ + +#include <omp.h> +#include <stdlib.h> + +#include "callbacks.h" +#include "register_non_emi.h" + +int main() { + int d = omp_get_default_device(); + int id = omp_get_initial_device(); + int q[128], i; + void *p; + void *result; + + if (d < 0 || d >= omp_get_num_devices()) + d = id; + + p = omp_target_alloc(130 * sizeof(int), d); + if (p == NULL) + return 0; + + for (i = 0; i < 128; i++) + q[i] = i; + + result = omp_target_memset(p, 0, 130 * sizeof(int), d); + if (result != p) { + abort(); + } + + int q2[128]; + for (i = 0; i < 128; ++i) + q2[i] = i; + if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d, + 0, NULL)) + abort(); + +#pragma omp taskwait + + for (i = 0; i < 128; ++i) + if (q2[i] != 0) + abort(); + + omp_target_free(p, d); + + return 0; +} + +// clang-format off +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc +/// CHECK-SAME: src_device_num=[[HOST:-1]] +/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]] +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE1:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_memset +/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE1]] +/// CHECK: code=[[CODE2:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_transfer_from_device +/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE2]] +/// CHECK: code=[[CODE3:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_delete +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE3]] diff --git a/offload/test/ompt/target_memset_async.c b/offload/test/ompt/target_memset_async.c new file mode 100644 index 0000000000000..a57353848145a --- /dev/null +++ b/offload/test/ompt/target_memset_async.c @@ -0,0 +1,79 @@ +// clang-format off +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// REQUIRES: gpu +// clang-format on + +/* + * Verify that for the target OpenMP APIs, the return address is non-null and + * distinct. + */ + +#include <omp.h> +#include <stdlib.h> + +#include "callbacks.h" +#include "register_non_emi.h" + +int main() { + int d = omp_get_default_device(); + int id = omp_get_initial_device(); + int q[128], i; + void *p; + void *result; + + if (d < 0 || d >= omp_get_num_devices()) + d = id; + + p = omp_target_alloc(130 * sizeof(int), d); + if (p == NULL) + return 0; + + for (i = 0; i < 128; i++) + q[i] = i; + + result = omp_target_memset_async(p, 0, 130 * sizeof(int), d, 0, NULL); + +#pragma omp taskwait + + if (result != p) { + abort(); + } + + int q2[128]; + for (i = 0; i < 128; ++i) + q2[i] = i; + if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d, + 0, NULL)) + abort(); + +#pragma omp taskwait + + for (i = 0; i < 128; ++i) + if (q2[i] != 0) + abort(); + + omp_target_free(p, d); + + return 0; +} + +// clang-format off +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc +/// CHECK-SAME: src_device_num=[[HOST:-1]] +/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]] +/// CHECK-NOT: code=(nil) +/// CHECK: code=[[CODE1:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_memset +/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE1]] +/// CHECK: code=[[CODE2:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_transfer_from_device +/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE2]] +/// CHECK: code=[[CODE3:0x[0-f]+]] +/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_delete +/// CHECK-NOT: code=(nil) +/// CHECK-NOT: code=[[CODE3]] diff --git a/offload/test/ompt/target_memset_emi.c b/offload/test/ompt/target_memset_emi.c new file mode 100644 index 0000000000000..9161a84aa5360 --- /dev/null +++ b/offload/test/ompt/target_memset_emi.c @@ -0,0 +1,78 @@ +// clang-format off +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// REQUIRES: gpu +// clang-format on + +/* + * Verify correct callback sequence for memset API call. + */ + +#include <omp.h> +#include <stdio.h> +#include <stdlib.h> + +#include "callbacks.h" +#include "register_emi.h" + +int main() { + int d = omp_get_default_device(); + int id = omp_get_initial_device(); + int q[128], i; + void *p; + void *result; + + if (d < 0 || d >= omp_get_num_devices()) + d = id; + + p = omp_target_alloc(130 * sizeof(int), d); + if (p == NULL) + return 0; + + for (i = 0; i < 128; i++) + q[i] = i; + + result = omp_target_memset(p, 0, 130 * sizeof(int), d); + if (result != p) { + abort(); + } + + int q2[128]; + for (i = 0; i < 128; ++i) + q2[i] = i; + if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d, + 0, NULL)) + abort(); + +#pragma omp taskwait + + for (i = 0; i < 128; ++i) + if (q2[i] != 0) + abort(); + + omp_target_free(p, d); + + return 0; +} + +// clang-format off + +/// CHECK: Callback Init: + +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_alloc +/// CHECK-SAME: src_device_num=[[HOST:-1]] +/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]] +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_alloc {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] + +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_memset {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_memset {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]] + +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_transfer_from_device {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_transfer_from_device {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]] + +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_delete {{.+}} src_device_num=[[DEVICE]] +/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_delete {{.+}} src_device_num=[[DEVICE]] + +/// CHECK: Callback Fini: + +// clang-format on _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
