https://github.com/nicebert updated https://github.com/llvm/llvm-project/pull/138294
>From 025d36ef4386bf017e3a8db4f42076a6350ff4ff Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <nicole.aschenbren...@amd.com> Date: Fri, 2 May 2025 09:58:23 -0400 Subject: [PATCH 1/6] [OpenMP] Adds omp_target_is_accessible routine Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host). --- clang/docs/OpenMPSupport.rst | 2 +- offload/include/device.h | 3 ++ offload/include/omptarget.h | 1 + offload/libomptarget/OpenMP/API.cpp | 45 ++++++++++++++++++- offload/libomptarget/device.cpp | 4 ++ offload/libomptarget/exports | 1 + offload/plugins-nextgen/amdgpu/src/rtl.cpp | 2 + .../common/include/PluginInterface.h | 7 +++ .../common/src/PluginInterface.cpp | 8 ++++ offload/test/mapping/is_accessible.cpp | 40 +++++++++++++++++ 10 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 offload/test/mapping/is_accessible.cpp diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst index 58cd10ad4d8fa..838d329041baa 100644 --- a/clang/docs/OpenMPSupport.rst +++ b/clang/docs/OpenMPSupport.rst @@ -256,7 +256,7 @@ implementation. +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device | device-specific environment variables | :none:`unclaimed` | | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ -| device | omp_target_is_accessible routine | :none:`unclaimed` | | +| device | omp_target_is_accessible routine | :part:`worked on` | https://github.com/llvm/llvm-project/pull/138294 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ | device | omp_get_mapped_ptr routine | :good:`done` | D141545 | +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+ diff --git a/offload/include/device.h b/offload/include/device.h index f4b10abbaa3fd..c82d947de6891 100644 --- a/offload/include/device.h +++ b/offload/include/device.h @@ -152,6 +152,9 @@ struct DeviceTy { /// Ask the device whether the runtime should use auto zero-copy. bool useAutoZeroCopy(); + /// Ask the device whether it supports unified memory. + bool supportsUnifiedMemory(); + /// Check if there are pending images for this device. bool hasPendingImages() const { return HasPendingImages; } diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 6971780c7bdb5..8af8c4f659b35 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -280,6 +280,7 @@ int omp_get_initial_device(void); void *omp_target_alloc(size_t Size, int DeviceNum); void omp_target_free(void *DevicePtr, int DeviceNum); int omp_target_is_present(const void *Ptr, int DeviceNum); +int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum); int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice); diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4576f9bd06121..3ad54220c1135 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -93,7 +93,8 @@ EXTERN int omp_get_device_num(void) { EXTERN int omp_get_initial_device(void) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - int HostDevice = omp_get_num_devices(); + int NumDevices = omp_get_num_devices(); + int HostDevice = NumDevices == 0 ? -1 : NumDevices; DP("Call to omp_get_initial_device returning %d\n", HostDevice); return HostDevice; } @@ -195,6 +196,48 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { return Rc; } +/// Check whether a pointer is accessible from a device. +/// the functionality is available in OpenMP 5.1 and later +/// OpenMP 5.1 +/// omp_target_is_accessible checks whether a host pointer is accessible from a +/// device OpenMP 6.0 removes restriction on pointer, allowing any pointer +/// interpreted as a pointer in the address space of the given device. +EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, + int DeviceNum) { + TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); + DP("Call to omp_target_is_accessible for device %d, address " DPxMOD + ", size %zu\n", + DeviceNum, DPxPTR(Ptr), Size); + + if (!Ptr) { + DP("Call to omp_target_is_accessible with NULL ptr returning false\n"); + return false; + } + + if (DeviceNum == omp_get_initial_device()) { + DP("Call to omp_target_is_accessible on host, returning true\n"); + return true; + } + + // the device number must refer to a valid device + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) + FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); + + // for OpenMP 5.1 the routine checks whether a host pointer is accessible from + // the device this requires for the device to support unified shared memory + if (DeviceOrErr->supportsUnifiedMemory()) { + DP("Device %d supports unified memory, returning true\n", DeviceNum); + return true; + } + + // functionality to check whether a device pointer is accessible from a device + // (OpenMP 6.0) from the host might not be possible + DP("Device %d does not support unified memory, returning false\n", DeviceNum); + return false; +} + EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index f88e30ae9e76b..584c967c1a178 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -281,3 +281,7 @@ bool DeviceTy::useAutoZeroCopy() { return false; return RTL->use_auto_zero_copy(RTLDeviceID); } + +bool DeviceTy::supportsUnifiedMemory() { + return RTL->supports_unified_memory(RTLDeviceID); +} diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 2406776c1fb5f..94be34b2fbf77 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -42,6 +42,7 @@ VERS1.0 { omp_get_initial_device; omp_target_alloc; omp_target_free; + omp_target_is_accessible; omp_target_is_present; omp_target_memcpy; omp_target_memcpy_rect; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index f8db9bf0ae739..bdccb988e8d9e 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2821,6 +2821,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return ((IsAPU || OMPX_ApuMaps) && IsXnackEnabled); } + bool supportsUnifiedMemoryImpl() override { return IsXnackEnabled; } + /// Getters and setters for stack and heap sizes. Error getDeviceStackSize(uint64_t &Value) override { Value = StackSize; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 8c17a2ee07047..d0c350c2cf50b 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -1003,6 +1003,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + /// Returns true if the device has unified memory capabilities + bool supportsUnifiedMemory(); + virtual bool supportsUnifiedMemoryImpl() { return false; } + /// Allocate and construct a kernel object. virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0; @@ -1402,6 +1406,9 @@ struct GenericPluginTy { /// Returns if the plugin can support automatic copy. int32_t use_auto_zero_copy(int32_t DeviceId); + /// Returns if the the device supports unified memory. + int32_t supports_unified_memory(int32_t DeviceId); + /// Look up a global symbol in the given binary. int32_t get_global(__tgt_device_binary Binary, uint64_t Size, const char *Name, void **DevicePtr); diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 94a050b559efe..315c522e7dccb 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1629,6 +1629,10 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) { bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); } +bool GenericDeviceTy::supportsUnifiedMemory() { + return supportsUnifiedMemoryImpl(); +} + Error GenericPluginTy::init() { if (Initialized) return Plugin::success(); @@ -2181,6 +2185,10 @@ int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) { return getDevice(DeviceId).useAutoZeroCopy(); } +int32_t GenericPluginTy::supports_unified_memory(int32_t DeviceId) { + return getDevice(DeviceId).supportsUnifiedMemory(); +} + int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size, const char *Name, void **DevicePtr) { assert(Binary.handle && "Invalid device binary handle"); diff --git a/offload/test/mapping/is_accessible.cpp b/offload/test/mapping/is_accessible.cpp new file mode 100644 index 0000000000000..6d6a0048e01f3 --- /dev/null +++ b/offload/test/mapping/is_accessible.cpp @@ -0,0 +1,40 @@ +// RUN: %libomptarget-compilexx-generic +// RUN: env HSA_XNACK=1 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// RUN: %libomptarget-compilexx-generic +// RUN: env HSA_XNACK=0 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=NO_USM + +// REQUIRES: unified_shared_memory +// REQUIRES: amdgpu + +// CHECK: SUCCESS +// NO_USM: Not accessible + +#include <assert.h> +#include <iostream> +#include <omp.h> +#include <stdio.h> + +int main() { + int n = 10000; + int *a = new int[n]; + int err = 0; + + // program must be executed with HSA_XNACK=1 + if (!omp_target_is_accessible(a, n * sizeof(int), /*device_num=*/0)) + printf("Not accessible\n"); + else { +#pragma omp target teams distribute parallel for + for (int i = 0; i < n; i++) + a[i] = i; + + for (int i = 0; i < n; i++) + if (a[i] != i) + err++; + } + + printf("%s\n", err == 0 ? "SUCCESS" : "FAIL"); + return err; +} >From b33b27ea15da8885f5db7871fdbfe7593c9bd5ec Mon Sep 17 00:00:00 2001 From: nicebert <110385235+niceb...@users.noreply.github.com> Date: Mon, 28 Jul 2025 16:49:21 +0200 Subject: [PATCH 2/6] Update offload/libomptarget/OpenMP/API.cpp Co-authored-by: Shilei Tian <i...@tianshilei.me> --- offload/libomptarget/OpenMP/API.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 3ad54220c1135..2958779397946 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -220,7 +220,7 @@ EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, return true; } - // the device number must refer to a valid device + // The device number must refer to a valid device auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); >From bf01578259844418a3f0d5f3eeb48dba6c57aad3 Mon Sep 17 00:00:00 2001 From: nicebert <110385235+niceb...@users.noreply.github.com> Date: Mon, 28 Jul 2025 16:49:57 +0200 Subject: [PATCH 3/6] Fix comment spelling Co-authored-by: Shilei Tian <i...@tianshilei.me> --- offload/libomptarget/OpenMP/API.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 2958779397946..5dc294381d545 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -225,7 +225,7 @@ EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, if (!DeviceOrErr) FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); - // for OpenMP 5.1 the routine checks whether a host pointer is accessible from + // For OpenMP 5.1 the routine checks whether a host pointer is accessible from // the device this requires for the device to support unified shared memory if (DeviceOrErr->supportsUnifiedMemory()) { DP("Device %d supports unified memory, returning true\n", DeviceNum); >From d20f4d5c171852ca42115d0dddd8f329dde6db80 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <nicole.aschenbren...@amd.com> Date: Fri, 2 May 2025 09:58:23 -0400 Subject: [PATCH 4/6] [OpenMP] Adds omp_target_is_accessible routine Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host). --- offload/libomptarget/OpenMP/API.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 5dc294381d545..2b92d2c9c3f2d 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -220,18 +220,26 @@ EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, return true; } - // The device number must refer to a valid device + // the device number must refer to a valid device auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); - // For OpenMP 5.1 the routine checks whether a host pointer is accessible from + // TODO: Add check for openmp version compatability + + // for OpenMP 5.1 the routine checks whether a host pointer is accessible from // the device this requires for the device to support unified shared memory if (DeviceOrErr->supportsUnifiedMemory()) { DP("Device %d supports unified memory, returning true\n", DeviceNum); return true; } + // TODO: Provide stubs & implementation to check whether a pointer is accessible from a given device + // using hsa_amd_pointer_info for AMDGPU implementation + // for OpenMP 6.x the specification is required to return true if + // the accessibility of the pointer can be determined otherwise it's allowed to return false + // the specification will be clarified from the current wording + // functionality to check whether a device pointer is accessible from a device // (OpenMP 6.0) from the host might not be possible DP("Device %d does not support unified memory, returning false\n", DeviceNum); >From cb872425f16aa0c4de1905a2fb7e9a6971ef5d88 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <nicole.aschenbren...@amd.com> Date: Fri, 2 May 2025 09:58:23 -0400 Subject: [PATCH 5/6] [OpenMP] Adds omp_target_is_accessible routine Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host). --- offload/libomptarget/OpenMP/API.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 5dc294381d545..4d804934c47ed 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -220,18 +220,27 @@ EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, return true; } - // The device number must refer to a valid device + // the device number must refer to a valid device auto DeviceOrErr = PM->getDevice(DeviceNum); if (!DeviceOrErr) FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); - // For OpenMP 5.1 the routine checks whether a host pointer is accessible from + // TODO: Add check for openmp version compatability + + // for OpenMP 5.1 the routine checks whether a host pointer is accessible from // the device this requires for the device to support unified shared memory if (DeviceOrErr->supportsUnifiedMemory()) { DP("Device %d supports unified memory, returning true\n", DeviceNum); return true; } + // TODO: Provide stubs & implementation to check whether a pointer is + // accessible from a given device using hsa_amd_pointer_info for AMDGPU + // implementation for OpenMP 6.x the specification is required to return true + // if the accessibility of the pointer can be determined otherwise it's + // allowed to return false the specification will be clarified from the + // current wording + // functionality to check whether a device pointer is accessible from a device // (OpenMP 6.0) from the host might not be possible DP("Device %d does not support unified memory, returning false\n", DeviceNum); >From 95ab6fe61af091a0bbf8b3337ba8073adb01aad2 Mon Sep 17 00:00:00 2001 From: Nicole Aschenbrenner <nicole.aschenbren...@amd.com> Date: Fri, 1 Aug 2025 04:55:50 -0500 Subject: [PATCH 6/6] [OpenMP] Reverts omp_get_initial_device changes --- offload/libomptarget/OpenMP/API.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index 4d804934c47ed..80bfb7f689a75 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -93,8 +93,7 @@ EXTERN int omp_get_device_num(void) { EXTERN int omp_get_initial_device(void) { TIMESCOPE(); OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); - int NumDevices = omp_get_num_devices(); - int HostDevice = NumDevices == 0 ? -1 : NumDevices; + int HostDevice = omp_get_num_devices(); DP("Call to omp_get_initial_device returning %d\n", HostDevice); return HostDevice; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits