[llvm-branch-commits] [llvm] [Offload] Add GenericPluginTy::get_mem_info (PR #157484)

Ross Brunton via llvm-branch-commits Sat, 13 Sep 2025 01:28:26 -0700

https://github.com/RossBrunton updated 
https://github.com/llvm/llvm-project/pull/157484


>From 7bf7fe1df8a873964df2ebc17328d9bef00f1347 Mon Sep 17 00:00:00 2001
From: Ross Brunton <r...@codeplay.com>
Date: Mon, 8 Sep 2025 10:45:42 +0100
Subject: [PATCH 1/6] [Offload] Add GenericPluginTy::get_mem_info

This takes a pointer allocated by the plugin, and returns a struct
containing important information about it. This is now used in
`olMemFree` instead of using a map to track allocation info.
---
 offload/include/omptarget.h                   |   2 +
 offload/liboffload/src/OffloadImpl.cpp        |  27 +--
 .../amdgpu/dynamic_hsa/hsa.cpp                |   1 +
 .../amdgpu/dynamic_hsa/hsa_ext_amd.h          |   3 +
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    |  31 ++-
 .../common/include/PluginInterface.h          |  13 ++
 offload/plugins-nextgen/cuda/src/rtl.cpp      | 216 +++++++++++-------
 offload/plugins-nextgen/host/src/rtl.cpp      |   5 +
 8 files changed, 193 insertions(+), 105 deletions(-)

diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 8fd722bb15022..197cbd3806d91 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -96,6 +96,8 @@ enum OpenMPOffloadingDeclareTargetFlags {
   OMP_REGISTER_REQUIRES = 0x10,
 };
 
+// Note: This type should be no larger than 3 bits, as the amdgpu platform uses
+// the lower 3 bits of a pointer to store it
 enum TargetAllocTy : int32_t {
   TARGET_ALLOC_DEVICE = 0,
   TARGET_ALLOC_HOST,
diff --git a/offload/liboffload/src/OffloadImpl.cpp 
b/offload/liboffload/src/OffloadImpl.cpp
index fef3a5669e0d5..9620c35ac5c10 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -201,8 +201,6 @@ struct OffloadContext {
 
   bool TracingEnabled = false;
   bool ValidationEnabled = true;
-  DenseMap<void *, AllocInfo> AllocInfoMap{};
-  std::mutex AllocInfoMapMutex{};
   SmallVector<ol_platform_impl_t, 4> Platforms{};
   size_t RefCount;
 
@@ -624,32 +622,15 @@ Error olMemAlloc_impl(ol_device_handle_t Device, 
ol_alloc_type_t Type,
     return Alloc.takeError();
 
   *AllocationOut = *Alloc;
-  {
-    std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
-    OffloadContext::get().AllocInfoMap.insert_or_assign(
-        *Alloc, AllocInfo{Device, Type});
-  }
   return Error::success();
 }
 
 Error olMemFree_impl(ol_platform_handle_t Platform, void *Address) {
-  ol_device_handle_t Device;
-  ol_alloc_type_t Type;
-  {
-    std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
-    if (!OffloadContext::get().AllocInfoMap.contains(Address))
-      return createOffloadError(ErrorCode::INVALID_ARGUMENT,
-                                "address is not a known allocation");
-
-    auto AllocInfo = OffloadContext::get().AllocInfoMap.at(Address);
-    Device = AllocInfo.Device;
-    Type = AllocInfo.Type;
-    OffloadContext::get().AllocInfoMap.erase(Address);
-  }
-  assert(Platform == Device->Platform);
+  auto MemInfo = Platform->Plugin->get_memory_info(Address);
+  if (auto Err = MemInfo.takeError())
+    return Err;
 
-  if (auto Res =
-          Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type)))
+  if (auto Res = MemInfo->Device->dataDelete(Address, MemInfo->Type))
     return Res;
 
   return Error::success();
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp 
b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
index bc92f4a46a5c0..7f0e75cb9b500 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.cpp
@@ -68,6 +68,7 @@ DLWRAP(hsa_amd_register_system_event_handler, 2)
 DLWRAP(hsa_amd_signal_create, 5)
 DLWRAP(hsa_amd_signal_async_handler, 5)
 DLWRAP(hsa_amd_pointer_info, 5)
+DLWRAP(hsa_amd_pointer_info_set_userdata, 2)
 DLWRAP(hsa_code_object_reader_create_from_memory, 3)
 DLWRAP(hsa_code_object_reader_destroy, 1)
 DLWRAP(hsa_executable_load_agent_code_object, 5)
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h 
b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
index 29cfe78082dbb..5c2fbd127c86d 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
@@ -160,6 +160,7 @@ typedef struct hsa_amd_pointer_info_s {
   void* agentBaseAddress;
   void* hostBaseAddress;
   size_t sizeInBytes;
+  void *userData;
 } hsa_amd_pointer_info_t;
 
 hsa_status_t hsa_amd_pointer_info(const void* ptr,
@@ -168,6 +169,8 @@ hsa_status_t hsa_amd_pointer_info(const void* ptr,
                                           uint32_t* num_agents_accessible,
                                           hsa_agent_t** accessible);
 
+hsa_status_t hsa_amd_pointer_info_set_userdata(const void *ptr, void 
*userdata);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index c26cfe961aa0e..90d9ca9f787e7 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -96,6 +96,8 @@ struct AMDGPUMemoryPoolTy;
 
 namespace hsa_utils {
 
+using UserDataPair = PointerIntPair<AMDGPUDeviceTy *, 3>;
+
 /// Iterate elements using an HSA iterate function. Do not use this function
 /// directly but the specialized ones below instead.
 template <typename ElemTy, typename IterFuncTy, typename CallbackTy>
@@ -2032,7 +2034,8 @@ struct AMDHostDeviceTy : public AMDGenericDeviceTy {
 
 /// Class implementing the AMDGPU device functionalities which derives from the
 /// generic device class.
-struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
+struct alignas(8) alignas(void *) AMDGPUDeviceTy : public GenericDeviceTy,
+                                                   AMDGenericDeviceTy {
   // Create an AMDGPU device with a device id and default AMDGPU grid values.
   AMDGPUDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId, int32_t NumDevices,
                  AMDHostDeviceTy &HostDevice, hsa_agent_t Agent)
@@ -3561,6 +3564,26 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     return KernelAgents;
   }
 
+  Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) override {
+    hsa_amd_pointer_info_t Info;
+    if (auto Err = Plugin::check(
+            hsa_amd_pointer_info(TgtPtr, &Info, nullptr, nullptr, nullptr),
+            "error in hsa_amd_pointer_info: %s"))
+      return Err;
+
+    // The pointer info struct contains an "agent" field, but that doesn't
+    // necessarily map to the device that created it
+    MemoryInfoTy ToReturn;
+    ToReturn.Base = Info.agentBaseAddress;
+    ToReturn.Size = Info.size;
+    auto UserData = hsa_utils::UserDataPair::getFromOpaqueValue(Info.userData);
+    ToReturn.Type = static_cast<TargetAllocTy>(UserData.getInt());
+    ToReturn.Device =
+        reinterpret_cast<GenericDeviceTy *>(UserData.getPointer());
+
+    return ToReturn;
+  }
+
 private:
   /// Event handler that will be called by ROCr if an event is detected.
   static hsa_status_t eventHandler(const hsa_amd_event_t *Event,
@@ -3882,6 +3905,12 @@ void *AMDGPUDeviceTy::allocate(size_t Size, void *, 
TargetAllocTy Kind) {
       REPORT("%s\n", toString(std::move(Err)).data());
       return nullptr;
     }
+
+    auto UserData = hsa_utils::UserDataPair(this, Kind);
+    if (auto Err = Plugin::check(
+            hsa_amd_pointer_info_set_userdata(Alloc, 
UserData.getOpaqueValue()),
+            "error in hsa_amd_pointer_info_set_userdata: %s"))
+      REPORT("%s\n", toString(std::move(Err)).data());
   }
 
   return Alloc;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h 
b/offload/plugins-nextgen/common/include/PluginInterface.h
index f0c05a1b90716..ce270b3f1ab9a 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1271,6 +1271,15 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
 };
 
+struct MemoryInfoTy {
+  void *Base;
+  size_t Size;
+  TargetAllocTy Type;
+  GenericDeviceTy *Device;
+
+  void *limit() const { return reinterpret_cast<char *>(Base) + Size; }
+};
+
 /// Class implementing common functionalities of offload plugins. Each plugin
 /// should define the specific plugin class, derive from this generic one, and
 /// implement the necessary virtual function members.
@@ -1567,6 +1576,10 @@ struct GenericPluginTy {
   /// object and return immediately.
   int32_t async_barrier(omp_interop_val_t *Interop);
 
+  /// Given a pointer previously allocated by GenericDeviceTy::Allocate, return
+  /// information about that allocation
+  virtual Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) = 0;
+
 private:
   /// Indicates if the platform runtime has been fully initialized.
   bool Initialized = false;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp 
b/offload/plugins-nextgen/cuda/src/rtl.cpp
index af3c74636bff3..f04049cd8bdfd 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -566,89 +566,10 @@ struct CUDADeviceTy : public GenericDeviceTy {
   }
 
   /// Allocate memory on the device or related to the device.
-  void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
-    if (Size == 0)
-      return nullptr;
-
-    if (auto Err = setContext()) {
-      REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
-      return nullptr;
-    }
-
-    void *MemAlloc = nullptr;
-    CUdeviceptr DevicePtr;
-    CUresult Res;
-
-    switch (Kind) {
-    case TARGET_ALLOC_DEFAULT:
-    case TARGET_ALLOC_DEVICE:
-      Res = cuMemAlloc(&DevicePtr, Size);
-      MemAlloc = (void *)DevicePtr;
-      break;
-    case TARGET_ALLOC_HOST:
-      Res = cuMemAllocHost(&MemAlloc, Size);
-      break;
-    case TARGET_ALLOC_SHARED:
-      Res = cuMemAllocManaged(&DevicePtr, Size, CU_MEM_ATTACH_GLOBAL);
-      MemAlloc = (void *)DevicePtr;
-      break;
-    case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
-      CUstream Stream;
-      if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
-        break;
-      if ((Res = cuMemAllocAsync(&DevicePtr, Size, Stream)))
-        break;
-      cuStreamSynchronize(Stream);
-      Res = cuStreamDestroy(Stream);
-      MemAlloc = (void *)DevicePtr;
-    }
-    }
-
-    if (auto Err =
-            Plugin::check(Res, "error in cuMemAlloc[Host|Managed]: %s")) {
-      REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
-      return nullptr;
-    }
-    return MemAlloc;
-  }
+  void *allocate(size_t Size, void *, TargetAllocTy Kind) override;
 
   /// Deallocate memory on the device or related to the device.
-  int free(void *TgtPtr, TargetAllocTy Kind) override {
-    if (TgtPtr == nullptr)
-      return OFFLOAD_SUCCESS;
-
-    if (auto Err = setContext()) {
-      REPORT("Failure to free memory: %s\n", toString(std::move(Err)).data());
-      return OFFLOAD_FAIL;
-    }
-
-    CUresult Res;
-    switch (Kind) {
-    case TARGET_ALLOC_DEFAULT:
-    case TARGET_ALLOC_DEVICE:
-    case TARGET_ALLOC_SHARED:
-      Res = cuMemFree((CUdeviceptr)TgtPtr);
-      break;
-    case TARGET_ALLOC_HOST:
-      Res = cuMemFreeHost(TgtPtr);
-      break;
-    case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
-      CUstream Stream;
-      if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
-        break;
-      cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(TgtPtr), Stream);
-      cuStreamSynchronize(Stream);
-      if ((Res = cuStreamDestroy(Stream)))
-        break;
-    }
-    }
-
-    if (auto Err = Plugin::check(Res, "error in cuMemFree[Host]: %s")) {
-      REPORT("Failure to free memory: %s\n", toString(std::move(Err)).data());
-      return OFFLOAD_FAIL;
-    }
-    return OFFLOAD_SUCCESS;
-  }
+  int free(void *TgtPtr, TargetAllocTy Kind) override;
 
   /// Synchronize current thread with the pending operations on the async info.
   Error synchronizeImpl(__tgt_async_info &AsyncInfo,
@@ -1612,8 +1533,141 @@ struct CUDAPluginTy final : public GenericPluginTy {
     // revision.
     return Major == ImageMajor && Minor >= ImageMinor;
   }
+
+  Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) override {
+    std::lock_guard<std::mutex> Lock(MemoryInfoMutex);
+
+    // TODO: Is it possible to query this information using CUDA directly?
+
+    // Fast case, we have been given the base pointer directly
+    if (MemoryInfo.contains(TgtPtr))
+      return MemoryInfo[TgtPtr];
+
+    // Slower case, we need to look up the base pointer first
+    auto Loc = std::lower_bound(MemoryBases.begin(), MemoryBases.end(), TgtPtr,
+                                [&](const void *Iter, const void *Val) {
+                                  return MemoryInfo[Iter].limit() < Val;
+                                });
+    if (Loc == MemoryBases.end() || TgtPtr > MemoryInfo[*Loc].limit())
+      return Plugin::error(ErrorCode::NOT_FOUND,
+                           "allocated memory information not found");
+    return MemoryInfo[*Loc];
+  }
+
+private:
+  friend CUDADeviceTy;
+
+  std::mutex MemoryInfoMutex;
+  llvm::SmallVector<const void *> MemoryBases;
+  llvm::DenseMap<const void *, MemoryInfoTy> MemoryInfo;
 };
 
+void *CUDADeviceTy::allocate(size_t Size, void *, TargetAllocTy Kind) {
+  if (Size == 0)
+    return nullptr;
+
+  if (auto Err = setContext()) {
+    REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
+    return nullptr;
+  }
+
+  void *MemAlloc = nullptr;
+  CUdeviceptr DevicePtr;
+  CUresult Res;
+
+  switch (Kind) {
+  case TARGET_ALLOC_DEFAULT:
+  case TARGET_ALLOC_DEVICE:
+    Res = cuMemAlloc(&DevicePtr, Size);
+    MemAlloc = (void *)DevicePtr;
+    break;
+  case TARGET_ALLOC_HOST:
+    Res = cuMemAllocHost(&MemAlloc, Size);
+    break;
+  case TARGET_ALLOC_SHARED:
+    Res = cuMemAllocManaged(&DevicePtr, Size, CU_MEM_ATTACH_GLOBAL);
+    MemAlloc = (void *)DevicePtr;
+    break;
+  case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
+    CUstream Stream;
+    if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
+      break;
+    if ((Res = cuMemAllocAsync(&DevicePtr, Size, Stream)))
+      break;
+    cuStreamSynchronize(Stream);
+    Res = cuStreamDestroy(Stream);
+    MemAlloc = (void *)DevicePtr;
+  }
+  }
+
+  if (auto Err = Plugin::check(Res, "error in cuMemAlloc[Host|Managed]: %s")) {
+    REPORT("Failure to alloc memory: %s\n", toString(std::move(Err)).data());
+    return nullptr;
+  }
+
+  auto CudaPlugin = reinterpret_cast<CUDAPluginTy *>(&Plugin);
+  {
+    std::lock_guard<std::mutex> Lock(CudaPlugin->MemoryInfoMutex);
+    CudaPlugin->MemoryBases.insert(
+        std::lower_bound(CudaPlugin->MemoryBases.begin(),
+                         CudaPlugin->MemoryBases.end(), MemAlloc),
+        MemAlloc);
+    CudaPlugin->MemoryInfo[MemAlloc] = MemoryInfoTy{
+        /*Base=*/MemAlloc,
+        /*Size=*/Size,
+        /*Type=*/Kind,
+        /*Device=*/this,
+    };
+  }
+  return MemAlloc;
+}
+
+int CUDADeviceTy::free(void *TgtPtr, TargetAllocTy Kind) {
+  if (TgtPtr == nullptr)
+    return OFFLOAD_SUCCESS;
+
+  if (auto Err = setContext()) {
+    REPORT("Failure to free memory: %s\n", toString(std::move(Err)).data());
+    return OFFLOAD_FAIL;
+  }
+
+  CUresult Res;
+  switch (Kind) {
+  case TARGET_ALLOC_DEFAULT:
+  case TARGET_ALLOC_DEVICE:
+  case TARGET_ALLOC_SHARED:
+    Res = cuMemFree((CUdeviceptr)TgtPtr);
+    break;
+  case TARGET_ALLOC_HOST:
+    Res = cuMemFreeHost(TgtPtr);
+    break;
+  case TARGET_ALLOC_DEVICE_NON_BLOCKING: {
+    CUstream Stream;
+    if ((Res = cuStreamCreate(&Stream, CU_STREAM_NON_BLOCKING)))
+      break;
+    cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(TgtPtr), Stream);
+    cuStreamSynchronize(Stream);
+    if ((Res = cuStreamDestroy(Stream)))
+      break;
+  }
+  }
+
+  if (auto Err = Plugin::check(Res, "error in cuMemFree[Host]: %s")) {
+    REPORT("Failure to free memory: %s\n", toString(std::move(Err)).data());
+    return OFFLOAD_FAIL;
+  }
+
+  auto CudaPlugin = reinterpret_cast<CUDAPluginTy *>(&Plugin);
+  {
+    std::lock_guard<std::mutex> Lock(CudaPlugin->MemoryInfoMutex);
+    CudaPlugin->MemoryBases.erase(std::find(CudaPlugin->MemoryBases.begin(),
+                                            CudaPlugin->MemoryBases.end(),
+                                            TgtPtr));
+    assert(CudaPlugin->MemoryInfo.erase(TgtPtr));
+  }
+  return OFFLOAD_SUCCESS;
+}
+
 Error CUDADeviceTy::dataExchangeImpl(const void *SrcPtr,
                                      GenericDeviceTy &DstGenericDevice,
                                      void *DstPtr, int64_t Size,
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp 
b/offload/plugins-nextgen/host/src/rtl.cpp
index f440ebaf17fe4..0f57043da448c 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -517,6 +517,11 @@ struct GenELF64PluginTy final : public GenericPluginTy {
 #endif
   }
 
+  Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) override {
+    return Plugin::error(ErrorCode::UNIMPLEMENTED,
+                         "get_memory_info not implemented for host device");
+  }
+
   const char *getName() const override { return GETNAME(TARGET_NAME); }
 };
 

>From dfc89b4699156c0f82841cf0681405377ebc5763 Mon Sep 17 00:00:00 2001
From: Ross Brunton <r...@codeplay.com>
Date: Tue, 9 Sep 2025 11:11:58 +0100
Subject: [PATCH 2/6] Don't use auto for MemInfo

---
 offload/liboffload/src/OffloadImpl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/liboffload/src/OffloadImpl.cpp 
b/offload/liboffload/src/OffloadImpl.cpp
index 9620c35ac5c10..f399a74989036 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -626,7 +626,7 @@ Error olMemAlloc_impl(ol_device_handle_t Device, 
ol_alloc_type_t Type,
 }
 
 Error olMemFree_impl(ol_platform_handle_t Platform, void *Address) {
-  auto MemInfo = Platform->Plugin->get_memory_info(Address);
+  Expected<MemoryInfoTy> MemInfo = Platform->Plugin->get_memory_info(Address);
   if (auto Err = MemInfo.takeError())
     return Err;
 

>From 4bc13339d32b36834e47b9a09dc821fb5593af03 Mon Sep 17 00:00:00 2001
From: Ross Brunton <r...@codeplay.com>
Date: Tue, 9 Sep 2025 12:04:42 +0100
Subject: [PATCH 3/6] Use proper size in amdgpu

---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 90d9ca9f787e7..b7028b2a9d4c3 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3575,7 +3575,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     // necessarily map to the device that created it
     MemoryInfoTy ToReturn;
     ToReturn.Base = Info.agentBaseAddress;
-    ToReturn.Size = Info.size;
+    ToReturn.Size = Info.sizeInBytes;
     auto UserData = hsa_utils::UserDataPair::getFromOpaqueValue(Info.userData);
     ToReturn.Type = static_cast<TargetAllocTy>(UserData.getInt());
     ToReturn.Device =

>From f7a0354d13a321ebf49c8e8a7e5c34f3318f9c37 Mon Sep 17 00:00:00 2001
From: Ross Brunton <r...@codeplay.com>
Date: Tue, 9 Sep 2025 12:29:43 +0100
Subject: [PATCH 4/6] Rename get_memory_info -> getMemInfo

---
 offload/liboffload/src/OffloadImpl.cpp                   | 2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp               | 2 +-
 offload/plugins-nextgen/common/include/PluginInterface.h | 2 +-
 offload/plugins-nextgen/cuda/src/rtl.cpp                 | 2 +-
 offload/plugins-nextgen/host/src/rtl.cpp                 | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/offload/liboffload/src/OffloadImpl.cpp 
b/offload/liboffload/src/OffloadImpl.cpp
index f399a74989036..00d1b83e91b05 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -626,7 +626,7 @@ Error olMemAlloc_impl(ol_device_handle_t Device, 
ol_alloc_type_t Type,
 }
 
 Error olMemFree_impl(ol_platform_handle_t Platform, void *Address) {
-  Expected<MemoryInfoTy> MemInfo = Platform->Plugin->get_memory_info(Address);
+  Expected<MemoryInfoTy> MemInfo = Platform->Plugin->getMemoryInfo(Address);
   if (auto Err = MemInfo.takeError())
     return Err;
 
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b7028b2a9d4c3..b4eb55506927f 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3564,7 +3564,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     return KernelAgents;
   }
 
-  Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) override {
+  Expected<MemoryInfoTy> getMemoryInfo(const void *TgtPtr) override {
     hsa_amd_pointer_info_t Info;
     if (auto Err = Plugin::check(
             hsa_amd_pointer_info(TgtPtr, &Info, nullptr, nullptr, nullptr),
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h 
b/offload/plugins-nextgen/common/include/PluginInterface.h
index ce270b3f1ab9a..163b02ac124b8 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1578,7 +1578,7 @@ struct GenericPluginTy {
 
   /// Given a pointer previously allocated by GenericDeviceTy::Allocate, return
   /// information about that allocation
-  virtual Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) = 0;
+  virtual Expected<MemoryInfoTy> getMemoryInfo(const void *TgtPtr) = 0;
 
 private:
   /// Indicates if the platform runtime has been fully initialized.
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp 
b/offload/plugins-nextgen/cuda/src/rtl.cpp
index f04049cd8bdfd..2765453504ae9 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1534,7 +1534,7 @@ struct CUDAPluginTy final : public GenericPluginTy {
     return Major == ImageMajor && Minor >= ImageMinor;
   }
 
-  Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) override {
+  Expected<MemoryInfoTy> getMemoryInfo(const void *TgtPtr) override {
     std::lock_guard<std::mutex> Lock(MemoryInfoMutex);
 
     // TODO: Is it possible to query this information using CUDA directly?
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp 
b/offload/plugins-nextgen/host/src/rtl.cpp
index 0f57043da448c..aad537e355872 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -517,9 +517,9 @@ struct GenELF64PluginTy final : public GenericPluginTy {
 #endif
   }
 
-  Expected<MemoryInfoTy> get_memory_info(const void *TgtPtr) override {
+  Expected<MemoryInfoTy> getMemoryInfo(const void *TgtPtr) override {
     return Plugin::error(ErrorCode::UNIMPLEMENTED,
-                         "get_memory_info not implemented for host device");
+                         "getMemoryInfo not implemented for host device");
   }
 
   const char *getName() const override { return GETNAME(TARGET_NAME); }

>From d2a99ef7081ed92ae4ed334ede1d8bec193daa8d Mon Sep 17 00:00:00 2001
From: Ross Brunton <r...@codeplay.com>
Date: Tue, 9 Sep 2025 12:32:59 +0100
Subject: [PATCH 5/6] Properly support "NOT_FOUND" in amdgpu

---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index b4eb55506927f..e06e4c63c9cda 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3570,6 +3570,8 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
             hsa_amd_pointer_info(TgtPtr, &Info, nullptr, nullptr, nullptr),
             "error in hsa_amd_pointer_info: %s"))
       return Err;
+    if (Info.type == HSA_EXT_POINTER_TYPE_UNKNOWN)
+      return Plugin::error(ErrorCode::NOT_FOUND, "could not find allocation");
 
     // The pointer info struct contains an "agent" field, but that doesn't
     // necessarily map to the device that created it

>From e3678fc63d13698153d8448eabb4421bc2703ebc Mon Sep 17 00:00:00 2001
From: Ross Brunton <bruntonr...@protonmail.com>
Date: Fri, 12 Sep 2025 11:25:50 +0100
Subject: [PATCH 6/6] Respond to PR feedback

---
 offload/include/omptarget.h                   |  2 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    | 12 ++++--------
 .../common/include/PluginInterface.h          |  4 ++++
 offload/plugins-nextgen/cuda/src/rtl.cpp      | 19 +++++++++----------
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 197cbd3806d91..4818ff67afefa 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -98,7 +98,7 @@ enum OpenMPOffloadingDeclareTargetFlags {
 
 // Note: This type should be no larger than 3 bits, as the amdgpu platform uses
 // the lower 3 bits of a pointer to store it
-enum TargetAllocTy : int32_t {
+enum TargetAllocTy : uint8_t {
   TARGET_ALLOC_DEVICE = 0,
   TARGET_ALLOC_HOST,
   TARGET_ALLOC_SHARED,
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp 
b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index e06e4c63c9cda..7e6078d834d25 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3575,15 +3575,11 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
 
     // The pointer info struct contains an "agent" field, but that doesn't
     // necessarily map to the device that created it
-    MemoryInfoTy ToReturn;
-    ToReturn.Base = Info.agentBaseAddress;
-    ToReturn.Size = Info.sizeInBytes;
     auto UserData = hsa_utils::UserDataPair::getFromOpaqueValue(Info.userData);
-    ToReturn.Type = static_cast<TargetAllocTy>(UserData.getInt());
-    ToReturn.Device =
-        reinterpret_cast<GenericDeviceTy *>(UserData.getPointer());
-
-    return ToReturn;
+    return MemoryInfoTy(
+        /*Base=*/Info.agentBaseAddress, /*Size=*/Info.sizeInBytes,
+        /*Type=*/static_cast<TargetAllocTy>(UserData.getInt()),
+        /*Device=*/reinterpret_cast<GenericDeviceTy *>(UserData.getPointer()));
   }
 
 private:
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h 
b/offload/plugins-nextgen/common/include/PluginInterface.h
index 163b02ac124b8..5f9d125d8fe74 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1277,6 +1277,10 @@ struct MemoryInfoTy {
   TargetAllocTy Type;
   GenericDeviceTy *Device;
 
+  MemoryInfoTy(void *Base, size_t Size, TargetAllocTy Type,
+               GenericDeviceTy *Device)
+      : Base(Base), Size(Size), Type(Type), Device(Device) {}
+
   void *limit() const { return reinterpret_cast<char *>(Base) + Size; }
 };
 
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp 
b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 2765453504ae9..f0343f5e79af1 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1541,17 +1541,17 @@ struct CUDAPluginTy final : public GenericPluginTy {
 
     // Fast case, we have been given the base pointer directly
     if (MemoryInfo.contains(TgtPtr))
-      return MemoryInfo[TgtPtr];
+      return MemoryInfo.at(TgtPtr);
 
     // Slower case, we need to look up the base pointer first
     auto Loc = std::lower_bound(MemoryBases.begin(), MemoryBases.end(), TgtPtr,
                                 [&](const void *Iter, const void *Val) {
-                                  return MemoryInfo[Iter].limit() < Val;
+                                  return MemoryInfo.at(Iter).limit() < Val;
                                 });
-    if (Loc == MemoryBases.end() || TgtPtr > MemoryInfo[*Loc].limit())
+    if (Loc == MemoryBases.end() || TgtPtr > MemoryInfo.at(*Loc).limit())
       return Plugin::error(ErrorCode::NOT_FOUND,
                            "allocated memory information not found");
-    return MemoryInfo[*Loc];
+    return MemoryInfo.at(*Loc);
   }
 
 private:
@@ -1612,12 +1612,11 @@ void *CUDADeviceTy::allocate(size_t Size, void *, 
TargetAllocTy Kind) {
         std::lower_bound(CudaPlugin->MemoryBases.begin(),
                          CudaPlugin->MemoryBases.end(), MemAlloc),
         MemAlloc);
-    CudaPlugin->MemoryInfo[MemAlloc] = MemoryInfoTy{
-        /*Base=*/MemAlloc,
-        /*Size=*/Size,
-        /*Type=*/Kind,
-        /*Device=*/this,
-    };
+    CudaPlugin->MemoryInfo.emplace_or_assign(MemAlloc,
+                                             /*Base=*/MemAlloc,
+                                             /*Size=*/Size,
+                                             /*Type=*/Kind,
+                                             /*Device=*/this);
   }
   return MemAlloc;
 }

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] [Offload] Add GenericPluginTy::get_mem_info (PR #157484)

Reply via email to