================
@@ -0,0 +1,565 @@
+//===- InstrProfilingPlatformROCm.c - Profile data ROCm platform ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrProfiling.h"
+#include "InstrProfilingInternal.h"
+#include "InstrProfilingPort.h"
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static int ProcessDeviceOffloadPrf(void *DeviceOffloadPrf, int TUIndex);
+
+static int IsVerboseMode() {
+  static int IsVerbose = -1;
+  if (IsVerbose == -1)
+    IsVerbose = getenv("LLVM_PROFILE_VERBOSE") != NULL;
+  return IsVerbose;
+}
+
+/* -------------------------------------------------------------------------- 
*/
+/*  Dynamic loading of HIP runtime symbols                                   */
+/* -------------------------------------------------------------------------- 
*/
+
+typedef int (*hipMemcpyFromSymbolTy)(void *, const void *, size_t, size_t, 
int);
+typedef int (*hipGetSymbolAddressTy)(void **, const void *);
+typedef int (*hipMemcpyTy)(void *, void *, size_t, int);
+typedef int (*hipModuleGetGlobalTy)(void **, size_t *, void *, const char *);
+
+static hipMemcpyFromSymbolTy pHipMemcpyFromSymbol = NULL;
+static hipGetSymbolAddressTy pHipGetSymbolAddress = NULL;
+static hipMemcpyTy pHipMemcpy = NULL;
+static hipModuleGetGlobalTy pHipModuleGetGlobal = NULL;
+
+/* -------------------------------------------------------------------------- 
*/
+/*  Device-to-host copies                                                     
*/
+/*  Keep HIP-only to avoid an HSA dependency.                                 
*/
+/* -------------------------------------------------------------------------- 
*/
+
+static void EnsureHipLoaded(void) {
+  static int Initialized = 0;
+  if (Initialized)
+    return;
+  Initialized = 1;
+
+  void *Handle = dlopen("libamdhip64.so", RTLD_LAZY | RTLD_LOCAL);
+  if (!Handle) {
+    fprintf(stderr, "compiler-rt: failed to open libamdhip64.so: %s\n",
+            dlerror());
+    return;
+  }
+
+  pHipMemcpyFromSymbol =
+      (hipMemcpyFromSymbolTy)dlsym(Handle, "hipMemcpyFromSymbol");
+  pHipGetSymbolAddress =
+      (hipGetSymbolAddressTy)dlsym(Handle, "hipGetSymbolAddress");
+  pHipMemcpy = (hipMemcpyTy)dlsym(Handle, "hipMemcpy");
+  pHipModuleGetGlobal =
+      (hipModuleGetGlobalTy)dlsym(Handle, "hipModuleGetGlobal");
+}
+
+/* -------------------------------------------------------------------------- 
*/
+/*  Public wrappers that forward to the loaded HIP symbols                   */
+/* -------------------------------------------------------------------------- 
*/
+
+static int hipMemcpyFromSymbol(void *dst, const void *symbol, size_t sizeBytes,
+                               size_t offset, int kind) {
+  EnsureHipLoaded();
+  return pHipMemcpyFromSymbol
+             ? pHipMemcpyFromSymbol(dst, symbol, sizeBytes, offset, kind)
+             : -1;
+}
+
+static int hipGetSymbolAddress(void **devPtr, const void *symbol) {
+  EnsureHipLoaded();
+  return pHipGetSymbolAddress ? pHipGetSymbolAddress(devPtr, symbol) : -1;
+}
+
+static int hipMemcpy(void *dest, void *src, size_t len, int kind /*2=DToH*/) {
+  EnsureHipLoaded();
+  return pHipMemcpy ? pHipMemcpy(dest, src, len, kind) : -1;
+}
+
+/* Copy from device to host using HIP.
+ * This requires that the device section symbols are registered with CLR,
+ * otherwise hipMemcpy may attempt a CPU path and crash. */
+static int memcpyDeviceToHost(void *Dst, void *Src, size_t Size) {
+  return hipMemcpy(Dst, Src, Size, 2 /* DToH */);
+}
+
+static int hipModuleGetGlobal(void **DevPtr, size_t *Bytes, void *Module,
+                              const char *Name) {
+  EnsureHipLoaded();
+  return pHipModuleGetGlobal ? pHipModuleGetGlobal(DevPtr, Bytes, Module, Name)
+                             : -1;
+}
+
+/* -------------------------------------------------------------------------- 
*/
+/*  Dynamic module tracking                                                   
*/
+/* -------------------------------------------------------------------------- 
*/
+
+#define MAX_DYNAMIC_MODULES 256
+
+typedef struct {
+  void *ModulePtr; /* hipModule_t returned by hipModuleLoad            */
+  void *DeviceVar; /* address of __llvm_offload_prf in this module     */
+  int Processed;   /* 0 = not yet collected, 1 = data already copied   */
+} HipDynamicModuleInfo;
+
+static HipDynamicModuleInfo DynamicModules[MAX_DYNAMIC_MODULES];
+static int NumDynamicModules = 0;
+
+/* -------------------------------------------------------------------------- 
*/
+/*  Registration / un-registration helpers                                   */
+/* -------------------------------------------------------------------------- 
*/
+
+void __llvm_profile_hip_register_dynamic_module(int ModuleLoadRc, void **Ptr) {
+  if (IsVerboseMode())
+    PROF_NOTE("Registering loaded module %d: rc=%d, module=%p\n",
+              NumDynamicModules, ModuleLoadRc, *Ptr);
+
+  if (ModuleLoadRc)
+    return;
+
+  if (NumDynamicModules >= MAX_DYNAMIC_MODULES) {
+    PROF_ERR("Too many dynamic modules registered. Maximum is %d.\n",
+             MAX_DYNAMIC_MODULES);
+    return;
+  }
+
+  HipDynamicModuleInfo *Info = &DynamicModules[NumDynamicModules++];
+  Info->ModulePtr = *Ptr;
+  Info->DeviceVar = NULL;
+  Info->Processed = 0;
+
+  size_t Bytes = 0;
+  if (hipModuleGetGlobal(&Info->DeviceVar, &Bytes, *Ptr,
+                         "__llvm_offload_prf") != 0) {
+    PROF_WARN("Failed to get symbol __llvm_offload_prf for module %p\n", *Ptr);
+    /* Leave DeviceVar NULL so later code can recognise the failure */
+    return;
+  }
+
+  if (IsVerboseMode())
+    PROF_NOTE("Module %p: Device profile var %p\n", *Ptr, Info->DeviceVar);
+}
+
+void __llvm_profile_hip_unregister_dynamic_module(void *Ptr) {
+  for (int i = 0; i < NumDynamicModules; ++i) {
+    HipDynamicModuleInfo *Info = &DynamicModules[i];
+
+    if (Info->ModulePtr == Ptr) {
+      if (IsVerboseMode())
+        PROF_NOTE("Unregistering module %p (DeviceVar=%p, Processed=%d)\n",
+                  Info->ModulePtr, Info->DeviceVar, Info->Processed);
+
+      if (Info->Processed) {
+        PROF_WARN("Module %p has already been unregistered or processed\n",
+                  Ptr);
+        return;
+      }
+
+      if (Info->DeviceVar) {
+        // Use module index as TU index for dynamic modules
+        // to ensure each module gets a unique profile file
+        if (ProcessDeviceOffloadPrf(Info->DeviceVar, i) == 0)
+          Info->Processed = 1;
+        else
+          PROF_WARN(
+              "Failed to process profile data for module %p on unregister\n",
+              Ptr);
+      } else {
+        PROF_WARN("Module %p has no device profile variable to process\n", 
Ptr);
+      }
+      return;
+    }
+  }
+
+  if (IsVerboseMode())
+    PROF_WARN("Unregister called for unknown module %p\n", Ptr);
+}
+
+#define MAX_SHADOW_VARIABLES 256
+static void *HipShadowVariables[MAX_SHADOW_VARIABLES];
+static int NumShadowVariables = 0;
+
+void __llvm_profile_hip_register_shadow_variable(void *ptr) {
+  if (NumShadowVariables >= MAX_SHADOW_VARIABLES) {
+    PROF_ERR("Too many shadow variables registered. Maximum is %d.\n",
+             MAX_SHADOW_VARIABLES);
+    return;
+  }
+  if (IsVerboseMode())
+    PROF_NOTE("Registering shadow variable %d: %p\n", NumShadowVariables, ptr);
+  HipShadowVariables[NumShadowVariables++] = ptr;
+}
+
+#define MAX_SECTION_SHADOW_VARIABLES 1024
+static void *HipSectionShadowVariables[MAX_SECTION_SHADOW_VARIABLES];
+static int NumSectionShadowVariables = 0;
+
+void __llvm_profile_hip_register_section_shadow_variable(void *ptr) {
+  if (NumSectionShadowVariables >= MAX_SECTION_SHADOW_VARIABLES) {
+    PROF_ERR("Too many section shadow variables registered. Maximum is %d.\n",
+             MAX_SECTION_SHADOW_VARIABLES);
+    return;
+  }
+  if (IsVerboseMode())
+    PROF_NOTE("Registering section shadow variable %d: %p\n",
+              NumSectionShadowVariables, ptr);
+  HipSectionShadowVariables[NumSectionShadowVariables++] = ptr;
+}
+
+static int ProcessDeviceOffloadPrf(void *DeviceOffloadPrf, int TUIndex) {
+  void *HostOffloadPrf[8];
+
+  if (IsVerboseMode())
+    PROF_NOTE("HostOffloadPrf buffer size: %zu bytes\n",
+              sizeof(HostOffloadPrf));
+
+  if (hipMemcpy(HostOffloadPrf, DeviceOffloadPrf, sizeof(HostOffloadPrf),
+                2 /*DToH*/) != 0) {
+    PROF_ERR("%s\n", "Failed to copy offload prf structure from device");
+    return -1;
+  }
+
+  void *DevCntsBegin = HostOffloadPrf[0];
+  void *DevDataBegin = HostOffloadPrf[1];
+  void *DevNamesBegin = HostOffloadPrf[2];
+  void *DevUniformCntsBegin = HostOffloadPrf[3];
+  void *DevCntsEnd = HostOffloadPrf[4];
+  void *DevDataEnd = HostOffloadPrf[5];
+  void *DevNamesEnd = HostOffloadPrf[6];
+  void *DevUniformCntsEnd = HostOffloadPrf[7];
+
+  if (IsVerboseMode()) {
+    PROF_NOTE("%s", "Device Profile Pointers:\n");
+    PROF_NOTE("  Counters:        %p - %p\n", DevCntsBegin, DevCntsEnd);
+    PROF_NOTE("  Data:            %p - %p\n", DevDataBegin, DevDataEnd);
+    PROF_NOTE("  Names:           %p - %p\n", DevNamesBegin, DevNamesEnd);
+    PROF_NOTE("  UniformCounters: %p - %p\n", DevUniformCntsBegin,
+              DevUniformCntsEnd);
+  }
+
+  size_t CountersSize = (char *)DevCntsEnd - (char *)DevCntsBegin;
+  size_t DataSize = (char *)DevDataEnd - (char *)DevDataBegin;
+  size_t NamesSize = (char *)DevNamesEnd - (char *)DevNamesBegin;
+  size_t UniformCountersSize =
+      (char *)DevUniformCntsEnd - (char *)DevUniformCntsBegin;
+
+  if (IsVerboseMode()) {
+    PROF_NOTE("Section sizes: Counters=%zu, Data=%zu, Names=%zu, "
+              "UniformCounters=%zu\n",
+              CountersSize, DataSize, NamesSize, UniformCountersSize);
+  }
+
+  if (CountersSize == 0 || DataSize == 0) {
+    if (IsVerboseMode())
+      PROF_NOTE("%s\n", "Counters or Data section has zero size. No profile "
+                        "data to collect.");
+    return 0;
+  }
+
+  // Pre-register device section symbols with CLR memory tracking.
+  // This makes the section base pointers (and sub-pointers) safe for 
hipMemcpy.
+  if (IsVerboseMode())
+    PROF_NOTE("Pre-registering %d section symbols\n",
+              NumSectionShadowVariables);
+  for (int i = 0; i < NumSectionShadowVariables; ++i) {
+    void *DevPtr = NULL;
+    (void)hipGetSymbolAddress(&DevPtr, HipSectionShadowVariables[i]);
+  }
+
+  char *DeviceFilename = NULL;
+  FILE *File = NULL;
+  int ret = -1;
+
+  // Allocate host memory for the device sections
+  char *HostCountersBegin = (char *)malloc(CountersSize);
+  char *HostDataBegin = (char *)malloc(DataSize);
+  char *HostNamesBegin = (char *)malloc(NamesSize);
+  char *HostUniformCountersBegin =
+      (UniformCountersSize > 0) ? (char *)malloc(UniformCountersSize) : NULL;
+
+  if (!HostCountersBegin || !HostDataBegin ||
+      (NamesSize > 0 && !HostNamesBegin) ||
+      (UniformCountersSize > 0 && !HostUniformCountersBegin)) {
+    PROF_ERR("%s\n", "Failed to allocate host memory for device sections");
+    goto cleanup;
+  }
+
+  // Copy data from device to host using HIP.
+  if (memcpyDeviceToHost(HostCountersBegin, DevCntsBegin, CountersSize) != 0 ||
+      memcpyDeviceToHost(HostDataBegin, DevDataBegin, DataSize) != 0 ||
+      (NamesSize > 0 &&
+       memcpyDeviceToHost(HostNamesBegin, DevNamesBegin, NamesSize) != 0) ||
+      (UniformCountersSize > 0 &&
+       memcpyDeviceToHost(HostUniformCountersBegin, DevUniformCntsBegin,
+                          UniformCountersSize) != 0)) {
+    PROF_ERR("%s\n", "Failed to copy profile sections from device");
+    goto cleanup;
+  }
+
+  if (IsVerboseMode())
+    PROF_NOTE("Copied device sections: Counters=%zu, Data=%zu, Names=%zu, "
+              "UniformCounters=%zu\n",
+              CountersSize, DataSize, NamesSize, UniformCountersSize);
+
+  if (IsVerboseMode() && UniformCountersSize > 0) {
+    PROF_NOTE("Successfully copied %zu bytes of uniform counters from 
device\n",
+              UniformCountersSize);
+  }
+
+  // Construct the device-specific filename
+  // Format: <base>.<target>[.<TUIndex>].<ext>
+  // TUIndex is included when >= 0 to support multi-TU programs
+  char *BaseFilename = (char *)__llvm_profile_get_filename();
+  if (!BaseFilename) {
+    PROF_ERR("%s\n", "Failed to get base profile filename");
+    goto cleanup;
+  }
+  if (IsVerboseMode())
+    PROF_NOTE("Base profile filename: %s\n", BaseFilename);
+
+  const char *TargetInfix = "amdgcn-amd-amdhsa";
+  const char *Extension = strrchr(BaseFilename, '.');
+  char TUIndexStr[16] = "";
+  if (TUIndex >= 0) {
+    snprintf(TUIndexStr, sizeof(TUIndexStr), ".%d", TUIndex);
+  }
+
+  if (Extension) {
+    size_t BaseLen = Extension - BaseFilename;
+    size_t InfixLen = strlen(TargetInfix);
+    size_t TUIndexLen = strlen(TUIndexStr);
+    size_t ExtLen = strlen(Extension);
+    DeviceFilename =
+        (char *)malloc(BaseLen + 1 + InfixLen + TUIndexLen + ExtLen + 1);
+    strncpy(DeviceFilename, BaseFilename, BaseLen);
+    DeviceFilename[BaseLen] = '\0';
+    strcat(DeviceFilename, ".");
+    strcat(DeviceFilename, TargetInfix);
+    strcat(DeviceFilename, TUIndexStr);
+    strcat(DeviceFilename, Extension);
+  } else {
+    DeviceFilename =
+        (char *)malloc(strlen(BaseFilename) + 1 + strlen(TargetInfix) +
+                       strlen(TUIndexStr) + 1);
+    strcpy(DeviceFilename, BaseFilename);
+    strcat(DeviceFilename, ".");
+    strcat(DeviceFilename, TargetInfix);
+    strcat(DeviceFilename, TUIndexStr);
+  }
+  free(BaseFilename);
+
+  if (IsVerboseMode())
+    PROF_NOTE("Device profile filename: %s\n", DeviceFilename);
+
+  // Manually write the profile data with a proper header
+  File = fopen(DeviceFilename, "w");
+  if (!File) {
+    PROF_ERR("Failed to open %s for writing\n", DeviceFilename);
+    goto cleanup;
+  }
+
+  __llvm_profile_header Header;
+  const uint64_t NumData = DataSize / sizeof(__llvm_profile_data);
+  const uint64_t NumCounters = CountersSize / sizeof(uint64_t);
+  const uint64_t NumBitmapBytes = 0;
+  const uint64_t VTableSectionSize = 0;
+  const uint64_t VNamesSize = 0;
+  uint64_t PaddingBytesBeforeCounters, PaddingBytesAfterCounters,
+      PaddingBytesAfterBitmapBytes, PaddingBytesAfterNames,
+      PaddingBytesAfterVTable, PaddingBytesAfterVNames;
+
+  if (__llvm_profile_get_padding_sizes_for_counters(
+          DataSize, CountersSize, NumBitmapBytes, NamesSize, VTableSectionSize,
+          VNamesSize, &PaddingBytesBeforeCounters, &PaddingBytesAfterCounters,
+          &PaddingBytesAfterBitmapBytes, &PaddingBytesAfterNames,
+          &PaddingBytesAfterVTable, &PaddingBytesAfterVNames) != 0) {
+    PROF_ERR("%s\n", "Failed to get padding sizes");
+    goto cleanup;
+  }
+
+  // Relocate pointers
+  __llvm_profile_data *RelocatedData = (__llvm_profile_data *)HostDataBegin;
+  for (uint64_t i = 0; i < NumData; ++i) {
+    if (RelocatedData[i].CounterPtr) {
+      ptrdiff_t DeviceCounterPtrOffset = 
(ptrdiff_t)RelocatedData[i].CounterPtr;
+      void *DeviceDataStructAddr =
+          (char *)DevDataBegin + (i * sizeof(__llvm_profile_data));
+      void *DeviceCountersAddr =
+          (char *)DeviceDataStructAddr + DeviceCounterPtrOffset;
+      ptrdiff_t OffsetIntoCountersSection =
+          (char *)DeviceCountersAddr - (char *)DevCntsBegin;
+
+      ptrdiff_t NewRelativeOffset = DataSize + PaddingBytesBeforeCounters +
+                                    OffsetIntoCountersSection -
+                                    (i * sizeof(__llvm_profile_data));
+      memcpy(&RelocatedData[i].CounterPtr, &NewRelativeOffset,
+             sizeof(NewRelativeOffset));
+    }
+    uint64_t Zero = 0;
+    memcpy(&RelocatedData[i].BitmapPtr, &Zero, sizeof(Zero));
+    memcpy(&RelocatedData[i].FunctionPointer, &Zero, sizeof(Zero));
+    memcpy(&RelocatedData[i].Values, &Zero, sizeof(Zero));
----------------
arsenm wrote:

memset? 

https://github.com/llvm/llvm-project/pull/177665
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to