================
@@ -845,10 +1128,41 @@ extern "C" int 
__llvm_profile_hip_collect_device_data(void) {
   return Ret;
 }
 
-/* Interceptors for hipModuleLoad* / hipModuleUnload. Linux only. */
+/* Linux HIP interceptors. */
 
 #if defined(__linux__) && !defined(_WIN32)
 
+typedef struct {
+  unsigned int x;
+  unsigned int y;
+  unsigned int z;
+} HipDim3;
+
+typedef void *HipFunction;
+typedef void *HipStream;
+
+static int recordHipLaunchResult(int Rc) {
+  if (Rc == 0)
+    markCurrentDeviceUsed();
+  return Rc;
+}
+
----------------
yxsamliu wrote:

Good point. I added tracking for the other HIP launch entry points, including 
extended, cooperative, graph, stream-per-thread, and module launch APIs.

For launches with a stream, the runtime now uses hipStreamGetDevice to mark the 
launched device. If that is not available, it falls back to the current device. 
For multi-device launch APIs, it marks each launch parameter stream.

I also tested this on a multi-GPU machine with all devices visible. The runtime 
collected from the launched device and skipped the unused device.

https://github.com/llvm/llvm-project/pull/202095
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to