Author: Yaxun (Sam) Liu
Date: 2026-03-13T23:16:41-04:00
New Revision: a63a548b5ba6eeb3adf55eefcfa08ec742af8550

URL: 
https://github.com/llvm/llvm-project/commit/a63a548b5ba6eeb3adf55eefcfa08ec742af8550
DIFF: 
https://github.com/llvm/llvm-project/commit/a63a548b5ba6eeb3adf55eefcfa08ec742af8550.diff

LOG: [Driver] Enable -ftime-trace for CUDA/HIP device compilation (#179701)

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
the OffloadingPrefix was non-empty, causing handleTimeTrace() to be
skipped.

This patch enables -ftime-trace for offload device compilation by:
1. Passing the offloading prefix to handleTimeTrace()
2. Including the bound architecture in the trace filename
3. Deriving the trace output directory from the -o option for device
   compilation (since the device output is a temp file)

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.

Added: 
    clang/test/Driver/ftime-trace-offload.cpp

Modified: 
    clang/lib/Driver/Driver.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 1a80aedfd8f0a..b7f65b7b74401 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5904,20 +5904,50 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
       Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
     return;
+
+  SmallString<64> OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+    const ToolChain *TC = JA->getOffloadingToolChain();
+    OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+        JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "",
+        /*CreatePrefixForHost=*/false);
+    if (const char *Arch = JA->getOffloadingArch()) {
+      OffloadingPrefix += "-";
+      OffloadingPrefix += Arch;
+    }
+  } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+             C.getDriver().isSaveTempsEnabled()) {
+    OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+        Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(),
+        /*CreatePrefixForHost=*/true);
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
     Path = A->getValue();
     if (llvm::sys::fs::is_directory(Path)) {
-      SmallString<128> Tmp(Result.getFilename());
-      llvm::sys::path::replace_extension(Tmp, "json");
-      llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+      SmallString<128> Tmp(OffloadingPrefix.empty()
+                               ? llvm::sys::path::stem(Result.getFilename())
+                               : llvm::sys::path::stem(BaseInput));
+      Tmp += OffloadingPrefix;
+      Tmp += ".json";
+      llvm::sys::path::append(Path, Tmp);
     }
   } else {
     if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-      // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-      // end with a path separator.
+      // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+      // that dumpdir may not end with a path separator.
       Path = DumpDir->getValue();
-      Path += llvm::sys::path::filename(BaseInput);
+      Path += llvm::sys::path::stem(BaseInput);
+      Path += OffloadingPrefix;
+    } else if (!OffloadingPrefix.empty()) {
+      // For offloading, derive path from -o output directory combined with
+      // the input filename and offload prefix.
+      SmallString<128> TraceName(llvm::sys::path::stem(BaseInput));
+      TraceName += OffloadingPrefix;
+      if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
+        Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+      llvm::sys::path::append(Path, TraceName);
     } else {
       Path = Result.getFilename();
     }
@@ -6178,7 +6208,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
                                              AtTopLevel, MultipleArchs,
                                              OffloadingPrefix),
                        BaseInput);
-    if (T->canEmitIR() && OffloadingPrefix.empty())
+    if (T->canEmitIR())
       handleTimeTrace(C, Args, JA, BaseInput, Result);
   }
 

diff  --git a/clang/test/Driver/ftime-trace-offload.cpp 
b/clang/test/Driver/ftime-trace-offload.cpp
new file mode 100644
index 0000000000000..2305d7bf4e04d
--- /dev/null
+++ b/clang/test/Driver/ftime-trace-offload.cpp
@@ -0,0 +1,37 @@
+// RUN: rm -rf %t && mkdir -p %t && cd %t
+// RUN: mkdir d e f && cp %s d/a.cpp
+
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 
--offload-new-driver 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+
+/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified 
directory.
+// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-DIR
+// HIP-DIR: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-DIR: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=f{{/|\\\\}}a.json"
+
+/// Test HIP offloading with --save-temps: both host and device get unique 
trace files.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 
--save-temps 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP-SAVE-TEMPS
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a-host-x86_64-unknown-linux-gnu.json"
+
+/// Test CUDA offloading: -ftime-trace should generate traces for both host 
and device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp 
--offload-arch=sm_70 --offload-arch=sm_80 \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 
--cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CUDA
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_70.json"
+// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_80.json"
+// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} 
"-ftime-trace=e{{/|\\\\}}a.json"


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to