https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/179701
>From 5efb56479a8cd5e2e32592e8fd6da2c96d6a6a95 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <[email protected]> Date: Sat, 28 Feb 2026 21:09:59 -0500 Subject: [PATCH 1/2] [Driver] Enable -ftime-trace for CUDA/HIP device compilation --- clang/lib/Driver/Driver.cpp | 50 +++++++++++++++++++---- clang/test/Driver/ftime-trace-offload.cpp | 37 +++++++++++++++++ 2 files changed, 80 insertions(+), 7 deletions(-) create mode 100644 clang/test/Driver/ftime-trace-offload.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e4ec28753c594..7f7ead445c3a4 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5858,20 +5858,56 @@ static void handleTimeTrace(Compilation &C, const ArgList &Args, Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ); if (!A) return; + + std::string OffloadingPrefix; + if (JA->getOffloadingDeviceKind() != Action::OFK_None) { + const ToolChain *TC = JA->getOffloadingToolChain(); + OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "", + /*CreatePrefixForHost=*/false); + if (const char *Arch = JA->getOffloadingArch()) { + OffloadingPrefix += "-"; + OffloadingPrefix += Arch; + } + } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None && + C.getDriver().isSaveTempsEnabled()) { + OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(), + /*CreatePrefixForHost=*/true); + } + SmallString<128> Path; if (A->getOption().matches(options::OPT_ftime_trace_EQ)) { Path = A->getValue(); if (llvm::sys::fs::is_directory(Path)) { - SmallString<128> Tmp(Result.getFilename()); - llvm::sys::path::replace_extension(Tmp, "json"); - llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp)); + // When -ftime-trace=<dir> and it's a directory: + // - For host/non-offload: use the output filename stem + // - For offload: use input filename stem + offloading prefix + SmallString<128> Tmp; + if (OffloadingPrefix.empty()) { + Tmp = llvm::sys::path::stem(Result.getFilename()); + } else { + Tmp = llvm::sys::path::stem(BaseInput); + Tmp += OffloadingPrefix; + } + Tmp += ".json"; + llvm::sys::path::append(Path, Tmp); } } else { if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) { - // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may not - // end with a path separator. + // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note + // that dumpdir may not end with a path separator. Path = DumpDir->getValue(); - Path += llvm::sys::path::filename(BaseInput); + Path += llvm::sys::path::stem(BaseInput); + Path += OffloadingPrefix; + } else if (!OffloadingPrefix.empty()) { + // For offloading, derive path from -o output directory combined with + // the input filename and offload prefix. + SmallString<128> TraceName(llvm::sys::path::stem(BaseInput)); + TraceName += OffloadingPrefix; + if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) + Path = llvm::sys::path::parent_path(FinalOutput->getValue()); + llvm::sys::path::append(Path, TraceName); } else { Path = Result.getFilename(); } @@ -6132,7 +6168,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( AtTopLevel, MultipleArchs, OffloadingPrefix), BaseInput); - if (T->canEmitIR() && OffloadingPrefix.empty()) + if (T->canEmitIR()) handleTimeTrace(C, Args, JA, BaseInput, Result); } diff --git a/clang/test/Driver/ftime-trace-offload.cpp b/clang/test/Driver/ftime-trace-offload.cpp new file mode 100644 index 0000000000000..224a21ca2173a --- /dev/null +++ b/clang/test/Driver/ftime-trace-offload.cpp @@ -0,0 +1,37 @@ +// RUN: rm -rf %t && mkdir -p %t && cd %t +// RUN: mkdir d e f && cp %s d/a.cpp + +/// Test HIP offloading: -ftime-trace should generate traces for both host and device. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP +// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json" +// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json" + +/// Test HIP offloading with new driver: same output as above. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --offload-new-driver 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP + +/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified directory. +// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP-DIR +// HIP-DIR: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-DIR: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=f{{/|\\\\}}a.json" + +/// Test HIP offloading with --save-temps: both host and device get unique trace files. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP-SAVE-TEMPS +// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a-host-x86_64-unknown-linux-gnu.json" + +/// Test CUDA offloading: -ftime-trace should generate traces for both host and device. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp --offload-arch=sm_70 --offload-arch=sm_80 \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CUDA +// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_70.json" +// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_80.json" +// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json" >From 76c5606a0b2cdc987676bc7b850cfa227b6b3a21 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <[email protected]> Date: Sun, 1 Mar 2026 09:53:30 -0500 Subject: [PATCH 2/2] Fix Windows path separator in ftime-trace-offload test Use {{/|\\\\}} pattern to match both forward slash (Linux) and backslash (Windows) in FileCheck patterns, consistent with the HIP-DIR checks that already handled this correctly. --- clang/test/Driver/ftime-trace-offload.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/test/Driver/ftime-trace-offload.cpp b/clang/test/Driver/ftime-trace-offload.cpp index 224a21ca2173a..2305d7bf4e04d 100644 --- a/clang/test/Driver/ftime-trace-offload.cpp +++ b/clang/test/Driver/ftime-trace-offload.cpp @@ -5,9 +5,9 @@ // RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ // RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ // RUN: | FileCheck %s --check-prefix=HIP -// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json" -// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json" -// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json" +// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx90a.json" +// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a.json" /// Test HIP offloading with new driver: same output as above. // RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ @@ -25,13 +25,13 @@ // RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ // RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \ // RUN: | FileCheck %s --check-prefix=HIP-SAVE-TEMPS -// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json" -// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a-host-x86_64-unknown-linux-gnu.json" +// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a-host-x86_64-unknown-linux-gnu.json" /// Test CUDA offloading: -ftime-trace should generate traces for both host and device. // RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp --offload-arch=sm_70 --offload-arch=sm_80 \ // RUN: -c -o e/a.o --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \ // RUN: | FileCheck %s --check-prefix=CUDA -// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_70.json" -// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e/a-cuda-nvptx64-nvidia-cuda-sm_80.json" -// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json" +// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_70.json" +// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_80.json" +// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a.json" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
