Author: Yaxun (Sam) Liu Date: 2026-03-13T23:16:41-04:00 New Revision: a63a548b5ba6eeb3adf55eefcfa08ec742af8550
URL: https://github.com/llvm/llvm-project/commit/a63a548b5ba6eeb3adf55eefcfa08ec742af8550 DIFF: https://github.com/llvm/llvm-project/commit/a63a548b5ba6eeb3adf55eefcfa08ec742af8550.diff LOG: [Driver] Enable -ftime-trace for CUDA/HIP device compilation (#179701) Previously, -ftime-trace only generated trace files for host compilation when compiling CUDA/HIP code. Device compilation was excluded because the OffloadingPrefix was non-empty, causing handleTimeTrace() to be skipped. This patch enables -ftime-trace for offload device compilation by: 1. Passing the offloading prefix to handleTimeTrace() 2. Including the bound architecture in the trace filename 3. Deriving the trace output directory from the -o option for device compilation (since the device output is a temp file) Trace files are now generated for each offload target: - Host: output.json - Device: output-hip-amdgcn-amd-amdhsa-gfx906.json Note: When using --save-temps, multiple compilation phases (preprocess, compile, codegen) write to the same trace file, with each phase overwriting the previous. This is pre-existing behavior that also affects regular C++ compilation and is not addressed by this patch. This addresses a long-standing limitation noted in D150282. Added: clang/test/Driver/ftime-trace-offload.cpp Modified: clang/lib/Driver/Driver.cpp Removed: ################################################################################ diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 1a80aedfd8f0a..b7f65b7b74401 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5904,20 +5904,50 @@ static void handleTimeTrace(Compilation &C, const ArgList &Args, Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ); if (!A) return; + + SmallString<64> OffloadingPrefix; + if (JA->getOffloadingDeviceKind() != Action::OFK_None) { + const ToolChain *TC = JA->getOffloadingToolChain(); + OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "", + /*CreatePrefixForHost=*/false); + if (const char *Arch = JA->getOffloadingArch()) { + OffloadingPrefix += "-"; + OffloadingPrefix += Arch; + } + } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None && + C.getDriver().isSaveTempsEnabled()) { + OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(), + /*CreatePrefixForHost=*/true); + } + SmallString<128> Path; if (A->getOption().matches(options::OPT_ftime_trace_EQ)) { Path = A->getValue(); if (llvm::sys::fs::is_directory(Path)) { - SmallString<128> Tmp(Result.getFilename()); - llvm::sys::path::replace_extension(Tmp, "json"); - llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp)); + SmallString<128> Tmp(OffloadingPrefix.empty() + ? llvm::sys::path::stem(Result.getFilename()) + : llvm::sys::path::stem(BaseInput)); + Tmp += OffloadingPrefix; + Tmp += ".json"; + llvm::sys::path::append(Path, Tmp); } } else { if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) { - // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may not - // end with a path separator. + // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note + // that dumpdir may not end with a path separator. Path = DumpDir->getValue(); - Path += llvm::sys::path::filename(BaseInput); + Path += llvm::sys::path::stem(BaseInput); + Path += OffloadingPrefix; + } else if (!OffloadingPrefix.empty()) { + // For offloading, derive path from -o output directory combined with + // the input filename and offload prefix. + SmallString<128> TraceName(llvm::sys::path::stem(BaseInput)); + TraceName += OffloadingPrefix; + if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) + Path = llvm::sys::path::parent_path(FinalOutput->getValue()); + llvm::sys::path::append(Path, TraceName); } else { Path = Result.getFilename(); } @@ -6178,7 +6208,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( AtTopLevel, MultipleArchs, OffloadingPrefix), BaseInput); - if (T->canEmitIR() && OffloadingPrefix.empty()) + if (T->canEmitIR()) handleTimeTrace(C, Args, JA, BaseInput, Result); } diff --git a/clang/test/Driver/ftime-trace-offload.cpp b/clang/test/Driver/ftime-trace-offload.cpp new file mode 100644 index 0000000000000..2305d7bf4e04d --- /dev/null +++ b/clang/test/Driver/ftime-trace-offload.cpp @@ -0,0 +1,37 @@ +// RUN: rm -rf %t && mkdir -p %t && cd %t +// RUN: mkdir d e f && cp %s d/a.cpp + +/// Test HIP offloading: -ftime-trace should generate traces for both host and device. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP +// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx90a.json" +// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a.json" + +/// Test HIP offloading with new driver: same output as above. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 --offload-arch=gfx90a \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --offload-new-driver 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP + +/// Test HIP offloading with -ftime-trace=<dir>: traces go to specified directory. +// RUN: %clang -### -ftime-trace=f -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP-DIR +// HIP-DIR: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=f{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-DIR: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=f{{/|\\\\}}a.json" + +/// Test HIP offloading with --save-temps: both host and device get unique trace files. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp --offload-arch=gfx906 \ +// RUN: -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu --save-temps 2>&1 \ +// RUN: | FileCheck %s --check-prefix=HIP-SAVE-TEMPS +// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} "-ftime-trace=e{{/|\\\\}}a-hip-amdgcn-amd-amdhsa-gfx906.json" +// HIP-SAVE-TEMPS: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a-host-x86_64-unknown-linux-gnu.json" + +/// Test CUDA offloading: -ftime-trace should generate traces for both host and device. +// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x cuda d/a.cpp --offload-arch=sm_70 --offload-arch=sm_80 \ +// RUN: -c -o e/a.o --target=x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CUDA +// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_70.json" +// CUDA: -cc1{{.*}} "-triple" "nvptx64-nvidia-cuda"{{.*}} "-ftime-trace=e{{/|\\\\}}a-cuda-nvptx64-nvidia-cuda-sm_80.json" +// CUDA: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e{{/|\\\\}}a.json" _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
