[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-03-09 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 approved this pull request.


https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-03-09 Thread Yaxun Liu via cfe-commits

yxsamliu wrote:

Gentle ping — all comments addressed. Thanks!

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-03-01 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/179701

>From 5efb56479a8cd5e2e32592e8fd6da2c96d6a6a95 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Sat, 28 Feb 2026 21:09:59 -0500
Subject: [PATCH 1/2] [Driver] Enable -ftime-trace for CUDA/HIP device
 compilation

---
 clang/lib/Driver/Driver.cpp   | 50 +++
 clang/test/Driver/ftime-trace-offload.cpp | 37 +
 2 files changed, 80 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/Driver/ftime-trace-offload.cpp

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index e4ec28753c594..7f7ead445c3a4 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5858,20 +5858,56 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  std::string OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+const ToolChain *TC = JA->getOffloadingToolChain();
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "",
+/*CreatePrefixForHost=*/false);
+if (const char *Arch = JA->getOffloadingArch()) {
+  OffloadingPrefix += "-";
+  OffloadingPrefix += Arch;
+}
+  } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+ C.getDriver().isSaveTempsEnabled()) {
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(),
+/*CreatePrefixForHost=*/true);
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
 Path = A->getValue();
 if (llvm::sys::fs::is_directory(Path)) {
-  SmallString<128> Tmp(Result.getFilename());
-  llvm::sys::path::replace_extension(Tmp, "json");
-  llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+  // When -ftime-trace= and it's a directory:
+  // - For host/non-offload: use the output filename stem
+  // - For offload: use input filename stem + offloading prefix
+  SmallString<128> Tmp;
+  if (OffloadingPrefix.empty()) {
+Tmp = llvm::sys::path::stem(Result.getFilename());
+  } else {
+Tmp = llvm::sys::path::stem(BaseInput);
+Tmp += OffloadingPrefix;
+  }
+  Tmp += ".json";
+  llvm::sys::path::append(Path, Tmp);
 }
   } else {
 if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-  // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-  // end with a path separator.
+  // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+  // that dumpdir may not end with a path separator.
   Path = DumpDir->getValue();
-  Path += llvm::sys::path::filename(BaseInput);
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;
+} else if (!OffloadingPrefix.empty()) {
+  // For offloading, derive path from -o output directory combined with
+  // the input filename and offload prefix.
+  SmallString<128> TraceName(llvm::sys::path::stem(BaseInput));
+  TraceName += OffloadingPrefix;
+  if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
+Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+  llvm::sys::path::append(Path, TraceName);
 } else {
   Path = Result.getFilename();
 }
@@ -6132,7 +6168,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
  AtTopLevel, MultipleArchs,
  OffloadingPrefix),
BaseInput);
-if (T->canEmitIR() && OffloadingPrefix.empty())
+if (T->canEmitIR())
   handleTimeTrace(C, Args, JA, BaseInput, Result);
   }
 
diff --git a/clang/test/Driver/ftime-trace-offload.cpp 
b/clang/test/Driver/ftime-trace-offload.cpp
new file mode 100644
index 0..224a21ca2173a
--- /dev/null
+++ b/clang/test/Driver/ftime-trace-offload.cpp
@@ -0,0 +1,37 @@
+// RUN: rm -rf %t && mkdir -p %t && cd %t
+// RUN: mkdir d e f && cp %s d/a.cpp
+
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftim

[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-28 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/179701

>From 5efb56479a8cd5e2e32592e8fd6da2c96d6a6a95 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Sat, 28 Feb 2026 21:09:59 -0500
Subject: [PATCH] [Driver] Enable -ftime-trace for CUDA/HIP device compilation

---
 clang/lib/Driver/Driver.cpp   | 50 +++
 clang/test/Driver/ftime-trace-offload.cpp | 37 +
 2 files changed, 80 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/Driver/ftime-trace-offload.cpp

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index e4ec28753c594..7f7ead445c3a4 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5858,20 +5858,56 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  std::string OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+const ToolChain *TC = JA->getOffloadingToolChain();
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "",
+/*CreatePrefixForHost=*/false);
+if (const char *Arch = JA->getOffloadingArch()) {
+  OffloadingPrefix += "-";
+  OffloadingPrefix += Arch;
+}
+  } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+ C.getDriver().isSaveTempsEnabled()) {
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+Action::OFK_None, C.getDefaultToolChain().getTriple().normalize(),
+/*CreatePrefixForHost=*/true);
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
 Path = A->getValue();
 if (llvm::sys::fs::is_directory(Path)) {
-  SmallString<128> Tmp(Result.getFilename());
-  llvm::sys::path::replace_extension(Tmp, "json");
-  llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+  // When -ftime-trace= and it's a directory:
+  // - For host/non-offload: use the output filename stem
+  // - For offload: use input filename stem + offloading prefix
+  SmallString<128> Tmp;
+  if (OffloadingPrefix.empty()) {
+Tmp = llvm::sys::path::stem(Result.getFilename());
+  } else {
+Tmp = llvm::sys::path::stem(BaseInput);
+Tmp += OffloadingPrefix;
+  }
+  Tmp += ".json";
+  llvm::sys::path::append(Path, Tmp);
 }
   } else {
 if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-  // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-  // end with a path separator.
+  // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+  // that dumpdir may not end with a path separator.
   Path = DumpDir->getValue();
-  Path += llvm::sys::path::filename(BaseInput);
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;
+} else if (!OffloadingPrefix.empty()) {
+  // For offloading, derive path from -o output directory combined with
+  // the input filename and offload prefix.
+  SmallString<128> TraceName(llvm::sys::path::stem(BaseInput));
+  TraceName += OffloadingPrefix;
+  if (Arg *FinalOutput = Args.getLastArg(options::OPT_o))
+Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+  llvm::sys::path::append(Path, TraceName);
 } else {
   Path = Result.getFilename();
 }
@@ -6132,7 +6168,7 @@ InputInfoList Driver::BuildJobsForActionNoCache(
  AtTopLevel, MultipleArchs,
  OffloadingPrefix),
BaseInput);
-if (T->canEmitIR() && OffloadingPrefix.empty())
+if (T->canEmitIR())
   handleTimeTrace(C, Args, JA, BaseInput, Result);
   }
 
diff --git a/clang/test/Driver/ftime-trace-offload.cpp 
b/clang/test/Driver/ftime-trace-offload.cpp
new file mode 100644
index 0..224a21ca2173a
--- /dev/null
+++ b/clang/test/Driver/ftime-trace-offload.cpp
@@ -0,0 +1,37 @@
+// RUN: rm -rf %t && mkdir -p %t && cd %t
+// RUN: mkdir d e f && cp %s d/a.cpp
+
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -nogpulib -nogpuinc -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"
+// HIP: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx90a.json"
+// HIP: -cc1{{.*}} "-triple" "x86_64{{.*}}"{{.*}} "-ftime-trace=e/a.json"
+
+/// Test HIP offloading with new driver: same output as above.
+// RUN: %clang -### -ftime-trace -ftime-tra

[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-19 Thread Joseph Huber via cfe-commits


@@ -5858,22 +5858,67 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  std::string OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+const ToolChain *TC = JA->getOffloadingToolChain();
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+JA->getOffloadingDeviceKind(),
+TC ? TC->getTriple().normalize() : "",
+/*CreatePrefixForHost=*/false);
+if (const char *Arch = JA->getOffloadingArch()) {
+  OffloadingPrefix += "-";
+  OffloadingPrefix += Arch;
+}
+  } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+ C.getDriver().isSaveTempsEnabled()) {
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+Action::OFK_None,

jhuber6 wrote:

Why do we pass in OFK_None here if save temps is enabled? I'm also wondering 
why just calling this is insufficient for our case, since it's supposed to make 
a valid prefix. I thought that included the arch already.

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-19 Thread Joseph Huber via cfe-commits


@@ -5858,22 +5858,67 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  std::string OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {
+const ToolChain *TC = JA->getOffloadingToolChain();
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+JA->getOffloadingDeviceKind(),
+TC ? TC->getTriple().normalize() : "",
+/*CreatePrefixForHost=*/false);
+if (const char *Arch = JA->getOffloadingArch()) {
+  OffloadingPrefix += "-";
+  OffloadingPrefix += Arch;
+}
+  } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None &&
+ C.getDriver().isSaveTempsEnabled()) {
+OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+Action::OFK_None,
+C.getDefaultToolChain().getTriple().normalize(),
+/*CreatePrefixForHost=*/true);
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
 Path = A->getValue();
 if (llvm::sys::fs::is_directory(Path)) {
-  SmallString<128> Tmp(Result.getFilename());
-  llvm::sys::path::replace_extension(Tmp, "json");
-  llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+  // When -ftime-trace= and it's a directory:
+  // - For host/non-offload: use the output filename stem
+  // - For offload: use input filename stem + offloading prefix
+  SmallString<128> Tmp;
+  if (OffloadingPrefix.empty()) {

jhuber6 wrote:

Why does this portion need to be aware of the OffloadingPrefix? Couldn't we 
just replace the value of `Result` or something?

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-19 Thread Joseph Huber via cfe-commits


@@ -5858,22 +5858,67 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  std::string OffloadingPrefix;

jhuber6 wrote:

Should this be a small string?

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-19 Thread Joseph Huber via cfe-commits


@@ -5858,22 +5858,67 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  std::string OffloadingPrefix;
+  if (JA->getOffloadingDeviceKind() != Action::OFK_None) {

jhuber6 wrote:

Does this mean this applies for both the host and device phases?

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-11 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff origin/main HEAD --extensions cpp -- 
clang/lib/Driver/Driver.cpp clang/test/Driver/ftime-trace.cpp 
--diff_from_common_commit
``

:warning:
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing `origin/main` to the base branch/commit you want to compare against.
:warning:





View the diff from clang-format here.


``diff
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 67d9e11bc..6720ba16f 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5855,11 +5855,9 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   // offloading), this produces e.g. "-host-x86_64-unknown-linux-gnu".
   // For top-level host actions, no prefix is generated.
   std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
-  JA->getOffloadingDeviceKind(),
-  TC ? TC->getTriple().normalize() : "",
+  JA->getOffloadingDeviceKind(), TC ? TC->getTriple().normalize() : "",
   /*CreatePrefixForHost=*/
-  !(JA->getOffloadingHostActiveKinds() == Action::OFK_None ||
-AtTopLevel));
+  !(JA->getOffloadingHostActiveKinds() == Action::OFK_None || AtTopLevel));
   if (!OffloadingPrefix.empty() && !BoundArch.empty()) {
 OffloadingPrefix += "-";
 OffloadingPrefix += BoundArch;

``




https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-11 Thread Yaxun Liu via cfe-commits


@@ -5821,7 +5821,8 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
 const JobAction *JA, const char *BaseInput,
-const InputInfo &Result) {
+const InputInfo &Result,
+StringRef OffloadingPrefix = "") {

yxsamliu wrote:

will do

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-11 Thread Yaxun Liu via cfe-commits


@@ -63,6 +63,41 @@
 // UNUSED-NEXT: warning: argument unused during compilation: 
'-ftime-trace-verbose'
 // UNUSED-NOT:  warning:
 
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"

yxsamliu wrote:

will do

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-11 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/179701

>From 7713b806be79a6e15aab1dcfb9a8b6f8b96db579 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Wed, 4 Feb 2026 09:50:54 -0500
Subject: [PATCH] [Driver] Enable -ftime-trace for CUDA/HIP device compilation

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
handleTimeTrace() had no offloading context.

This patch enables -ftime-trace for offload device compilation by having
handleTimeTrace() derive the offloading prefix internally from the
JobAction, ToolChain, and BoundArch, following the same pattern as
GetOffloadingFileNamePrefix used for output filenames. The bound
architecture is appended to ensure unique trace files per offload target.
For device compilation, the trace output directory is derived from the
-o option since the device output is a temp file.

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.
---
 clang/lib/Driver/Driver.cpp   | 64 ++-
 clang/test/Driver/ftime-trace.cpp | 35 +
 2 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index a55c5033b57cf..67d9e11bc420d 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5842,27 +5842,70 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
 const JobAction *JA, const char *BaseInput,
-const InputInfo &Result) {
+const InputInfo &Result, const ToolChain *TC,
+StringRef BoundArch, bool AtTopLevel) {
   Arg *A =
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
 return;
+
+  // Build the offloading prefix from the job action's offloading context.
+  // For device actions, this produces e.g. "-hip-amdgcn-amd-amdhsa-gfx906".
+  // For host actions that are not at top level (e.g. --save-temps with
+  // offloading), this produces e.g. "-host-x86_64-unknown-linux-gnu".
+  // For top-level host actions, no prefix is generated.
+  std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix(
+  JA->getOffloadingDeviceKind(),
+  TC ? TC->getTriple().normalize() : "",
+  /*CreatePrefixForHost=*/
+  !(JA->getOffloadingHostActiveKinds() == Action::OFK_None ||
+AtTopLevel));
+  if (!OffloadingPrefix.empty() && !BoundArch.empty()) {
+OffloadingPrefix += "-";
+OffloadingPrefix += BoundArch;
+  }
+
   SmallString<128> Path;
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
 Path = A->getValue();
 if (llvm::sys::fs::is_directory(Path)) {
-  SmallString<128> Tmp(Result.getFilename());
-  llvm::sys::path::replace_extension(Tmp, "json");
-  llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+  // When -ftime-trace= and it's a directory:
+  // - For host/non-offload: use the output filename stem
+  // - For offload: use input filename stem + offloading prefix
+  SmallString<128> Tmp;
+  if (OffloadingPrefix.empty()) {
+Tmp = llvm::sys::path::stem(Result.getFilename());
+  } else {
+Tmp = llvm::sys::path::stem(BaseInput);
+Tmp += OffloadingPrefix;
+  }
+  Tmp += ".json";
+  llvm::sys::path::append(Path, Tmp);
 }
   } else {
 if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-  // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-  // end with a path separator.
+  // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+  // that dumpdir may not end with a path separator.
   Path = DumpDir->getValue();
-  Path += llvm::sys::path::filename(BaseInput);
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;
+} else if (!OffloadingPrefix.empty()) {
+  // For offloading, derive path from -o option or use current directory.
+  // The Result filename may be a temp file, so we use the -o output
+  // directory combined with the input filename and offload prefix.
+  if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) {
+Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+if (!Path.empty())
+  Path += llvm::sys::path::get_separator();
+  }
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;

[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread Joseph Huber via cfe-commits


@@ -63,6 +63,41 @@
 // UNUSED-NEXT: warning: argument unused during compilation: 
'-ftime-trace-verbose'
 // UNUSED-NOT:  warning:
 
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"

jhuber6 wrote:

+1, I've had a nightmare of a time trying to update tests that try to be 
'smart' with DAGs and reusing check lines in the past. It's much nicer to have 
one block of checks for one test.

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread Joseph Huber via cfe-commits


@@ -5821,7 +5821,8 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
 const JobAction *JA, const char *BaseInput,
-const InputInfo &Result) {
+const InputInfo &Result,
+StringRef OffloadingPrefix = "") {

jhuber6 wrote:

Why do we need to pass this in? We have access to the compilation so I figured 
that we'd just generate some name if the current job action is offloading.

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread Fangrui Song via cfe-commits


@@ -63,6 +63,41 @@
 // UNUSED-NEXT: warning: argument unused during compilation: 
'-ftime-trace-verbose'
 // UNUSED-NOT:  warning:
 
+/// Test HIP offloading: -ftime-trace should generate traces for both host and 
device.
+// RUN: %clang -### -ftime-trace -ftime-trace-granularity=0 -x hip d/a.cpp 
--offload-arch=gfx906 --offload-arch=gfx90a \
+// RUN:   -c -o e/a.o --target=x86_64-linux-gnu 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=HIP
+// HIP-DAG: -cc1{{.*}} "-triple" "amdgcn-amd-amdhsa"{{.*}} 
"-ftime-trace=e/a-hip-amdgcn-amd-amdhsa-gfx906.json"

MaskRay wrote:

`-DAG` check lines are quite difficult to debug. Prefer to remove `-DAG` since 
the order should be deterministic.

https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread Yanzuo Liu via cfe-commits

https://github.com/zwuis edited https://github.com/llvm/llvm-project/pull/179701
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread via cfe-commits

github-actions[bot] wrote:


# :window: Windows x64 Test Results

* 52368 tests passed
* 935 tests skipped
* 1 test failed

## Failed Tests
(click on a test name to see its output)

### Clang

Clang.Driver/ftime-trace.cpp

```
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
rm -rf 
C:\_work\llvm-project\llvm-project\build\tools\clang\test\Driver\Output\ftime-trace.cpp.tmp
 && mkdir -p 
C:\_work\llvm-project\llvm-project\build\tools\clang\test\Driver\Output\ftime-trace.cpp.tmp
 && cd 
C:\_work\llvm-project\llvm-project\build\tools\clang\test\Driver\Output\ftime-trace.cpp.tmp
# executed command: rm -rf 
'C:\_work\llvm-project\llvm-project\build\tools\clang\test\Driver\Output\ftime-trace.cpp.tmp'
# note: command had no output on stdout or stderr
# executed command: mkdir -p 
'C:\_work\llvm-project\llvm-project\build\tools\clang\test\Driver\Output\ftime-trace.cpp.tmp'
# note: command had no output on stdout or stderr
# executed command: cd 
'C:\_work\llvm-project\llvm-project\build\tools\clang\test\Driver\Output\ftime-trace.cpp.tmp'
# note: command had no output on stdout or stderr
# RUN: at line 2
c:\_work\llvm-project\llvm-project\build\bin\clang.exe --driver-mode=g++ -S 
-no-canonical-prefixes -ftime-trace -ftime-trace-granularity=0 
-ftime-trace-verbose -o out 
C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\clang.exe' 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
'C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp'
# note: command had no output on stdout or stderr
# RUN: at line 3
cat out.json| "C:\Python312\python.exe" -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'  
  | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe 
C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp
# executed command: cat out.json
# note: command had no output on stdout or stderr
# executed command: 'C:\Python312\python.exe' -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'
# note: command had no output on stdout or stderr
# executed command: 
'c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe' 
'C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp'
# note: command had no output on stdout or stderr
# RUN: at line 6
c:\_work\llvm-project\llvm-project\build\bin\clang.exe --driver-mode=g++ -S 
-no-canonical-prefixes -ftime-trace=new-name.json -ftime-trace-granularity=0 
-ftime-trace-verbose -o out 
C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\clang.exe' 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace=new-name.json 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
'C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp'
# note: command had no output on stdout or stderr
# RUN: at line 7
cat new-name.json| "C:\Python312\python.exe" -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'  
  | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe 
C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp
# executed command: cat new-name.json
# note: command had no output on stdout or stderr
# executed command: 'C:\Python312\python.exe' -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'
# note: command had no output on stdout or stderr
# executed command: 
'c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe' 
'C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp'
# note: command had no output on stdout or stderr
# RUN: at line 10
mkdir dir1 dir2
# executed command: mkdir dir1 dir2
# note: command had no output on stdout or stderr
# RUN: at line 11
c:\_work\llvm-project\llvm-project\build\bin\clang.exe --driver-mode=g++ -S 
-no-canonical-prefixes -ftime-trace=dir1 -ftime-trace-granularity=0 
-ftime-trace-verbose -o out 
C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp
# executed command: 'c:\_work\llvm-project\llvm-project\build\bin\clang.exe' 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace=dir1 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
'C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp'
# note: command had no output on stdout or stderr
# RUN: at line 12
cat dir1/out.json| "C:\Python312\python.exe" -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'  
  | c:\_work\llvm-project\llvm-project\build\bin\filecheck.exe 
C:\_work\llvm-project\llvm-project\clang\test\Driver\ftime-trace.cpp
# executed command: cat dir1/out.json
# note: command had no output on stdout or stderr
# executed command: 'C:\Python312\python.exe' -c 'import j

[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread via cfe-commits

github-actions[bot] wrote:


# :penguin: Linux x64 Test Results

* 86008 tests passed
* 1241 tests skipped
* 1 test failed

## Failed Tests
(click on a test name to see its output)

### Clang

Clang.Driver/ftime-trace.cpp

```
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
rm -rf 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver/Output/ftime-trace.cpp.tmp
 && mkdir -p 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver/Output/ftime-trace.cpp.tmp
 && cd 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver/Output/ftime-trace.cpp.tmp
# executed command: rm -rf 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver/Output/ftime-trace.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: mkdir -p 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver/Output/ftime-trace.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: cd 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/tools/clang/test/Driver/Output/ftime-trace.cpp.tmp
# note: command had no output on stdout or stderr
# RUN: at line 2
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# executed command: 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# note: command had no output on stdout or stderr
# RUN: at line 3
cat out.json| "/usr/bin/python3" -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'  
  | 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# executed command: cat out.json
# note: command had no output on stdout or stderr
# executed command: /usr/bin/python3 -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'
# note: command had no output on stdout or stderr
# executed command: 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# note: command had no output on stdout or stderr
# RUN: at line 6
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace=new-name.json 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# executed command: 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace=new-name.json 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# note: command had no output on stdout or stderr
# RUN: at line 7
cat new-name.json| "/usr/bin/python3" -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'  
  | 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# executed command: cat new-name.json
# note: command had no output on stdout or stderr
# executed command: /usr/bin/python3 -c 'import json, sys; 
json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)'
# note: command had no output on stdout or stderr
# executed command: 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/FileCheck 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# note: command had no output on stdout or stderr
# RUN: at line 10
mkdir dir1 dir2
# executed command: mkdir dir1 dir2
# note: command had no output on stdout or stderr
# RUN: at line 11
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace=dir1 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# executed command: 
/home/gha/actions-runner/_work/llvm-project/llvm-project/build/bin/clang 
--driver-mode=g++ -S -no-canonical-prefixes -ftime-trace=dir1 
-ftime-trace-granularity=0 -ftime-trace-verbose -o out 
/home/gha/actions-runner/_work/llvm-project/llvm-project/clang/test/Driver/ftime-trace.cpp
# note: command had n

[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-driver

Author: Yaxun (Sam) Liu (yxsamliu)


Changes

[Driver] Enable -ftime-trace for CUDA/HIP device compilation

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
the OffloadingPrefix was non-empty, causing handleTimeTrace() to be
skipped.

This patch enables -ftime-trace for offload device compilation by:
1. Passing the offloading prefix to handleTimeTrace()
2. Including the bound architecture in the trace filename
3. Deriving the trace output directory from the -o option for device
   compilation (since the device output is a temp file)

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.



---
Full diff: https://github.com/llvm/llvm-project/pull/179701.diff


2 Files Affected:

- (modified) clang/lib/Driver/Driver.cpp (+42-10) 
- (modified) clang/test/Driver/ftime-trace.cpp (+35) 


``diff
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index eb3f9cbea2845..4df11efab5967 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5821,7 +5821,8 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
 const JobAction *JA, const char *BaseInput,
-const InputInfo &Result) {
+const InputInfo &Result,
+StringRef OffloadingPrefix = "") {
   Arg *A =
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
@@ -5830,18 +5831,43 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
 Path = A->getValue();
 if (llvm::sys::fs::is_directory(Path)) {
-  SmallString<128> Tmp(Result.getFilename());
-  llvm::sys::path::replace_extension(Tmp, "json");
-  llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+  // When -ftime-trace= and it's a directory:
+  // - For host/non-offload: use the output filename stem
+  // - For offload: use input filename stem + offloading prefix
+  SmallString<128> Tmp;
+  if (OffloadingPrefix.empty()) {
+Tmp = llvm::sys::path::stem(Result.getFilename());
+  } else {
+Tmp = llvm::sys::path::stem(BaseInput);
+Tmp += OffloadingPrefix;
+  }
+  Tmp += ".json";
+  llvm::sys::path::append(Path, Tmp);
 }
   } else {
 if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-  // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-  // end with a path separator.
+  // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+  // that dumpdir may not end with a path separator.
   Path = DumpDir->getValue();
-  Path += llvm::sys::path::filename(BaseInput);
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;
+} else if (!OffloadingPrefix.empty()) {
+  // For offloading, derive path from -o option or use current directory.
+  // The Result filename may be a temp file, so we use the -o output
+  // directory combined with the input filename and offload prefix.
+  if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) {
+Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+if (!Path.empty())
+  Path += llvm::sys::path::get_separator();
+  }
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;
 } else {
-  Path = Result.getFilename();
+  // Use the output filename stem for the trace file.
+  Path = llvm::sys::path::parent_path(Result.getFilename());
+  if (!Path.empty())
+Path += llvm::sys::path::get_separator();
+  Path += llvm::sys::path::stem(Result.getFilename());
 }
 llvm::sys::path::replace_extension(Path, "json");
   }
@@ -6100,8 +6126,14 @@ InputInfoList Driver::BuildJobsForActionNoCache(
  AtTopLevel, MultipleArchs,
  OffloadingPrefix),
BaseInput);
-if (T->canEmitIR() && OffloadingPrefix.empty())
-  handleTimeTrace(C, Args, JA, BaseInput, Result);
+if (T->canEmitIR()) {
+  // For time trace, include the bound arch in the prefix to ensure unique
+  // trace files for each offload target.
+  std::string TimeTracePrefix = OffloadingPrefix;
+  if (!OffloadingPrefix.empty() && !BoundArch

[clang] [Driver] Enable -ftime-trace for CUDA/HIP device compilation (PR #179701)

2026-02-04 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu created 
https://github.com/llvm/llvm-project/pull/179701

[Driver] Enable -ftime-trace for CUDA/HIP device compilation

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
the OffloadingPrefix was non-empty, causing handleTimeTrace() to be
skipped.

This patch enables -ftime-trace for offload device compilation by:
1. Passing the offloading prefix to handleTimeTrace()
2. Including the bound architecture in the trace filename
3. Deriving the trace output directory from the -o option for device
   compilation (since the device output is a temp file)

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.



>From 0548ff5a891c047765429614d6a8c3ee266d3fff Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" 
Date: Wed, 4 Feb 2026 09:50:54 -0500
Subject: [PATCH] [Driver] Enable -ftime-trace for CUDA/HIP device compilation

Previously, -ftime-trace only generated trace files for host compilation
when compiling CUDA/HIP code. Device compilation was excluded because
the OffloadingPrefix was non-empty, causing handleTimeTrace() to be
skipped.

This patch enables -ftime-trace for offload device compilation by:
1. Passing the offloading prefix to handleTimeTrace()
2. Including the bound architecture in the trace filename
3. Deriving the trace output directory from the -o option for device
   compilation (since the device output is a temp file)

Trace files are now generated for each offload target:
- Host: output.json
- Device: output-hip-amdgcn-amd-amdhsa-gfx906.json

Note: When using --save-temps, multiple compilation phases (preprocess,
compile, codegen) write to the same trace file, with each phase
overwriting the previous. This is pre-existing behavior that also
affects regular C++ compilation and is not addressed by this patch.

This addresses a long-standing limitation noted in D150282.
---
 clang/lib/Driver/Driver.cpp   | 52 +--
 clang/test/Driver/ftime-trace.cpp | 35 +
 2 files changed, 77 insertions(+), 10 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index eb3f9cbea2845..4df11efab5967 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -5821,7 +5821,8 @@ InputInfoList Driver::BuildJobsForAction(
 
 static void handleTimeTrace(Compilation &C, const ArgList &Args,
 const JobAction *JA, const char *BaseInput,
-const InputInfo &Result) {
+const InputInfo &Result,
+StringRef OffloadingPrefix = "") {
   Arg *A =
   Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ);
   if (!A)
@@ -5830,18 +5831,43 @@ static void handleTimeTrace(Compilation &C, const 
ArgList &Args,
   if (A->getOption().matches(options::OPT_ftime_trace_EQ)) {
 Path = A->getValue();
 if (llvm::sys::fs::is_directory(Path)) {
-  SmallString<128> Tmp(Result.getFilename());
-  llvm::sys::path::replace_extension(Tmp, "json");
-  llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp));
+  // When -ftime-trace= and it's a directory:
+  // - For host/non-offload: use the output filename stem
+  // - For offload: use input filename stem + offloading prefix
+  SmallString<128> Tmp;
+  if (OffloadingPrefix.empty()) {
+Tmp = llvm::sys::path::stem(Result.getFilename());
+  } else {
+Tmp = llvm::sys::path::stem(BaseInput);
+Tmp += OffloadingPrefix;
+  }
+  Tmp += ".json";
+  llvm::sys::path::append(Path, Tmp);
 }
   } else {
 if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) {
-  // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may 
not
-  // end with a path separator.
+  // The trace file is ${dumpdir}${basename}${offloadprefix}.json. Note
+  // that dumpdir may not end with a path separator.
   Path = DumpDir->getValue();
-  Path += llvm::sys::path::filename(BaseInput);
+  Path += llvm::sys::path::stem(BaseInput);
+  Path += OffloadingPrefix;
+} else if (!OffloadingPrefix.empty()) {
+  // For offloading, derive path from -o option or use current directory.
+  // The Result filename may be a temp file, so we use the -o output
+  // directory combined with the input filename and offload prefix.
+  if (Arg *FinalOutput = Args.getLastArg(options::OPT_o)) {
+Path = llvm::sys::path::parent_path(FinalOutput->getValue());
+