https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/204186

>From 2cb1c18647d0a3eb858094f0b5bca16e106e22bb Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Tue, 16 Jun 2026 09:45:45 -0500
Subject: [PATCH 1/2] [ClangLinkerWrapper] Use discrete steps in verbose mode

Summary:
One persistent problem with the linker wrapper flow is that it was more
difficult to reuse as a script than the previous flow. This is because
it did a lot of work internally. In the past we moved al ot of this into
dedicated LLVM tools, so now it's possible to simply use these tools
instead.

This PR changes the verbose mode handling to defer steps to tools rather
than doing it internally. This allows users to use verbose printing and
can copy/paste the results to re-run the steps.

comment

Save temps only

Update
---
 .../linker-wrapper-verbose.c                  |  94 ++++++++
 .../ClangLinkerWrapper.cpp                    | 203 +++++++++++++++++-
 2 files changed, 288 insertions(+), 9 deletions(-)
 create mode 100644 
clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-verbose.c

diff --git 
a/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-verbose.c 
b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-verbose.c
new file mode 100644
index 0000000000000..54bd856746fd3
--- /dev/null
+++ b/clang/test/OffloadTools/clang-linker-wrapper/linker-wrapper-verbose.c
@@ -0,0 +1,94 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.elf.o
+
+//
+// For OpenMP everything goes through the LLVM offloading binary type.
+//
+// RUN: llvm-offload-binary -o %t.out \
+// RUN:   
--image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
+// RUN:   
--image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --save-temps --dry-run \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=OPENMP
+
+// OPENMP: llvm-offload-binary{{.*}} {{.*}}.o 
--image=kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70,file={{.*}}.o 
--image=kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a,file={{.*}}.o
+// OPENMP: clang{{.*}} --target=nvptx64-nvidia-cuda -march=sm_70
+// OPENMP: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx90a
+// OPENMP: llvm-offload-binary{{.*}} -o {{.*}}.offload 
--image=file={{.*}}.img,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
+// OPENMP: llvm-offload-binary{{.*}} -o {{.*}}.offload 
--image=file={{.*}}.img,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
+// OPENMP: llvm-offload-wrapper{{.*}} --kind=openmp 
--triple=x86_64-unknown-linux-gnu -o [[BC:.*]].bc {{.*}}.offload {{.*}}.offload
+// OPENMP: clang{{.*}} --no-default-config --target=x86_64-unknown-linux-gnu 
-c -fPIC -o {{.*}}.openmp.image.wrapper{{.*}}.o [[BC]].bc
+
+//
+// The '--relocatable' flag is forwarded to the wrapper tool for OpenMP.
+//
+// RUN: llvm-offload-binary -o %t.out \
+// RUN:   
--image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --save-temps --dry-run \
+// RUN:   --linker-path=/usr/bin/ld -r %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=RELOCATABLE
+
+// RELOCATABLE: llvm-offload-wrapper{{.*}} --kind=openmp 
--triple=x86_64-unknown-linux-gnu -o {{.*}}.bc --relocatable {{.*}}.offload
+
+//
+// For CUDA the device images are combined with 'fatbinary'.
+//
+// RUN: llvm-offload-binary -o %t.out \
+// RUN:   
--image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70 \
+// RUN:   --image=file=%t.elf.o,kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --save-temps --dry-run \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=CUDA
+
+// CUDA: llvm-offload-binary{{.*}} {{.*}}.o 
--image=kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_70,file={{.*}}.o 
--image=kind=cuda,triple=nvptx64-nvidia-cuda,arch=sm_52,file={{.*}}.o
+// CUDA: clang{{.*}} --target=nvptx64-nvidia-cuda -march=sm_70
+// CUDA: clang{{.*}} --target=nvptx64-nvidia-cuda -march=sm_52
+// CUDA: fatbinary{{.*}}--create [[FB:.*]].fatbin 
{{.*}}--image3=kind=elf,sm=70{{.*}}--image3=kind=elf,sm=52
+// CUDA: llvm-offload-wrapper{{.*}} --kind=cuda 
--triple=x86_64-unknown-linux-gnu -o [[BC:.*]].bc [[FB]].fatbin
+// CUDA: clang{{.*}} --no-default-config --target=x86_64-unknown-linux-gnu -c 
-fPIC -o {{.*}}.cuda.image.wrapper{{.*}}.o [[BC]].bc
+
+//
+// For HIP the device images are combined with 'clang-offload-bundler'.
+//
+// RUN: llvm-offload-binary -o %t.out \
+// RUN:   --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a \
+// RUN:   --image=file=%t.elf.o,kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --save-temps --dry-run \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=HIP
+
+// HIP: llvm-offload-binary{{.*}} {{.*}}.o 
--image=kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx90a,file={{.*}}.o 
--image=kind=hip,triple=amdgcn-amd-amdhsa,arch=gfx908,file={{.*}}.o
+// HIP: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx90a
+// HIP: clang{{.*}} --target=amdgcn-amd-amdhsa -mcpu=gfx908
+// HIP: 
clang-offload-bundler{{.*}}-targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx90a,hip-amdgcn-amd-amdhsa--gfx908{{.*}}-output=[[FB:.*]].hipfb
+// HIP: llvm-offload-wrapper{{.*}} --kind=hip 
--triple=x86_64-unknown-linux-gnu -o [[BC:.*]].bc [[FB]].hipfb
+// HIP: clang{{.*}} --no-default-config --target=x86_64-unknown-linux-gnu -c 
-fPIC -o {{.*}}.hip.image.wrapper{{.*}}.o [[BC]].bc
+
+//
+// For SYCL the device image is linked with 'clang --sycl-link' and wrapped
+// directly with 'llvm-offload-wrapper --kind=sycl'.
+//
+// RUN: llvm-offload-binary -o %t.out \
+// RUN:   
--image=file=%t.elf.o,kind=sycl,triple=spirv64-unknown-unknown,arch=generic
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --save-temps --dry-run \
+// RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s 
--check-prefix=SYCL
+
+// SYCL: llvm-offload-binary{{.*}} {{.*}}.o 
--image=kind=sycl,triple=spirv64-unknown-unknown,arch=generic,file={{.*}}.o
+// SYCL: clang{{.*}} --target=spirv64-unknown-unknown {{.*}} --sycl-link 
{{.*}}-triple=spirv64-unknown-unknown{{.*}}-arch=
+// SYCL: llvm-offload-wrapper{{.*}} --kind=sycl 
--triple=x86_64-unknown-linux-gnu -o [[BC:.*]].bc {{.*}}.img
+// SYCL: clang{{.*}} --no-default-config --target=x86_64-unknown-linux-gnu -c 
-fPIC -o {{.*}}.sycl.image.wrapper{{.*}}.o [[BC]].bc
+
+//
+// Images pulled from a static archive are extracted from the archive path and
+// singled out by their member name so the replayed command is reproducible.
+//
+// RUN: llvm-offload-binary -o %t.out \
+// RUN:   
--image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
+// RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o 
-fembed-offload-object=%t.out
+// RUN: rm -f %t.a && llvm-ar rcs %t.a %t.o
+// RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu 
--wrapper-verbose --save-temps --dry-run \
+// RUN:   --should-extract=sm_70 --linker-path=/usr/bin/ld %t.a -o a.out 2>&1 
| FileCheck %s --check-prefix=ARCHIVE
+
+// ARCHIVE: llvm-offload-binary{{.*}} {{.*}}.a 
--image=kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70,member={{.*}}.o,file={{.*}}.o
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp 
b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index a4a67eed7d47f..61c6de2963b90 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -723,6 +723,67 @@ Expected<StringRef> compileModule(Module &M, OffloadKind 
Kind) {
   return *TempFileOrErr;
 }
 
+/// Performs the wrapping stage with individual tool invocations for verbose
+/// printing.
+Expected<StringRef>
+wrapDeviceImagesVerbose(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers,
+                        const ArgList &Args, OffloadKind Kind) {
+  Expected<std::string> WrapperPath = findProgram(
+      "llvm-offload-wrapper", {getExecutableDir("llvm-offload-wrapper")});
+  if (!WrapperPath)
+    return WrapperPath.takeError();
+
+  llvm::Triple Triple(
+      Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple()));
+
+  // Generate the runtime registration bitcode from the bundled images.
+  auto BitcodeOrErr = createOutputFile(
+      ExecutableName + "." + getOffloadKindName(Kind) + ".image.wrapper", 
"bc");
+  if (!BitcodeOrErr)
+    return BitcodeOrErr.takeError();
+
+  SmallVector<StringRef> WrapperArgs = {
+      *WrapperPath,
+      Args.MakeArgString("--kind=" + getOffloadKindName(Kind)),
+      Args.MakeArgString("--triple=" + Triple.getTriple()),
+      "-o",
+      *BitcodeOrErr,
+  };
+  if (Kind == OFK_OpenMP && Args.hasArg(OPT_relocatable))
+    WrapperArgs.push_back("--relocatable");
+  for (const auto &Buffer : Buffers)
+    WrapperArgs.push_back(Buffer->getBufferIdentifier());
+
+  if (Error Err = executeCommands(*WrapperPath, WrapperArgs))
+    return std::move(Err);
+
+  // Compile the generated registration bitcode into a host object.
+  Expected<std::string> ClangPath =
+      findProgram("clang", {getExecutableDir("clang")});
+  if (!ClangPath)
+    return ClangPath.takeError();
+
+  auto ObjectOrErr = createOutputFile(
+      ExecutableName + "." + getOffloadKindName(Kind) + ".image.wrapper", "o");
+  if (!ObjectOrErr)
+    return ObjectOrErr.takeError();
+
+  const StringRef ClangArgs[] = {
+      *ClangPath,
+      "--no-default-config",
+      Args.MakeArgString("--target=" + Triple.getTriple()),
+      "-c",
+      "-fPIC",
+      "-o",
+      *ObjectOrErr,
+      *BitcodeOrErr,
+  };
+  if (Error Err = executeCommands(*ClangPath, ClangArgs))
+    return std::move(Err);
+
+  return *ObjectOrErr;
+}
+
 /// Creates the object file containing the device image and runtime
 /// registration code from the device images stored in \p Images.
 Expected<StringRef>
@@ -730,6 +791,10 @@ wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> 
Buffers,
                  const ArgList &Args, OffloadKind Kind) {
   llvm::TimeTraceScope TimeScope("Wrap bundled images");
 
+  // We use the discrete tools if we are in verbose mode with '--save-temps'.
+  if (Verbose && SaveTemps && !Args.hasArg(OPT_print_wrapped_module))
+    return wrapDeviceImagesVerbose(Buffers, Args, Kind);
+
   SmallVector<ArrayRef<char>, 4> BuffersToWrap;
   for (const auto &Buffer : Buffers)
     BuffersToWrap.emplace_back(
@@ -792,6 +857,50 @@ wrapDeviceImages(ArrayRef<std::unique_ptr<MemoryBuffer>> 
Buffers,
   return *FileOrErr;
 }
 
+/// Perform the OpenMP bundling with 'llvm-offload-binary' in verbose mode.
+Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
+bundleOpenMPVerbose(ArrayRef<OffloadingImage> Images) {
+  Expected<std::string> OffloadBinaryPath = findProgram(
+      "llvm-offload-binary", {getExecutableDir("llvm-offload-binary")});
+  if (!OffloadBinaryPath)
+    return OffloadBinaryPath.takeError();
+
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+  SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
+  for (const OffloadingImage &Image : Images) {
+    StringRef ImageFile = Image.Image->getBufferIdentifier();
+    auto BinaryOrErr =
+        createOutputFile(sys::path::stem(ImageFile) + "." +
+                             getOffloadKindName(Image.TheOffloadKind),
+                         "offload");
+    if (!BinaryOrErr)
+      return BinaryOrErr.takeError();
+
+    std::string ImageArg = ("--image=file=" + ImageFile +
+                            ",kind=" + 
getOffloadKindName(Image.TheOffloadKind))
+                               .str();
+    for (const auto &[Key, Value] : Image.StringData)
+      ImageArg += ("," + Key + "=" + Value).str();
+
+    SmallVector<StringRef> CmdArgs = {*OffloadBinaryPath, "-o", *BinaryOrErr,
+                                      Saver.save(ImageArg)};
+    if (Error Err = executeCommands(*OffloadBinaryPath, CmdArgs))
+      return std::move(Err);
+
+    auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(*BinaryOrErr);
+    if (std::error_code EC = BufferOrErr.getError()) {
+      if (DryRun)
+        BufferOrErr = MemoryBuffer::getMemBuffer("", *BinaryOrErr);
+      else
+        return createFileError(*BinaryOrErr, EC);
+    }
+    Buffers.emplace_back(std::move(*BufferOrErr));
+  }
+
+  return std::move(Buffers);
+}
+
 Expected<SmallVector<std::unique_ptr<MemoryBuffer>>>
 bundleOpenMP(ArrayRef<OffloadingImage> Images) {
   SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
@@ -809,7 +918,8 @@ bundleSYCL(ArrayRef<OffloadingImage> Images) {
     // clang-sycl-linker packs outputs into one binary blob. Therefore, it is
     // passed to Offload Wrapper as is.
     StringRef S(Image.Image->getBufferStart(), Image.Image->getBufferSize());
-    Buffers.emplace_back(MemoryBuffer::getMemBufferCopy(S));
+    Buffers.emplace_back(
+        MemoryBuffer::getMemBufferCopy(S, Image.Image->getBufferIdentifier()));
   }
 
   return std::move(Buffers);
@@ -830,8 +940,12 @@ bundleCuda(ArrayRef<OffloadingImage> Images, const ArgList 
&Args) {
       llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
 
   SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
-  if (std::error_code EC = ImageOrError.getError())
-    return createFileError(*FileOrErr, EC);
+  if (std::error_code EC = ImageOrError.getError()) {
+    if (DryRun)
+      ImageOrError = MemoryBuffer::getMemBuffer("", *FileOrErr);
+    else
+      return createFileError(*FileOrErr, EC);
+  }
   Buffers.emplace_back(std::move(*ImageOrError));
 
   return std::move(Buffers);
@@ -853,8 +967,12 @@ bundleHIP(ArrayRef<OffloadingImage> Images, const ArgList 
&Args) {
       llvm::MemoryBuffer::getFileOrSTDIN(*FileOrErr);
 
   SmallVector<std::unique_ptr<MemoryBuffer>> Buffers;
-  if (std::error_code EC = ImageOrError.getError())
-    return createFileError(*FileOrErr, EC);
+  if (std::error_code EC = ImageOrError.getError()) {
+    if (DryRun)
+      ImageOrError = MemoryBuffer::getMemBuffer("", *FileOrErr);
+    else
+      return createFileError(*FileOrErr, EC);
+  }
   Buffers.emplace_back(std::move(*ImageOrError));
 
   return std::move(Buffers);
@@ -868,7 +986,8 @@ bundleLinkedOutput(ArrayRef<OffloadingImage> Images, const 
ArgList &Args,
   llvm::TimeTraceScope TimeScope("Bundle linked output");
   switch (Kind) {
   case OFK_OpenMP:
-    return bundleOpenMP(Images);
+    return (Verbose && SaveTemps) ? bundleOpenMPVerbose(Images)
+                                  : bundleOpenMP(Images);
   case OFK_SYCL:
     return bundleSYCL(Images);
   case OFK_Cuda:
@@ -1040,7 +1159,7 @@ linkAndWrapDeviceFiles(ArrayRef<SmallVector<OffloadFile>> 
LinkerInputFiles,
           llvm::MemoryBuffer::getFileOrSTDIN(*OutputOrErr);
       if (std::error_code EC = FileOrErr.getError()) {
         if (DryRun)
-          FileOrErr = MemoryBuffer::getMemBuffer("");
+          FileOrErr = MemoryBuffer::getMemBuffer("", *OutputOrErr);
         else
           return createFileError(*OutputOrErr, EC);
       }
@@ -1164,6 +1283,63 @@ std::optional<std::string> searchLibrary(StringRef 
Input, StringRef Root,
   return searchLibraryBaseName(Input, Root, SearchPaths);
 }
 
+/// In verbose mode we need to replay the extracted files so the user can
+/// reproduce the generated. This only prints the steps that would result in 
the
+/// same output files given the input.
+Error emitExtractCommands(
+    ArrayRef<SmallVector<OffloadFile>> InputsForTarget,
+    const DenseMap<StringRef, StringRef> &SourceForImage) {
+  Expected<std::string> OffloadBinaryPath = findProgram(
+      "llvm-offload-binary", {getExecutableDir("llvm-offload-binary")});
+  if (!OffloadBinaryPath)
+    return OffloadBinaryPath.takeError();
+
+  BumpPtrAllocator Alloc;
+  StringSaver Saver(Alloc);
+  MapVector<StringRef, SmallVector<StringRef>> Commands;
+  DenseSet<StringRef> Seen;
+  for (const auto &Input : InputsForTarget) {
+    for (const OffloadFile &File : Input) {
+      const OffloadBinary &Binary = *File.getBinary();
+      StringRef Identifier = Binary.getMemoryBufferRef().getBufferIdentifier();
+      StringRef Source = SourceForImage.lookup(Identifier);
+      if (Source.empty())
+        Source = Identifier;
+
+      auto OutputOrErr =
+          createOutputFile(sys::path::stem(Identifier) + "-" +
+                               Binary.getTriple() + "-" + Binary.getArch(),
+                           "o");
+      if (!OutputOrErr)
+        return OutputOrErr.takeError();
+
+      std::string ImageArg =
+          ("--image=kind=" + getOffloadKindName(Binary.getOffloadKind()) +
+           ",triple=" + Binary.getTriple())
+              .str();
+      if (!Binary.getArch().empty())
+        ImageArg += (",arch=" + Binary.getArch()).str();
+      file_magic Magic;
+      if (!identify_magic(Source, Magic) && Magic == file_magic::archive)
+        ImageArg += (",member=" + sys::path::filename(Identifier)).str();
+      ImageArg += (",file=" + *OutputOrErr).str();
+
+      // Shared images only need to be extracted once per source.
+      StringRef SavedImage = Saver.save(ImageArg);
+      if (!Seen.insert(Saver.save(Source + "\x01" + SavedImage)).second)
+        continue;
+      Commands[Source].push_back(SavedImage);
+    }
+  }
+
+  for (const auto &[Source, Images] : Commands) {
+    SmallVector<StringRef> CmdArgs = {*OffloadBinaryPath, Source};
+    llvm::append_range(CmdArgs, Images);
+    printCommands(CmdArgs);
+  }
+  return Error::success();
+}
+
 /// Search the input files and libraries for embedded device offloading code
 /// and add it to the list of files to be linked. Files coming from static
 /// libraries are only added to the input if they are used by an existing
@@ -1188,6 +1364,7 @@ getDeviceInput(const ArgList &Args) {
   bool WholeArchive = Args.hasArg(OPT_wholearchive_flag);
   SmallVector<OffloadFile> ObjectFilesToExtract;
   SmallVector<OffloadFile> ArchiveFilesToExtract;
+  DenseMap<StringRef, StringRef> SourceForImage;
   for (const opt::Arg *Arg : Args.filtered(
            OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) {
     if (Arg->getOption().matches(OPT_whole_archive) ||
@@ -1222,6 +1399,10 @@ getDeviceInput(const ArgList &Args) {
       return std::move(Err);
 
     for (auto &Binary : Binaries) {
+      if (Verbose && SaveTemps)
+        SourceForImage.try_emplace(
+            Binary.getBinary()->getMemoryBufferRef().getBufferIdentifier(),
+            Saver.save(StringRef(*Filename)));
       if (identify_magic(Buffer.getBuffer()) == file_magic::archive &&
           !WholeArchive)
         ArchiveFilesToExtract.emplace_back(std::move(Binary));
@@ -1251,8 +1432,8 @@ getDeviceInput(const ArgList &Args) {
   }
 
   llvm::DenseSet<StringRef> ShouldExtract;
-  for (auto &Arg : Args.getAllArgValues(OPT_should_extract))
-    ShouldExtract.insert(Arg);
+  for (StringRef Arg : Args.getAllArgValues(OPT_should_extract))
+    ShouldExtract.insert(Saver.save(Arg));
 
   // We only extract archive members from the fat binary if we find a used or
   // requested target. Unlike normal static archive handling, we just extract
@@ -1284,6 +1465,10 @@ getDeviceInput(const ArgList &Args) {
   for (auto &[ID, Input] : InputFiles)
     InputsForTarget.emplace_back(std::move(Input));
 
+  if (Verbose && SaveTemps)
+    if (Error Err = emitExtractCommands(InputsForTarget, SourceForImage))
+      return std::move(Err);
+
   return std::move(InputsForTarget);
 }
 

>From c27717213c5973986bc6e78e8a45b5e84be50461 Mon Sep 17 00:00:00 2001
From: Joseph Huber <[email protected]>
Date: Tue, 23 Jun 2026 11:25:06 -0500
Subject: [PATCH 2/2] Add Docs

---
 clang/docs/ClangLinkerWrapper.rst | 51 ++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 21 deletions(-)

diff --git a/clang/docs/ClangLinkerWrapper.rst 
b/clang/docs/ClangLinkerWrapper.rst
index 3637bdb848273..555d123372f29 100644
--- a/clang/docs/ClangLinkerWrapper.rst
+++ b/clang/docs/ClangLinkerWrapper.rst
@@ -57,7 +57,9 @@ only for the linker wrapper will be forwarded to the wrapped 
linker job.
     --save-temps           Save intermediate results
     --sysroot<value>       Set the system root
     --verbose              Verbose output from tools
-    --v                    Display the version number and exit
+    -v
+    --wrapper-verbose      Verbose output from the linker-wrapper
+    --version              Display the version number and exit
     --                     The separator for the wrapped linker arguments
 
 The linker wrapper will generate the appropriate runtime calls to register the
@@ -97,34 +99,41 @@ The linker wrapper performs a lot of steps internally, such 
as input matching,
 symbol resolution, and image registration. This makes it difficult to debug in
 some scenarios. The behavior of the linker-wrapper is controlled mostly through
 metadata, described in `clang documentation
-<https://clang.llvm.org/docs/OffloadingDesign.html>`_. Intermediate output can
-be obtained from the linker-wrapper using the ``--save-temps`` flag. These 
files
-can then be modified.
+<https://clang.llvm.org/docs/OffloadingDesign.html>`_.
+
+The individual tool invocations the wrapper performs can be printed with the
+``--wrapper-verbose`` flag, and the intermediate files they operate on can be
+kept with ``--save-temps``. When both are enabled the wrapper emits a
+self-contained sequence of commands that reproduce its output. The example 
below
+shows the sequence for a single OpenMP image.
 
 .. code-block:: sh
 
   $> clang openmp.c -fopenmp --offload-arch=gfx90a -c
-  $> clang openmp.o -fopenmp --offload-arch=gfx90a -Wl,--save-temps
-  $> ; Modify temp files.
-  $> llvm-objcopy --update-section=.llvm.offloading=out.bc openmp.o
+  $> clang openmp.o -fopenmp --offload-arch=gfx90a -Wl,--wrapper-verbose 
-Wl,--save-temps
 
-Doing this will allow you to override one of the input files by replacing its
-embedded offloading metadata with a user-modified version. However, this will 
be
-more difficult when there are multiple input files. For a very large hammer, 
the
-``--override-image=<kind>=<file>`` flag can be used.
+  # 1. Extract each embedded device image from the host object.
+  llvm-offload-binary openmp.o 
--image=kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a,file=openmp.gfx90a.o
 
-In the following example, we use the ``--save-temps`` to obtain the LLVM-IR 
just
-before running the backend. We then modify it to test altered behavior, and 
then
-compile it to a binary. This can then be passed to the linker-wrapper which 
will
-then ignore all embedded metadata and use the provided image as if it were the
-result of the device linking phase.
+  # 2. Link the extracted image for the device target.
+  clang --target=amdgcn-amd-amdhsa -mcpu=gfx90a openmp.gfx90a.o -o 
openmp.gfx90a.img <...>
 
-.. code-block:: sh
+  # 3. Bundle the linked image back into the offloading binary format.
+  llvm-offload-binary -o openmp.gfx90a.offload 
--image=file=openmp.gfx90a.img,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx90a
+
+  # 4. Generate the host runtime registration code for the bundled images.
+  llvm-offload-wrapper --kind=openmp --triple=x86_64-unknown-linux-gnu -o 
openmp.wrapper.bc openmp.gfx90a.offload
+
+  # 5. Compile the registration code into a host object.
+  clang --target=x86_64-unknown-linux-gnu -c -fPIC -o openmp.wrapper.o 
openmp.wrapper.bc
+
+  # 6. Link the host objects with the registration code into the executable.
+  ld.lld openmp.host.o openmp.wrapper.o -o a.out <...>
+
+To replace the output of a single stage, edit the relevant intermediate file 
and
+re-run the remaining commands. To bypass the device link entirely and 
substitute
+a pre-built image, use the ``--override-image=<kind>=<file>`` flag.
 
-  $> clang openmp.c -fopenmp --offload-arch=gfx90a -Wl,--save-temps
-  $> ; Modify temp files.
-  $> clang --target=amdgcn-amd-amdhsa -mcpu=gfx90a -nogpulib out.bc -o a.out
-  $> clang openmp.c -fopenmp --offload-arch=gfx90a 
-Wl,--override-image=openmp=a.out
 
 Example
 =======

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to