https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/190369
Previously there was a vector of toolchains, but a number of places assumed there was only a single toolchain. I'm also not sure how you were supposed to identify which toolchain to use from this array. Make this parallel to the stored GpuArches. For the fat binary cases, we still need to pick a toolchain so that still just picks the first one; it probably should use the most neutral available triple. This also doesn't feel like a complete fix. The various Actions all contain a reference to an OffloadingToolChain, which seems to frequently be missing and isn't set at construction time. >From 6f48ec2579360623d879a61a535f807d05945780 Mon Sep 17 00:00:00 2001 From: Matt Arsenault <[email protected]> Date: Fri, 3 Apr 2026 11:11:50 +0200 Subject: [PATCH] clang: Stop assuming one toolchain covers all GPUArchs Previously there was a vector of toolchains, but a number of places assumed there was only a single toolchain. I'm also not sure how you were supposed to identify which toolchain to use from this array. Make this parallel to the stored GpuArches. For the fat binary cases, we still need to pick a toolchain so that still just picks the first one; it probably should use the most neutral available triple. This also doesn't feel like a complete fix. The various Actions all contain a reference to an OffloadingToolChain, which seems to frequently be missing and isn't set at construction time. --- clang/lib/Driver/Driver.cpp | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 0686a89d42faf..f4cbc443aec36 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3296,7 +3296,9 @@ class OffloadingActionBuilder final { /// Tool chains associated with this builder. The same programming /// model may have associated one or more tool chains. + /// There should be one entry for each TargetID. SmallVector<const ToolChain *, 2> ToolChains; + const ToolChain *FatBinaryToolChain = nullptr; /// The derived arguments associated with this builder. DerivedArgList &Args; @@ -3478,9 +3480,9 @@ class OffloadingActionBuilder final { llvm::sys::path::extension(FileName) == LibFileExt)) return ABRT_Inactive; - for (auto Arch : GpuArchList) { + for (auto [Arch, ToolChain] : llvm::zip(GpuArchList, ToolChains)) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], Arch, + UA->registerDependentActionInfo(ToolChain, Arch, AssociatedOffloadKind); } IsActive = true; @@ -3492,15 +3494,16 @@ class OffloadingActionBuilder final { void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, TargetID TargetID) { + auto AddTopLevel = [&](Action *A, TargetID TargetID, + const ToolChain *TC) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), TargetID, AssociatedOffloadKind); + Dep.add(*A, *TC, TargetID, AssociatedOffloadKind); AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); }; // If we have a fat binary, add it to the list. if (CudaFatBinary) { - AddTopLevel(CudaFatBinary, OffloadArch::Unused); + AddTopLevel(CudaFatBinary, OffloadArch::Unused, FatBinaryToolChain); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; @@ -3514,10 +3517,10 @@ class OffloadingActionBuilder final { // architecture. assert(CudaDeviceActions.size() == GpuArchList.size() && "Expecting one action per GPU architecture."); - assert(ToolChains.size() == 1 && - "Expecting to have a single CUDA toolchain."); + assert(ToolChains.size() == GpuArchList.size() && + "Expecting to have a toolchain per GPU architecture"); for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - AddTopLevel(CudaDeviceActions[I], GpuArchList[I]); + AddTopLevel(CudaDeviceActions[I], GpuArchList[I], ToolChains[I]); CudaDeviceActions.clear(); } @@ -3550,20 +3553,21 @@ class OffloadingActionBuilder final { return true; } - std::set<StringRef> GpuArchs; + std::set<std::pair<StringRef, const ToolChain *>> GpuArchs; for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { - ToolChains.push_back(I.second); - for (auto Arch : C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second)) - GpuArchs.insert(Arch); + GpuArchs.insert({Arch, I.second}); } } - for (auto Arch : GpuArchs) + for (auto [Arch, TC] : GpuArchs) { GpuArchList.push_back(Arch.data()); + ToolChains.push_back(TC); + } + FatBinaryToolChain = ToolChains.front(); CompileHostOnly = C.getDriver().offloadHostOnly(); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); @@ -3647,7 +3651,7 @@ class OffloadingActionBuilder final { for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); + DDep.add(*A, *ToolChains[I], GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction<OffloadAction>(DDep, A->getType())); } @@ -3822,7 +3826,7 @@ class OffloadingActionBuilder final { // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + DDep.add(*CudaDeviceActions[I], *ToolChains[I], GpuArchList[I], AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction<OffloadAction>( DDep, CudaDeviceActions[I]->getType()); @@ -3877,7 +3881,7 @@ class OffloadingActionBuilder final { *BundleOutput) { for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + DDep.add(*CudaDeviceActions[I], *ToolChains[I], GpuArchList[I], AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction<OffloadAction>( DDep, CudaDeviceActions[I]->getType()); @@ -3915,8 +3919,8 @@ class OffloadingActionBuilder final { // Linking all inputs for the current GPU arch. // LI contains all the inputs for the linker. OffloadAction::DeviceDependences DeviceLinkDeps; - DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], - GpuArchList[I], AssociatedOffloadKind); + DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[I], GpuArchList[I], + AssociatedOffloadKind); Actions.push_back(C.MakeAction<OffloadAction>( DeviceLinkDeps, DeviceLinkAction->getType())); ++I; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
