yaxunl created this revision.
yaxunl added a reviewer: tra.
Herald added a subscriber: jdoerfert.
Herald added a project: clang.

When -fno-gpu-rdc is set, device code is compiled, linked, and assembled into 
fat binary
and embedded as string in object files. The object files are normal object 
files which
can be linked by host linker. In the linking stage, the object files should not 
be unbundled
when -fno-gpu-rdc is set since they are normal object files, not bundles. The 
object files
only need to be unbundled when -fgpu-rdc is set.

Currently clang always unbundles object files, disregarding -fgpu-rdc option.

This patch fixes that.


Repository:
  rC Clang

https://reviews.llvm.org/D58917

Files:
  lib/Driver/Driver.cpp
  test/Driver/hip-binding.hip
  test/Driver/hip-link-shared-library.hip

Index: test/Driver/hip-link-shared-library.hip
===================================================================
--- test/Driver/hip-link-shared-library.hip
+++ test/Driver/hip-link-shared-library.hip
@@ -1,7 +1,7 @@
 // RUN: touch %t.o
 // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
 // RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o %S/Inputs/in.so \
-// RUN: 2>&1 | FileCheck %s
+// RUN:   -fgpu-rdc 2>&1 | FileCheck %s
 
 // CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"]
 // CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*out]]"
Index: test/Driver/hip-binding.hip
===================================================================
--- test/Driver/hip-binding.hip
+++ test/Driver/hip-binding.hip
@@ -4,7 +4,7 @@
 
 // RUN: touch %t.o
 // RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
-// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -fgpu-rdc %t.o\
 // RUN: 2>&1 | FileCheck %s
 
 // CHECK: # "amdgcn-amd-amdhsa" - "offload bundler", inputs: ["[[IN:.*o]]"], outputs: ["[[OBJ1:.*o]]", "[[OBJ2:.*o]]", "[[OBJ3:.*o]]"] 
@@ -13,3 +13,10 @@
 // CHECK: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ3]]"], output: "[[IMG3:.*out]]"
 // CHECK-NOT: offload bundler
 // CHECK: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["[[OBJ1]]", "[[IMG2]]", "[[IMG3]]"], output: "a.out"
+
+// RUN: %clang --hip-link -ccc-print-bindings -target x86_64-linux-gnu \
+// RUN:   --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t.o\
+// RUN: 2>&1 | FileCheck -check-prefix=NORDC %s
+
+// NORDC-NOT: offload bundler
+// NORDC: # "x86_64-unknown-linux-gnu" - "GNU::Linker", inputs: ["{{.*o}}"], output: "a.out"
Index: lib/Driver/Driver.cpp
===================================================================
--- lib/Driver/Driver.cpp
+++ lib/Driver/Driver.cpp
@@ -2293,11 +2293,15 @@
 
     /// Flag that is set to true if this builder acted on the current input.
     bool IsActive = false;
+
+    /// Flag for -fgpu-rdc.
+    bool Relocatable;
   public:
     CudaActionBuilderBase(Compilation &C, DerivedArgList &Args,
                           const Driver::InputList &Inputs,
                           Action::OffloadKind OFKind)
-        : DeviceActionBuilder(C, Args, Inputs, OFKind) {}
+        : DeviceActionBuilder(C, Args, Inputs, OFKind),
+          Relocatable(false) {}
 
     ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {
       // While generating code for CUDA, we only depend on the host input action
@@ -2338,6 +2342,12 @@
 
       // If this is an unbundling action use it as is for each CUDA toolchain.
       if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
+
+        // If -fgpu-rdc is disabled, should not unbundle since there is no
+        // device code to link.
+        if (!Relocatable)
+          return ABRT_Inactive;
+
         CudaDeviceActions.clear();
         auto *IA = cast<InputAction>(UA->getInputs().back());
         std::string FileName = IA->getInputArg().getAsString(Args);
@@ -2409,6 +2419,9 @@
           !C.hasOffloadToolChain<Action::OFK_HIP>())
         return false;
 
+      Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
+          options::OPT_fno_gpu_rdc, /*Default=*/false);
+
       const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
       assert(HostTC && "No toolchain for host compilation.");
       if (HostTC->getTriple().isNVPTX() ||
@@ -2594,13 +2607,11 @@
   class HIPActionBuilder final : public CudaActionBuilderBase {
     /// The linker inputs obtained for each device arch.
     SmallVector<ActionList, 8> DeviceLinkerInputs;
-    bool Relocatable;
 
   public:
     HIPActionBuilder(Compilation &C, DerivedArgList &Args,
                      const Driver::InputList &Inputs)
-        : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP),
-          Relocatable(false) {}
+        : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {}
 
     bool canUseBundlerUnbundler() const override { return true; }
 
@@ -2705,13 +2716,6 @@
         ++I;
       }
     }
-
-    bool initialize() override {
-      Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
-          options::OPT_fno_gpu_rdc, /*Default=*/false);
-
-      return CudaActionBuilderBase::initialize();
-    }
   };
 
   /// OpenMP action builder. The host bitcode is passed to the device frontend
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to