yaxunl created this revision.
yaxunl added reviewers: tra, rsmith, rjmccall.
Herald added subscribers: jfb, mgorny.

It is observed that device code compilation takes most of the compilation time 
when
clang compiles CUDA/HIP programs since device code usually contains complicated
computation code. Often times such code are highly coupled, which results in
a few large source files which become bottlenecks of a whole project. Things 
become
worse when such code is compiled with multiple gpu archs, since clang compiles 
for
each gpu arch sequentially. In practice, it is common to compile for more than 
5 gpu
archs.

To alleviate this issue, this patch implements a simple scheduler which let 
clang
driver compile independent jobs in parallel.

This patch tries to minimize impact on existing clang driver. No changes to 
action
builder and tool chain. It introduces a driver option -parallel-jobs=n to 
control number
of parallel jobs to launch. By default it is 1, and it is NFC per clang driver 
behavior.
If llvm/clang is built with LLVM_ENABLE_THREADS off, this change is also NFC.

The basic design of the scheduler is to find the dependence among the jobs and
use a thread to launches a job when its dependent jobs are done.


https://reviews.llvm.org/D69582

Files:
  clang/include/clang/Basic/OptionUtils.h
  clang/include/clang/Driver/Driver.h
  clang/include/clang/Driver/Job.h
  clang/include/clang/Driver/Options.td
  clang/include/clang/Frontend/Utils.h
  clang/lib/Basic/CMakeLists.txt
  clang/lib/Basic/OptionUtils.cpp
  clang/lib/Driver/Compilation.cpp
  clang/lib/Driver/Driver.cpp
  clang/lib/Driver/Job.cpp
  clang/lib/Frontend/CompilerInvocation.cpp

Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -3622,35 +3622,8 @@
   return llvm::APInt(64, code).toString(36, /*Signed=*/false);
 }
 
-template<typename IntTy>
-static IntTy getLastArgIntValueImpl(const ArgList &Args, OptSpecifier Id,
-                                    IntTy Default,
-                                    DiagnosticsEngine *Diags) {
-  IntTy Res = Default;
-  if (Arg *A = Args.getLastArg(Id)) {
-    if (StringRef(A->getValue()).getAsInteger(10, Res)) {
-      if (Diags)
-        Diags->Report(diag::err_drv_invalid_int_value) << A->getAsString(Args)
-                                                       << A->getValue();
-    }
-  }
-  return Res;
-}
-
 namespace clang {
 
-// Declared in clang/Frontend/Utils.h.
-int getLastArgIntValue(const ArgList &Args, OptSpecifier Id, int Default,
-                       DiagnosticsEngine *Diags) {
-  return getLastArgIntValueImpl<int>(Args, Id, Default, Diags);
-}
-
-uint64_t getLastArgUInt64Value(const ArgList &Args, OptSpecifier Id,
-                               uint64_t Default,
-                               DiagnosticsEngine *Diags) {
-  return getLastArgIntValueImpl<uint64_t>(Args, Id, Default, Diags);
-}
-
 IntrusiveRefCntPtr<llvm::vfs::FileSystem>
 createVFSFromCompilerInvocation(const CompilerInvocation &CI,
                                 DiagnosticsEngine &Diags) {
Index: clang/lib/Driver/Job.cpp
===================================================================
--- clang/lib/Driver/Job.cpp
+++ clang/lib/Driver/Job.cpp
@@ -39,9 +39,11 @@
                  ArrayRef<InputInfo> Inputs)
     : Source(Source), Creator(Creator), Executable(Executable),
       Arguments(Arguments) {
-  for (const auto &II : Inputs)
+  for (const auto &II : Inputs) {
     if (II.isFilename())
       InputFilenames.push_back(II.getFilename());
+    DependentActions.push_back(II.getAction());
+  }
 }
 
 /// Check if the compiler flag in question should be skipped when
Index: clang/lib/Driver/Driver.cpp
===================================================================
--- clang/lib/Driver/Driver.cpp
+++ clang/lib/Driver/Driver.cpp
@@ -37,13 +37,14 @@
 #include "ToolChains/NaCl.h"
 #include "ToolChains/NetBSD.h"
 #include "ToolChains/OpenBSD.h"
-#include "ToolChains/PS4CPU.h"
 #include "ToolChains/PPCLinux.h"
+#include "ToolChains/PS4CPU.h"
 #include "ToolChains/RISCVToolchain.h"
 #include "ToolChains/Solaris.h"
 #include "ToolChains/TCE.h"
 #include "ToolChains/WebAssembly.h"
 #include "ToolChains/XCore.h"
+#include "clang/Basic/OptionUtils.h"
 #include "clang/Basic/Version.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
@@ -54,6 +55,7 @@
 #include "clang/Driver/SanitizerArgs.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
+#include "clang/Driver/Util.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -129,7 +131,7 @@
       CCLogDiagnostics(false), CCGenDiagnostics(false),
       TargetTriple(TargetTriple), CCCGenericGCCName(""), Saver(Alloc),
       CheckInputsExist(true), GenReproducer(false),
-      SuppressMissingInputWarning(false) {
+      SuppressMissingInputWarning(false), NumParallelJobs(1) {
 
   // Provide a sane fallback if no VFS is specified.
   if (!this->VFS)
@@ -1094,6 +1096,9 @@
       BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model);
   }
 
+  setNumberOfParallelJobs(
+      getLastArgIntValue(Args, options::OPT_parallel_jobs_EQ, 1, Diags));
+
   std::unique_ptr<llvm::opt::InputArgList> UArgs =
       std::make_unique<InputArgList>(std::move(Args));
 
Index: clang/lib/Driver/Compilation.cpp
===================================================================
--- clang/lib/Driver/Compilation.cpp
+++ clang/lib/Driver/Compilation.cpp
@@ -15,6 +15,7 @@
 #include "clang/Driver/Options.h"
 #include "clang/Driver/ToolChain.h"
 #include "clang/Driver/Util.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
@@ -25,8 +26,11 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
+#include <functional>
+#include <mutex>
 #include <string>
 #include <system_error>
+#include <thread>
 #include <utility>
 
 using namespace clang;
@@ -220,22 +224,134 @@
   return !ActionFailed(&C.getSource(), FailingCommands);
 }
 
+namespace {
+class JobScheduler {
+public:
+  enum JobState { JS_WAIT, JS_RUN, JS_DONE, JS_FAIL };
+  JobScheduler(const JobList &Jobs, size_t NJobs = 1)
+      : Jobs(Jobs), NumJobs(NJobs) {
+#if !LLVM_ENABLE_THREADS
+    NumJobs = 1;
+#endif
+    for (auto &Job : Jobs) {
+      JState[&Job] = JS_WAIT;
+      for (const auto *AI : Job.getDependentActions()) {
+        for (const auto *CI : ActToCmds[AI]) {
+          DependentCmds[&Job].push_back(CI);
+        }
+      }
+      for (const auto *CI : ActToCmds[&Job.getSource()]) {
+        DependentCmds[&Job].push_back(CI);
+      }
+      ActToCmds[&Job.getSource()].push_back(&Job);
+    }
+  }
+  /// \return true if all jobs are done. Otherwise, \p Next contains the
+  /// the next job ready to be executed if it is not null pointer. Otherwise
+  /// all jobs are running or waiting.
+  bool IsDone(const Command *&Next) {
+    std::lock_guard<std::mutex> lock(Mutex);
+    Next = nullptr;
+    unsigned Done = 0;
+    unsigned Running = 0;
+    for (auto &Cmd : Jobs) {
+      switch (JState[&Cmd]) {
+      case JS_RUN:
+        ++Running;
+        break;
+      case JS_DONE:
+      case JS_FAIL:
+        ++Done;
+        break;
+      case JS_WAIT: {
+        bool InputsReady = true;
+        for (const auto *CI : DependentCmds[&Cmd]) {
+          if (JState[CI] == JS_FAIL) {
+            JState[&Cmd] = JS_FAIL;
+            ++Done;
+            InputsReady = false;
+            break;
+          }
+          if (JState[CI] != JS_DONE) {
+            InputsReady = false;
+            break;
+          }
+        }
+        if (!Next && InputsReady) {
+          Next = &Cmd;
+        }
+        break;
+      }
+      }
+    }
+    if (Running >= NumJobs)
+      Next = nullptr;
+    return Done == Jobs.size();
+  }
+
+  void setJobState(const Command *Cmd, JobState JS) {
+    std::lock_guard<std::mutex> lock(Mutex);
+    JState[Cmd] = JS;
+  }
+
+  void launch(std::function<void()> Work) {
+#if LLVM_ENABLE_THREADS
+    if (NumJobs == 1) {
+      Work();
+      return;
+    }
+    std::thread Th(Work);
+    Th.detach();
+#else
+    Work();
+#endif
+  }
+
+private:
+  std::mutex Mutex;
+  const JobList &Jobs;
+  llvm::DenseMap<const Command *, JobState> JState;
+  llvm::DenseMap<const Action *, llvm::SmallVector<const Command *, 4>>
+      ActToCmds;
+  llvm::DenseMap<const Command *, llvm::SmallVector<const Command *, 4>>
+      DependentCmds;
+  size_t NumJobs; // Number of parallel jobs to run
+};
+} // namespace
 void Compilation::ExecuteJobs(const JobList &Jobs,
                               FailingCommandList &FailingCommands) const {
   // According to UNIX standard, driver need to continue compiling all the
   // inputs on the command line even one of them failed.
   // In all but CLMode, execute all the jobs unless the necessary inputs for the
   // job is missing due to previous failures.
-  for (const auto &Job : Jobs) {
-    if (!InputsOk(Job, FailingCommands))
+  JobScheduler JS(Jobs, getDriver().getNumberOfParallelJobs());
+
+  const Command *Next = nullptr;
+  while (!JS.IsDone(Next)) {
+    if (!Next) {
+      std::this_thread::yield();
       continue;
-    const Command *FailingCommand = nullptr;
-    if (int Res = ExecuteCommand(Job, FailingCommand)) {
-      FailingCommands.push_back(std::make_pair(Res, FailingCommand));
+    }
+
+    if (!InputsOk(*Next, FailingCommands)) {
+      JS.setJobState(Next, JobScheduler::JS_FAIL);
       // Bail as soon as one command fails in cl driver mode.
       if (TheDriver.IsCLMode())
         return;
+      continue;
     }
+
+    JS.setJobState(Next, JobScheduler::JS_RUN);
+    auto Work = [&, Next]() {
+      const Command *FailingCommand = nullptr;
+      if (int Res = ExecuteCommand(*Next, FailingCommand)) {
+        JS.setJobState(Next, JobScheduler::JS_FAIL);
+        FailingCommands.push_back(std::make_pair(Res, FailingCommand));
+      } else {
+        JS.setJobState(Next, JobScheduler::JS_DONE);
+      }
+    };
+    JS.launch(Work);
   }
 }
 
Index: clang/lib/Basic/OptionUtils.cpp
===================================================================
--- /dev/null
+++ clang/lib/Basic/OptionUtils.cpp
@@ -0,0 +1,44 @@
+//===--- OptionUtils.cpp - Utilities for command line arguments -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/OptionUtils.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticDriver.h"
+#include "llvm/Option/ArgList.h"
+
+using namespace clang;
+using namespace llvm::opt;
+
+template <typename IntTy>
+static IntTy getLastArgIntValueImpl(const ArgList &Args, OptSpecifier Id,
+                                    IntTy Default, DiagnosticsEngine *Diags) {
+  IntTy Res = Default;
+  if (Arg *A = Args.getLastArg(Id)) {
+    if (StringRef(A->getValue()).getAsInteger(10, Res)) {
+      if (Diags)
+        Diags->Report(diag::err_drv_invalid_int_value)
+            << A->getAsString(Args) << A->getValue();
+    }
+  }
+  return Res;
+}
+
+namespace clang {
+
+// Declared in clang/Frontend/Utils.h.
+int getLastArgIntValue(const ArgList &Args, OptSpecifier Id, int Default,
+                       DiagnosticsEngine *Diags) {
+  return getLastArgIntValueImpl<int>(Args, Id, Default, Diags);
+}
+
+uint64_t getLastArgUInt64Value(const ArgList &Args, OptSpecifier Id,
+                               uint64_t Default, DiagnosticsEngine *Diags) {
+  return getLastArgIntValueImpl<uint64_t>(Args, Id, Default, Diags);
+}
+
+} // namespace clang
Index: clang/lib/Basic/CMakeLists.txt
===================================================================
--- clang/lib/Basic/CMakeLists.txt
+++ clang/lib/Basic/CMakeLists.txt
@@ -55,6 +55,7 @@
   ObjCRuntime.cpp
   OpenMPKinds.cpp
   OperatorPrecedence.cpp
+  OptionUtils.cpp
   SanitizerBlacklist.cpp
   SanitizerSpecialCaseList.cpp
   Sanitizers.cpp
Index: clang/include/clang/Frontend/Utils.h
===================================================================
--- clang/include/clang/Frontend/Utils.h
+++ clang/include/clang/Frontend/Utils.h
@@ -15,6 +15,7 @@
 
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LLVM.h"
+#include "clang/Basic/OptionUtils.h"
 #include "clang/Frontend/DependencyOutputOptions.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -34,12 +35,6 @@
 
 class Triple;
 
-namespace opt {
-
-class ArgList;
-
-} // namespace opt
-
 } // namespace llvm
 
 namespace clang {
@@ -226,29 +221,6 @@
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS = nullptr,
     bool ShouldRecoverOnErrors = false);
 
-/// Return the value of the last argument as an integer, or a default. If Diags
-/// is non-null, emits an error if the argument is given, but non-integral.
-int getLastArgIntValue(const llvm::opt::ArgList &Args,
-                       llvm::opt::OptSpecifier Id, int Default,
-                       DiagnosticsEngine *Diags = nullptr);
-
-inline int getLastArgIntValue(const llvm::opt::ArgList &Args,
-                              llvm::opt::OptSpecifier Id, int Default,
-                              DiagnosticsEngine &Diags) {
-  return getLastArgIntValue(Args, Id, Default, &Diags);
-}
-
-uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args,
-                               llvm::opt::OptSpecifier Id, uint64_t Default,
-                               DiagnosticsEngine *Diags = nullptr);
-
-inline uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args,
-                                      llvm::opt::OptSpecifier Id,
-                                      uint64_t Default,
-                                      DiagnosticsEngine &Diags) {
-  return getLastArgUInt64Value(Args, Id, Default, &Diags);
-}
-
 // Frontend timing utils
 
 /// If the user specifies the -ftime-report argument on an Clang command line
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -401,6 +401,8 @@
 def Ofast : Joined<["-"], "Ofast">, Group<O_Group>, Flags<[CC1Option]>;
 def P : Flag<["-"], "P">, Flags<[CC1Option]>, Group<Preprocessor_Group>,
   HelpText<"Disable linemarker output in -E mode">;
+def parallel_jobs_EQ : Joined<["-"], "parallel-jobs=">, Flags<[DriverOption]>,
+  HelpText<"Number of parallel jobs">;
 def Qy : Flag<["-"], "Qy">, Flags<[CC1Option]>,
   HelpText<"Emit metadata containing compiler name and version">;
 def Qn : Flag<["-"], "Qn">, Flags<[CC1Option]>,
Index: clang/include/clang/Driver/Job.h
===================================================================
--- clang/include/clang/Driver/Job.h
+++ clang/include/clang/Driver/Job.h
@@ -73,6 +73,9 @@
   /// See Command::setEnvironment
   std::vector<const char *> Environment;
 
+  /// Dependent actions
+  llvm::SmallVector<const Action *, 4> DependentActions;
+
   /// When a response file is needed, we try to put most arguments in an
   /// exclusive file, while others remains as regular command line arguments.
   /// This functions fills a vector with the regular command line arguments,
@@ -130,6 +133,10 @@
 
   /// Set whether to print the input filenames when executing.
   void setPrintInputFilenames(bool P) { PrintInputFilenames = P; }
+
+  const llvm::SmallVector<const Action *, 4> &getDependentActions() const {
+    return DependentActions;
+  }
 };
 
 /// Like Command, but with a fallback which is executed in case
Index: clang/include/clang/Driver/Driver.h
===================================================================
--- clang/include/clang/Driver/Driver.h
+++ clang/include/clang/Driver/Driver.h
@@ -242,6 +242,9 @@
   /// stored in it, and will clean them up when torn down.
   mutable llvm::StringMap<std::unique_ptr<ToolChain>> ToolChains;
 
+  /// Number of parallel jobs.
+  unsigned NumParallelJobs;
+
 private:
   /// TranslateInputArgs - Create a new derived argument list from the input
   /// arguments, after applying the standard argument translations.
@@ -540,6 +543,12 @@
   /// Get the specific kind of LTO being performed.
   LTOKind getLTOMode() const { return LTOMode; }
 
+  /// Get the number of parallel jobs.
+  unsigned getNumberOfParallelJobs() const { return NumParallelJobs; }
+
+  /// Set the number of parallel jobs.
+  void setNumberOfParallelJobs(unsigned N) { NumParallelJobs = N; }
+
 private:
 
   /// Tries to load options from configuration file.
Index: clang/include/clang/Basic/OptionUtils.h
===================================================================
--- /dev/null
+++ clang/include/clang/Basic/OptionUtils.h
@@ -0,0 +1,56 @@
+//===- OptionUtils.h - Utilities for command line arguments -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header contains utilities for command line arguments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_BASIC_OPTIONUTILS_H
+#define LLVM_CLANG_BASIC_OPTIONUTILS_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/Option/OptSpecifier.h"
+
+namespace llvm {
+
+namespace opt {
+
+class ArgList;
+
+} // namespace opt
+
+} // namespace llvm
+
+namespace clang {
+/// Return the value of the last argument as an integer, or a default. If Diags
+/// is non-null, emits an error if the argument is given, but non-integral.
+int getLastArgIntValue(const llvm::opt::ArgList &Args,
+                       llvm::opt::OptSpecifier Id, int Default,
+                       DiagnosticsEngine *Diags = nullptr);
+
+inline int getLastArgIntValue(const llvm::opt::ArgList &Args,
+                              llvm::opt::OptSpecifier Id, int Default,
+                              DiagnosticsEngine &Diags) {
+  return getLastArgIntValue(Args, Id, Default, &Diags);
+}
+
+uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args,
+                               llvm::opt::OptSpecifier Id, uint64_t Default,
+                               DiagnosticsEngine *Diags = nullptr);
+
+inline uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args,
+                                      llvm::opt::OptSpecifier Id,
+                                      uint64_t Default,
+                                      DiagnosticsEngine &Diags) {
+  return getLastArgUInt64Value(Args, Id, Default, &Diags);
+}
+
+} // namespace clang
+
+#endif // LLVM_CLANG_BASIC_OPTIONUTILS_H
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to