yaxunl created this revision. yaxunl added reviewers: tra, rsmith, rjmccall. Herald added subscribers: jfb, mgorny.
It is observed that device code compilation takes most of the compilation time when clang compiles CUDA/HIP programs since device code usually contains complicated computation code. Often times such code are highly coupled, which results in a few large source files which become bottlenecks of a whole project. Things become worse when such code is compiled with multiple gpu archs, since clang compiles for each gpu arch sequentially. In practice, it is common to compile for more than 5 gpu archs. To alleviate this issue, this patch implements a simple scheduler which let clang driver compile independent jobs in parallel. This patch tries to minimize impact on existing clang driver. No changes to action builder and tool chain. It introduces a driver option -parallel-jobs=n to control number of parallel jobs to launch. By default it is 1, and it is NFC per clang driver behavior. If llvm/clang is built with LLVM_ENABLE_THREADS off, this change is also NFC. The basic design of the scheduler is to find the dependence among the jobs and use a thread to launches a job when its dependent jobs are done. https://reviews.llvm.org/D69582 Files: clang/include/clang/Basic/OptionUtils.h clang/include/clang/Driver/Driver.h clang/include/clang/Driver/Job.h clang/include/clang/Driver/Options.td clang/include/clang/Frontend/Utils.h clang/lib/Basic/CMakeLists.txt clang/lib/Basic/OptionUtils.cpp clang/lib/Driver/Compilation.cpp clang/lib/Driver/Driver.cpp clang/lib/Driver/Job.cpp clang/lib/Frontend/CompilerInvocation.cpp
Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -3622,35 +3622,8 @@ return llvm::APInt(64, code).toString(36, /*Signed=*/false); } -template<typename IntTy> -static IntTy getLastArgIntValueImpl(const ArgList &Args, OptSpecifier Id, - IntTy Default, - DiagnosticsEngine *Diags) { - IntTy Res = Default; - if (Arg *A = Args.getLastArg(Id)) { - if (StringRef(A->getValue()).getAsInteger(10, Res)) { - if (Diags) - Diags->Report(diag::err_drv_invalid_int_value) << A->getAsString(Args) - << A->getValue(); - } - } - return Res; -} - namespace clang { -// Declared in clang/Frontend/Utils.h. -int getLastArgIntValue(const ArgList &Args, OptSpecifier Id, int Default, - DiagnosticsEngine *Diags) { - return getLastArgIntValueImpl<int>(Args, Id, Default, Diags); -} - -uint64_t getLastArgUInt64Value(const ArgList &Args, OptSpecifier Id, - uint64_t Default, - DiagnosticsEngine *Diags) { - return getLastArgIntValueImpl<uint64_t>(Args, Id, Default, Diags); -} - IntrusiveRefCntPtr<llvm::vfs::FileSystem> createVFSFromCompilerInvocation(const CompilerInvocation &CI, DiagnosticsEngine &Diags) { Index: clang/lib/Driver/Job.cpp =================================================================== --- clang/lib/Driver/Job.cpp +++ clang/lib/Driver/Job.cpp @@ -39,9 +39,11 @@ ArrayRef<InputInfo> Inputs) : Source(Source), Creator(Creator), Executable(Executable), Arguments(Arguments) { - for (const auto &II : Inputs) + for (const auto &II : Inputs) { if (II.isFilename()) InputFilenames.push_back(II.getFilename()); + DependentActions.push_back(II.getAction()); + } } /// Check if the compiler flag in question should be skipped when Index: clang/lib/Driver/Driver.cpp =================================================================== --- clang/lib/Driver/Driver.cpp +++ clang/lib/Driver/Driver.cpp @@ -37,13 +37,14 @@ #include "ToolChains/NaCl.h" #include "ToolChains/NetBSD.h" #include "ToolChains/OpenBSD.h" -#include "ToolChains/PS4CPU.h" #include "ToolChains/PPCLinux.h" +#include "ToolChains/PS4CPU.h" #include "ToolChains/RISCVToolchain.h" #include "ToolChains/Solaris.h" #include "ToolChains/TCE.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" +#include "clang/Basic/OptionUtils.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" @@ -54,6 +55,7 @@ #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" +#include "clang/Driver/Util.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -129,7 +131,7 @@ CCLogDiagnostics(false), CCGenDiagnostics(false), TargetTriple(TargetTriple), CCCGenericGCCName(""), Saver(Alloc), CheckInputsExist(true), GenReproducer(false), - SuppressMissingInputWarning(false) { + SuppressMissingInputWarning(false), NumParallelJobs(1) { // Provide a sane fallback if no VFS is specified. if (!this->VFS) @@ -1094,6 +1096,9 @@ BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model); } + setNumberOfParallelJobs( + getLastArgIntValue(Args, options::OPT_parallel_jobs_EQ, 1, Diags)); + std::unique_ptr<llvm::opt::InputArgList> UArgs = std::make_unique<InputArgList>(std::move(Args)); Index: clang/lib/Driver/Compilation.cpp =================================================================== --- clang/lib/Driver/Compilation.cpp +++ clang/lib/Driver/Compilation.cpp @@ -15,6 +15,7 @@ #include "clang/Driver/Options.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Util.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -25,8 +26,11 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <functional> +#include <mutex> #include <string> #include <system_error> +#include <thread> #include <utility> using namespace clang; @@ -220,22 +224,134 @@ return !ActionFailed(&C.getSource(), FailingCommands); } +namespace { +class JobScheduler { +public: + enum JobState { JS_WAIT, JS_RUN, JS_DONE, JS_FAIL }; + JobScheduler(const JobList &Jobs, size_t NJobs = 1) + : Jobs(Jobs), NumJobs(NJobs) { +#if !LLVM_ENABLE_THREADS + NumJobs = 1; +#endif + for (auto &Job : Jobs) { + JState[&Job] = JS_WAIT; + for (const auto *AI : Job.getDependentActions()) { + for (const auto *CI : ActToCmds[AI]) { + DependentCmds[&Job].push_back(CI); + } + } + for (const auto *CI : ActToCmds[&Job.getSource()]) { + DependentCmds[&Job].push_back(CI); + } + ActToCmds[&Job.getSource()].push_back(&Job); + } + } + /// \return true if all jobs are done. Otherwise, \p Next contains the + /// the next job ready to be executed if it is not null pointer. Otherwise + /// all jobs are running or waiting. + bool IsDone(const Command *&Next) { + std::lock_guard<std::mutex> lock(Mutex); + Next = nullptr; + unsigned Done = 0; + unsigned Running = 0; + for (auto &Cmd : Jobs) { + switch (JState[&Cmd]) { + case JS_RUN: + ++Running; + break; + case JS_DONE: + case JS_FAIL: + ++Done; + break; + case JS_WAIT: { + bool InputsReady = true; + for (const auto *CI : DependentCmds[&Cmd]) { + if (JState[CI] == JS_FAIL) { + JState[&Cmd] = JS_FAIL; + ++Done; + InputsReady = false; + break; + } + if (JState[CI] != JS_DONE) { + InputsReady = false; + break; + } + } + if (!Next && InputsReady) { + Next = &Cmd; + } + break; + } + } + } + if (Running >= NumJobs) + Next = nullptr; + return Done == Jobs.size(); + } + + void setJobState(const Command *Cmd, JobState JS) { + std::lock_guard<std::mutex> lock(Mutex); + JState[Cmd] = JS; + } + + void launch(std::function<void()> Work) { +#if LLVM_ENABLE_THREADS + if (NumJobs == 1) { + Work(); + return; + } + std::thread Th(Work); + Th.detach(); +#else + Work(); +#endif + } + +private: + std::mutex Mutex; + const JobList &Jobs; + llvm::DenseMap<const Command *, JobState> JState; + llvm::DenseMap<const Action *, llvm::SmallVector<const Command *, 4>> + ActToCmds; + llvm::DenseMap<const Command *, llvm::SmallVector<const Command *, 4>> + DependentCmds; + size_t NumJobs; // Number of parallel jobs to run +}; +} // namespace void Compilation::ExecuteJobs(const JobList &Jobs, FailingCommandList &FailingCommands) const { // According to UNIX standard, driver need to continue compiling all the // inputs on the command line even one of them failed. // In all but CLMode, execute all the jobs unless the necessary inputs for the // job is missing due to previous failures. - for (const auto &Job : Jobs) { - if (!InputsOk(Job, FailingCommands)) + JobScheduler JS(Jobs, getDriver().getNumberOfParallelJobs()); + + const Command *Next = nullptr; + while (!JS.IsDone(Next)) { + if (!Next) { + std::this_thread::yield(); continue; - const Command *FailingCommand = nullptr; - if (int Res = ExecuteCommand(Job, FailingCommand)) { - FailingCommands.push_back(std::make_pair(Res, FailingCommand)); + } + + if (!InputsOk(*Next, FailingCommands)) { + JS.setJobState(Next, JobScheduler::JS_FAIL); // Bail as soon as one command fails in cl driver mode. if (TheDriver.IsCLMode()) return; + continue; } + + JS.setJobState(Next, JobScheduler::JS_RUN); + auto Work = [&, Next]() { + const Command *FailingCommand = nullptr; + if (int Res = ExecuteCommand(*Next, FailingCommand)) { + JS.setJobState(Next, JobScheduler::JS_FAIL); + FailingCommands.push_back(std::make_pair(Res, FailingCommand)); + } else { + JS.setJobState(Next, JobScheduler::JS_DONE); + } + }; + JS.launch(Work); } } Index: clang/lib/Basic/OptionUtils.cpp =================================================================== --- /dev/null +++ clang/lib/Basic/OptionUtils.cpp @@ -0,0 +1,44 @@ +//===--- OptionUtils.cpp - Utilities for command line arguments -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/OptionUtils.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/DiagnosticDriver.h" +#include "llvm/Option/ArgList.h" + +using namespace clang; +using namespace llvm::opt; + +template <typename IntTy> +static IntTy getLastArgIntValueImpl(const ArgList &Args, OptSpecifier Id, + IntTy Default, DiagnosticsEngine *Diags) { + IntTy Res = Default; + if (Arg *A = Args.getLastArg(Id)) { + if (StringRef(A->getValue()).getAsInteger(10, Res)) { + if (Diags) + Diags->Report(diag::err_drv_invalid_int_value) + << A->getAsString(Args) << A->getValue(); + } + } + return Res; +} + +namespace clang { + +// Declared in clang/Frontend/Utils.h. +int getLastArgIntValue(const ArgList &Args, OptSpecifier Id, int Default, + DiagnosticsEngine *Diags) { + return getLastArgIntValueImpl<int>(Args, Id, Default, Diags); +} + +uint64_t getLastArgUInt64Value(const ArgList &Args, OptSpecifier Id, + uint64_t Default, DiagnosticsEngine *Diags) { + return getLastArgIntValueImpl<uint64_t>(Args, Id, Default, Diags); +} + +} // namespace clang Index: clang/lib/Basic/CMakeLists.txt =================================================================== --- clang/lib/Basic/CMakeLists.txt +++ clang/lib/Basic/CMakeLists.txt @@ -55,6 +55,7 @@ ObjCRuntime.cpp OpenMPKinds.cpp OperatorPrecedence.cpp + OptionUtils.cpp SanitizerBlacklist.cpp SanitizerSpecialCaseList.cpp Sanitizers.cpp Index: clang/include/clang/Frontend/Utils.h =================================================================== --- clang/include/clang/Frontend/Utils.h +++ clang/include/clang/Frontend/Utils.h @@ -15,6 +15,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/OptionUtils.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" @@ -34,12 +35,6 @@ class Triple; -namespace opt { - -class ArgList; - -} // namespace opt - } // namespace llvm namespace clang { @@ -226,29 +221,6 @@ IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS = nullptr, bool ShouldRecoverOnErrors = false); -/// Return the value of the last argument as an integer, or a default. If Diags -/// is non-null, emits an error if the argument is given, but non-integral. -int getLastArgIntValue(const llvm::opt::ArgList &Args, - llvm::opt::OptSpecifier Id, int Default, - DiagnosticsEngine *Diags = nullptr); - -inline int getLastArgIntValue(const llvm::opt::ArgList &Args, - llvm::opt::OptSpecifier Id, int Default, - DiagnosticsEngine &Diags) { - return getLastArgIntValue(Args, Id, Default, &Diags); -} - -uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args, - llvm::opt::OptSpecifier Id, uint64_t Default, - DiagnosticsEngine *Diags = nullptr); - -inline uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args, - llvm::opt::OptSpecifier Id, - uint64_t Default, - DiagnosticsEngine &Diags) { - return getLastArgUInt64Value(Args, Id, Default, &Diags); -} - // Frontend timing utils /// If the user specifies the -ftime-report argument on an Clang command line Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -401,6 +401,8 @@ def Ofast : Joined<["-"], "Ofast">, Group<O_Group>, Flags<[CC1Option]>; def P : Flag<["-"], "P">, Flags<[CC1Option]>, Group<Preprocessor_Group>, HelpText<"Disable linemarker output in -E mode">; +def parallel_jobs_EQ : Joined<["-"], "parallel-jobs=">, Flags<[DriverOption]>, + HelpText<"Number of parallel jobs">; def Qy : Flag<["-"], "Qy">, Flags<[CC1Option]>, HelpText<"Emit metadata containing compiler name and version">; def Qn : Flag<["-"], "Qn">, Flags<[CC1Option]>, Index: clang/include/clang/Driver/Job.h =================================================================== --- clang/include/clang/Driver/Job.h +++ clang/include/clang/Driver/Job.h @@ -73,6 +73,9 @@ /// See Command::setEnvironment std::vector<const char *> Environment; + /// Dependent actions + llvm::SmallVector<const Action *, 4> DependentActions; + /// When a response file is needed, we try to put most arguments in an /// exclusive file, while others remains as regular command line arguments. /// This functions fills a vector with the regular command line arguments, @@ -130,6 +133,10 @@ /// Set whether to print the input filenames when executing. void setPrintInputFilenames(bool P) { PrintInputFilenames = P; } + + const llvm::SmallVector<const Action *, 4> &getDependentActions() const { + return DependentActions; + } }; /// Like Command, but with a fallback which is executed in case Index: clang/include/clang/Driver/Driver.h =================================================================== --- clang/include/clang/Driver/Driver.h +++ clang/include/clang/Driver/Driver.h @@ -242,6 +242,9 @@ /// stored in it, and will clean them up when torn down. mutable llvm::StringMap<std::unique_ptr<ToolChain>> ToolChains; + /// Number of parallel jobs. + unsigned NumParallelJobs; + private: /// TranslateInputArgs - Create a new derived argument list from the input /// arguments, after applying the standard argument translations. @@ -540,6 +543,12 @@ /// Get the specific kind of LTO being performed. LTOKind getLTOMode() const { return LTOMode; } + /// Get the number of parallel jobs. + unsigned getNumberOfParallelJobs() const { return NumParallelJobs; } + + /// Set the number of parallel jobs. + void setNumberOfParallelJobs(unsigned N) { NumParallelJobs = N; } + private: /// Tries to load options from configuration file. Index: clang/include/clang/Basic/OptionUtils.h =================================================================== --- /dev/null +++ clang/include/clang/Basic/OptionUtils.h @@ -0,0 +1,56 @@ +//===- OptionUtils.h - Utilities for command line arguments -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header contains utilities for command line arguments. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_OPTIONUTILS_H +#define LLVM_CLANG_BASIC_OPTIONUTILS_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LLVM.h" +#include "llvm/Option/OptSpecifier.h" + +namespace llvm { + +namespace opt { + +class ArgList; + +} // namespace opt + +} // namespace llvm + +namespace clang { +/// Return the value of the last argument as an integer, or a default. If Diags +/// is non-null, emits an error if the argument is given, but non-integral. +int getLastArgIntValue(const llvm::opt::ArgList &Args, + llvm::opt::OptSpecifier Id, int Default, + DiagnosticsEngine *Diags = nullptr); + +inline int getLastArgIntValue(const llvm::opt::ArgList &Args, + llvm::opt::OptSpecifier Id, int Default, + DiagnosticsEngine &Diags) { + return getLastArgIntValue(Args, Id, Default, &Diags); +} + +uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args, + llvm::opt::OptSpecifier Id, uint64_t Default, + DiagnosticsEngine *Diags = nullptr); + +inline uint64_t getLastArgUInt64Value(const llvm::opt::ArgList &Args, + llvm::opt::OptSpecifier Id, + uint64_t Default, + DiagnosticsEngine &Diags) { + return getLastArgUInt64Value(Args, Id, Default, &Diags); +} + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_OPTIONUTILS_H
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits