https://github.com/ChuanqiXu9 updated https://github.com/llvm/llvm-project/pull/106683
>From 15aa6af1f5d22e4b837e8e2fd49469310ffbe7f1 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Fri, 30 Aug 2024 15:11:07 +0800 Subject: [PATCH 1/3] [clangd] [Modules] Support Reusable Modules Builder --- clang-tools-extra/clangd/ModulesBuilder.cpp | 375 ++++++++++++++---- clang-tools-extra/clangd/ModulesBuilder.h | 8 +- .../unittests/PrerequisiteModulesTest.cpp | 36 ++ 3 files changed, 344 insertions(+), 75 deletions(-) diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index 97f67ddf5495a2..c4fb3d4010d370 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -13,6 +13,7 @@ #include "clang/Frontend/FrontendActions.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/InMemoryModuleCache.h" +#include "llvm/ADT/ScopeExit.h" namespace clang { namespace clangd { @@ -192,33 +193,27 @@ bool IsModuleFilesUpToDate( }); } -// StandalonePrerequisiteModules - stands for PrerequisiteModules for which all +// ReusablePrerequisiteModules - stands for PrerequisiteModules for which all // the required modules are built successfully. All the module files -// are owned by the StandalonePrerequisiteModules class. -// -// Any of the built module files won't be shared with other instances of the -// class. So that we can avoid worrying thread safety. -// -// We don't need to worry about duplicated module names here since the standard -// guarantees the module names should be unique to a program. -class StandalonePrerequisiteModules : public PrerequisiteModules { +// are owned by the modules builder. +class ReusablePrerequisiteModules : public PrerequisiteModules { public: - StandalonePrerequisiteModules() = default; + ReusablePrerequisiteModules() = default; - StandalonePrerequisiteModules(const StandalonePrerequisiteModules &) = delete; - StandalonePrerequisiteModules - operator=(const StandalonePrerequisiteModules &) = delete; - StandalonePrerequisiteModules(StandalonePrerequisiteModules &&) = delete; - StandalonePrerequisiteModules - operator=(StandalonePrerequisiteModules &&) = delete; + ReusablePrerequisiteModules(const ReusablePrerequisiteModules &) = delete; + ReusablePrerequisiteModules + operator=(const ReusablePrerequisiteModules &) = delete; + ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete; + ReusablePrerequisiteModules + operator=(ReusablePrerequisiteModules &&) = delete; - ~StandalonePrerequisiteModules() override = default; + ~ReusablePrerequisiteModules() override = default; void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override { // Appending all built module files. for (auto &RequiredModule : RequiredModules) Options.PrebuiltModuleFiles.insert_or_assign( - RequiredModule.ModuleName, RequiredModule.ModuleFilePath); + RequiredModule->ModuleName, RequiredModule->ModuleFilePath); } bool canReuse(const CompilerInvocation &CI, @@ -228,54 +223,30 @@ class StandalonePrerequisiteModules : public PrerequisiteModules { return BuiltModuleNames.contains(ModuleName); } - void addModuleFile(llvm::StringRef ModuleName, - llvm::StringRef ModuleFilePath) { - RequiredModules.emplace_back(ModuleName, ModuleFilePath); - BuiltModuleNames.insert(ModuleName); + void addModuleFile(std::shared_ptr<ModuleFile> BMI) { + BuiltModuleNames.insert(BMI->ModuleName); + RequiredModules.emplace_back(std::move(BMI)); } private: - llvm::SmallVector<ModuleFile, 8> RequiredModules; + mutable llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules; // A helper class to speedup the query if a module is built. llvm::StringSet<> BuiltModuleNames; }; -// Build a module file for module with `ModuleName`. The information of built -// module file are stored in \param BuiltModuleFiles. -llvm::Error buildModuleFile(llvm::StringRef ModuleName, - const GlobalCompilationDatabase &CDB, - const ThreadsafeFS &TFS, ProjectModules &MDB, - PathRef ModuleFilesPrefix, - StandalonePrerequisiteModules &BuiltModuleFiles) { - if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) - return llvm::Error::success(); - - PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName); - // It is possible that we're meeting third party modules (modules whose - // source are not in the project. e.g, the std module may be a third-party - // module for most projects) or something wrong with the implementation of - // ProjectModules. - // FIXME: How should we treat third party modules here? If we want to ignore - // third party modules, we should return true instead of false here. - // Currently we simply bail out. - if (ModuleUnitFileName.empty()) - return llvm::createStringError("Failed to get the primary source"); - +/// Build a module file for module with `ModuleName`. The information of built +/// module file are stored in \param BuiltModuleFiles. +llvm::Expected<ModuleFile> +buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName, + const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS, + PathRef ModuleFilesPrefix, + const ReusablePrerequisiteModules &BuiltModuleFiles) { // Try cheap operation earlier to boil-out cheaply if there are problems. auto Cmd = CDB.getCompileCommand(ModuleUnitFileName); if (!Cmd) return llvm::createStringError( llvm::formatv("No compile command for {0}", ModuleUnitFileName)); - for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName)) { - // Return early if there are errors building the module file. - if (llvm::Error Err = buildModuleFile(RequiredModuleName, CDB, TFS, MDB, - ModuleFilesPrefix, BuiltModuleFiles)) - return llvm::createStringError( - llvm::formatv("Failed to build dependency {0}: {1}", - RequiredModuleName, llvm::toString(std::move(Err)))); - } - Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix); ParseInputs Inputs; @@ -316,15 +287,282 @@ llvm::Error buildModuleFile(llvm::StringRef ModuleName, if (Clang->getDiagnostics().hasErrorOccurred()) return llvm::createStringError("Compilation failed"); - BuiltModuleFiles.addModuleFile(ModuleName, Inputs.CompileCommand.Output); - return llvm::Error::success(); + return ModuleFile{ModuleName, Inputs.CompileCommand.Output}; +} + +bool ReusablePrerequisiteModules::canReuse( + const CompilerInvocation &CI, + llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const { + if (RequiredModules.empty()) + return true; + + SmallVector<StringRef> BMIPaths; + for (auto &MF : RequiredModules) + BMIPaths.push_back(MF->ModuleFilePath); + return IsModuleFilesUpToDate(BMIPaths, *this, VFS); } } // namespace +class ModulesBuilder::ModuleFileCache { +public: + ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {} + + llvm::Error + getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS, + ProjectModules &MDB, + ReusablePrerequisiteModules &RequiredModules); + const GlobalCompilationDatabase &getCDB() const { return CDB; } + +private: + std::shared_ptr<ModuleFile> + getValidModuleFile(StringRef ModuleName, ProjectModules &MDB, + const ThreadsafeFS &TFS, + PrerequisiteModules &BuiltModuleFiles); + + /// This should only be called by getValidModuleFile. This is unlocked version + /// of getValidModuleFile. The function is extracted to avoid dead locks when + /// recursing. + std::shared_ptr<ModuleFile> + isValidModuleFileUnlocked(StringRef ModuleName, ProjectModules &MDB, + const ThreadsafeFS &TFS, + PrerequisiteModules &BuiltModuleFiles); + + void startBuildingModule(StringRef ModuleName) { + std::lock_guard<std::mutex> _(ModulesBuildingMutex); + BuildingModules.insert(ModuleName); + } + void endBuildingModule(StringRef ModuleName) { + std::lock_guard<std::mutex> _(ModulesBuildingMutex); + BuildingModules.erase(ModuleName); + } + bool isBuildingModule(StringRef ModuleName) { + std::lock_guard<std::mutex> _(ModulesBuildingMutex); + return BuildingModules.contains(ModuleName); + } + + const GlobalCompilationDatabase &CDB; + + llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles; + // Mutex to guard accesses to ModuleFiles. + std::mutex ModuleFilesMutex; + + // We should only build a unique module at most at the same time. + // When we want to build a module, use the mutex to lock it and use the + // condition variable to notify other threads the status of the build results. + // + // Store the mutex and the condition_variable in shared_ptr since they may be + // accessed by many threads. + llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes; + llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs; + // The building modules set. A successed built module or a failed module or + // an unbuilt module shouldn't be in this set. + // This set is helpful to control the behavior of the condition variables. + llvm::StringSet<> BuildingModules; + // Lock when we access BuildingModules, BuildingModuleMutexes and + // BuildingModuleCVs. + std::mutex ModulesBuildingMutex; + + /// An RAII object to guard the process to build a specific module. + struct ModuleBuildingSharedOwner { + public: + ModuleBuildingSharedOwner(StringRef ModuleName, + std::shared_ptr<std::mutex> &Mutex, + std::shared_ptr<std::condition_variable> &CV, + ModuleFileCache &Cache) + : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) { + IsFirstTask = (Mutex.use_count() == 2); + } + + ~ModuleBuildingSharedOwner(); + + bool isUniqueBuildingOwner() { return IsFirstTask; } + + std::mutex &getMutex() { return *Mutex; } + + std::condition_variable &getCV() { return *CV; } + + private: + StringRef ModuleName; + std::shared_ptr<std::mutex> Mutex; + std::shared_ptr<std::condition_variable> CV; + ModuleFileCache &Cache; + bool IsFirstTask; + }; + + ModuleBuildingSharedOwner + getOrCreateModuleBuildingOwner(StringRef ModuleName); +}; + +ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner:: + ~ModuleBuildingSharedOwner() { + std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex); + + Mutex.reset(); + CV.reset(); + + // Try to release the memory in builder if possible. + if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName); + Iter != Cache.BuildingModuleCVs.end() && + Iter->getValue().use_count() == 1) + Cache.BuildingModuleCVs.erase(Iter); + + if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName); + Iter != Cache.BuildingModuleMutexes.end() && + Iter->getValue().use_count() == 1) + Cache.BuildingModuleMutexes.erase(Iter); +} + +std::shared_ptr<ModuleFile> +ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked( + StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS, + PrerequisiteModules &BuiltModuleFiles) { + auto Iter = ModuleFiles.find(ModuleName); + if (Iter != ModuleFiles.end()) { + if (!IsModuleFileUpToDate(Iter->second->ModuleFilePath, BuiltModuleFiles, + TFS.view(std::nullopt))) { + log("Found not-up-date module file {0} for module {1} in cache", + Iter->second->ModuleFilePath, ModuleName); + ModuleFiles.erase(Iter); + return nullptr; + } + + if (llvm::any_of( + MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)), + [&MDB, &TFS, &BuiltModuleFiles, this](auto &&RequiredModuleName) { + return !isValidModuleFileUnlocked(RequiredModuleName, MDB, TFS, + BuiltModuleFiles); + })) { + ModuleFiles.erase(Iter); + return nullptr; + } + + return Iter->second; + } + + log("Don't find {0} in cache", ModuleName); + + return nullptr; +} + +std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile( + StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS, + PrerequisiteModules &BuiltModuleFiles) { + std::lock_guard<std::mutex> _(ModuleFilesMutex); + + return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles); +} + +ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner +ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner( + StringRef ModuleName) { + std::lock_guard<std::mutex> _(ModulesBuildingMutex); + + auto MutexIter = BuildingModuleMutexes.find(ModuleName); + if (MutexIter == BuildingModuleMutexes.end()) + MutexIter = BuildingModuleMutexes + .try_emplace(ModuleName, std::make_shared<std::mutex>()) + .first; + + auto CVIter = BuildingModuleCVs.find(ModuleName); + if (CVIter == BuildingModuleCVs.end()) + CVIter = BuildingModuleCVs + .try_emplace(ModuleName, + std::make_shared<std::condition_variable>()) + .first; + + return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(), + CVIter->getValue(), *this); +} + +llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile( + StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB, + ReusablePrerequisiteModules &BuiltModuleFiles) { + if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) + return llvm::Error::success(); + + PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName); + /// It is possible that we're meeting third party modules (modules whose + /// source are not in the project. e.g, the std module may be a third-party + /// module for most project) or something wrong with the implementation of + /// ProjectModules. + /// FIXME: How should we treat third party modules here? If we want to ignore + /// third party modules, we should return true instead of false here. + /// Currently we simply bail out. + if (ModuleUnitFileName.empty()) + return llvm::createStringError( + llvm::formatv("Don't get the module unit for module {0}", ModuleName)); + + for (auto &RequiredModuleName : MDB.getRequiredModules(ModuleUnitFileName)) + // Return early if there are errors building the module file. + if (!getOrBuildModuleFile(RequiredModuleName, TFS, MDB, BuiltModuleFiles)) + return llvm::createStringError( + llvm::formatv("Failed to build module {0}", RequiredModuleName)); + + if (std::shared_ptr<ModuleFile> Cached = + getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) { + log("Reusing module {0} from {1}", ModuleName, Cached->ModuleFilePath); + BuiltModuleFiles.addModuleFile(Cached); + return llvm::Error::success(); + } + + ModuleBuildingSharedOwner ModuleBuildingOwner = + getOrCreateModuleBuildingOwner(ModuleName); + + std::condition_variable &CV = ModuleBuildingOwner.getCV(); + std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex()); + if (!ModuleBuildingOwner.isUniqueBuildingOwner()) { + log("Waiting other task for module {0}", ModuleName); + CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); }); + + // Try to access the built module files from other threads manually. + // We don't call getValidModuleFile here since it may be too heavy. + std::lock_guard<std::mutex> _(ModuleFilesMutex); + auto Iter = ModuleFiles.find(ModuleName); + if (Iter != ModuleFiles.end()) { + log("Got module file from other task building {0}", ModuleName); + BuiltModuleFiles.addModuleFile(Iter->second); + return llvm::Error::success(); + } + + // If the module file is not in the cache, it indicates that the building + // from other thread failed, so we give up earlier in this case to avoid + // wasting time. + return llvm::createStringError(llvm::formatv( + "The module file {0} may be failed to build in other thread.", + ModuleName)); + } + + log("Building module {0}", ModuleName); + startBuildingModule(ModuleName); + + auto _ = llvm::make_scope_exit([&]() { + endBuildingModule(ModuleName); + CV.notify_all(); + }); + + llvm::SmallString<256> ModuleFilesPrefix = + getUniqueModuleFilesPath(ModuleUnitFileName); + + llvm::Expected<ModuleFile> MF = + buildModuleFile(ModuleName, ModuleUnitFileName, CDB, TFS, + ModuleFilesPrefix, BuiltModuleFiles); + if (llvm::Error Err = MF.takeError()) + return Err; + + log("Built module {0} to {1}", ModuleName, MF->ModuleFilePath); + + std::lock_guard<std::mutex> __(ModuleFilesMutex); + auto BuiltModuleFile = std::make_shared<ModuleFile>(std::move(*MF)); + ModuleFiles.insert_or_assign(ModuleName, BuiltModuleFile); + BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile)); + + return llvm::Error::success(); +} std::unique_ptr<PrerequisiteModules> ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, - const ThreadsafeFS &TFS) const { - std::unique_ptr<ProjectModules> MDB = CDB.getProjectModules(File); + const ThreadsafeFS &TFS) { + std::unique_ptr<ProjectModules> MDB = + MFCache->getCDB().getProjectModules(File); if (!MDB) { elog("Failed to get Project Modules information for {0}", File); return std::make_unique<FailedPrerequisiteModules>(); @@ -332,20 +570,19 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File); if (RequiredModuleNames.empty()) - return std::make_unique<StandalonePrerequisiteModules>(); + return std::make_unique<ReusablePrerequisiteModules>(); llvm::SmallString<256> ModuleFilesPrefix = getUniqueModuleFilesPath(File); log("Trying to build required modules for {0} in {1}", File, ModuleFilesPrefix); - auto RequiredModules = std::make_unique<StandalonePrerequisiteModules>(); + auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>(); for (llvm::StringRef RequiredModuleName : RequiredModuleNames) { // Return early if there is any error. - if (llvm::Error Err = - buildModuleFile(RequiredModuleName, CDB, TFS, *MDB.get(), - ModuleFilesPrefix, *RequiredModules.get())) { + if (llvm::Error Err = MFCache->getOrBuildModuleFile( + RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) { elog("Failed to build module {0}; due to {1}", RequiredModuleName, toString(std::move(Err))); return std::make_unique<FailedPrerequisiteModules>(); @@ -357,17 +594,11 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, return std::move(RequiredModules); } -bool StandalonePrerequisiteModules::canReuse( - const CompilerInvocation &CI, - llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const { - if (RequiredModules.empty()) - return true; - - SmallVector<StringRef> BMIPaths; - for (auto &MF : RequiredModules) - BMIPaths.push_back(MF.ModuleFilePath); - return IsModuleFilesUpToDate(BMIPaths, *this, VFS); +ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) { + MFCache = std::make_unique<ModuleFileCache>(CDB); } +ModulesBuilder::~ModulesBuilder() {} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/ModulesBuilder.h b/clang-tools-extra/clangd/ModulesBuilder.h index 0514e7486475d0..2aae441747a898 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.h +++ b/clang-tools-extra/clangd/ModulesBuilder.h @@ -85,7 +85,8 @@ class PrerequisiteModules { /// different versions and different source files. class ModulesBuilder { public: - ModulesBuilder(const GlobalCompilationDatabase &CDB) : CDB(CDB) {} + ModulesBuilder(const GlobalCompilationDatabase &CDB); + ~ModulesBuilder(); ModulesBuilder(const ModulesBuilder &) = delete; ModulesBuilder(ModulesBuilder &&) = delete; @@ -94,10 +95,11 @@ class ModulesBuilder { ModulesBuilder &operator=(ModulesBuilder &&) = delete; std::unique_ptr<PrerequisiteModules> - buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) const; + buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS); private: - const GlobalCompilationDatabase &CDB; + class ModuleFileCache; + std::unique_ptr<ModuleFileCache> MFCache; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp index 691a93e7acd0af..9577935c48b5ec 100644 --- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp +++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp @@ -482,6 +482,42 @@ void func() { EXPECT_EQ(Result.signatures[0].parameters[0].labelString, "int a"); } +TEST_F(PrerequisiteModulesTests, ReusablePrerequisiteModulesTest) { + MockDirectoryCompilationDatabase CDB(TestDir, FS); + + CDB.addFile("M.cppm", R"cpp( +export module M; +export int M = 43; + )cpp"); + CDB.addFile("A.cppm", R"cpp( +export module A; +import M; +export int A = 43 + M; + )cpp"); + CDB.addFile("B.cppm", R"cpp( +export module B; +import M; +export int B = 44 + M; + )cpp"); + + ModulesBuilder Builder(CDB); + + auto AInfo = Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS); + EXPECT_TRUE(AInfo); + auto BInfo = Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS); + EXPECT_TRUE(BInfo); + HeaderSearchOptions HSOptsA(TestDir); + HeaderSearchOptions HSOptsB(TestDir); + AInfo->adjustHeaderSearchOptions(HSOptsA); + BInfo->adjustHeaderSearchOptions(HSOptsB); + + EXPECT_FALSE(HSOptsA.PrebuiltModuleFiles.empty()); + EXPECT_FALSE(HSOptsB.PrebuiltModuleFiles.empty()); + + // Check that we're reusing the module files. + EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles); +} + } // namespace } // namespace clang::clangd >From 4ed7bf5fadb8a3879fc9862537a24a4d9990a055 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Thu, 31 Oct 2024 13:35:59 +0800 Subject: [PATCH 2/3] Remove unique builder --- clang-tools-extra/clangd/ModulesBuilder.cpp | 133 -------------------- 1 file changed, 133 deletions(-) diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index c4fb3d4010d370..e1c0cc743097f5 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -327,91 +327,13 @@ class ModulesBuilder::ModuleFileCache { const ThreadsafeFS &TFS, PrerequisiteModules &BuiltModuleFiles); - void startBuildingModule(StringRef ModuleName) { - std::lock_guard<std::mutex> _(ModulesBuildingMutex); - BuildingModules.insert(ModuleName); - } - void endBuildingModule(StringRef ModuleName) { - std::lock_guard<std::mutex> _(ModulesBuildingMutex); - BuildingModules.erase(ModuleName); - } - bool isBuildingModule(StringRef ModuleName) { - std::lock_guard<std::mutex> _(ModulesBuildingMutex); - return BuildingModules.contains(ModuleName); - } - const GlobalCompilationDatabase &CDB; llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles; // Mutex to guard accesses to ModuleFiles. std::mutex ModuleFilesMutex; - - // We should only build a unique module at most at the same time. - // When we want to build a module, use the mutex to lock it and use the - // condition variable to notify other threads the status of the build results. - // - // Store the mutex and the condition_variable in shared_ptr since they may be - // accessed by many threads. - llvm::StringMap<std::shared_ptr<std::mutex>> BuildingModuleMutexes; - llvm::StringMap<std::shared_ptr<std::condition_variable>> BuildingModuleCVs; - // The building modules set. A successed built module or a failed module or - // an unbuilt module shouldn't be in this set. - // This set is helpful to control the behavior of the condition variables. - llvm::StringSet<> BuildingModules; - // Lock when we access BuildingModules, BuildingModuleMutexes and - // BuildingModuleCVs. - std::mutex ModulesBuildingMutex; - - /// An RAII object to guard the process to build a specific module. - struct ModuleBuildingSharedOwner { - public: - ModuleBuildingSharedOwner(StringRef ModuleName, - std::shared_ptr<std::mutex> &Mutex, - std::shared_ptr<std::condition_variable> &CV, - ModuleFileCache &Cache) - : ModuleName(ModuleName), Mutex(Mutex), CV(CV), Cache(Cache) { - IsFirstTask = (Mutex.use_count() == 2); - } - - ~ModuleBuildingSharedOwner(); - - bool isUniqueBuildingOwner() { return IsFirstTask; } - - std::mutex &getMutex() { return *Mutex; } - - std::condition_variable &getCV() { return *CV; } - - private: - StringRef ModuleName; - std::shared_ptr<std::mutex> Mutex; - std::shared_ptr<std::condition_variable> CV; - ModuleFileCache &Cache; - bool IsFirstTask; - }; - - ModuleBuildingSharedOwner - getOrCreateModuleBuildingOwner(StringRef ModuleName); }; -ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner:: - ~ModuleBuildingSharedOwner() { - std::lock_guard<std::mutex> _(Cache.ModulesBuildingMutex); - - Mutex.reset(); - CV.reset(); - - // Try to release the memory in builder if possible. - if (auto Iter = Cache.BuildingModuleCVs.find(ModuleName); - Iter != Cache.BuildingModuleCVs.end() && - Iter->getValue().use_count() == 1) - Cache.BuildingModuleCVs.erase(Iter); - - if (auto Iter = Cache.BuildingModuleMutexes.find(ModuleName); - Iter != Cache.BuildingModuleMutexes.end() && - Iter->getValue().use_count() == 1) - Cache.BuildingModuleMutexes.erase(Iter); -} - std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked( StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS, @@ -452,28 +374,6 @@ std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile( return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles); } -ModulesBuilder::ModuleFileCache::ModuleBuildingSharedOwner -ModulesBuilder::ModuleFileCache::getOrCreateModuleBuildingOwner( - StringRef ModuleName) { - std::lock_guard<std::mutex> _(ModulesBuildingMutex); - - auto MutexIter = BuildingModuleMutexes.find(ModuleName); - if (MutexIter == BuildingModuleMutexes.end()) - MutexIter = BuildingModuleMutexes - .try_emplace(ModuleName, std::make_shared<std::mutex>()) - .first; - - auto CVIter = BuildingModuleCVs.find(ModuleName); - if (CVIter == BuildingModuleCVs.end()) - CVIter = BuildingModuleCVs - .try_emplace(ModuleName, - std::make_shared<std::condition_variable>()) - .first; - - return ModuleBuildingSharedOwner(ModuleName, MutexIter->getValue(), - CVIter->getValue(), *this); -} - llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile( StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { @@ -505,40 +405,7 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile( return llvm::Error::success(); } - ModuleBuildingSharedOwner ModuleBuildingOwner = - getOrCreateModuleBuildingOwner(ModuleName); - - std::condition_variable &CV = ModuleBuildingOwner.getCV(); - std::unique_lock<std::mutex> lk(ModuleBuildingOwner.getMutex()); - if (!ModuleBuildingOwner.isUniqueBuildingOwner()) { - log("Waiting other task for module {0}", ModuleName); - CV.wait(lk, [this, ModuleName] { return !isBuildingModule(ModuleName); }); - - // Try to access the built module files from other threads manually. - // We don't call getValidModuleFile here since it may be too heavy. - std::lock_guard<std::mutex> _(ModuleFilesMutex); - auto Iter = ModuleFiles.find(ModuleName); - if (Iter != ModuleFiles.end()) { - log("Got module file from other task building {0}", ModuleName); - BuiltModuleFiles.addModuleFile(Iter->second); - return llvm::Error::success(); - } - - // If the module file is not in the cache, it indicates that the building - // from other thread failed, so we give up earlier in this case to avoid - // wasting time. - return llvm::createStringError(llvm::formatv( - "The module file {0} may be failed to build in other thread.", - ModuleName)); - } - log("Building module {0}", ModuleName); - startBuildingModule(ModuleName); - - auto _ = llvm::make_scope_exit([&]() { - endBuildingModule(ModuleName); - CV.notify_all(); - }); llvm::SmallString<256> ModuleFilesPrefix = getUniqueModuleFilesPath(ModuleUnitFileName); >From dcdd18acd9b5d30e5607f0dffc18843e06adbfd6 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu <yedeng...@linux.alibaba.com> Date: Mon, 4 Nov 2024 14:26:12 +0800 Subject: [PATCH 3/3] Update --- clang-tools-extra/clangd/ModulesBuilder.cpp | 151 +++++++++++------- clang-tools-extra/clangd/ModulesBuilder.h | 4 +- .../unittests/PrerequisiteModulesTest.cpp | 36 +++++ 3 files changed, 135 insertions(+), 56 deletions(-) diff --git a/clang-tools-extra/clangd/ModulesBuilder.cpp b/clang-tools-extra/clangd/ModulesBuilder.cpp index e1c0cc743097f5..73186f3b5ce7fc 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.cpp +++ b/clang-tools-extra/clangd/ModulesBuilder.cpp @@ -14,6 +14,7 @@ #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "llvm/ADT/ScopeExit.h" +#include <queue> namespace clang { namespace clangd { @@ -125,6 +126,11 @@ struct ModuleFile { llvm::sys::fs::remove(ModuleFilePath); } + StringRef getModuleName() const { return ModuleName; } + + StringRef getModuleFilePath() const { return ModuleFilePath; } + +private: std::string ModuleName; std::string ModuleFilePath; }; @@ -213,7 +219,8 @@ class ReusablePrerequisiteModules : public PrerequisiteModules { // Appending all built module files. for (auto &RequiredModule : RequiredModules) Options.PrebuiltModuleFiles.insert_or_assign( - RequiredModule->ModuleName, RequiredModule->ModuleFilePath); + RequiredModule->getModuleName().str(), + RequiredModule->getModuleFilePath().str()); } bool canReuse(const CompilerInvocation &CI, @@ -224,12 +231,12 @@ class ReusablePrerequisiteModules : public PrerequisiteModules { } void addModuleFile(std::shared_ptr<ModuleFile> BMI) { - BuiltModuleNames.insert(BMI->ModuleName); + BuiltModuleNames.insert(BMI->getModuleName()); RequiredModules.emplace_back(std::move(BMI)); } private: - mutable llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules; + llvm::SmallVector<std::shared_ptr<ModuleFile>, 8> RequiredModules; // A helper class to speedup the query if a module is built. llvm::StringSet<> BuiltModuleNames; }; @@ -298,12 +305,11 @@ bool ReusablePrerequisiteModules::canReuse( SmallVector<StringRef> BMIPaths; for (auto &MF : RequiredModules) - BMIPaths.push_back(MF->ModuleFilePath); + BMIPaths.push_back(MF->getModuleFilePath()); return IsModuleFilesUpToDate(BMIPaths, *this, VFS); } -} // namespace -class ModulesBuilder::ModuleFileCache { +class ModuleFileCache { public: ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {} @@ -313,20 +319,18 @@ class ModulesBuilder::ModuleFileCache { ReusablePrerequisiteModules &RequiredModules); const GlobalCompilationDatabase &getCDB() const { return CDB; } -private: std::shared_ptr<ModuleFile> getValidModuleFile(StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS, PrerequisiteModules &BuiltModuleFiles); - /// This should only be called by getValidModuleFile. This is unlocked version - /// of getValidModuleFile. The function is extracted to avoid dead locks when - /// recursing. - std::shared_ptr<ModuleFile> - isValidModuleFileUnlocked(StringRef ModuleName, ProjectModules &MDB, - const ThreadsafeFS &TFS, - PrerequisiteModules &BuiltModuleFiles); + void add(StringRef ModuleName, std::shared_ptr<ModuleFile> ModuleFile) { + std::lock_guard<std::mutex> _(ModuleFilesMutex); + + ModuleFiles.insert_or_assign(ModuleName, ModuleFile); + } +private: const GlobalCompilationDatabase &CDB; llvm::StringMap<std::shared_ptr<ModuleFile>> ModuleFiles; @@ -334,47 +338,88 @@ class ModulesBuilder::ModuleFileCache { std::mutex ModuleFilesMutex; }; +/// Collect the directly and indirectly required module names for \param +/// ModuleName. The \param ModuleName is guaranteed to be the first element in +/// \param ModuleNames. +void getAllRequiredModules(ProjectModules &MDB, StringRef ModuleName, + llvm::SmallVector<StringRef> &ModuleNames) { + std::queue<StringRef> Worklist; + llvm::StringSet<> ModuleNamesSet; + Worklist.push(ModuleName); + + while (!Worklist.empty()) { + StringRef CurrentModule = Worklist.front(); + Worklist.pop(); + + if (!ModuleNamesSet.insert(CurrentModule).second) + continue; + + ModuleNames.push_back(CurrentModule); + + for (StringRef RequiredModuleName : + MDB.getRequiredModules(MDB.getSourceForModuleName(CurrentModule))) + if (!ModuleNamesSet.contains(RequiredModuleName)) + Worklist.push(RequiredModuleName); + } +} + std::shared_ptr<ModuleFile> -ModulesBuilder::ModuleFileCache::isValidModuleFileUnlocked( - StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS, - PrerequisiteModules &BuiltModuleFiles) { - auto Iter = ModuleFiles.find(ModuleName); - if (Iter != ModuleFiles.end()) { - if (!IsModuleFileUpToDate(Iter->second->ModuleFilePath, BuiltModuleFiles, - TFS.view(std::nullopt))) { - log("Found not-up-date module file {0} for module {1} in cache", - Iter->second->ModuleFilePath, ModuleName); - ModuleFiles.erase(Iter); - return nullptr; - } +ModuleFileCache::getValidModuleFile(StringRef ModuleName, ProjectModules &MDB, + const ThreadsafeFS &TFS, + PrerequisiteModules &BuiltModuleFiles) { + { + std::lock_guard<std::mutex> _(ModuleFilesMutex); - if (llvm::any_of( - MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)), - [&MDB, &TFS, &BuiltModuleFiles, this](auto &&RequiredModuleName) { - return !isValidModuleFileUnlocked(RequiredModuleName, MDB, TFS, - BuiltModuleFiles); - })) { - ModuleFiles.erase(Iter); + if (ModuleFiles.find(ModuleName) == ModuleFiles.end()) return nullptr; - } + } + + llvm::SmallVector<StringRef> ModuleNames; + getAllRequiredModules(MDB, ModuleName, ModuleNames); + + llvm::SmallVector<std::shared_ptr<ModuleFile>> RequiredModuleFiles; + + { + std::lock_guard<std::mutex> _(ModuleFilesMutex); + + for (StringRef ModuleName : ModuleNames) { + auto Iter = ModuleFiles.find(ModuleName); + if (Iter == ModuleFiles.end()) + return nullptr; - return Iter->second; + RequiredModuleFiles.push_back(Iter->second); + } } - log("Don't find {0} in cache", ModuleName); + if (RequiredModuleFiles.empty()) + return nullptr; + + if (llvm::all_of(RequiredModuleFiles, [&](auto MF) { + return IsModuleFileUpToDate(MF->getModuleFilePath(), BuiltModuleFiles, + TFS.view(std::nullopt)); + })) + return RequiredModuleFiles[0]; return nullptr; } +} // namespace -std::shared_ptr<ModuleFile> ModulesBuilder::ModuleFileCache::getValidModuleFile( - StringRef ModuleName, ProjectModules &MDB, const ThreadsafeFS &TFS, - PrerequisiteModules &BuiltModuleFiles) { - std::lock_guard<std::mutex> _(ModuleFilesMutex); +class ModulesBuilder::ModulesBuilderImpl { +public: + ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {} - return isValidModuleFileUnlocked(ModuleName, MDB, TFS, BuiltModuleFiles); -} + const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); } -llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile( + llvm::Error + getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS, + ProjectModules &MDB, + ReusablePrerequisiteModules &BuiltModuleFiles); + +private: + ModuleFileCache Cache; +}; + +llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile( StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB, ReusablePrerequisiteModules &BuiltModuleFiles) { if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName)) @@ -399,8 +444,8 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile( llvm::formatv("Failed to build module {0}", RequiredModuleName)); if (std::shared_ptr<ModuleFile> Cached = - getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) { - log("Reusing module {0} from {1}", ModuleName, Cached->ModuleFilePath); + Cache.getValidModuleFile(ModuleName, MDB, TFS, BuiltModuleFiles)) { + log("Reusing module {0} from {1}", ModuleName, Cached->getModuleFilePath()); BuiltModuleFiles.addModuleFile(Cached); return llvm::Error::success(); } @@ -411,25 +456,23 @@ llvm::Error ModulesBuilder::ModuleFileCache::getOrBuildModuleFile( getUniqueModuleFilesPath(ModuleUnitFileName); llvm::Expected<ModuleFile> MF = - buildModuleFile(ModuleName, ModuleUnitFileName, CDB, TFS, + buildModuleFile(ModuleName, ModuleUnitFileName, getCDB(), TFS, ModuleFilesPrefix, BuiltModuleFiles); if (llvm::Error Err = MF.takeError()) return Err; - log("Built module {0} to {1}", ModuleName, MF->ModuleFilePath); - - std::lock_guard<std::mutex> __(ModuleFilesMutex); + log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath()); auto BuiltModuleFile = std::make_shared<ModuleFile>(std::move(*MF)); - ModuleFiles.insert_or_assign(ModuleName, BuiltModuleFile); + Cache.add(ModuleName, BuiltModuleFile); BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile)); return llvm::Error::success(); } + std::unique_ptr<PrerequisiteModules> ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS) { - std::unique_ptr<ProjectModules> MDB = - MFCache->getCDB().getProjectModules(File); + std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File); if (!MDB) { elog("Failed to get Project Modules information for {0}", File); return std::make_unique<FailedPrerequisiteModules>(); @@ -448,7 +491,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, for (llvm::StringRef RequiredModuleName : RequiredModuleNames) { // Return early if there is any error. - if (llvm::Error Err = MFCache->getOrBuildModuleFile( + if (llvm::Error Err = Impl->getOrBuildModuleFile( RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) { elog("Failed to build module {0}; due to {1}", RequiredModuleName, toString(std::move(Err))); @@ -462,7 +505,7 @@ ModulesBuilder::buildPrerequisiteModulesFor(PathRef File, } ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) { - MFCache = std::make_unique<ModuleFileCache>(CDB); + Impl = std::make_unique<ModulesBuilderImpl>(CDB); } ModulesBuilder::~ModulesBuilder() {} diff --git a/clang-tools-extra/clangd/ModulesBuilder.h b/clang-tools-extra/clangd/ModulesBuilder.h index 2aae441747a898..f40a9006e91690 100644 --- a/clang-tools-extra/clangd/ModulesBuilder.h +++ b/clang-tools-extra/clangd/ModulesBuilder.h @@ -98,8 +98,8 @@ class ModulesBuilder { buildPrerequisiteModulesFor(PathRef File, const ThreadsafeFS &TFS); private: - class ModuleFileCache; - std::unique_ptr<ModuleFileCache> MFCache; + class ModulesBuilderImpl; + std::unique_ptr<ModulesBuilderImpl> Impl; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp index 9577935c48b5ec..1bb8e19cce23e0 100644 --- a/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp +++ b/clang-tools-extra/clangd/unittests/PrerequisiteModulesTest.cpp @@ -516,6 +516,42 @@ export int B = 44 + M; // Check that we're reusing the module files. EXPECT_EQ(HSOptsA.PrebuiltModuleFiles, HSOptsB.PrebuiltModuleFiles); + + // Update M.cppm to check if the modules builder can update correctly. + CDB.addFile("M.cppm", R"cpp( +export module M; +export constexpr int M = 43; + )cpp"); + + ParseInputs AUse = getInputs("A.cppm", CDB); + AUse.ModulesManager = &Builder; + std::unique_ptr<CompilerInvocation> AInvocation = + buildCompilerInvocation(AUse, DiagConsumer); + EXPECT_FALSE(AInfo->canReuse(*AInvocation, FS.view(TestDir))); + + ParseInputs BUse = getInputs("B.cppm", CDB); + AUse.ModulesManager = &Builder; + std::unique_ptr<CompilerInvocation> BInvocation = + buildCompilerInvocation(BUse, DiagConsumer); + EXPECT_FALSE(BInfo->canReuse(*BInvocation, FS.view(TestDir))); + + auto NewAInfo = + Builder.buildPrerequisiteModulesFor(getFullPath("A.cppm"), FS); + auto NewBInfo = + Builder.buildPrerequisiteModulesFor(getFullPath("B.cppm"), FS); + EXPECT_TRUE(NewAInfo); + EXPECT_TRUE(NewBInfo); + HeaderSearchOptions NewHSOptsA(TestDir); + HeaderSearchOptions NewHSOptsB(TestDir); + NewAInfo->adjustHeaderSearchOptions(NewHSOptsA); + NewBInfo->adjustHeaderSearchOptions(NewHSOptsB); + + EXPECT_FALSE(NewHSOptsA.PrebuiltModuleFiles.empty()); + EXPECT_FALSE(NewHSOptsB.PrebuiltModuleFiles.empty()); + + EXPECT_EQ(NewHSOptsA.PrebuiltModuleFiles, NewHSOptsB.PrebuiltModuleFiles); + // Check that we didn't reuse the old and stale module files. + EXPECT_NE(NewHSOptsA.PrebuiltModuleFiles, HSOptsA.PrebuiltModuleFiles); } } // namespace _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits