Author: kadircet Date: Fri Nov 16 01:03:56 2018 New Revision: 347038 URL: http://llvm.org/viewvc/llvm-project?rev=347038&view=rev Log: Introduce shard storage to auto-index.
Reviewers: sammccall, ioeric Reviewed By: sammccall Subscribers: llvm-commits, mgorny, Eugene.Zelenko, ilya-biryukov, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D54269 Added: clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt clang-tools-extra/trunk/clangd/index/Background.cpp clang-tools-extra/trunk/clangd/index/Background.h clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=347038&r1=347037&r2=347038&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt (original) +++ clang-tools-extra/trunk/clangd/CMakeLists.txt Fri Nov 16 01:03:56 2018 @@ -38,6 +38,7 @@ add_clang_library(clangDaemon XRefs.cpp index/Background.cpp + index/BackgroundIndexStorage.cpp index/CanonicalIncludes.cpp index/FileIndex.cpp index/Index.cpp Modified: clang-tools-extra/trunk/clangd/index/Background.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.cpp?rev=347038&r1=347037&r2=347038&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Background.cpp (original) +++ clang-tools-extra/trunk/clangd/index/Background.cpp Fri Nov 16 01:03:56 2018 @@ -24,6 +24,9 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/SHA1.h" + +#include <memory> +#include <queue> #include <random> #include <string> @@ -31,21 +34,22 @@ using namespace llvm; namespace clang { namespace clangd { -BackgroundIndex::BackgroundIndex(Context BackgroundContext, - StringRef ResourceDir, - const FileSystemProvider &FSProvider, - ArrayRef<std::string> URISchemes, - size_t ThreadPoolSize) +BackgroundIndex::BackgroundIndex( + Context BackgroundContext, StringRef ResourceDir, + const FileSystemProvider &FSProvider, ArrayRef<std::string> URISchemes, + BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize) : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir), FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)), - URISchemes(URISchemes) { + URISchemes(URISchemes), + IndexStorageFactory(std::move(IndexStorageFactory)) { assert(ThreadPoolSize > 0 && "Thread pool size can't be zero."); + assert(IndexStorageFactory && "Storage factory can not be null!"); while (ThreadPoolSize--) { ThreadPool.emplace_back([this] { run(); }); // Set priority to low, since background indexing is a long running task we // do not want to eat up cpu when there are any other high priority threads. // FIXME: In the future we might want a more general way of handling this to - // support a tasks with various priorities. + // support tasks with various priorities. setThreadPriority(ThreadPool.back(), ThreadPriority::Low); } } @@ -97,9 +101,10 @@ void BackgroundIndex::blockUntilIdleForT void BackgroundIndex::enqueue(StringRef Directory, tooling::CompileCommand Cmd) { + BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory); { std::lock_guard<std::mutex> Lock(QueueMu); - enqueueLocked(std::move(Cmd)); + enqueueLocked(std::move(Cmd), IndexStorage); } QueueCV.notify_all(); } @@ -110,6 +115,7 @@ void BackgroundIndex::enqueueAll(StringR // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB // from disk and enqueue the commands asynchronously? auto Cmds = CDB.getAllCompileCommands(); + BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory); SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size())); std::mt19937 Generator(std::random_device{}()); std::shuffle(Cmds.begin(), Cmds.end(), Generator); @@ -117,17 +123,18 @@ void BackgroundIndex::enqueueAll(StringR { std::lock_guard<std::mutex> Lock(QueueMu); for (auto &Cmd : Cmds) - enqueueLocked(std::move(Cmd)); + enqueueLocked(std::move(Cmd), IndexStorage); } QueueCV.notify_all(); } -void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) { +void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd, + BackgroundIndexStorage *IndexStorage) { Queue.push_back(Bind( - [this](tooling::CompileCommand Cmd) { + [this, IndexStorage](tooling::CompileCommand Cmd) { std::string Filename = Cmd.Filename; Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir); - if (auto Error = index(std::move(Cmd))) + if (auto Error = index(std::move(Cmd), IndexStorage)) log("Indexing {0} failed: {1}", Filename, std::move(Error)); }, std::move(Cmd))); @@ -179,7 +186,8 @@ private: /// Given index results from a TU, only update files in \p FilesToUpdate. void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, - const StringMap<FileDigest> &FilesToUpdate) { + const StringMap<FileDigest> &FilesToUpdate, + BackgroundIndexStorage *IndexStorage) { // Partition symbols/references into files. struct File { DenseSet<const Symbol *> Symbols; @@ -227,20 +235,35 @@ void BackgroundIndex::update(StringRef M for (const auto *R : F.second.Refs) Refs.insert(RefToIDs[R], *R); + auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build()); + auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build()); + + auto Hash = FilesToUpdate.lookup(Path); + // We need to store shards before updating the index, since the latter + // consumes slabs. + // FIXME: Store Hash in the Shard. + if (IndexStorage) { + IndexFileOut Shard; + Shard.Symbols = SS.get(); + Shard.Refs = RS.get(); + if (auto Error = IndexStorage->storeShard(Path, Shard)) + elog("Failed to write background-index shard for file {0}: {1}", Path, + std::move(Error)); + } + std::lock_guard<std::mutex> Lock(DigestsMu); // This can override a newer version that is added in another thread, // if this thread sees the older version but finishes later. This should be // rare in practice. - IndexedFileDigests[Path] = FilesToUpdate.lookup(Path); - IndexedSymbols.update(Path, - make_unique<SymbolSlab>(std::move(Syms).build()), - make_unique<RefSlab>(std::move(Refs).build())); + IndexedFileDigests[Path] = Hash; + IndexedSymbols.update(Path, std::move(SS), std::move(RS)); } } // Creates a filter to not collect index results from files with unchanged // digests. -// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate. +// \p FileDigests contains file digests for the current indexed files, and all +// changed files will be added to \p FilesToUpdate. decltype(SymbolCollector::Options::FileFilter) createFileFilter( const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests, llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) { @@ -269,7 +292,8 @@ decltype(SymbolCollector::Options::FileF }; } -Error BackgroundIndex::index(tooling::CompileCommand Cmd) { +Error BackgroundIndex::index(tooling::CompileCommand Cmd, + BackgroundIndexStorage *IndexStorage) { trace::Span Tracer("BackgroundIndex"); SPAN_ATTACH(Tracer, "file", Cmd.Filename); SmallString<128> AbsolutePath; @@ -342,7 +366,8 @@ Error BackgroundIndex::index(tooling::Co Symbols.size(), Refs.numRefs()); SPAN_ATTACH(Tracer, "symbols", int(Symbols.size())); SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs())); - update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate); + update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate, + IndexStorage); { // Make sure hash for the main file is always updated even if there is no // index data in it. Modified: clang-tools-extra/trunk/clangd/index/Background.h URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Background.h?rev=347038&r1=347037&r2=347038&view=diff ============================================================================== --- clang-tools-extra/trunk/clangd/index/Background.h (original) +++ clang-tools-extra/trunk/clangd/index/Background.h Fri Nov 16 01:03:56 2018 @@ -14,6 +14,7 @@ #include "FSProvider.h" #include "index/FileIndex.h" #include "index/Index.h" +#include "index/Serialization.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/SHA1.h" @@ -27,6 +28,31 @@ namespace clang { namespace clangd { +// Handles storage and retrieval of index shards. Both store and load +// operations can be called from multiple-threads concurrently. +class BackgroundIndexStorage { +public: + // Shards of the index are stored and retrieved independently, keyed by shard + // identifier - in practice this is a source file name + virtual llvm::Error storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const = 0; + + // Tries to load shard with given identifier, returns nullptr if shard + // couldn't be loaded. + virtual std::unique_ptr<IndexFileIn> + loadShard(llvm::StringRef ShardIdentifier) const = 0; + + // The factory provides storage for each CDB. + // It keeps ownership of the storage instances, and should manage caching + // itself. Factory must be threadsafe and never returns nullptr. + using Factory = + llvm::unique_function<BackgroundIndexStorage *(llvm::StringRef)>; + + // Creates an Index Storage that saves shards into disk. Index storage uses + // CDBDirectory + ".clangd-index/" as the folder to save shards. + static Factory createDiskBackedStorageFactory(); +}; + // Builds an in-memory index by by running the static indexer action over // all commands in a compilation database. Indexing happens in the background. // FIXME: it should also persist its state on disk for fast start. @@ -34,8 +60,9 @@ namespace clangd { class BackgroundIndex : public SwapIndex { public: // FIXME: resource-dir injection should be hoisted somewhere common. - BackgroundIndex(Context BackgroundContext, StringRef ResourceDir, + BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir, const FileSystemProvider &, ArrayRef<std::string> URISchemes, + BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize = llvm::hardware_concurrency()); ~BackgroundIndex(); // Blocks while the current task finishes. @@ -59,7 +86,8 @@ public: private: /// Given index results from a TU, only update files in \p FilesToUpdate. void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs, - const llvm::StringMap<FileDigest> &FilesToUpdate); + const llvm::StringMap<FileDigest> &FilesToUpdate, + BackgroundIndexStorage *IndexStorage); // configuration std::string ResourceDir; @@ -68,16 +96,20 @@ private: std::vector<std::string> URISchemes; // index state - llvm::Error index(tooling::CompileCommand); + llvm::Error index(tooling::CompileCommand, + BackgroundIndexStorage *IndexStorage); FileSymbols IndexedSymbols; llvm::StringMap<FileDigest> IndexedFileDigests; // Key is absolute file path. std::mutex DigestsMu; + BackgroundIndexStorage::Factory IndexStorageFactory; + // queue management using Task = std::function<void()>; void run(); // Main loop executed by Thread. Runs tasks from Queue. - void enqueueLocked(tooling::CompileCommand Cmd); + void enqueueLocked(tooling::CompileCommand Cmd, + BackgroundIndexStorage *IndexStorage); std::mutex QueueMu; unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks. std::condition_variable QueueCV; Added: clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp?rev=347038&view=auto ============================================================================== --- clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp (added) +++ clang-tools-extra/trunk/clangd/index/BackgroundIndexStorage.cpp Fri Nov 16 01:03:56 2018 @@ -0,0 +1,112 @@ +//== BackgroundIndexStorage.cpp - Provide caching support to BackgroundIndex ==/ +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Logger.h" +#include "index/Background.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SHA1.h" + +namespace clang { +namespace clangd { +namespace { + +using FileDigest = decltype(llvm::SHA1::hash({})); + +static FileDigest digest(StringRef Content) { + return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()}); +} + +std::string getShardPathFromFilePath(llvm::StringRef ShardRoot, + llvm::StringRef FilePath) { + llvm::SmallString<128> ShardRootSS(ShardRoot); + llvm::sys::path::append(ShardRootSS, llvm::sys::path::filename(FilePath) + + "." + llvm::toHex(digest(FilePath)) + + ".idx"); + return ShardRoot.str(); +} + +// Uses disk as a storage for index shards. Creates a directory called +// ".clangd-index/" under the path provided during construction. +class DiskBackedIndexStorage : public BackgroundIndexStorage { + std::string DiskShardRoot; + +public: + // Sets DiskShardRoot to (Directory + ".clangd-index/") which is the base + // directory for all shard files. + DiskBackedIndexStorage(llvm::StringRef Directory) { + llvm::SmallString<128> CDBDirectory(Directory); + llvm::sys::path::append(CDBDirectory, ".clangd-index/"); + DiskShardRoot = CDBDirectory.str(); + std::error_code OK; + std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot); + if (EC != OK) { + elog("Failed to create directory {0} for index storage: {1}", + DiskShardRoot, EC.message()); + } + } + + std::unique_ptr<IndexFileIn> + loadShard(llvm::StringRef ShardIdentifier) const override { + const std::string ShardPath = + getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + auto Buffer = llvm::MemoryBuffer::getFile(ShardPath); + if (!Buffer) + return nullptr; + if (auto I = readIndexFile(Buffer->get()->getBuffer())) + return llvm::make_unique<IndexFileIn>(std::move(*I)); + else + elog("Error while reading shard {0}: {1}", ShardIdentifier, + I.takeError()); + return nullptr; + } + + llvm::Error storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const override { + auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier); + std::error_code EC; + llvm::raw_fd_ostream OS(ShardPath, EC); + if (EC) + return llvm::errorCodeToError(EC); + OS << Shard; + OS.close(); + return llvm::errorCodeToError(OS.error()); + } +}; + +// Creates and owns IndexStorages for multiple CDBs. +class DiskBackedIndexStorageManager { +public: + // Creates or fetches to storage from cache for the specified CDB. + BackgroundIndexStorage *operator()(llvm::StringRef CDBDirectory) { + std::lock_guard<std::mutex> Lock(*IndexStorageMapMu); + auto &IndexStorage = IndexStorageMap[CDBDirectory]; + if (!IndexStorage) + IndexStorage = llvm::make_unique<DiskBackedIndexStorage>(CDBDirectory); + return IndexStorage.get(); + } + + // Creates or fetches to storage from cache for the specified CDB. + BackgroundIndexStorage *createStorage(llvm::StringRef CDBDirectory); + +private: + llvm::StringMap<std::unique_ptr<BackgroundIndexStorage>> IndexStorageMap; + std::unique_ptr<std::mutex> IndexStorageMapMu; +}; + +} // namespace + +BackgroundIndexStorage::Factory +BackgroundIndexStorage::createDiskBackedStorageFactory() { + return DiskBackedIndexStorageManager(); +} + +} // namespace clangd +} // namespace clang Modified: clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp?rev=347038&r1=347037&r2=347038&view=diff ============================================================================== --- clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp (original) +++ clang-tools-extra/trunk/unittests/clangd/BackgroundIndexTests.cpp Fri Nov 16 01:03:56 2018 @@ -1,6 +1,7 @@ #include "SyncAPI.h" #include "TestFS.h" #include "index/Background.h" +#include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -24,6 +25,37 @@ RefsAre(std::vector<testing::Matcher<Ref return ElementsAre(testing::Pair(_, UnorderedElementsAreArray(Matchers))); } +class MemoryShardStorage : public BackgroundIndexStorage { + mutable std::mutex StorageMu; + llvm::StringMap<std::string> &Storage; + size_t &CacheHits; + +public: + MemoryShardStorage(llvm::StringMap<std::string> &Storage, size_t &CacheHits) + : Storage(Storage), CacheHits(CacheHits) {} + llvm::Error storeShard(llvm::StringRef ShardIdentifier, + IndexFileOut Shard) const override { + std::lock_guard<std::mutex> Lock(StorageMu); + Storage[ShardIdentifier] = llvm::to_string(Shard); + return llvm::Error::success(); + } + std::unique_ptr<IndexFileIn> + loadShard(llvm::StringRef ShardIdentifier) const override { + std::lock_guard<std::mutex> Lock(StorageMu); + if (Storage.find(ShardIdentifier) == Storage.end()) { + return nullptr; + } + auto IndexFile = readIndexFile(Storage[ShardIdentifier]); + if (!IndexFile) { + ADD_FAILURE() << "Error while reading " << ShardIdentifier << ':' + << IndexFile.takeError(); + return nullptr; + } + CacheHits++; + return llvm::make_unique<IndexFileIn>(std::move(*IndexFile)); + } +}; + TEST(BackgroundIndexTest, IndexTwoFiles) { MockFSProvider FS; // a.h yields different symbols when included by A.cc vs B.cc. @@ -45,7 +77,11 @@ TEST(BackgroundIndexTest, IndexTwoFiles) void f_b() { (void)common; })cpp"; - BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}); + llvm::StringMap<std::string> Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, + [&](llvm::StringRef) { return &MSS; }); tooling::CompileCommand Cmd; Cmd.Filename = testPath("root/A.cc"); @@ -78,5 +114,49 @@ TEST(BackgroundIndexTest, IndexTwoFiles) FileURI("unittest:///root/B.cc")})); } +TEST(BackgroundIndexTest, ShardStorageWriteTest) { + MockFSProvider FS; + FS.Files[testPath("root/A.h")] = R"cpp( + void common(); + void f_b(); + class A_CC {}; + )cpp"; + FS.Files[testPath("root/A.cc")] = + "#include \"A.h\"\nvoid g() { (void)common; }"; + + llvm::StringMap<std::string> Storage; + size_t CacheHits = 0; + MemoryShardStorage MSS(Storage, CacheHits); + + tooling::CompileCommand Cmd; + Cmd.Filename = testPath("root/A.cc"); + Cmd.Directory = testPath("root"); + Cmd.CommandLine = {"clang++", testPath("root/A.cc")}; + // Check nothing is loaded from Storage, but A.cc and A.h has been stored. + { + BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"}, + [&](llvm::StringRef) { return &MSS; }); + Idx.enqueue(testPath("root"), Cmd); + Idx.blockUntilIdleForTest(); + } + EXPECT_EQ(CacheHits, 0U); + EXPECT_EQ(Storage.size(), 2U); + + auto ShardHeader = MSS.loadShard(testPath("root/A.h")); + EXPECT_NE(ShardHeader, nullptr); + EXPECT_THAT( + *ShardHeader->Symbols, + UnorderedElementsAre(Named("common"), Named("A_CC"), + AllOf(Named("f_b"), Declared(), Not(Defined())))); + for (const auto &Ref : *ShardHeader->Refs) + EXPECT_THAT(Ref.second, + UnorderedElementsAre(FileURI("unittest:///root/A.h"))); + + auto ShardSource = MSS.loadShard(testPath("root/A.cc")); + EXPECT_NE(ShardSource, nullptr); + EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre()); + EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")})); +} + } // namespace clangd } // namespace clang _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits