friss updated this revision to Diff 489914.
friss added a comment.

The pre-commit CI showed some test failures on Windows. Try to address these.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D136651/new/

https://reviews.llvm.org/D136651

Files:
  clang/include/clang/Basic/DiagnosticFrontendKinds.td
  clang/include/clang/Driver/Options.td
  clang/include/clang/Frontend/CompilerInvocation.h
  clang/include/clang/Lex/HeaderSearchOptions.h
  clang/lib/Frontend/ASTUnit.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/CMakeLists.txt
  clang/test/Driver/vfsstatcache.c
  clang/test/clang-stat-cache/cache-effects.c
  clang/test/clang-stat-cache/errors.test
  clang/tools/CMakeLists.txt
  clang/tools/clang-stat-cache/CMakeLists.txt
  clang/tools/clang-stat-cache/clang-stat-cache.cpp
  llvm/include/llvm/Support/StatCacheFileSystem.h
  llvm/lib/Support/CMakeLists.txt
  llvm/lib/Support/StatCacheFileSystem.cpp
  llvm/unittests/Support/VirtualFileSystemTest.cpp

Index: llvm/unittests/Support/VirtualFileSystemTest.cpp
===================================================================
--- llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -14,9 +14,11 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/StatCacheFileSystem.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <list>
 #include <map>
 #include <string>
 
@@ -3228,3 +3230,306 @@
             "  DummyFileSystem (RecursiveContents)\n",
             Output);
 }
+
+class StatCacheFileSystemTest : public ::testing::Test {
+public:
+  void SetUp() override {}
+
+  template <typename StringCollection>
+  void createStatCacheFileSystem(
+      StringRef OutputFile, StringRef BaseDir, bool IsCaseSensitive,
+      IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result,
+      StringCollection &Filenames,
+      IntrusiveRefCntPtr<vfs::FileSystem> Lower = new ErrorDummyFileSystem(),
+      uint64_t ValidityToken = 0) {
+    sys::fs::file_status s;
+    status(BaseDir, s);
+    vfs::StatCacheFileSystem::StatCacheWriter Generator(
+        BaseDir, s, IsCaseSensitive, ValidityToken);
+    std::error_code ErrorCode;
+
+    Result.reset();
+
+    // Base path should be present in the stat cache.
+    Filenames.push_back(std::string(BaseDir));
+
+    for (sys::fs::recursive_directory_iterator I(BaseDir, ErrorCode), E;
+         I != E && !ErrorCode; I.increment(ErrorCode)) {
+      Filenames.push_back(I->path());
+      StringRef Path(Filenames.back().c_str());
+      status(Path, s);
+      Generator.addEntry(Path, s);
+    }
+
+    {
+      raw_fd_ostream StatCacheFile(OutputFile, ErrorCode);
+      ASSERT_FALSE(ErrorCode);
+      Generator.writeStatCache(StatCacheFile);
+    }
+
+    loadCacheFile(OutputFile, ValidityToken, Lower, Result);
+  }
+
+  void loadCacheFile(StringRef OutputFile, uint64_t ExpectedValidityToken,
+                     IntrusiveRefCntPtr<vfs::FileSystem> Lower,
+                     IntrusiveRefCntPtr<vfs::StatCacheFileSystem> &Result) {
+    auto ErrorOrBuffer = MemoryBuffer::getFile(OutputFile);
+    EXPECT_TRUE(ErrorOrBuffer);
+    StringRef CacheBaseDir;
+    bool IsCaseSensitive;
+    bool VersionMatch;
+    uint64_t FileValidityToken;
+    auto E = vfs::StatCacheFileSystem::validateCacheFile(
+        (*ErrorOrBuffer)->getMemBufferRef(), CacheBaseDir, IsCaseSensitive,
+        VersionMatch, FileValidityToken);
+    ASSERT_FALSE(E);
+    EXPECT_TRUE(VersionMatch);
+    EXPECT_EQ(FileValidityToken, ExpectedValidityToken);
+    auto ExpectedCache =
+        vfs::StatCacheFileSystem::create(std::move(*ErrorOrBuffer), Lower);
+    ASSERT_FALSE(ExpectedCache.takeError());
+    Result = *ExpectedCache;
+  }
+
+  template <typename StringCollection>
+  void
+  compareStatCacheToRealFS(IntrusiveRefCntPtr<vfs::StatCacheFileSystem> CacheFS,
+                           const StringCollection &Files) {
+    IntrusiveRefCntPtr<vfs::FileSystem> RealFS = vfs::getRealFileSystem();
+
+    for (auto &File : Files) {
+      auto ErrorOrStatus1 = RealFS->status(File);
+      auto ErrorOrStatus2 = CacheFS->status(File);
+
+      EXPECT_EQ((bool)ErrorOrStatus1, (bool)ErrorOrStatus2);
+      if (!ErrorOrStatus1 || !ErrorOrStatus2)
+        continue;
+
+      vfs::Status s1 = *ErrorOrStatus1, s2 = *ErrorOrStatus2;
+      EXPECT_EQ(s1.getName(), s2.getName());
+      EXPECT_EQ(s1.getType(), s2.getType());
+      EXPECT_EQ(s1.getPermissions(), s2.getPermissions());
+      EXPECT_EQ(s1.getLastModificationTime(), s2.getLastModificationTime());
+      EXPECT_EQ(s1.getUniqueID(), s2.getUniqueID());
+      EXPECT_EQ(s1.getUser(), s2.getUser());
+      EXPECT_EQ(s1.getGroup(), s2.getGroup());
+      EXPECT_EQ(s1.getSize(), s2.getSize());
+    }
+  }
+};
+
+TEST_F(StatCacheFileSystemTest, Basic) {
+  TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
+  TempDir _a(TestDirectory.path("a"));
+  TempFile _ab(TestDirectory.path("a/b"));
+  TempDir _ac(TestDirectory.path("a/c"));
+  TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
+  TempFile _ace(TestDirectory.path("a/c/e"));
+  TempFile _acf(TestDirectory.path("a/c/f"), "", "More dummy contents");
+  TempDir _ag(TestDirectory.path("a/g"));
+  TempFile _agh(TestDirectory.path("a/g/h"));
+
+  StringRef BaseDir(_a.path());
+
+  SmallVector<std::string, 10> Filenames;
+  IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ true, StatCacheFS,
+                            Filenames);
+  ASSERT_TRUE(StatCacheFS);
+  compareStatCacheToRealFS(StatCacheFS, Filenames);
+}
+
+TEST_F(StatCacheFileSystemTest, CaseSensitivity) {
+  TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
+  TempDir _a(TestDirectory.path("a"));
+  TempDir _ac(TestDirectory.path("a/c"));
+  TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
+  TempDir _b(TestDirectory.path("B"));
+  TempDir _bc(TestDirectory.path("B/c"));
+  TempFile _bcd(TestDirectory.path("B/c/D"), "", "Dummy contents");
+
+  StringRef BaseDir(TestDirectory.path());
+  SmallVector<std::string, 10> Filenames;
+  IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ true, StatCacheFS,
+                            Filenames);
+  ASSERT_TRUE(StatCacheFS);
+
+  auto ErrorOrStatus = StatCacheFS->status(_acd.path());
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(_bcd.path());
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d"));
+  EXPECT_FALSE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d"));
+  EXPECT_FALSE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D"));
+  EXPECT_FALSE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d"));
+  EXPECT_FALSE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d"));
+  EXPECT_FALSE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D"));
+  EXPECT_FALSE(ErrorOrStatus);
+
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ false, StatCacheFS,
+                            Filenames);
+  ASSERT_TRUE(StatCacheFS);
+  ErrorOrStatus = StatCacheFS->status(_acd.path());
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(_bcd.path());
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/C/d"));
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("A/C/d"));
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("a/c/D"));
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/c/d"));
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("b/C/d"));
+  EXPECT_TRUE(ErrorOrStatus);
+  ErrorOrStatus = StatCacheFS->status(TestDirectory.path("B/C/D"));
+  EXPECT_TRUE(ErrorOrStatus);
+}
+
+TEST_F(StatCacheFileSystemTest, DotDot) {
+  TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
+  TempDir _a(TestDirectory.path("a"));
+  TempDir _ab(TestDirectory.path("a/b"));
+  TempFile _abd(TestDirectory.path("a/b/d"));
+  TempDir _ac(TestDirectory.path("a/c"));
+  TempFile _acd(TestDirectory.path("a/c/d"));
+
+  StringRef BaseDir(_a.path());
+  SmallVector<std::string, 10> Filenames;
+  IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
+  auto RealFS = vfs::getRealFileSystem();
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ true, StatCacheFS, Filenames,
+                            RealFS);
+  ASSERT_TRUE(StatCacheFS);
+
+  // Create a file in the cached prefix after the cache was created.
+  TempFile _abe(TestDirectory.path("a/b/e"));
+  // Verify the cache is kicking in.
+  ASSERT_FALSE(StatCacheFS->status(_abe.path()));
+  // We can access the new file using a ".." because the StatCache will
+  // just pass that request to the FileSystem below it.
+  const SmallString<128> PathsToTest[] = {
+      TestDirectory.path("a/b/../e"),
+      TestDirectory.path("a/b/../c/d"),
+      TestDirectory.path("a/b/.."),
+  };
+  compareStatCacheToRealFS(StatCacheFS, PathsToTest);
+}
+
+#ifdef LLVM_ON_UNIX
+TEST_F(StatCacheFileSystemTest, Links) {
+  TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
+  TempDir _a(TestDirectory.path("a"));
+  TempLink _ab("d", TestDirectory.path("a/b"));
+  TempFile _ac(TestDirectory.path("a/c"));
+  TempDir _ad(TestDirectory.path("a/d"));
+  TempFile _add(TestDirectory.path("a/d/d"), "", "Dummy contents");
+  TempFile _ade(TestDirectory.path("a/d/e"));
+  TempFile _adf(TestDirectory.path("a/d/f"), "", "More dummy contents");
+  TempLink _adg(_ad.path(), TestDirectory.path("a/d/g"));
+  TempDir _ah(TestDirectory.path("a/h"));
+  TempLink _ahi(_ad.path(), TestDirectory.path("a/h/i"));
+  TempLink _ahj("no_such_file", TestDirectory.path("a/h/j"));
+
+  StringRef BaseDir(_a.path());
+
+  SmallVector<std::string, 10> Filenames;
+  IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ true, StatCacheFS,
+                            Filenames);
+  ASSERT_TRUE(StatCacheFS);
+  EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
+                      TestDirectory.path("a/d/g/g")),
+            Filenames.end());
+  EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
+                      TestDirectory.path("a/b/e")),
+            Filenames.end());
+  EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
+                      TestDirectory.path("a/h/i/f")),
+            Filenames.end());
+  EXPECT_NE(std::find(Filenames.begin(), Filenames.end(),
+                      TestDirectory.path("a/h/j")),
+            Filenames.end());
+  compareStatCacheToRealFS(StatCacheFS, Filenames);
+
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ true, StatCacheFS, Filenames,
+                            vfs::getRealFileSystem());
+  const SmallString<128> PathsToTest[] = {
+      TestDirectory.path("a/h/i/../c"),
+      TestDirectory.path("a/b/../d"),
+      TestDirectory.path("a/g/g/../c"),
+      TestDirectory.path("a/b/.."),
+  };
+  compareStatCacheToRealFS(StatCacheFS, PathsToTest);
+}
+#endif
+
+TEST_F(StatCacheFileSystemTest, Canonical) {
+  TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
+  TempDir _a(TestDirectory.path("a"));
+  TempFile _ab(TestDirectory.path("a/b"));
+  TempDir _ac(TestDirectory.path("a/c"));
+  TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
+
+  StringRef BaseDir(_a.path());
+  SmallVector<std::string, 10> Filenames;
+  IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
+  createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                            /* IsCaseSensitive= */ true, StatCacheFS,
+                            Filenames);
+  ASSERT_TRUE(StatCacheFS);
+
+  const SmallString<128> PathsToTest[] = {
+      TestDirectory.path("./a/b"),        TestDirectory.path("a//./b"),
+      TestDirectory.path("a///b"),        TestDirectory.path("a//c//d"),
+      TestDirectory.path("a//c/./d"),     TestDirectory.path("a/./././b"),
+      TestDirectory.path("a/.//.//.//b"),
+  };
+  compareStatCacheToRealFS(StatCacheFS, PathsToTest);
+}
+
+TEST_F(StatCacheFileSystemTest, ValidityToken) {
+  TempDir TestDirectory("virtual-file-system-test", /*Unique*/ true);
+  TempDir _a(TestDirectory.path("a"));
+  TempFile _ab(TestDirectory.path("a/b"));
+  TempDir _ac(TestDirectory.path("a/c"));
+  TempFile _acd(TestDirectory.path("a/c/d"), "", "Dummy contents");
+
+  StringRef BaseDir(_a.path());
+  IntrusiveRefCntPtr<vfs::StatCacheFileSystem> StatCacheFS;
+  {
+    SmallVector<std::string, 10> Filenames;
+    uint64_t ValidityToken = 0x1234567890abcfef;
+    createStatCacheFileSystem(TestDirectory.path("stat.cache"), BaseDir,
+                              /* IsCaseSensitive= */ true, StatCacheFS,
+                              Filenames, new DummyFileSystem(), ValidityToken);
+    ASSERT_TRUE(StatCacheFS);
+  }
+
+  uint64_t UpdatedValidityToken = 0xabcdef0123456789;
+  {
+    std::error_code EC;
+    raw_fd_ostream CacheFile(TestDirectory.path("stat.cache"), EC,
+                             sys::fs::CD_OpenAlways);
+    ASSERT_FALSE(EC);
+    vfs::StatCacheFileSystem::updateValidityToken(CacheFile,
+                                                  UpdatedValidityToken);
+  }
+
+  loadCacheFile(TestDirectory.path("stat.cache"), UpdatedValidityToken,
+                new DummyFileSystem(), StatCacheFS);
+  EXPECT_TRUE(StatCacheFS);
+}
Index: llvm/lib/Support/StatCacheFileSystem.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Support/StatCacheFileSystem.cpp
@@ -0,0 +1,299 @@
+//===- StatCacheFileSystem.cpp - Status Caching Proxy File System ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StatCacheFileSystem.h"
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+namespace llvm {
+namespace vfs {
+
+class StatCacheFileSystem::StatCacheLookupInfo {
+public:
+  typedef StringRef external_key_type;
+  typedef StringRef internal_key_type;
+  typedef llvm::sys::fs::file_status data_type;
+  typedef uint32_t hash_value_type;
+  typedef uint32_t offset_type;
+
+  static bool EqualKey(const internal_key_type &a, const internal_key_type &b) {
+    return a == b;
+  }
+
+  static hash_value_type ComputeHash(const internal_key_type &a) {
+    return hash_value(a);
+  }
+
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char *&d) {
+    using namespace llvm::support;
+    unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
+    unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
+    return std::make_pair(KeyLen, DataLen);
+  }
+
+  static const internal_key_type &GetInternalKey(const external_key_type &x) {
+    return x;
+  }
+
+  static const external_key_type &GetExternalKey(const internal_key_type &x) {
+    return x;
+  }
+
+  static internal_key_type ReadKey(const unsigned char *d, unsigned n) {
+    return StringRef((const char *)d, n);
+  }
+
+  static data_type ReadData(const internal_key_type &k, const unsigned char *d,
+                            unsigned DataLen) {
+    data_type Result;
+    memcpy(&Result, d, sizeof(Result));
+    return Result;
+  }
+};
+
+class StatCacheFileSystem::StatCacheGenerationInfo {
+public:
+  typedef StringRef key_type;
+  typedef const StringRef &key_type_ref;
+  typedef sys::fs::file_status data_type;
+  typedef const sys::fs::file_status &data_type_ref;
+  typedef uint32_t hash_value_type;
+  typedef uint32_t offset_type;
+
+  /// Calculate the hash for Key
+  static hash_value_type ComputeHash(key_type_ref Key) {
+    return static_cast<size_t>(hash_value(Key));
+  }
+
+  /// Return the lengths, in bytes, of the given Key/Data pair.
+  static std::pair<unsigned, unsigned>
+  EmitKeyDataLength(raw_ostream &Out, key_type_ref Key, data_type_ref Data) {
+    using namespace llvm::support;
+    endian::Writer LE(Out, little);
+    unsigned KeyLen = Key.size();
+    unsigned DataLen = sizeof(Data);
+    LE.write<uint16_t>(KeyLen);
+    LE.write<uint16_t>(DataLen);
+    return std::make_pair(KeyLen, DataLen);
+  }
+
+  static void EmitKey(raw_ostream &Out, key_type_ref Key, unsigned KeyLen) {
+    Out.write(Key.data(), KeyLen);
+  }
+
+  /// Write Data to Out.  DataLen is the length from EmitKeyDataLength.
+  static void EmitData(raw_ostream &Out, key_type_ref Key, data_type_ref Data,
+                       unsigned Len) {
+    Out.write((const char *)&Data, Len);
+  }
+
+  static bool EqualKey(key_type_ref Key1, key_type_ref Key2) {
+    return Key1 == Key2;
+  }
+};
+
+// The format of the stat cache is (pseudo-code):
+//  struct stat_cache {
+//    char     Magic[4];       // "STAT" or "Stat"
+//    uint32_t BucketOffset;   // See BucketOffset in OnDiskHashTable.h
+//    uint64_t ValidityToken;  // Platofrm specific data allowing to check
+//                             // whether the cache is up-to-date.
+//    uint32_t Version;        // The stat cache format version.
+//    char     BaseDir[N];     // Zero terminated path to the base directory
+//    < OnDiskHashtable Data > // Data for the has table. The keys are the
+//                             // relative paths under BaseDir. The data is
+//                             // llvm::sys::fs::file_status structures.
+//  };
+
+#define MAGIC_CASE_SENSITIVE "Stat"
+#define MAGIC_CASE_INSENSITIVE "STAT"
+#define STAT_CACHE_VERSION 1
+
+namespace {
+struct StatCacheHeader {
+  char Magic[4];
+  uint32_t BucketOffset;
+  uint64_t ValidityToken;
+  uint32_t Version;
+  char BaseDir[1];
+};
+} // namespace
+
+StatCacheFileSystem::StatCacheFileSystem(
+    std::unique_ptr<MemoryBuffer> CacheFile, IntrusiveRefCntPtr<FileSystem> FS,
+    bool IsCaseSensitive)
+    : ProxyFileSystem(std::move(FS)), StatCacheFile(std::move(CacheFile)),
+      IsCaseSensitive(IsCaseSensitive) {
+  const char *CacheFileStart = StatCacheFile->getBufferStart();
+  auto *Header = reinterpret_cast<const StatCacheHeader *>(CacheFileStart);
+
+  uint32_t BucketOffset = Header->BucketOffset;
+  StatCachePrefix = StringRef(Header->BaseDir);
+  // HashTableStart points at the beginning of the data emitted by the
+  // OnDiskHashTable.
+  const unsigned char *HashTableStart = (const unsigned char *)CacheFileStart +
+                                        StatCachePrefix.size() +
+                                        sizeof(StatCacheHeader);
+  StatCache.reset(StatCacheType::Create(
+      (const unsigned char *)CacheFileStart + BucketOffset, HashTableStart,
+      (const unsigned char *)CacheFileStart));
+}
+
+Expected<IntrusiveRefCntPtr<StatCacheFileSystem>>
+StatCacheFileSystem::create(std::unique_ptr<MemoryBuffer> CacheBuffer,
+                            IntrusiveRefCntPtr<FileSystem> FS) {
+  StringRef BaseDir;
+  bool IsCaseSensitive;
+  bool VersionMatch;
+  uint64_t ValidityToken;
+  if (auto E = validateCacheFile(*CacheBuffer, BaseDir, IsCaseSensitive,
+                                 VersionMatch, ValidityToken))
+    return E;
+  if (!VersionMatch) {
+    return createStringError(inconvertibleErrorCode(),
+                             CacheBuffer->getBufferIdentifier() +
+                                 ": Mismatched cache file version");
+  }
+  return new StatCacheFileSystem(std::move(CacheBuffer), FS, IsCaseSensitive);
+}
+
+ErrorOr<Status> StatCacheFileSystem::status(const Twine &Path) {
+  SmallString<180> StringPath;
+  Path.toVector(StringPath);
+  // If the cache is not case sensitive, do all operations on lower-cased paths.
+  if (!IsCaseSensitive)
+    std::transform(StringPath.begin(), StringPath.end(), StringPath.begin(),
+                   toLower);
+
+  // Canonicalize the path. This removes single dot path components,
+  // but it also gets rid of repeating separators.
+  llvm::sys::path::remove_dots(StringPath);
+
+  // If on Windows, canonicalize separators.
+  llvm::sys::path::make_preferred(StringPath);
+
+  // Check if the requested path falls into the cache.
+  StringRef SuffixPath(StringPath);
+  if (!SuffixPath.consume_front(StatCachePrefix))
+    return ProxyFileSystem::status(Path);
+
+  auto It = StatCache->find(SuffixPath);
+  if (It == StatCache->end()) {
+    // We didn't find the file in the cache even though it started with the
+    // cache prefix. It could be that the file doesn't exist, or the spelling
+    // the path is different. `remove_dots` canonicalizes the path by removing
+    // `.` and excess separators, but leaves `..` since it isn't semantically
+    // preserving to remove them in the presence of symlinks. If the path
+    // does not contain '..' we can safely say it doesn't exist.
+    if (std::find(sys::path::begin(SuffixPath), sys::path::end(SuffixPath),
+                  "..") == sys::path::end(SuffixPath)) {
+      return llvm::errc::no_such_file_or_directory;
+    }
+    return ProxyFileSystem::status(Path);
+  }
+
+  // clang-stat-cache will record entries for broken symlnks with a default-
+  // constructed Status. This will have a default-constructed UinqueID.
+  if ((*It).getUniqueID() == llvm::sys::fs::UniqueID())
+    return llvm::errc::no_such_file_or_directory;
+
+  return llvm::vfs::Status::copyWithNewName(*It, Path);
+}
+
+StatCacheFileSystem::StatCacheWriter::StatCacheWriter(
+    StringRef BaseDir, const sys::fs::file_status &Status, bool IsCaseSensitive,
+    uint64_t ValidityToken)
+    : BaseDir(IsCaseSensitive ? BaseDir.str() : BaseDir.lower()),
+      IsCaseSensitive(IsCaseSensitive), ValidityToken(ValidityToken),
+      Generator(new StatCacheGeneratorType()) {
+  addEntry(BaseDir, Status);
+  // If on Windows, canonicalize separators.
+  llvm::sys::path::make_preferred(this->BaseDir);
+}
+
+StatCacheFileSystem::StatCacheWriter::~StatCacheWriter() { delete Generator; }
+
+void StatCacheFileSystem::StatCacheWriter::addEntry(
+    StringRef Path, const sys::fs::file_status &Status) {
+  std::string StoredPath;
+  if (!IsCaseSensitive) {
+    StoredPath = Path.lower();
+    Path = StoredPath;
+  }
+
+  LLVM_ATTRIBUTE_UNUSED bool Consumed = Path.consume_front(BaseDir);
+  assert(Consumed && "Path does not start with expected prefix.");
+
+  PathStorage.emplace_back(Path.str());
+  Generator->insert(PathStorage.back(), Status);
+}
+
+size_t
+StatCacheFileSystem::StatCacheWriter::writeStatCache(raw_fd_ostream &Out) {
+  const uint32_t Version = STAT_CACHE_VERSION;
+  // Magic value.
+  if (IsCaseSensitive)
+    Out.write(MAGIC_CASE_SENSITIVE, 4);
+  else
+    Out.write(MAGIC_CASE_INSENSITIVE, 4);
+  // Placeholder for BucketOffset, filled in below.
+  Out.write("\0\0\0\0", 4);
+  // Write out the validity token.
+  Out.write((const char *)&ValidityToken, sizeof(ValidityToken));
+  // Write out the version.
+  Out.write((const char *)&Version, sizeof(Version));
+  // Write out the base directory for the cache.
+  Out.write(BaseDir.c_str(), BaseDir.size() + 1);
+  // Write out the hashtable data.
+  uint32_t BucketOffset = Generator->Emit(Out);
+  int Size = Out.tell();
+  // Move back to right after the Magic to insert BucketOffset
+  Out.seek(4);
+  Out.write((const char *)&BucketOffset, sizeof(BucketOffset));
+  return Size;
+}
+
+Error StatCacheFileSystem::validateCacheFile(MemoryBufferRef Buffer,
+                                             StringRef &BaseDir,
+                                             bool &IsCaseSensitive,
+                                             bool &VersionMatch,
+                                             uint64_t &ValidityToken) {
+  auto *Header =
+      reinterpret_cast<const StatCacheHeader *>(Buffer.getBufferStart());
+  if (Buffer.getBufferSize() < sizeof(StatCacheHeader) ||
+      (memcmp(Header->Magic, MAGIC_CASE_INSENSITIVE, sizeof(Header->Magic)) &&
+       memcmp(Header->Magic, MAGIC_CASE_SENSITIVE, sizeof(Header->Magic))) ||
+      Header->BucketOffset > Buffer.getBufferSize())
+    return createStringError(inconvertibleErrorCode(), "Invalid cache file");
+
+  auto PathLen =
+      strnlen(Header->BaseDir,
+              Buffer.getBufferSize() - offsetof(StatCacheHeader, BaseDir));
+  if (Header->BaseDir[PathLen] != 0)
+    return createStringError(inconvertibleErrorCode(), "Invalid cache file");
+
+  IsCaseSensitive = Header->Magic[1] == MAGIC_CASE_SENSITIVE[1];
+  VersionMatch = Header->Version == STAT_CACHE_VERSION;
+  BaseDir = StringRef(Header->BaseDir, PathLen);
+  ValidityToken = Header->ValidityToken;
+
+  return ErrorSuccess();
+}
+
+void StatCacheFileSystem::updateValidityToken(raw_fd_ostream &CacheFile,
+                                              uint64_t ValidityToken) {
+  CacheFile.pwrite(reinterpret_cast<char *>(&ValidityToken),
+                   sizeof(ValidityToken),
+                   offsetof(StatCacheHeader, ValidityToken));
+}
+
+} // namespace vfs
+} // namespace llvm
Index: llvm/lib/Support/CMakeLists.txt
===================================================================
--- llvm/lib/Support/CMakeLists.txt
+++ llvm/lib/Support/CMakeLists.txt
@@ -212,6 +212,7 @@
   SmallVector.cpp
   SourceMgr.cpp
   SpecialCaseList.cpp
+  StatCacheFileSystem.cpp
   Statistic.cpp
   StringExtras.cpp
   StringMap.cpp
Index: llvm/include/llvm/Support/StatCacheFileSystem.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/Support/StatCacheFileSystem.h
@@ -0,0 +1,110 @@
+//===- StatCacheFileSystem.h - Status Caching Proxy File System -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_STATCACHEFILESYSTEM_H
+#define LLVM_SUPPORT_STATCACHEFILESYSTEM_H
+
+#include "llvm/Support/VirtualFileSystem.h"
+
+#include <list>
+
+namespace llvm {
+template <typename T> class OnDiskIterableChainedHashTable;
+template <typename T> class OnDiskChainedHashTableGenerator;
+
+namespace vfs {
+
+/// A ProxyFileSystem using cached information for status() rather than going to
+/// the underlying filesystem.
+///
+/// When dealing with a huge tree of (mostly) immutable filesystem content
+/// like an SDK, it can be very costly to ask the underlying filesystem for
+/// `stat` data. Even when caching the `stat`s internally, having many
+/// concurrent Clangs accessing the same tree in a similar way causes
+/// contention. As SDK files are mostly immutable, we can pre-compute the status
+/// information using clang-stat-cache and use that information directly without
+/// accessing the real filesystem until Clang needs to open a file. This can
+/// speed up module verification and HeaderSearch by significant amounts.
+class StatCacheFileSystem : public llvm::vfs::ProxyFileSystem {
+  class StatCacheLookupInfo;
+  using StatCacheType =
+      llvm::OnDiskIterableChainedHashTable<StatCacheLookupInfo>;
+
+  class StatCacheGenerationInfo;
+  using StatCacheGeneratorType =
+      llvm::OnDiskChainedHashTableGenerator<StatCacheGenerationInfo>;
+
+  explicit StatCacheFileSystem(std::unique_ptr<llvm::MemoryBuffer> CacheFile,
+                               IntrusiveRefCntPtr<FileSystem> FS,
+                               bool IsCaseSensitive);
+
+public:
+  /// Create a StatCacheFileSystem from the passed \a CacheBuffer, a
+  /// MemoryBuffer representing the contents of the \a CacheFilename file. The
+  /// returned filesystem will be overlaid on top of \a FS.
+  static Expected<IntrusiveRefCntPtr<StatCacheFileSystem>>
+  create(std::unique_ptr<llvm::MemoryBuffer> CacheBuffer,
+         IntrusiveRefCntPtr<FileSystem> FS);
+
+  /// The status override which will consult the cache if \a Path is in the
+  /// cached filesystem tree.
+  llvm::ErrorOr<llvm::vfs::Status> status(const Twine &Path) override;
+
+public:
+  /// A helper class to generate stat caches.
+  class StatCacheWriter {
+    llvm::SmallString<128> BaseDir;
+    bool IsCaseSensitive;
+    uint64_t ValidityToken;
+    StatCacheGeneratorType *Generator;
+    std::list<std::string> PathStorage;
+
+  public:
+    /// Create a StatCacheWriter
+    ///
+    /// \param BaseDir The base directory for the path. Every filename passed to
+    ///                addEntry() needs to start with this base directory.
+    /// \param Status The status entry for the base directory.
+    /// \param IsCaseSensitive Whether the cache is case sensitive.
+    /// \param ValidityToken A 64 bits token that gets embedded in the cache and
+    ///                      can be used by generator tools to check for the
+    ///                      cache validity in a platform-specific way.
+    StatCacheWriter(StringRef BaseDir, const sys::fs::file_status &Status,
+                    bool IsCaseSensitive, uint64_t ValidityToken = 0);
+    ~StatCacheWriter();
+
+    /// Add a cache entry storing \a Status for the file at \a Path.
+    void addEntry(StringRef Path, const sys::fs::file_status &Status);
+
+    /// Write the cache file to \a Out.
+    size_t writeStatCache(raw_fd_ostream &Out);
+  };
+
+public:
+  /// Validate that the file content in \a Buffer is a valid stat cache file.
+  /// \a BaseDir, \a IsCaseSensitive and \a ValidityToken are output parameters
+  /// that get populated by this call.
+  static Error validateCacheFile(llvm::MemoryBufferRef Buffer,
+                                 StringRef &BaseDir, bool &IsCaseSensitive,
+                                 bool &VersionMatch, uint64_t &ValidityToken);
+
+  /// Update the ValidityToken data in \a CacheFile.
+  static void updateValidityToken(raw_fd_ostream &CacheFile,
+                                  uint64_t ValidityToken);
+
+private:
+  std::unique_ptr<llvm::MemoryBuffer> StatCacheFile;
+  llvm::StringRef StatCachePrefix;
+  std::unique_ptr<StatCacheType> StatCache;
+  bool IsCaseSensitive = true;
+};
+
+} // namespace vfs
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_STATCACHEFILESYSTEM_H
Index: clang/tools/clang-stat-cache/clang-stat-cache.cpp
===================================================================
--- /dev/null
+++ clang/tools/clang-stat-cache/clang-stat-cache.cpp
@@ -0,0 +1,312 @@
+//===- clang-stat-cache.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/StatCacheFileSystem.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <assert.h>
+
+#ifdef __APPLE__
+#include <CoreServices/CoreServices.h>
+
+#include <sys/mount.h>
+#include <sys/param.h>
+#endif // __APPLE__
+
+// The clang-stat-cache utility creates an on-disk cache for the stat data
+// of a file-system tree which is expected to be immutable during a build.
+
+using namespace llvm;
+using llvm::vfs::StatCacheFileSystem;
+
+cl::OptionCategory StatCacheCategory("clang-stat-cache options");
+
+cl::opt<std::string> OutputFilename("o", cl::Required,
+                                    cl::desc("Specify output filename"),
+                                    cl::value_desc("filename"),
+                                    cl::cat(StatCacheCategory));
+
+cl::opt<std::string> TargetDirectory(cl::Positional, cl::Required,
+                                     cl::value_desc("dirname"),
+                                     cl::cat(StatCacheCategory));
+
+cl::opt<bool> Verbose("v", cl::desc("More verbose output"));
+cl::opt<bool> Force("f", cl::desc("Force cache generation"));
+
+#if __APPLE__
+// Used by checkContentsValidity. See below.
+struct CallbackInfo {
+  bool SeenChanges = false;
+};
+
+// Used by checkContentsValidity. See below.
+static void FSEventsCallback(ConstFSEventStreamRef streamRef, void *CtxInfo,
+                             size_t numEvents, void *eventPaths,
+                             const FSEventStreamEventFlags *eventFlags,
+                             const FSEventStreamEventId *eventIds) {
+  CallbackInfo *Info = static_cast<CallbackInfo *>(CtxInfo);
+  for (size_t i = 0; i < numEvents; ++i) {
+    // The kFSEventStreamEventFlagHistoryDone is set on the last 'historical'
+    // event passed to the callback. This means it is passed after the callback
+    // all the relevant activity between the StartEvent of the stream and the
+    // point the stream was created.
+    // If the callback didn't see any other event, it means there haven't been
+    // any alterations to the target directory hierarchy and the cache contents
+    // is still up-to-date.
+    if (eventFlags[i] & kFSEventStreamEventFlagHistoryDone) {
+      // Let's stop the main queue and go back to our non-queue code.
+      CFRunLoopStop(CFRunLoopGetCurrent());
+      break;
+    }
+
+    // If we see any event outisde of the kFSEventStreamEventFlagHistoryDone
+    // one, there have been changes to the target directory.
+    Info->SeenChanges = true;
+  }
+}
+
+// FSEvents-based check for cache contents validity. We store the latest
+// FSEventStreamEventId in the cache as a ValidityToken and check if any
+// file system events affected the base directory since the cache was
+// generated.
+static bool checkContentsValidity(uint64_t &ValidityToken) {
+  CFStringRef TargetDir = CFStringCreateWithCStringNoCopy(
+      kCFAllocatorDefault, TargetDirectory.c_str(), kCFStringEncodingASCII,
+      kCFAllocatorNull);
+  CFArrayRef PathsToWatch =
+      CFArrayCreate(nullptr, (const void **)&TargetDir, 1, nullptr);
+  CallbackInfo Info;
+  FSEventStreamContext Ctx = {0, &Info, nullptr, nullptr, nullptr};
+  FSEventStreamRef Stream;
+  CFAbsoluteTime Latency = 0; // Latency in seconds. Do not wait.
+
+  // Start at the latest event stored in the cache.
+  FSEventStreamEventId StartEvent = ValidityToken;
+  // Update the Validity token with the current latest event.
+  ValidityToken = FSEventsGetCurrentEventId();
+
+  // Create the stream
+  Stream =
+      FSEventStreamCreate(NULL, &FSEventsCallback, &Ctx, PathsToWatch,
+                          StartEvent, Latency, kFSEventStreamCreateFlagNone);
+
+  // Associate the stream with the main queue.
+  FSEventStreamSetDispatchQueue(Stream, dispatch_get_main_queue());
+  // Start the stream (needs the queue to run to do anything).
+  if (!FSEventStreamStart(Stream)) {
+    errs() << "Failed to create FS event stream. "
+           << "Considering the cache up-to-date.\n";
+    return true;
+  }
+
+  // Start the main queue. It will be exited by our callback when it got
+  // confirmed it processed all events.
+  CFRunLoopRun();
+
+  return !Info.SeenChanges;
+}
+
+#else // __APPLE__
+
+// There is no cross-platform way to implement a validity check. If this
+// platform doesn't support it, just consider the cache contents always
+// valid. When that's the case, the tool running cache generation needs
+// to have the knowledge to do it only when needed.
+static bool checkContentsValidity(uint64_t &ValidityToken) { return true; }
+
+#endif // __APPLE__
+
+// Populate Generator with the stat cache data for the filesystem tree
+// rooted at BasePath.
+static std::error_code
+populateHashTable(StringRef BasePath,
+                  StatCacheFileSystem::StatCacheWriter &Generator) {
+  using namespace llvm;
+  using namespace sys::fs;
+
+  std::error_code ErrorCode;
+
+  // Just loop over the target directory using a recursive iterator.
+  // This invocation follows symlinks, so we are going to potentially
+  // store the status of the same file multiple times with different
+  // names.
+  for (recursive_directory_iterator I(BasePath, ErrorCode), E;
+       I != E && !ErrorCode; I.increment(ErrorCode)) {
+    StringRef Path = I->path();
+    sys::fs::file_status s;
+    // This can fail (broken symlink) and leave the file_status with
+    // its default values. The reader knows this.
+    status(Path, s);
+
+    Generator.addEntry(Path, s);
+  }
+
+  return ErrorCode;
+}
+
+static bool checkCacheValid(int FD, raw_fd_ostream &Out,
+                            uint64_t &ValidityToken) {
+  sys::fs::file_status Status;
+  auto EC = sys::fs::status(FD, Status);
+  if (EC) {
+    llvm::errs() << "fstat failed: "
+                 << llvm::toString(llvm::errorCodeToError(EC)) << "\n";
+    return false;
+  }
+
+  auto Size = Status.getSize();
+  if (Size == 0) {
+    // New file.
+#ifdef __APPLE__
+    // Get the current (global) FSEvent id and use this as ValidityToken.
+    ValidityToken = FSEventsGetCurrentEventId();
+#endif
+    return false;
+  }
+
+  auto ErrorOrBuffer = MemoryBuffer::getOpenFile(
+      sys::fs::convertFDToNativeFile(FD), OutputFilename, Status.getSize());
+
+  // Refuse to write to this cache file if it exists but its contents do
+  // not look like a valid cache file.
+  StringRef BaseDir;
+  bool IsCaseSensitive;
+  bool VersionMatch;
+  if (auto E = StatCacheFileSystem::validateCacheFile(
+          (*ErrorOrBuffer)->getMemBufferRef(), BaseDir, IsCaseSensitive,
+          VersionMatch, ValidityToken)) {
+    llvm::errs() << "The output cache file exists and is not a valid stat "
+                    "cache.";
+    if (!Force) {
+      llvm::errs() << " Aborting.\n";
+      exit(1);
+    }
+
+    consumeError(std::move(E));
+    llvm::errs() << " Forced update.\n";
+    return false;
+  }
+
+  if (BaseDir != TargetDirectory &&
+      (IsCaseSensitive || !BaseDir.equals_insensitive(TargetDirectory))) {
+    llvm::errs() << "Existing cache has different directory. Regenerating...\n";
+    return false;
+  }
+
+  if (!VersionMatch) {
+    llvm::errs()
+        << "Exisitng cache has different version number. Regenerating...\n";
+    return false;
+  }
+
+  // Basic structure checks have passed. Lets see if we can prove that the cache
+  // contents are still valid.
+  bool IsValid = checkContentsValidity(ValidityToken);
+  if (IsValid) {
+    // The cache is valid, but we might have gotten an updated ValidityToken.
+    // Update the cache with it as clang-stat-cache is just going to exit after
+    // returning from this function.
+    StatCacheFileSystem::updateValidityToken(Out, ValidityToken);
+  }
+  return IsValid && !Force;
+}
+
+int main(int argc, char *argv[]) {
+  cl::ParseCommandLineOptions(argc, argv);
+
+  // Remove extraneous separators from the end of the basename.
+  while (!TargetDirectory.empty() &&
+         sys::path::is_separator(TargetDirectory.back()))
+    TargetDirectory.pop_back();
+  StringRef Dirname(TargetDirectory);
+
+  std::error_code EC;
+  int FD;
+  EC = sys::fs::openFileForReadWrite(
+      OutputFilename, FD, llvm::sys::fs::CD_OpenAlways, llvm::sys::fs::OF_None);
+  if (EC) {
+    llvm::errs() << "Failed to open cache file: "
+                 << toString(llvm::createFileError(OutputFilename, EC)) << "\n";
+    return 1;
+  }
+
+  raw_fd_ostream Out(FD, /* ShouldClose=*/true);
+
+  uint64_t ValidityToken = 0;
+  // Check if the cache is valid and up-to-date.
+  if (checkCacheValid(FD, Out, ValidityToken)) {
+    if (Verbose)
+      outs() << "Cache up-to-date, exiting\n";
+    return 0;
+  }
+
+  if (Verbose)
+    outs() << "Building a stat cache for '" << TargetDirectory << "' into '"
+           << OutputFilename << "'\n";
+
+  // Do not generate a cache for NFS. Iterating huge directory hierarchies
+  // over NFS will be very slow. Better to let the compiler search only the
+  // pieces that it needs than use a cache that takes ages to populate.
+  bool IsLocal;
+  EC = sys::fs::is_local(Dirname, IsLocal);
+  if (EC) {
+    errs() << "Failed to stat the target directory: "
+           << llvm::toString(llvm::errorCodeToError(EC)) << "\n";
+    return 1;
+  }
+
+  if (!IsLocal && !Force) {
+    errs() << "Target directory is not a local filesystem. "
+           << "Not populating the cache.\n";
+    return 0;
+  }
+
+  sys::fs::file_status BaseDirStatus;
+  if (std::error_code EC = status(Dirname, BaseDirStatus)) {
+    errs() << "Failed to stat the target directory: "
+           << llvm::toString(llvm::errorCodeToError(EC)) << "\n";
+    return 1;
+  }
+
+  // Check if the filesystem hosting the target directory is case sensitive.
+  bool IsCaseSensitive = true;
+#ifdef _PC_CASE_SENSITIVE
+  IsCaseSensitive =
+      ::pathconf(TargetDirectory.c_str(), _PC_CASE_SENSITIVE) == 1;
+#endif
+  StatCacheFileSystem::StatCacheWriter Generator(
+      Dirname, BaseDirStatus, IsCaseSensitive, ValidityToken);
+
+  // Populate the cache.
+  auto startTime = llvm::TimeRecord::getCurrentTime();
+  populateHashTable(Dirname, Generator);
+  auto duration = llvm::TimeRecord::getCurrentTime();
+  duration -= startTime;
+
+  if (Verbose)
+    errs() << "populateHashTable took: " << duration.getWallTime() << "s\n";
+
+  // Write the cache to disk.
+  startTime = llvm::TimeRecord::getCurrentTime();
+  int Size = Generator.writeStatCache(Out);
+  duration = llvm::TimeRecord::getCurrentTime();
+  duration -= startTime;
+
+  if (Verbose)
+    errs() << "writeStatCache took: " << duration.getWallTime() << "s\n";
+
+  // We might have opened a pre-exising cache which was bigger.
+  llvm::sys::fs::resize_file(FD, Size);
+
+  return 0;
+}
Index: clang/tools/clang-stat-cache/CMakeLists.txt
===================================================================
--- /dev/null
+++ clang/tools/clang-stat-cache/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  Support
+  )
+
+add_clang_tool(clang-stat-cache
+  clang-stat-cache.cpp
+  )
+
+if(APPLE)
+set(CLANG_STAT_CACHE_LIB_DEPS
+  "-framework CoreServices"
+  )
+endif()
+
+clang_target_link_libraries(clang-stat-cache
+  PRIVATE
+  ${CLANG_STAT_CACHE_LIB_DEPS}
+  )
Index: clang/tools/CMakeLists.txt
===================================================================
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -15,6 +15,7 @@
 if(HAVE_CLANG_REPL_SUPPORT)
   add_clang_subdirectory(clang-repl)
 endif()
+add_clang_subdirectory(clang-stat-cache)
 
 add_clang_subdirectory(c-index-test)
 
Index: clang/test/clang-stat-cache/errors.test
===================================================================
--- /dev/null
+++ clang/test/clang-stat-cache/errors.test
@@ -0,0 +1,42 @@
+RUN: rm -rf %t
+RUN: mkdir -p %t
+
+RUN: not clang-stat-cache %t/not-there -o %t/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-DIR %s
+NO-SUCH-DIR: Failed to stat the target directory: {{[Nn]}}o such file or directory
+
+RUN: not clang-stat-cache %t -o %t/not-there/stat.cache 2>&1 | FileCheck --check-prefix=NO-SUCH-FILE %s
+NO-SUCH-FILE: Failed to open cache file: '{{.*}}': {{[Nn]}}o such file or directory
+
+# Use mixed-case directories to exercise the case insensitive implementation.
+RUN: mkdir -p %t/Dir
+RUN: mkdir -p %t/Dir2
+
+# Try to overwrite a few invalid caches
+RUN: echo "Not a stat cache" > %t/stat.cache
+RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s
+RUN: echo "Not a stat cache, but bigger than the stat cache header" > %t/stat.cache
+RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s
+RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache
+RUN: not clang-stat-cache %t/Dir -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE %s
+
+INVALID-CACHE: The output cache file exists and is not a valid stat cache. Aborting.
+
+# Test the force flag
+RUN: echo "STAT. This has the correct MAGIC and is bigger than the header." > %t/stat.cache
+RUN: clang-stat-cache %t/Dir -f -o %t/stat.cache 2>&1 | FileCheck --check-prefix=INVALID-CACHE-FORCE %s
+INVALID-CACHE-FORCE: The output cache file exists and is not a valid stat cache. Forced update.
+
+# Generate a valid cache for dir
+RUN: rm %t/stat.cache
+RUN: clang-stat-cache %t/Dir -o %t/stat.cache
+RUN: cp %t/stat.cache %t/stat.cache.save
+
+# Try with same base direcotry but with extraneous separators
+RUN: clang-stat-cache %t/Dir/// -v -o %t/stat.cache | FileCheck --check-prefix=EXTRA-SEP %s
+EXTRA-SEP-NOT: Existing cache has different directory. Regenerating...
+EXTRA-SEP: Cache up-to-date, exiting
+
+# Rewrite the cache with a different base directory
+RUN: clang-stat-cache %t/Dir2 -o %t/stat.cache 2>&1 | FileCheck --check-prefix=OTHER-DIR %s
+OTHER-DIR: Existing cache has different directory. Regenerating...
+
Index: clang/test/clang-stat-cache/cache-effects.c
===================================================================
--- /dev/null
+++ clang/test/clang-stat-cache/cache-effects.c
@@ -0,0 +1,63 @@
+#include "foo.h"
+
+// Testing the effects of a cache is tricky, because it's just supposed to speed
+// things up, not change the behavior. In this test, we are using an outdated
+// cache to trick HeaderSearch into finding the wrong module and show that it is
+// being used.
+
+// Clear the module cache.
+// RUN: rm -rf %t
+// RUN: mkdir -p %t/Inputs
+// RUN: mkdir -p %t/Inputs/Foo1
+// RUN: mkdir -p %t/Inputs/Foo2
+// RUN: mkdir -p %t/modules-to-compare
+
+// ===
+// Create a Foo module in the Foo1 direcotry.
+// RUN: echo 'void meow(void);' > %t/Inputs/Foo1/foo.h
+// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo1/module.map
+
+// ===
+// Compile the module. Note that the compiler has 2 header search paths:
+// Foo2 and Foo1 in that order. The module has been created in Foo1, and
+// it is the only version available now.
+// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -Rmodule-build  %s 2>&1
+// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-before.pcm
+
+// ===
+// Create a stat cache for our inputs directory
+// RUN: clang-stat-cache %t/Inputs -o %t/stat.cache
+
+// ===
+// As a sanity check, re-run the same compilation with the cache and check that
+// the module does not change.
+// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache %s -Rmodule-build 2>&1
+// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm
+
+// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm
+
+// ===
+// Now introduce a different Foo module in the Foo2 directory which is before
+// Foo1 in the search paths.
+// RUN: echo 'void meow2(void);' > %t/Inputs/Foo2/foo.h
+// RUN: echo 'module Foo { header "foo.h" }' > %t/Inputs/Foo2/module.map
+
+// ===
+// Because we're using the (now-outdated) stat cache, this compilation
+// should still be using the first module. It will not see the new one
+// which is earlier in the search paths.
+// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1  -ivfsstatcache %t/stat.cache -Rmodule-build -Rmodule-import %s 2>&1
+// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm
+
+// RUN: diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm
+
+// ===
+// Regenerate the stat cache for our Inputs directory
+// RUN: clang-stat-cache -f %t/Inputs -o %t/stat.cache 2>&1
+
+// ===
+// Use the module and now see that we are recompiling the new one.
+// RUN: %clang_cc1 -cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -fsyntax-only -I %t/Inputs/Foo2 -I %t/Inputs/Foo1 -ivfsstatcache %t/stat.cache -Rmodule-build %s 2>&1
+// RUN: cp %t/modules-cache/Foo.pcm %t/modules-to-compare/Foo-after.pcm
+
+// RUN: not diff %t/modules-to-compare/Foo-before.pcm %t/modules-to-compare/Foo-after.pcm
Index: clang/test/Driver/vfsstatcache.c
===================================================================
--- /dev/null
+++ clang/test/Driver/vfsstatcache.c
@@ -0,0 +1,5 @@
+// RUN: %clang -ivfsstatcache foo.h -### %s 2>&1 | FileCheck %s
+// CHECK: "-ivfsstatcache" "foo.h"
+
+// RUN: not %clang -ivfsstatcache foo.h %s 2>&1 | FileCheck -check-prefix=CHECK-MISSING %s
+// CHECK-MISSING: stat cache file 'foo.h' not found
Index: clang/test/CMakeLists.txt
===================================================================
--- clang/test/CMakeLists.txt
+++ clang/test/CMakeLists.txt
@@ -71,6 +71,7 @@
   clang-refactor
   clang-diff
   clang-scan-deps
+  clang-stat-cache
   diagtool
   hmaptool
   )
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -83,6 +83,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/StatCacheFileSystem.h"
 #include "llvm/Support/VersionTuple.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -3084,6 +3085,9 @@
     GenerateArg(Args, Opt, P.Prefix, SA);
   }
 
+  for (const std::string &F : Opts.VFSStatCacheFiles)
+    GenerateArg(Args, OPT_ivfsstatcache, F, SA);
+
   for (const std::string &F : Opts.VFSOverlayFiles)
     GenerateArg(Args, OPT_ivfsoverlay, F, SA);
 }
@@ -3217,6 +3221,9 @@
     Opts.AddSystemHeaderPrefix(
         A->getValue(), A->getOption().matches(OPT_system_header_prefix));
 
+  for (const auto *A : Args.filtered(OPT_ivfsstatcache))
+    Opts.AddVFSStatCacheFile(A->getValue());
+
   for (const auto *A : Args.filtered(OPT_ivfsoverlay))
     Opts.AddVFSOverlayFile(A->getValue());
 
@@ -4747,12 +4754,31 @@
     const CompilerInvocation &CI, DiagnosticsEngine &Diags,
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) {
   return createVFSFromOverlayFiles(CI.getHeaderSearchOpts().VFSOverlayFiles,
+                                   CI.getHeaderSearchOpts().VFSStatCacheFiles,
                                    Diags, std::move(BaseFS));
 }
 
 IntrusiveRefCntPtr<llvm::vfs::FileSystem> clang::createVFSFromOverlayFiles(
-    ArrayRef<std::string> VFSOverlayFiles, DiagnosticsEngine &Diags,
+    ArrayRef<std::string> VFSOverlayFiles,
+    ArrayRef<std::string> VFSStatCacheFiles, DiagnosticsEngine &Diags,
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) {
+  for (const auto &File : VFSStatCacheFiles) {
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buffer =
+        BaseFS->getBufferForFile(File);
+    if (!Buffer) {
+      Diags.Report(diag::err_missing_vfs_stat_cache_file) << File;
+      continue;
+    }
+
+    auto StatCache =
+        llvm::vfs::StatCacheFileSystem::create(std::move(*Buffer), BaseFS);
+
+    if (errorToBool(StatCache.takeError()))
+      Diags.Report(diag::err_invalid_vfs_stat_cache) << File;
+    else
+      BaseFS = std::move(*StatCache);
+  }
+
   if (VFSOverlayFiles.empty())
     return BaseFS;
 
Index: clang/lib/Frontend/ASTUnit.cpp
===================================================================
--- clang/lib/Frontend/ASTUnit.cpp
+++ clang/lib/Frontend/ASTUnit.cpp
@@ -574,7 +574,7 @@
     // performs the initialization too late (once both target and language
     // options are read).
     PP.getFileManager().setVirtualFileSystem(createVFSFromOverlayFiles(
-        HSOpts.VFSOverlayFiles, PP.getDiagnostics(),
+        HSOpts.VFSOverlayFiles, HSOpts.VFSStatCacheFiles, PP.getDiagnostics(),
         PP.getFileManager().getVirtualFileSystemPtr()));
 
     InitializedHeaderSearchPaths = true;
Index: clang/include/clang/Lex/HeaderSearchOptions.h
===================================================================
--- clang/include/clang/Lex/HeaderSearchOptions.h
+++ clang/include/clang/Lex/HeaderSearchOptions.h
@@ -181,6 +181,9 @@
   /// of computing the module hash.
   llvm::SmallSetVector<llvm::CachedHashString, 16> ModulesIgnoreMacros;
 
+  /// The set of user-provided stat cache files.
+  std::vector<std::string> VFSStatCacheFiles;
+
   /// The set of user-provided virtual filesystem overlay files.
   std::vector<std::string> VFSOverlayFiles;
 
@@ -250,6 +253,10 @@
     SystemHeaderPrefixes.emplace_back(Prefix, IsSystemHeader);
   }
 
+  void AddVFSStatCacheFile(StringRef Name) {
+    VFSStatCacheFiles.push_back(std::string(Name));
+  }
+
   void AddVFSOverlayFile(StringRef Name) {
     VFSOverlayFiles.push_back(std::string(Name));
   }
Index: clang/include/clang/Frontend/CompilerInvocation.h
===================================================================
--- clang/include/clang/Frontend/CompilerInvocation.h
+++ clang/include/clang/Frontend/CompilerInvocation.h
@@ -296,6 +296,7 @@
 
 IntrusiveRefCntPtr<llvm::vfs::FileSystem>
 createVFSFromOverlayFiles(ArrayRef<std::string> VFSOverlayFiles,
+                          ArrayRef<std::string> VFSStatCacheFiles,
                           DiagnosticsEngine &Diags,
                           IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS);
 
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -3357,6 +3357,8 @@
   HelpText<"Add directory to SYSTEM include search path, "
            "absolute paths are relative to -isysroot">, MetaVarName<"<directory>">,
   Flags<[CC1Option]>;
+def ivfsstatcache : JoinedOrSeparate<["-"], "ivfsstatcache">, Group<clang_i_Group>, Flags<[CC1Option]>,
+  HelpText<"Use the stat data cached in file instead of doing filesystem syscalls. See clang-stat-cache utility.">;
 def ivfsoverlay : JoinedOrSeparate<["-"], "ivfsoverlay">, Group<clang_i_Group>, Flags<[CC1Option]>,
   HelpText<"Overlay the virtual filesystem described by file over the real file system">;
 def imultilib : Separate<["-"], "imultilib">, Group<gfortran_Group>;
Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -256,6 +256,11 @@
   "test module file extension '%0' has different version (%1.%2) than expected "
   "(%3.%4)">;
 
+def err_missing_vfs_stat_cache_file : Error<
+  "stat cache file '%0' not found">, DefaultFatal;
+def err_invalid_vfs_stat_cache : Error<
+  "invalid stat cache file '%0'">, DefaultFatal;
+
 def err_missing_vfs_overlay_file : Error<
   "virtual filesystem overlay file '%0' not found">, DefaultFatal;
 def err_invalid_vfs_overlay : Error<
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to