[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alex Lorenz via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL368086: [clang-scan-deps] Implementation of dependency 
scanner over minimized sources (authored by arphaman, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D63907?vs=213664=213704#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  cfe/trunk/include/clang/Basic/FileManager.h
  
cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  cfe/trunk/lib/Tooling/DependencyScanning/CMakeLists.txt
  cfe/trunk/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  cfe/trunk/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  cfe/trunk/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
cfe/trunk/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
cfe/trunk/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  cfe/trunk/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  cfe/trunk/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  cfe/trunk/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  cfe/trunk/test/ClangScanDeps/header_stat_before_open.m
  cfe/trunk/test/ClangScanDeps/regular_cdb.cpp
  cfe/trunk/test/ClangScanDeps/vfsoverlay.cpp
  cfe/trunk/tools/clang-scan-deps/ClangScanDeps.cpp

Index: cfe/trunk/include/clang/Basic/FileManager.h
===
--- cfe/trunk/include/clang/Basic/FileManager.h
+++ cfe/trunk/include/clang/Basic/FileManager.h
@@ -231,6 +231,10 @@
 
   llvm::vfs::FileSystem () const { return *FS; }
 
+  void setVirtualFileSystem(IntrusiveRefCntPtr FS) {
+this->FS = std::move(FS);
+  }
+
   /// Retrieve a file entry for a "virtual" file that acts as
   /// if there were a file with the given name on disk.
   ///
Index: cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
===
--- cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
+++ cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_WORKER_H
 
 #include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Tooling/CompilationDatabase.h"
@@ -21,6 +22,9 @@
 namespace tooling {
 namespace dependencies {
 
+class DependencyScanningService;
+class DependencyScanningWorkerFilesystem;
+
 /// An individual dependency scanning worker that is able to run on its own
 /// thread.
 ///
@@ -29,7 +33,7 @@
 /// using the regular processing run.
 class DependencyScanningWorker {
 public:
-  DependencyScanningWorker();
+  DependencyScanningWorker(DependencyScanningService );
 
   /// Print out the dependency information into a string using the dependency
   /// file format that is specified in the options (-MD is the default) and
@@ -45,10 +49,11 @@
   IntrusiveRefCntPtr DiagOpts;
   std::shared_ptr PCHContainerOps;
 
+  llvm::IntrusiveRefCntPtr RealFS;
   /// The file system that is used by each worker when scanning for
   /// dependencies. This filesystem persists accross multiple compiler
   /// invocations.
-  llvm::IntrusiveRefCntPtr WorkerFS;
+  llvm::IntrusiveRefCntPtr DepFS;
 };
 
 } // end namespace dependencies
Index: cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
===
--- cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ cfe/trunk/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -0,0 +1,168 @@
+//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
+#define LLVM_CLANG_TOOLING_DEPENDENCY_SCANNING_FILESYSTEM_H
+
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include 
+
+namespace clang {
+namespace tooling {
+namespace dependencies {
+
+/// An in-memory representation of a file system entity that is of interest to
+/// the dependency 

[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman marked an inline comment as done.
arphaman added inline comments.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:76
+CachedFileSystemEntry
+CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status Stat) {
+  assert(Stat.isDirectory() && "not a directory!");

aganea wrote:
> arphaman wrote:
> > aganea wrote:
> > > `llvm::vfs::Status &` to avoid a copy?
> > The copy should already be avoided, as I move the argument when passing in, 
> > and when it's assigned to the result.
> If `Stat` is not a rvalue reference, wouldn't the `std::move` in the call 
> site end up as a copy? See [[ https://godbolt.org/z/Rr7cdM | this ]]. If you 
> change `int test(A a)` to `int test(A &)` you can see the difference in the 
> asm output. However if the function is inlined, the extra copy will probably 
> be optimized out. Not a big deal - the OS calls like stat() will most likely 
> dominate here.
Isn't the difference in the output caused by the details of the calling 
convention (pass the structure on the stack by value)? The move constructor 
should still be called for the stat, it will not copy its members. We can 
optimize this though and pass by ref directly, I agree, so let me do that.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alexandre Ganea via Phabricator via cfe-commits
aganea accepted this revision.
aganea added a comment.
This revision is now accepted and ready to land.

LGTM, thank you!

In D63907#1617417 , @arphaman wrote:

> Just for reference, this patch still doesn't reuse the FileManager across 
> invocations in a thread. We expect to get even better performance once we 
> reuse it, but I'm going have to improve its API first.


Can't wait! @SamChaps is already testing this patch. He found some minor 
edge-cases with the minimizer (most likely unrelated to this), I'll post a 
patch.




Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:76
+CachedFileSystemEntry
+CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status Stat) {
+  assert(Stat.isDirectory() && "not a directory!");

arphaman wrote:
> aganea wrote:
> > `llvm::vfs::Status &` to avoid a copy?
> The copy should already be avoided, as I move the argument when passing in, 
> and when it's assigned to the result.
If `Stat` is not a rvalue reference, wouldn't the `std::move` in the call site 
end up as a copy? See [[ https://godbolt.org/z/Rr7cdM | this ]]. If you change 
`int test(A a)` to `int test(A &)` you can see the difference in the asm 
output. However if the function is inlined, the extra copy will probably be 
optimized out. Not a big deal - the OS calls like stat() will most likely 
dominate here.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added a comment.

Just for reference, this patch still doesn't reuse the FileManager across 
invocations in a thread. We expect to get even better performance once we reuse 
it, but I'm going have to improve its API first.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added inline comments.



Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:117
+std::mutex CacheLock;
+llvm::StringMap> Cache;
+  };

aganea wrote:
> Why not use a bump allocator here? (it would be per-thread) On Windows the 
> heap is always thread-safe, which induce a lock in `malloc` for each new 
> entry. You could also avoid the usage of `unique_ptr` by the same occasion:
> 
> `llvm::StringMap SpecificBumpPtrAllocator> Cache;`
> 
> //(unless you're planning on removing entries from the cache later on?)//
Good idea, I switched to a bump pointer allocator (I don't think I can use a 
specific one, as that would cause the values to be destroyed twice). I'm not 
planning on removing entries from the cache, no.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:76
+CachedFileSystemEntry
+CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status Stat) {
+  assert(Stat.isDirectory() && "not a directory!");

aganea wrote:
> `llvm::vfs::Status &` to avoid a copy?
The copy should already be avoided, as I move the argument when passing in, and 
when it's assigned to the result.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman updated this revision to Diff 213664.
arphaman marked 6 inline comments as done.
arphaman added a comment.

Address review comments.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase ,
+  DependencyScanningTool(DependencyScanningService ,
+ const tooling::CompilationDatabase ,
  SharedStream , SharedStream )
-  : Compilations(Compilations), OS(OS), Errs(Errs) {}
+  : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt ScanMode(
+"mode",
+llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+llvm::cl::values(
+clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+   "preprocess-minimized-sources",
+   "The set of dependencies is computed by preprocessing the "
+   "source files that were minimized to only include the "
+   "contents that might affect the dependencies"),
+clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+   "The set of dependencies is computed by preprocessing the "
+   "unmodified source files")),
+llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt
 NumThreads("j", llvm::cl::Optional,
llvm::cl::desc("Number of worker threads to use (default: use "
@@ -136,12 +152,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
   NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
 WorkerTools.push_back(llvm::make_unique(
-*AdjustingCompilations, DependencyOS, Errs));
+Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector WorkerThreads;
   std::atomic HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===
--- 

[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-06 Thread Alexandre Ganea via Phabricator via cfe-commits
aganea added a comment.

Thanks for the update Alex! Just a few more comments and we should be good to 
go:




Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:117
+std::mutex CacheLock;
+llvm::StringMap> Cache;
+  };

Why not use a bump allocator here? (it would be per-thread) On Windows the heap 
is always thread-safe, which induce a lock in `malloc` for each new entry. You 
could also avoid the usage of `unique_ptr` by the same occasion:

`llvm::StringMap> Cache;`

//(unless you're planning on removing entries from the cache later on?)//



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:76
+CachedFileSystemEntry
+CachedFileSystemEntry::createDirectoryEntry(llvm::vfs::Status Stat) {
+  assert(Stat.isDirectory() && "not a directory!");

`llvm::vfs::Status &` to avoid a copy?



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:103
+  auto It =
+  Shard.Cache.try_emplace(Key, std::unique_ptr());
+  auto  = It.first->getValue();

`Shard.Cache.try_emplace(Key)` will provide a default constructed value if it's 
not there.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:117
+  // Check the local cache first.
+  if (const auto *Entry = getCachedEntry(Filename))
+return Entry->getStatus();

`CachedFileSystemEntry *Entry` ?



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:198
+  // Check the local cache first.
+  if (const auto *Entry = getCachedEntry(Filename))
+return createFile(Entry);

CachedFileSystemEntry *Entry ?



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp:148
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+DepFS = new DependencyScanningFilesystem(Service.getSharedCache(), RealFS);

arphaman wrote:
> aganea wrote:
> > arphaman wrote:
> > > aganea wrote:
> > > > Can we not use caching all the time?
> > > We want to have a mode where it's as close to the regular `clang -E` 
> > > invocation as possible for correctness CI and testing. We also haven't 
> > > evaluated if the cost of keeping non-minimized sources in memory, so it 
> > > might be too expensive for practical use? I can add a third option that 
> > > caches but doesn't minimize though as a follow-up patch though 
> > > 
> > Yes that would be nice. As for the size taken in RAM, it would be only .H 
> > files, right? For Clang+LLVM+LLD I'm counting about 40 MB. But indeed with 
> > a large project, that would be about 1.5 GB of .H. Although I doubt all 
> > these files will be loaded at once in memory (I'll check)
> > 
> > As for the usage: Fastbuild works like distcc (plain mode, not pump) so we 
> > were also planning on extracting the fully preprocessed output, not only 
> > the dependencies. There's one use-case where we need to preprocess locally, 
> > then send the preprocessed output remotely for compilation. And another 
> > use-case where we only want to extract the dependency list, compute a 
> > digest, to retrieve the OBJ from a network cache.
> Right now it doesn't differentiate between .H and other files, but we could 
> teach it do have a header only filter for the cache.
No worries, I was simply wondering about the size in memory.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-05 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added inline comments.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:156
+/// this subclass.
+class MinimizedVFSFile final : public llvm::vfs::File {
+public:

aganea wrote:
> This makes only a short-lived objects, is that right? Just during the call to 
> `CachedFileSystemEntry::createFileEntry`?
 Yes, these VFS buffer files are only alive for a particular invocation.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp:148
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+DepFS = new DependencyScanningFilesystem(Service.getSharedCache(), RealFS);

aganea wrote:
> arphaman wrote:
> > aganea wrote:
> > > Can we not use caching all the time?
> > We want to have a mode where it's as close to the regular `clang -E` 
> > invocation as possible for correctness CI and testing. We also haven't 
> > evaluated if the cost of keeping non-minimized sources in memory, so it 
> > might be too expensive for practical use? I can add a third option that 
> > caches but doesn't minimize though as a follow-up patch though 
> > 
> Yes that would be nice. As for the size taken in RAM, it would be only .H 
> files, right? For Clang+LLVM+LLD I'm counting about 40 MB. But indeed with a 
> large project, that would be about 1.5 GB of .H. Although I doubt all these 
> files will be loaded at once in memory (I'll check)
> 
> As for the usage: Fastbuild works like distcc (plain mode, not pump) so we 
> were also planning on extracting the fully preprocessed output, not only the 
> dependencies. There's one use-case where we need to preprocess locally, then 
> send the preprocessed output remotely for compilation. And another use-case 
> where we only want to extract the dependency list, compute a digest, to 
> retrieve the OBJ from a network cache.
Right now it doesn't differentiate between .H and other files, but we could 
teach it do have a header only filter for the cache.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-05 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman updated this revision to Diff 213511.
arphaman marked 3 inline comments as done.
arphaman added a comment.

Fix comment typo


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase ,
+  DependencyScanningTool(DependencyScanningService ,
+ const tooling::CompilationDatabase ,
  SharedStream , SharedStream )
-  : Compilations(Compilations), OS(OS), Errs(Errs) {}
+  : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt ScanMode(
+"mode",
+llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+llvm::cl::values(
+clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+   "preprocess-minimized-sources",
+   "The set of dependencies is computed by preprocessing the "
+   "source files that were minimized to only include the "
+   "contents that might affect the dependencies"),
+clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+   "The set of dependencies is computed by preprocessing the "
+   "unmodified source files")),
+llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt
 NumThreads("j", llvm::cl::Optional,
llvm::cl::desc("Number of worker threads to use (default: use "
@@ -136,12 +152,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
   NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
 WorkerTools.push_back(llvm::make_unique(
-*AdjustingCompilations, DependencyOS, Errs));
+Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector WorkerThreads;
   std::atomic HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===
--- clang/test/ClangScanDeps/regular_cdb.cpp

[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-08-01 Thread Alexandre Ganea via Phabricator via cfe-commits
aganea added inline comments.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:84
+DependencyScanningFilesystemSharedCache() {
+  NumShards = 8;
+  CacheShards = llvm::make_unique(NumShards);

arphaman wrote:
> aganea wrote:
> > This line needs a comment. Is this value based on empirical results across 
> > different hardwares? (I can't recall your answer at the LLVM conf) I am 
> > wondering what would be the good strategy here? The more cores/HT in your 
> > PC, the more chances that you'll hit the same shard, thus locking. But the 
> > bigger we make this value, the more `StringMaps` we will have, and more 
> > cache misses possibly.
> > Should it be something like `llvm::hardware_concurrency() / some_fudge`? 
> > It'd be interesting to subsequently profile on high core count machines (or 
> > maybe you have done that).
> I rewrote it to use a heuristic we settled on after doing empirical testing 
> on an 18 core / 36 thread machine ( max(2, thread_count / 4) ), and added a 
> comment. This was the number `9` (36 / 4) I mentioned at the talk, so we only 
> got to it because of a heuristic. I think now after some SDK/OS updates the 
> effect from adding more shards is less pronounced, so it mostly flatlines 
> past some number between 5-10. A better heuristic would probably be OS 
> specific to take the cost of lock contention into account.
> 
> Note that the increased number of shards does not increase the number of 
> cache misses, because the shard # is determined by the filename (we don't 
> actually have global cache misses, as the cache is designed to have only one 
> miss per file when it's first accessed)! It's just that an increased number 
> of shards doesn't improve performance after hitting some specific limit, so 
> we want to find a point where we can taper it off.
> 
> It would still be definitely interesting to profile it on other high core 
> machines with different OSes to see if it's a reasonable heuristic for those 
> scenarios too.
I'll give it a try on Windows 10, one project here has a large codebase and 
some wild machines to test on.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:156
+/// this subclass.
+class MinimizedVFSFile final : public llvm::vfs::File {
+public:

This makes only a short-lived objects, is that right? Just during the call to 
`CachedFileSystemEntry::createFileEntry`?



Comment at: clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp:94
+  // Add any filenames that were explicity passed in the build settings and
+  // that might be might be opened, as we want to ensure we don't run 
source
+  // minimization on them.

"might be" twice.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp:148
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+DepFS = new DependencyScanningFilesystem(Service.getSharedCache(), RealFS);

arphaman wrote:
> aganea wrote:
> > Can we not use caching all the time?
> We want to have a mode where it's as close to the regular `clang -E` 
> invocation as possible for correctness CI and testing. We also haven't 
> evaluated if the cost of keeping non-minimized sources in memory, so it might 
> be too expensive for practical use? I can add a third option that caches but 
> doesn't minimize though as a follow-up patch though 
> 
Yes that would be nice. As for the size taken in RAM, it would be only .H 
files, right? For Clang+LLVM+LLD I'm counting about 40 MB. But indeed with a 
large project, that would be about 1.5 GB of .H. Although I doubt all these 
files will be loaded at once in memory (I'll check)

As for the usage: Fastbuild works like distcc (plain mode, not pump) so we were 
also planning on extracting the fully preprocessed output, not only the 
dependencies. There's one use-case where we need to preprocess locally, then 
send the preprocessed output remotely for compilation. And another use-case 
where we only want to extract the dependency list, compute a digest, to 
retrieve the OBJ from a network cache.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-07-30 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman updated this revision to Diff 212442.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase ,
+  DependencyScanningTool(DependencyScanningService ,
+ const tooling::CompilationDatabase ,
  SharedStream , SharedStream )
-  : Compilations(Compilations), OS(OS), Errs(Errs) {}
+  : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt ScanMode(
+"mode",
+llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+llvm::cl::values(
+clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+   "preprocess-minimized-sources",
+   "The set of dependencies is computed by preprocessing the "
+   "source files that were minimized to only include the "
+   "contents that might affect the dependencies"),
+clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+   "The set of dependencies is computed by preprocessing the "
+   "unmodified source files")),
+llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt
 NumThreads("j", llvm::cl::Optional,
llvm::cl::desc("Number of worker threads to use (default: use "
@@ -136,12 +152,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
   NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
 WorkerTools.push_back(llvm::make_unique(
-*AdjustingCompilations, DependencyOS, Errs));
+Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector WorkerThreads;
   std::atomic HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===
--- clang/test/ClangScanDeps/regular_cdb.cpp
+++ clang/test/ClangScanDeps/regular_cdb.cpp
@@ -8,7 +8,9 @@
 // RUN: cp 

[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-07-30 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman added inline comments.



Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:1
+//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ 
-*-===//
+//

aganea wrote:
> General comment for this file and the implementation: it seems there's 
> nothing specific to the dependency scanning, except for replacing the content 
> with minimized content? This cached FS could be very well used to compile a 
> project with parallel workers. Could this be part of the `VirtualFileSystem` 
> infrastructure? ( `llvm/include/llvm/Support/VirtualFileSystem.h`) If yes, 
> can you possibly create a separate patch for this? @JDevlieghere @sammccall 
I think it could be possible to separate out the cache, but we definitely need 
a subclass of a VFS to handle some Clang specific logic for how to determine 
how to deal with module files for instance. I wouldn't be opposed to factoring 
it out if people are interested. I think that can be done as a follow-up if 
there's an interest though.




Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:84
+DependencyScanningFilesystemSharedCache() {
+  NumShards = 8;
+  CacheShards = llvm::make_unique(NumShards);

aganea wrote:
> This line needs a comment. Is this value based on empirical results across 
> different hardwares? (I can't recall your answer at the LLVM conf) I am 
> wondering what would be the good strategy here? The more cores/HT in your PC, 
> the more chances that you'll hit the same shard, thus locking. But the bigger 
> we make this value, the more `StringMaps` we will have, and more cache misses 
> possibly.
> Should it be something like `llvm::hardware_concurrency() / some_fudge`? It'd 
> be interesting to subsequently profile on high core count machines (or maybe 
> you have done that).
I rewrote it to use a heuristic we settled on after doing empirical testing on 
an 18 core / 36 thread machine ( max(2, thread_count / 4) ), and added a 
comment. This was the number `9` (36 / 4) I mentioned at the talk, so we only 
got to it because of a heuristic. I think now after some SDK/OS updates the 
effect from adding more shards is less pronounced, so it mostly flatlines past 
some number between 5-10. A better heuristic would probably be OS specific to 
take the cost of lock contention into account.

Note that the increased number of shards does not increase the number of cache 
misses, because the shard # is determined by the filename (we don't actually 
have global cache misses, as the cache is designed to have only one miss per 
file when it's first accessed)! It's just that an increased number of shards 
doesn't improve performance after hitting some specific limit, so we want to 
find a point where we can taper it off.

It would still be definitely interesting to profile it on other high core 
machines with different OSes to see if it's a reasonable heuristic for those 
scenarios too.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp:148
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+DepFS = new DependencyScanningFilesystem(Service.getSharedCache(), RealFS);

aganea wrote:
> Can we not use caching all the time?
We want to have a mode where it's as close to the regular `clang -E` invocation 
as possible for correctness CI and testing. We also haven't evaluated if the 
cost of keeping non-minimized sources in memory, so it might be too expensive 
for practical use? I can add a third option that caches but doesn't minimize 
though as a follow-up patch though 



Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-07-30 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman updated this revision to Diff 212435.
arphaman marked 7 inline comments as done.

Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase ,
+  DependencyScanningTool(DependencyScanningService ,
+ const tooling::CompilationDatabase ,
  SharedStream , SharedStream )
-  : Compilations(Compilations), OS(OS), Errs(Errs) {}
+  : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt ScanMode(
+"mode",
+llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+llvm::cl::values(
+clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+   "preprocess-minimized-sources",
+   "The set of dependencies is computed by preprocessing the "
+   "source files that were minimized to only include the "
+   "contents that might affect the dependencies"),
+clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+   "The set of dependencies is computed by preprocessing the "
+   "unmodified source files")),
+llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt
 NumThreads("j", llvm::cl::Optional,
llvm::cl::desc("Number of worker threads to use (default: use "
@@ -136,12 +152,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
   NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
 WorkerTools.push_back(llvm::make_unique(
-*AdjustingCompilations, DependencyOS, Errs));
+Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector WorkerThreads;
   std::atomic HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===
--- clang/test/ClangScanDeps/regular_cdb.cpp
+++ 

[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-07-30 Thread Alexandre Ganea via Phabricator via cfe-commits
aganea added subscribers: sammccall, JDevlieghere.
aganea added inline comments.



Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:1
+//===- DependencyScanningFilesystem.h - clang-scan-deps fs ===---*- C++ 
-*-===//
+//

General comment for this file and the implementation: it seems there's nothing 
specific to the dependency scanning, except for replacing the content with 
minimized content? This cached FS could be very well used to compile a project 
with parallel workers. Could this be part of the `VirtualFileSystem` 
infrastructure? ( `llvm/include/llvm/Support/VirtualFileSystem.h`) If yes, can 
you possibly create a separate patch for this? @JDevlieghere @sammccall 



Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:101
+public:
+  /// A \c CachedFileSystemEntry with a lock.
+  struct SharedFileSystemEntry {

The struct is self-explanatory, not sure the comment is needed here?



Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:103
+  struct SharedFileSystemEntry {
+std::mutex Lock;
+CachedFileSystemEntry Value;

Would you mind renaming this to `ValueLock` so it is easier to search for? (and 
to remain consistent with `CacheLock` below)



Comment at: 
clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h:129
+/// computation.
+class DependencyScanningFilesystem : public llvm::vfs::ProxyFileSystem {
+public:

Maybe worth mentioning this is NOT a global, thread-safe, class like 
`DependencyScanningFilesystemSharedCache`, but rather meant to be used as 
per-thread instances?

I am also wondering if there could be a better name to signify at first sight 
that this is a per-thread class. `DependencyScanningLocalFilesystem`? 
`DependencyScanningWorkerFilesystem`?



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:84
+DependencyScanningFilesystemSharedCache() {
+  NumShards = 8;
+  CacheShards = llvm::make_unique(NumShards);

This line needs a comment. Is this value based on empirical results across 
different hardwares? (I can't recall your answer at the LLVM conf) I am 
wondering what would be the good strategy here? The more cores/HT in your PC, 
the more chances that you'll hit the same shard, thus locking. But the bigger 
we make this value, the more `StringMaps` we will have, and more cache misses 
possibly.
Should it be something like `llvm::hardware_concurrency() / some_fudge`? It'd 
be interesting to subsequently profile on high core count machines (or maybe 
you have done that).



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp:210
+  bool KeepOriginalSource = IgnoredFiles.count(Filename);
+  auto  = SharedCache.get(Filename);
+  const CachedFileSystemEntry *Result;

Don't use `auto` when the type is not obvious.



Comment at: 
clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp:148
+  RealFS = new ProxyFileSystemWithoutChdir(llvm::vfs::getRealFileSystem());
+  if (Service.getMode() == ScanningMode::MinimizedSourcePreprocessing)
+DepFS = new DependencyScanningFilesystem(Service.getSharedCache(), RealFS);

Can we not use caching all the time?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-07-23 Thread Alexandre Ganea via Phabricator via cfe-commits
aganea added a comment.

I will take a look next week when I get back!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-07-23 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman updated this revision to Diff 211385.
arphaman added a comment.

Fix a bug for empty minimized files where null terminator lookup by the lexer 
was an out of bounds read


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D63907/new/

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase ,
+  DependencyScanningTool(DependencyScanningService ,
+ const tooling::CompilationDatabase ,
  SharedStream , SharedStream )
-  : Compilations(Compilations), OS(OS), Errs(Errs) {}
+  : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt ScanMode(
+"mode",
+llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+llvm::cl::values(
+clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+   "preprocess-minimized-sources",
+   "The set of dependencies is computed by preprocessing the "
+   "source files that were minimized to only include the "
+   "contents that might affect the dependencies"),
+clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+   "The set of dependencies is computed by preprocessing the "
+   "unmodified source files")),
+llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt
 NumThreads("j", llvm::cl::Optional,
llvm::cl::desc("Number of worker threads to use (default: use "
@@ -136,12 +152,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
   NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
 WorkerTools.push_back(llvm::make_unique(
-*AdjustingCompilations, DependencyOS, Errs));
+Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector WorkerThreads;
   std::atomic HadErrors(false);
Index: clang/test/ClangScanDeps/vfsoverlay.cpp
===
--- /dev/null
+++ clang/test/ClangScanDeps/vfsoverlay.cpp
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t.dir
+// RUN: rm -rf %t.cdb
+// RUN: mkdir -p %t.dir
+// RUN: cp %s %t.dir/vfsoverlay.cpp
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay.yaml > %t.dir/vfsoverlay.yaml
+// RUN: mkdir %t.dir/Inputs
+// RUN: cp %S/Inputs/header.h %t.dir/Inputs/header.h
+// RUN: sed -e "s|DIR|%/t.dir|g" %S/Inputs/vfsoverlay_cdb.json > %t.cdb
+//
+// RUN: clang-scan-deps -compilation-database %t.cdb -j 1 | \
+// RUN:   FileCheck %s
+
+#include "not_real.h"
+
+// CHECK: clang-scan-deps dependency
+// CHECK-NEXT: vfsoverlay.cpp
+// CHECK-NEXT: Inputs{{/|\\}}header.h
Index: clang/test/ClangScanDeps/regular_cdb.cpp
===

[PATCH] D63907: [clang-scan-deps] Implementation of dependency scanner over minimized sources

2019-06-27 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman created this revision.
arphaman added reviewers: Bigcheese, aganea.
Herald added subscribers: tschuett, dexonsmith, jkorous, mgorny.
Herald added a project: clang.

This patch implements the fast dependency scanning mode in `clang-scan-deps`: 
the preprocessing is done on files that are minimized using the dependency 
directives source minimizer.

A shared file system cache is used to ensure that the file system requests and 
source minimization is performed only once. The cache assumes that the 
underlying filesystem won't change during the course of the scan (or if it 
will, it will not affect the output), and it can't be evicted. This means that 
the service and workers can be used for a single run of a dependency scanner, 
and can't be reused across multiple, incremental runs. This is something that 
we'll most likely support in the future though. Note that the driver still 
utilizes the underlying real filesystem.

This patch is also still missing the fast skipped PP block skipping 
optimization that I mentioned at EuroLLVM talk.


Repository:
  rC Clang

https://reviews.llvm.org/D63907

Files:
  clang/include/clang/Basic/FileManager.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h
  clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h
  clang/lib/Tooling/DependencyScanning/CMakeLists.txt
  clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp
  clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/Headers/Framework.h
  
clang/test/ClangScanDeps/Inputs/frameworks/Framework.framework/PrivateHeaders/PrivateHeader.h
  clang/test/ClangScanDeps/Inputs/header_stat_before_open_cdb.json
  clang/test/ClangScanDeps/Inputs/vfsoverlay.yaml
  clang/test/ClangScanDeps/Inputs/vfsoverlay_cdb.json
  clang/test/ClangScanDeps/header_stat_before_open.m
  clang/test/ClangScanDeps/regular_cdb.cpp
  clang/test/ClangScanDeps/vfsoverlay.cpp
  clang/tools/clang-scan-deps/ClangScanDeps.cpp

Index: clang/tools/clang-scan-deps/ClangScanDeps.cpp
===
--- clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -8,6 +8,7 @@
 
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
 #include "clang/Tooling/JSONCompilationDatabase.h"
 #include "llvm/Support/InitLLVM.h"
@@ -45,9 +46,10 @@
   ///
   /// \param Compilations The reference to the compilation database that's
   /// used by the clang tool.
-  DependencyScanningTool(const tooling::CompilationDatabase ,
+  DependencyScanningTool(DependencyScanningService ,
+ const tooling::CompilationDatabase ,
  SharedStream , SharedStream )
-  : Compilations(Compilations), OS(OS), Errs(Errs) {}
+  : Worker(Service), Compilations(Compilations), OS(OS), Errs(Errs) {}
 
   /// Computes the dependencies for the given file and prints them out.
   ///
@@ -80,6 +82,20 @@
 
 llvm::cl::OptionCategory DependencyScannerCategory("Tool options");
 
+static llvm::cl::opt ScanMode(
+"mode",
+llvm::cl::desc("The preprocessing mode used to compute the dependencies"),
+llvm::cl::values(
+clEnumValN(ScanningMode::MinimizedSourcePreprocessing,
+   "preprocess-minimized-sources",
+   "The set of dependencies is computed by preprocessing the "
+   "source files that were minimized to only include the "
+   "contents that might affect the dependencies"),
+clEnumValN(ScanningMode::CanonicalPreprocessing, "preprocess",
+   "The set of dependencies is computed by preprocessing the "
+   "unmodified source files")),
+llvm::cl::init(ScanningMode::MinimizedSourcePreprocessing));
+
 llvm::cl::opt
 NumThreads("j", llvm::cl::Optional,
llvm::cl::desc("Number of worker threads to use (default: use "
@@ -135,12 +151,14 @@
   SharedStream Errs(llvm::errs());
   // Print out the dependency results to STDOUT by default.
   SharedStream DependencyOS(llvm::outs());
+
+  DependencyScanningService Service(ScanMode);
   unsigned NumWorkers =
   NumThreads == 0 ? llvm::hardware_concurrency() : NumThreads;
   std::vector> WorkerTools;
   for (unsigned I = 0; I < NumWorkers; ++I)
 WorkerTools.push_back(llvm::make_unique(
-*AdjustingCompilations, DependencyOS, Errs));
+Service, *AdjustingCompilations, DependencyOS, Errs));
 
   std::vector WorkerThreads;
   std::atomic HadErrors(false);
Index: