[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-17 Thread Sam McCall via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGd19f0666bcd8: [clang][Tooling] Try to avoid file system 
access if there is no record for theā€¦ (authored by ArcsinX, committed by 
sammccall).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621

Files:
  clang/lib/Tooling/FileMatchTrie.cpp
  clang/unittests/Tooling/CompilationDatabaseTest.cpp


Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp
===
--- clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
Index: clang/lib/Tooling/FileMatchTrie.cpp
===
--- clang/lib/Tooling/FileMatchTrie.cpp
+++ clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@
StringRef FileName,
bool ,
unsigned ConsumedLength = 0) const {
+// Note: we support only directory symlinks for performance reasons.
 if (Children.empty()) {
-  if (Comparator.equivalent(StringRef(Path), FileName))
+  // As far as we do not support file symlinks, compare
+  // basenames here to avoid request to file system.
+  if (llvm::sys::path::filename(Path) ==
+  llvm::sys::path::filename(FileName) &&
+  Comparator.equivalent(StringRef(Path), FileName))
 return StringRef(Path);
   return {};
 }
@@ -121,6 +126,13 @@
   if (!Result.empty() || IsAmbiguous)
 return Result;
 }
+
+// If `ConsumedLength` is zero, this is the root and we have no filename
+// match. Give up in this case, we don't try to find symlinks with
+// different names.
+if (ConsumedLength == 0)
+  return {};
+
 std::vector AllChildren;
 getAll(AllChildren, MatchingChild);
 StringRef Result;


Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp
===
--- clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
Index: clang/lib/Tooling/FileMatchTrie.cpp
===
--- clang/lib/Tooling/FileMatchTrie.cpp
+++ clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@
StringRef FileName,
bool ,
unsigned ConsumedLength = 0) const {
+// Note: we support only directory symlinks for performance reasons.
 if (Children.empty()) {
-  if (Comparator.equivalent(StringRef(Path), FileName))
+  // As far as we do not support file symlinks, compare
+  // basenames here to avoid request to file system.
+  if (llvm::sys::path::filename(Path) ==
+  llvm::sys::path::filename(FileName) &&
+  Comparator.equivalent(StringRef(Path), FileName))
 return StringRef(Path);
   return {};
 }
@@ -121,6 +126,13 @@
   if (!Result.empty() || IsAmbiguous)
 return Result;
 }
+
+// If `ConsumedLength` is zero, this is the root and we have no filename
+// match. Give up in this case, we don't try to find symlinks with
+// different names.
+if (ConsumedLength == 0)
+  return {};
+
 std::vector AllChildren;
 getAll(AllChildren, MatchingChild);
 StringRef Result;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-17 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

In D83621#2153711 , @ArcsinX wrote:

> As far as I do not have commit access, could you please commit for me?
>  Aleksandr Platonov 


Oops, missed this! Committed as d19f0666bcd8f7d26aaf4019244c3ed91e47b1b7 
 :-)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-15 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX marked 3 inline comments as done.
ArcsinX added a comment.

As far as I do not have commit access, could you please commit for me?
Aleksandr Platonov 




Comment at: clang/lib/Tooling/FileMatchTrie.cpp:111
+  // As far as we do not support file symlinks we compare
+  // basenames here to avoid expensive request to file system.
+  if (llvm::sys::path::filename(Path) ==

sammccall wrote:
> Here it's really just for consistency - we have a single candidate, and 
> calling equivalent() on a single file isn't expensive. I'd be happy with or 
> without this check, but the comment should mention consistency.
Removed "expensive" word.



Comment at: clang/lib/Tooling/FileMatchTrie.cpp:131
+// We failed to find the file with `Children.find()`.
+// If `ConsumedLength` is equal to 0 then we have tried to find the file
+// with it's basename. Thus, we do not have files with the same

sammccall wrote:
> nit: This is quite a lot of words to come to "we do not support file 
> symlinks" - I think it could be a bit shorter and also get into motivation:
> 
> "If `ConsumedLength` is zero, this is the root and we have no filename match. 
> Give up in this case, we don't try to find symlinks with different names".
Fixed.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-15 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX updated this revision to Diff 278230.
ArcsinX added a comment.

Update comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621

Files:
  clang/lib/Tooling/FileMatchTrie.cpp
  clang/unittests/Tooling/CompilationDatabaseTest.cpp


Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp
===
--- clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
Index: clang/lib/Tooling/FileMatchTrie.cpp
===
--- clang/lib/Tooling/FileMatchTrie.cpp
+++ clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@
StringRef FileName,
bool ,
unsigned ConsumedLength = 0) const {
+// Note: we support only directory symlinks for performance reasons.
 if (Children.empty()) {
-  if (Comparator.equivalent(StringRef(Path), FileName))
+  // As far as we do not support file symlinks, compare
+  // basenames here to avoid request to file system.
+  if (llvm::sys::path::filename(Path) ==
+  llvm::sys::path::filename(FileName) &&
+  Comparator.equivalent(StringRef(Path), FileName))
 return StringRef(Path);
   return {};
 }
@@ -121,6 +126,13 @@
   if (!Result.empty() || IsAmbiguous)
 return Result;
 }
+
+// If `ConsumedLength` is zero, this is the root and we have no filename
+// match. Give up in this case, we don't try to find symlinks with
+// different names.
+if (ConsumedLength == 0)
+  return {};
+
 std::vector AllChildren;
 getAll(AllChildren, MatchingChild);
 StringRef Result;


Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp
===
--- clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
Index: clang/lib/Tooling/FileMatchTrie.cpp
===
--- clang/lib/Tooling/FileMatchTrie.cpp
+++ clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@
StringRef FileName,
bool ,
unsigned ConsumedLength = 0) const {
+// Note: we support only directory symlinks for performance reasons.
 if (Children.empty()) {
-  if (Comparator.equivalent(StringRef(Path), FileName))
+  // As far as we do not support file symlinks, compare
+  // basenames here to avoid request to file system.
+  if (llvm::sys::path::filename(Path) ==
+  llvm::sys::path::filename(FileName) &&
+  Comparator.equivalent(StringRef(Path), FileName))
 return StringRef(Path);
   return {};
 }
@@ -121,6 +126,13 @@
   if (!Result.empty() || IsAmbiguous)
 return Result;
 }
+
+// If `ConsumedLength` is zero, this is the root and we have no filename
+// match. Give up in this case, we don't try to find symlinks with
+// different names.
+if (ConsumedLength == 0)
+  return {};
+
 std::vector AllChildren;
 getAll(AllChildren, MatchingChild);
 StringRef Result;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-15 Thread Sam McCall via Phabricator via cfe-commits
sammccall accepted this revision.
sammccall added a comment.
This revision is now accepted and ready to land.

Hmm, there is actually a case the old behavior may have been papering over: 
case-insensitive filesystems. If the real file is foo.cc and we query Foo.cc, 
then the trie would (inefficiently) return the correct file.
It may be a good idea to make the trie case-insensitive for this purpose, but 
maybe we should wait until someone complains. That's a different patch, anyway.




Comment at: clang/lib/Tooling/FileMatchTrie.cpp:111
+  // As far as we do not support file symlinks we compare
+  // basenames here to avoid expensive request to file system.
+  if (llvm::sys::path::filename(Path) ==

Here it's really just for consistency - we have a single candidate, and calling 
equivalent() on a single file isn't expensive. I'd be happy with or without 
this check, but the comment should mention consistency.



Comment at: clang/lib/Tooling/FileMatchTrie.cpp:131
+// We failed to find the file with `Children.find()`.
+// If `ConsumedLength` is equal to 0 then we have tried to find the file
+// with it's basename. Thus, we do not have files with the same

nit: This is quite a lot of words to come to "we do not support file symlinks" 
- I think it could be a bit shorter and also get into motivation:

"If `ConsumedLength` is zero, this is the root and we have no filename match. 
Give up in this case, we don't try to find symlinks with different names".


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-15 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX updated this revision to Diff 278161.
ArcsinX added a comment.

Support only directory simlinks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621

Files:
  clang/lib/Tooling/FileMatchTrie.cpp
  clang/unittests/Tooling/CompilationDatabaseTest.cpp


Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp
===
--- clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
Index: clang/lib/Tooling/FileMatchTrie.cpp
===
--- clang/lib/Tooling/FileMatchTrie.cpp
+++ clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@
StringRef FileName,
bool ,
unsigned ConsumedLength = 0) const {
+// Note: we support only directory symlinks for performance reasons.
 if (Children.empty()) {
-  if (Comparator.equivalent(StringRef(Path), FileName))
+  // As far as we do not support file symlinks we compare
+  // basenames here to avoid expensive request to file system.
+  if (llvm::sys::path::filename(Path) ==
+  llvm::sys::path::filename(FileName) &&
+  Comparator.equivalent(StringRef(Path), FileName))
 return StringRef(Path);
   return {};
 }
@@ -121,6 +126,15 @@
   if (!Result.empty() || IsAmbiguous)
 return Result;
 }
+
+// We failed to find the file with `Children.find()`.
+// If `ConsumedLength` is equal to 0 then we have tried to find the file
+// with it's basename. Thus, we do not have files with the same
+// basename and could return empty result here as far as we
+// do not support file symlinks.
+if (ConsumedLength == 0)
+  return {};
+
 std::vector AllChildren;
 getAll(AllChildren, MatchingChild);
 StringRef Result;


Index: clang/unittests/Tooling/CompilationDatabaseTest.cpp
===
--- clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(
Index: clang/lib/Tooling/FileMatchTrie.cpp
===
--- clang/lib/Tooling/FileMatchTrie.cpp
+++ clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@
StringRef FileName,
bool ,
unsigned ConsumedLength = 0) const {
+// Note: we support only directory symlinks for performance reasons.
 if (Children.empty()) {
-  if (Comparator.equivalent(StringRef(Path), FileName))
+  // As far as we do not support file symlinks we compare
+  // basenames here to avoid expensive request to file system.
+  if (llvm::sys::path::filename(Path) ==
+  llvm::sys::path::filename(FileName) &&
+  Comparator.equivalent(StringRef(Path), FileName))
 return StringRef(Path);
   return {};
 }
@@ -121,6 +126,15 @@
   if (!Result.empty() || IsAmbiguous)
 return Result;
 }
+
+// We failed to find the file with `Children.find()`.
+// If `ConsumedLength` is equal to 0 then we have tried to find the file
+// with it's basename. Thus, we do not have files with the same
+// basename and could return empty result here as far as we
+// do not support file symlinks.
+if (ConsumedLength == 0)
+  return {};
+
 std::vector AllChildren;
 getAll(AllChildren, MatchingChild);
 StringRef Result;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-15 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a subscriber: chandlerc.
sammccall added a comment.

In D83621#2152618 , @ArcsinX wrote:

> In D83621#2151716 , @sammccall wrote:
>
> > In D83621#2146750 , @ArcsinX wrote:
> >
> > > > - don't scan for equivalences if the set of candidates exceeds some 
> > > > size threshold (10 or so)
> > > > - don't scan for equivalences if the node we'd scan under is the root
> > >
> > > After such fixes `JSONCompilationDatabase::getCompileCommands()` will 
> > > return other results than before in some cases.
> >
> >
> > Can you elaborate on this - do you think there are reasonable cases where 
> > it will give the wrong answer?
> >  I can certainly imagine cases where this changes behaviour, e.g. 
> > project/foo.cc is a symlink to project/bar.cc, compile_commands contains an 
> > entry for foo.cc, we query for bar.cc.
> >  But I don't think this was ever really intended to handle arbitrary 
> > symlinks that change the names of files, and I do think this is OK to break.
>
>
> If breaking `project/foo.cc <=> project/bar.cc` is OK, then could we also 
> break `some/path/here/foo.cc <=> some/other/path/bar.cc`?


Yes. We discussed offline a bit with @chandlerc who doesn't think identifying 
cases where symlinked files have different names was a motivating case or 
likely to be important.

> In that case we can just prevent `Comparator.equivalent(FileA,FileB)` calls 
> when `llvm::sys::path::filename(FileA) != llvm::sys::path::filename(FileB)`.

You can - but this is equivalent to not doing the `getAll()` path when the node 
is the root (i.e. when ConsumedLength == 0).
So I think `if (ConsumedLength == 0) return {}` around line 124 in 
FileMatchTrie with an appropriate comment would also do the trick, and avoid 
pointless traversal/copy/string comparisons (much cheaper than IO but not free).

> I tried this locally:
> 
> - it works fast.
> - does not break tests.
> - easy to add unit test for `FileMatchTrie`.
> 
>   What do you think?

+1, happy with this behavior.
I think we should try the implementation that skips the traversal first - it's 
no more code and should be a bit more efficient.
But either way works, really.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-15 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX added a comment.

In D83621#2151716 , @sammccall wrote:

> In D83621#2146750 , @ArcsinX wrote:
>
> > > - don't scan for equivalences if the set of candidates exceeds some size 
> > > threshold (10 or so)
> > > - don't scan for equivalences if the node we'd scan under is the root
> >
> > After such fixes `JSONCompilationDatabase::getCompileCommands()` will 
> > return other results than before in some cases.
>
>
> Can you elaborate on this - do you think there are reasonable cases where it 
> will give the wrong answer?
>  I can certainly imagine cases where this changes behaviour, e.g. 
> project/foo.cc is a symlink to project/bar.cc, compile_commands contains an 
> entry for foo.cc, we query for bar.cc.
>  But I don't think this was ever really intended to handle arbitrary symlinks 
> that change the names of files, and I do think this is OK to break.


If breaking `project/foo.cc <=> project/bar.cc` is OK, then could we also break 
`some/path/here/foo.cc <=> some/other/path/bar.cc`?

In that case we can just prevent `Comparator.equivalent(FileA,FileB)` calls 
when `llvm::sys::path::filename(FileA) != llvm::sys::path::filename(FileB)`.
I tried this locally:

- it works fast.
- does not break tests.
- easy to add unit test for `FileMatchTrie`.

What do you think?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-14 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

In D83621#2146750 , @ArcsinX wrote:

> > - don't scan for equivalences if the set of candidates exceeds some size 
> > threshold (10 or so)
> > - don't scan for equivalences if the node we'd scan under is the root
>
> After such fixes `JSONCompilationDatabase::getCompileCommands()` will return 
> other results than before in some cases.


Can you elaborate on this - do you think there are reasonable cases where it 
will give the wrong answer?
I can certainly imagine cases where this changes behaviour, e.g. project/foo.cc 
is a symlink to project/bar.cc, compile_commands contains an entry for foo.cc, 
we query for bar.cc.
But I don't think this was ever really intended to handle arbitrary symlinks 
that change the names of files, and I do think this is OK to break.

>   For my experience InterpolatingCompilationDatabase works pretty well for 
> symlinks cases.

This sounds reasonable but is a bit anecdotal - people use different build 
systems/source layouts, some of them use symlinks heavily and some not at all.
InterpolatingCompilationDatabase often works well but it's very 
fuzzy/heuristic, and there are layouts where it really doesn't work well.
(At the same time I feel uncomfortable because I'm not sure what would be 
sufficient evidence to remove the filematchtrie)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-13 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX added a comment.

In D83621#2146746 , @klimek wrote:

> IIRC the symlink checking was there because some part of the system on linux 
> tends to give us realpaths (with CMake builds), and that leads to not finding 
> anything if there are symlinks involved.


That was before `InterpolatingCompilationDatabase` introduction in 
https://reviews.llvm.org/D45006.
For my experience `InterpolatingCompilationDatabase` works pretty well for 
symlinks cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-13 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX added a comment.

In D83621#2146706 , @sammccall wrote:

> Wow, yeah, this seems pretty bad! I'm not very familiar with this code. I'm 
> curious, for "it tooks more than 1 second" - what OS is this on?


Windows.
I faced this problem on a huge project and it takes 7 seconds for 
`MatchTrie.findEquivalent()` to return.

> My guess is that way back when, the performance of looking up a CDB entry 
> that didn't exist wasn't a big deal, or windows support wasn't a big thing, 
> or this was just an oversight.
> 
> I think it would be slightly cleaner to fix this in the trie itself, either:

Firstly, I wanted to fix this problem in `FileMatchTrie`, but for caching 
solution we need to update cache at every `instert()` call, but for caching 
solution inside `JSONCompilationDatabase` we can avoid this, because all 
inserts already done at `JSONCompilationDatabase::parse()`.

> - don't scan for equivalences if the set of candidates exceeds some size 
> threshold (10 or so)
> - don't scan for equivalences if the node we'd scan under is the root

After such fixes `JSONCompilationDatabase::getCompileCommands()` will return 
other results than before in some cases.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-13 Thread Manuel Klimek via Phabricator via cfe-commits
klimek added a subscriber: djasper.
klimek added a comment.

@djasper wrote this iirc, but I doubt he'll have much time to look into this :)

IIRC the symlink checking was there because some part of the system on linux 
tends to give us realpaths (with CMake builds), and that leads to not finding 
anything if there are symlinks involved.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-13 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a comment.

> But I am not sure, is it safe to completely remove FileMatchTrie?

Missed this... honestly I don't know. It does seem a little overly defensive to 
me, but any change in symlink behaviour tends to break *someone*.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-13 Thread Sam McCall via Phabricator via cfe-commits
sammccall added a subscriber: klimek.
sammccall added a comment.

Wow, yeah, this seems pretty bad! I'm not very familiar with this code. I'm 
curious, for "it tooks more than 1 second" - what OS is this on?

My guess is that way back when, the performance of looking up a CDB entry that 
didn't exist wasn't a big deal, or windows support wasn't a big thing, or this 
was just an oversight.

My reading of the code is that any TrieNode is a set of candidates that match a 
suffix of the path equally well, and we're trying to walk down the tree making 
the suffix longer and the candidate set more restrictive.
When we reach the node whether the path stops matching, we scan all the 
candidates to see if they're symlink-equivalent.
This is expensive if the set is too large, which is trivially the case if the 
filename is unique in the project (so we never get past the root node, and stat 
every file in the project).

I think it would be slightly cleaner to fix this in the trie itself, either:

- don't scan for equivalences if the set of candidates exceeds some size 
threshold (10 or so)
- don't scan for equivalences if the node we'd scan under is the root

@klimek In case you have context or thoughts here. Always fun to find a 9 year 
old bug :-)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-11 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX added a comment.

Also I think that `FileMatchTrie` introduced in https://reviews.llvm.org/D30 is 
not efficient solution to fight with symlinks and for most cases 
`InterpolatingCompilationDatabase` will be accurate enough. But I am not sure, 
is it safe to completely remove `FileMatchTrie`? What do you think?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83621/new/

https://reviews.llvm.org/D83621



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D83621: [clang][Tooling] Try to avoid file system access if there is no record for the file in compile_commads.json

2020-07-11 Thread Aleksandr Platonov via Phabricator via cfe-commits
ArcsinX created this revision.
Herald added subscribers: cfe-commits, usaxena95, kadircet, ilya-biryukov.
Herald added a project: clang.

If there is no record in compile_commands.json, we try to find suitable record 
with `MatchTrie.findEquivalent()` call.
This is very expensive operation with a lot of `llvm::sys::fs::equivalent()` 
calls in some cases.

This patch adds caching for `MatchTrie.findEquivalent()` call result.

Example scenario without this patch:

- compile_commands.json generated at clangd build (contains ~3000 files)..
- it tooks more than 1 second to get compile command for newly created file in 
the root folder of LLVM project.
- we wait for 1 second every time when clangd requests compile command for this 
file (at file change).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83621

Files:
  clang/include/clang/Tooling/JSONCompilationDatabase.h
  clang/lib/Tooling/JSONCompilationDatabase.cpp


Index: clang/lib/Tooling/JSONCompilationDatabase.cpp
===
--- clang/lib/Tooling/JSONCompilationDatabase.cpp
+++ clang/lib/Tooling/JSONCompilationDatabase.cpp
@@ -230,14 +230,28 @@
   SmallString<128> NativeFilePath;
   llvm::sys::path::native(FilePath, NativeFilePath);
 
-  std::string Error;
-  llvm::raw_string_ostream ES(Error);
-  StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES);
-  if (Match.empty())
-return {};
-  const auto CommandsRefI = IndexByFile.find(Match);
-  if (CommandsRefI == IndexByFile.end())
-return {};
+  // Avoid usage of `MatchTrie` if possible.
+  auto CommandsRefI = IndexByFile.find(NativeFilePath);
+  if (CommandsRefI == IndexByFile.end()) {
+llvm::StringRef Match;
+// Try to get cached value.
+auto MatchIt = MatchCache.find(NativeFilePath);
+if (MatchIt == MatchCache.end()) {
+  std::string Error;
+  llvm::raw_string_ostream ES(Error);
+  Match = MatchTrie.findEquivalent(NativeFilePath, ES);
+  // Save into cache even if the match result is empty.
+  MatchCache[NativeFilePath] = Match;
+} else {
+  // Cached value.
+  Match = MatchIt->second;
+}
+if (Match.empty())
+  return {};
+CommandsRefI = IndexByFile.find(Match);
+if (CommandsRefI == IndexByFile.end())
+  return {};
+  }
   std::vector Commands;
   getCommands(CommandsRefI->getValue(), Commands);
   return Commands;
Index: clang/include/clang/Tooling/JSONCompilationDatabase.h
===
--- clang/include/clang/Tooling/JSONCompilationDatabase.h
+++ clang/include/clang/Tooling/JSONCompilationDatabase.h
@@ -129,6 +129,8 @@
   std::vector AllCommands;
 
   FileMatchTrie MatchTrie;
+  // Cache for `MatchTrie`.
+  mutable llvm::StringMap MatchCache;
 
   std::unique_ptr Database;
   JSONCommandLineSyntax Syntax;


Index: clang/lib/Tooling/JSONCompilationDatabase.cpp
===
--- clang/lib/Tooling/JSONCompilationDatabase.cpp
+++ clang/lib/Tooling/JSONCompilationDatabase.cpp
@@ -230,14 +230,28 @@
   SmallString<128> NativeFilePath;
   llvm::sys::path::native(FilePath, NativeFilePath);
 
-  std::string Error;
-  llvm::raw_string_ostream ES(Error);
-  StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES);
-  if (Match.empty())
-return {};
-  const auto CommandsRefI = IndexByFile.find(Match);
-  if (CommandsRefI == IndexByFile.end())
-return {};
+  // Avoid usage of `MatchTrie` if possible.
+  auto CommandsRefI = IndexByFile.find(NativeFilePath);
+  if (CommandsRefI == IndexByFile.end()) {
+llvm::StringRef Match;
+// Try to get cached value.
+auto MatchIt = MatchCache.find(NativeFilePath);
+if (MatchIt == MatchCache.end()) {
+  std::string Error;
+  llvm::raw_string_ostream ES(Error);
+  Match = MatchTrie.findEquivalent(NativeFilePath, ES);
+  // Save into cache even if the match result is empty.
+  MatchCache[NativeFilePath] = Match;
+} else {
+  // Cached value.
+  Match = MatchIt->second;
+}
+if (Match.empty())
+  return {};
+CommandsRefI = IndexByFile.find(Match);
+if (CommandsRefI == IndexByFile.end())
+  return {};
+  }
   std::vector Commands;
   getCommands(CommandsRefI->getValue(), Commands);
   return Commands;
Index: clang/include/clang/Tooling/JSONCompilationDatabase.h
===
--- clang/include/clang/Tooling/JSONCompilationDatabase.h
+++ clang/include/clang/Tooling/JSONCompilationDatabase.h
@@ -129,6 +129,8 @@
   std::vector AllCommands;
 
   FileMatchTrie MatchTrie;
+  // Cache for `MatchTrie`.
+  mutable llvm::StringMap MatchCache;
 
   std::unique_ptr Database;
   JSONCommandLineSyntax Syntax;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits