[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-05-09 Thread Kadir Cetinkaya via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rCTE360344: [clangd] Count number of references while merging 
RefSlabs inside FileIndex (authored by kadircet, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D59481?vs=198816=198818#toc

Repository:
  rCTE Clang Tools Extra

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481

Files:
  clangd/index/Background.cpp
  clangd/index/FileIndex.cpp
  clangd/index/FileIndex.h
  clangd/index/IndexAction.cpp
  clangd/indexer/IndexerMain.cpp
  clangd/unittests/BackgroundIndexTests.cpp
  clangd/unittests/FileIndexTests.cpp

Index: clangd/unittests/BackgroundIndexTests.cpp
===
--- clangd/unittests/BackgroundIndexTests.cpp
+++ clangd/unittests/BackgroundIndexTests.cpp
@@ -32,6 +32,7 @@
   return !arg.IsTU && !arg.URI.empty() && arg.Digest == FileDigest{{0}} &&
  arg.DirectIncludes.empty();
 }
+MATCHER_P(NumReferences, N, "") { return arg.References == N; }
 
 class MemoryShardStorage : public BackgroundIndexStorage {
   mutable std::mutex StorageMu;
@@ -112,6 +113,9 @@
   #include "A.h"
   void f_b() {
 (void)common;
+(void)common;
+(void)common;
+(void)common;
   })cpp";
   llvm::StringMap Storage;
   size_t CacheHits = 0;
@@ -127,20 +131,25 @@
   CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
 
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
-  EXPECT_THAT(
-  runFuzzyFind(Idx, ""),
-  UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"),
-   AllOf(Named("f_b"), Declared(), Not(Defined();
+  EXPECT_THAT(runFuzzyFind(Idx, ""),
+  UnorderedElementsAre(AllOf(Named("common"), NumReferences(1U)),
+   AllOf(Named("A_CC"), NumReferences(0U)),
+   AllOf(Named("g"), NumReferences(0U)),
+   AllOf(Named("f_b"), Declared(),
+ Not(Defined()), NumReferences(0U;
 
   Cmd.Filename = testPath("root/B.cc");
   Cmd.CommandLine = {"clang++", Cmd.Filename};
-  CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
+  CDB.setCompileCommand(testPath("root/B.cc"), Cmd);
 
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
   // B_CC is dropped as we don't collect symbols from A.h in this compilation.
   EXPECT_THAT(runFuzzyFind(Idx, ""),
-  UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"),
-   AllOf(Named("f_b"), Declared(), Defined(;
+  UnorderedElementsAre(AllOf(Named("common"), NumReferences(5U)),
+   AllOf(Named("A_CC"), NumReferences(0U)),
+   AllOf(Named("g"), NumReferences(0U)),
+   AllOf(Named("f_b"), Declared(), Defined(),
+ NumReferences(1U;
 
   auto Syms = runFuzzyFind(Idx, "common");
   EXPECT_THAT(Syms, UnorderedElementsAre(Named("common")));
@@ -148,6 +157,9 @@
   EXPECT_THAT(getRefs(Idx, Common.ID),
   RefsAre({FileURI("unittest:///root/A.h"),
FileURI("unittest:///root/A.cc"),
+   FileURI("unittest:///root/B.cc"),
+   FileURI("unittest:///root/B.cc"),
+   FileURI("unittest:///root/B.cc"),
FileURI("unittest:///root/B.cc")}));
 }
 
Index: clangd/unittests/FileIndexTests.cpp
===
--- clangd/unittests/FileIndexTests.cpp
+++ clangd/unittests/FileIndexTests.cpp
@@ -45,6 +45,7 @@
   return llvm::StringRef(arg.Definition.FileURI) == U;
 }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
+MATCHER_P(NumReferences, N, "") { return arg.References == N; }
 
 namespace clang {
 namespace clangd {
@@ -81,7 +82,7 @@
   FileSymbols FS;
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty());
 
-  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"), false);
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*FS.buildIndex(IndexType::Light), SymbolID("1")),
@@ -90,8 +91,8 @@
 
 TEST(FileSymbolsTest, Overlap) {
   FileSymbols FS;
-  FS.update("f1", numSlab(1, 3), nullptr);
-  FS.update("f2", numSlab(3, 5), nullptr);
+  FS.update("f1", numSlab(1, 3), nullptr, false);
+  FS.update("f2", numSlab(3, 5), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(runFuzzyFind(*FS.buildIndex(Type), ""),
 UnorderedElementsAre(QName("1"), QName("2"), QName("3"),
@@ -110,8 +111,8 @@
   auto X2 = symbol("x");
   

[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-05-09 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet updated this revision to Diff 198816.
kadircet marked 2 inline comments as done.
kadircet added a comment.

- Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481

Files:
  clang-tools-extra/clangd/index/Background.cpp
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/clangd/index/FileIndex.h
  clang-tools-extra/clangd/index/IndexAction.cpp
  clang-tools-extra/clangd/indexer/IndexerMain.cpp
  clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
  clang-tools-extra/clangd/unittests/FileIndexTests.cpp

Index: clang-tools-extra/clangd/unittests/FileIndexTests.cpp
===
--- clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -45,6 +45,7 @@
   return llvm::StringRef(arg.Definition.FileURI) == U;
 }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
+MATCHER_P(NumReferences, N, "") { return arg.References == N; }
 
 namespace clang {
 namespace clangd {
@@ -81,7 +82,7 @@
   FileSymbols FS;
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty());
 
-  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"), false);
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*FS.buildIndex(IndexType::Light), SymbolID("1")),
@@ -90,8 +91,8 @@
 
 TEST(FileSymbolsTest, Overlap) {
   FileSymbols FS;
-  FS.update("f1", numSlab(1, 3), nullptr);
-  FS.update("f2", numSlab(3, 5), nullptr);
+  FS.update("f1", numSlab(1, 3), nullptr, false);
+  FS.update("f2", numSlab(3, 5), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(runFuzzyFind(*FS.buildIndex(Type), ""),
 UnorderedElementsAre(QName("1"), QName("2"), QName("3"),
@@ -110,8 +111,8 @@
   auto X2 = symbol("x");
   X2.Definition.FileURI = "file:///x2";
 
-  FS.update("f1", OneSymboSlab(X1), nullptr);
-  FS.update("f2", OneSymboSlab(X2), nullptr);
+  FS.update("f1", OneSymboSlab(X1), nullptr, false);
+  FS.update("f2", OneSymboSlab(X2), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(
 runFuzzyFind(*FS.buildIndex(Type, DuplicateHandling::Merge), "x"),
@@ -123,14 +124,14 @@
   FileSymbols FS;
 
   SymbolID ID("1");
-  FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"), false);
 
   auto Symbols = FS.buildIndex(IndexType::Light);
   EXPECT_THAT(runFuzzyFind(*Symbols, ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*Symbols, ID), RefsAre({FileURI("f1.cc")}));
 
-  FS.update("f1", nullptr, nullptr);
+  FS.update("f1", nullptr, nullptr, false);
   auto Empty = FS.buildIndex(IndexType::Light);
   EXPECT_THAT(runFuzzyFind(*Empty, ""), IsEmpty());
   EXPECT_THAT(getRefs(*Empty, ID), ElementsAre());
@@ -366,6 +367,33 @@
   RefsAre({RefRange(Main.range())}));
 }
 
+TEST(FileSymbolsTest, CountReferencesNoRefSlabs) {
+  FileSymbols FS;
+  FS.update("f1", numSlab(1, 3), nullptr, true);
+  FS.update("f2", numSlab(1, 3), nullptr, false);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(0u)),
+   AllOf(QName("2"), NumReferences(0u)),
+   AllOf(QName("3"), NumReferences(0u;
+}
+
+TEST(FileSymbolsTest, CountReferencesWithRefSlabs) {
+  FileSymbols FS;
+  FS.update("f1cpp", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cpp"), true);
+  FS.update("f1h", numSlab(1, 3), refSlab(SymbolID("1"), "f1.h"), false);
+  FS.update("f2cpp", numSlab(1, 3), refSlab(SymbolID("2"), "f2.cpp"), true);
+  FS.update("f2h", numSlab(1, 3), refSlab(SymbolID("2"), "f2.h"), false);
+  FS.update("f3cpp", numSlab(1, 3), refSlab(SymbolID("3"), "f3.cpp"), true);
+  FS.update("f3h", numSlab(1, 3), refSlab(SymbolID("3"), "f3.h"), false);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(1u)),
+   AllOf(QName("2"), NumReferences(1u)),
+   AllOf(QName("3"), NumReferences(1u;
+}
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
===
--- clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -32,6 +32,7 @@
   return !arg.IsTU && !arg.URI.empty() && 

[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-05-09 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov accepted this revision.
ilya-biryukov added a comment.
This revision is now accepted and ready to land.

LGTM




Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:101
 FileToRefs.erase(Path);
-  else
-FileToRefs[Path] = std::move(Refs);
+  else {
+RefSlabAndCountReferences Item;

NIT: maybe simplify to the following?
```
if (!Refs) {
  FileToRefs.erase(Path);
  return;
}
...
```

up to you.



Comment at: clang-tools-extra/clangd/index/FileIndex.h:63
   /// If either is nullptr, corresponding data for \p Path will be removed.
+  /// If CountReferences is true, Refs will be used for counting References
+  /// during merging.

NIT: use `\p Refs`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-05-09 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet updated this revision to Diff 198770.
kadircet marked 7 inline comments as done.
kadircet added a comment.

- Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481

Files:
  clang-tools-extra/clangd/index/Background.cpp
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/clangd/index/FileIndex.h
  clang-tools-extra/clangd/index/IndexAction.cpp
  clang-tools-extra/clangd/indexer/IndexerMain.cpp
  clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
  clang-tools-extra/clangd/unittests/FileIndexTests.cpp

Index: clang-tools-extra/clangd/unittests/FileIndexTests.cpp
===
--- clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -45,6 +45,7 @@
   return llvm::StringRef(arg.Definition.FileURI) == U;
 }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
+MATCHER_P(NumReferences, N, "") { return arg.References == N; }
 
 namespace clang {
 namespace clangd {
@@ -81,7 +82,7 @@
   FileSymbols FS;
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty());
 
-  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"), false);
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*FS.buildIndex(IndexType::Light), SymbolID("1")),
@@ -90,8 +91,8 @@
 
 TEST(FileSymbolsTest, Overlap) {
   FileSymbols FS;
-  FS.update("f1", numSlab(1, 3), nullptr);
-  FS.update("f2", numSlab(3, 5), nullptr);
+  FS.update("f1", numSlab(1, 3), nullptr, false);
+  FS.update("f2", numSlab(3, 5), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(runFuzzyFind(*FS.buildIndex(Type), ""),
 UnorderedElementsAre(QName("1"), QName("2"), QName("3"),
@@ -110,8 +111,8 @@
   auto X2 = symbol("x");
   X2.Definition.FileURI = "file:///x2";
 
-  FS.update("f1", OneSymboSlab(X1), nullptr);
-  FS.update("f2", OneSymboSlab(X2), nullptr);
+  FS.update("f1", OneSymboSlab(X1), nullptr, false);
+  FS.update("f2", OneSymboSlab(X2), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(
 runFuzzyFind(*FS.buildIndex(Type, DuplicateHandling::Merge), "x"),
@@ -123,14 +124,14 @@
   FileSymbols FS;
 
   SymbolID ID("1");
-  FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"), false);
 
   auto Symbols = FS.buildIndex(IndexType::Light);
   EXPECT_THAT(runFuzzyFind(*Symbols, ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*Symbols, ID), RefsAre({FileURI("f1.cc")}));
 
-  FS.update("f1", nullptr, nullptr);
+  FS.update("f1", nullptr, nullptr, false);
   auto Empty = FS.buildIndex(IndexType::Light);
   EXPECT_THAT(runFuzzyFind(*Empty, ""), IsEmpty());
   EXPECT_THAT(getRefs(*Empty, ID), ElementsAre());
@@ -366,6 +367,33 @@
   RefsAre({RefRange(Main.range())}));
 }
 
+TEST(FileSymbolsTest, CountReferencesNoRefSlabs) {
+  FileSymbols FS;
+  FS.update("f1", numSlab(1, 3), nullptr, true);
+  FS.update("f2", numSlab(1, 3), nullptr, false);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(0u)),
+   AllOf(QName("2"), NumReferences(0u)),
+   AllOf(QName("3"), NumReferences(0u;
+}
+
+TEST(FileSymbolsTest, CountReferencesWithRefSlabs) {
+  FileSymbols FS;
+  FS.update("f1cpp", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cpp"), true);
+  FS.update("f1h", numSlab(1, 3), refSlab(SymbolID("1"), "f1.h"), false);
+  FS.update("f2cpp", numSlab(1, 3), refSlab(SymbolID("2"), "f2.cpp"), true);
+  FS.update("f2h", numSlab(1, 3), refSlab(SymbolID("2"), "f2.h"), false);
+  FS.update("f3cpp", numSlab(1, 3), refSlab(SymbolID("3"), "f3.cpp"), true);
+  FS.update("f3h", numSlab(1, 3), refSlab(SymbolID("3"), "f3.h"), false);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(1u)),
+   AllOf(QName("2"), NumReferences(1u)),
+   AllOf(QName("3"), NumReferences(1u;
+}
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
===
--- clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -32,6 +32,7 @@
   return !arg.IsTU && !arg.URI.empty() && 

[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-05-08 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/index/Background.cpp:482
 FileDigest Digest;
+bool IsMainFile;
   };

NIT: maybe initialize with `=false` to avoid potential UB.
`Digest` seems uninitialized too, could also `={}` for it.

Sorry if this was discussed before and I'm repeating myself, I vaguely remember 
something similar but not sure what the conclusion was.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:116
+  RefSlabs.push_back(FileAndRefs.second.first);
+  if (FileAndRefs.second.second)
+MainFileRefs.push_back(RefSlabs.back().get());

Let's use a named struct here? `.second.second` is super-hard to read



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:125
+// Keeps index to symbol in SymsStorage.
+llvm::DenseMap Merged;
 for (const auto  : SymbolSlabs) {

Could we avoid changing the merge logic?
I.e. the proposal is to keep the merging code the same and add a loop that 
counts references at the end.



Comment at: clang-tools-extra/clangd/index/FileIndex.h:60
+///
+/// Will count Symbol::References based on number of references in the main
+/// files, while building the index with DuplicateHandling::Merge option.

NIT: this comment seems more appropriate at `buildIndex`, maybe move it there?
(It has the `DuplicateHandle` parameter, so it's closer to the context.



Comment at: clang-tools-extra/clangd/index/FileIndex.h:66
   /// If either is nullptr, corresponding data for \p Path will be removed.
+  /// If IsMainFile is true, Refs will be used for counting References during
+  /// merging.

Maybe call the parameter `CountReferences`?
It would narrow down what the code can do with it and make the code easier to 
understand



Comment at: clang-tools-extra/clangd/unittests/FileIndexTests.cpp:49
+MATCHER_P(NumReferences, N, "") {
+  return arg.References == static_cast(N);
+}

NIT: to avoid warnings, we could use the corresponding suffix at the callsite 
(e.g. `1u`) instead of casts
It's probably ok either way here, but having casts can lead to surprises if one 
passes negative numbers there


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-04-30 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet updated this revision to Diff 197288.
kadircet added a comment.

- Change logic to count references from only main files


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481

Files:
  clang-tools-extra/clangd/index/Background.cpp
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/clangd/index/FileIndex.h
  clang-tools-extra/clangd/index/IndexAction.cpp
  clang-tools-extra/clangd/indexer/IndexerMain.cpp
  clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
  clang-tools-extra/clangd/unittests/FileIndexTests.cpp

Index: clang-tools-extra/clangd/unittests/FileIndexTests.cpp
===
--- clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -45,6 +45,9 @@
   return llvm::StringRef(arg.Definition.FileURI) == U;
 }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
+MATCHER_P(NumReferences, N, "") {
+  return arg.References == static_cast(N);
+}
 
 namespace clang {
 namespace clangd {
@@ -81,7 +84,7 @@
   FileSymbols FS;
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""), IsEmpty());
 
-  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cc"), false);
   EXPECT_THAT(runFuzzyFind(*FS.buildIndex(IndexType::Light), ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*FS.buildIndex(IndexType::Light), SymbolID("1")),
@@ -90,8 +93,8 @@
 
 TEST(FileSymbolsTest, Overlap) {
   FileSymbols FS;
-  FS.update("f1", numSlab(1, 3), nullptr);
-  FS.update("f2", numSlab(3, 5), nullptr);
+  FS.update("f1", numSlab(1, 3), nullptr, false);
+  FS.update("f2", numSlab(3, 5), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(runFuzzyFind(*FS.buildIndex(Type), ""),
 UnorderedElementsAre(QName("1"), QName("2"), QName("3"),
@@ -110,8 +113,8 @@
   auto X2 = symbol("x");
   X2.Definition.FileURI = "file:///x2";
 
-  FS.update("f1", OneSymboSlab(X1), nullptr);
-  FS.update("f2", OneSymboSlab(X2), nullptr);
+  FS.update("f1", OneSymboSlab(X1), nullptr, false);
+  FS.update("f2", OneSymboSlab(X2), nullptr, false);
   for (auto Type : {IndexType::Light, IndexType::Heavy})
 EXPECT_THAT(
 runFuzzyFind(*FS.buildIndex(Type, DuplicateHandling::Merge), "x"),
@@ -123,14 +126,14 @@
   FileSymbols FS;
 
   SymbolID ID("1");
-  FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"));
+  FS.update("f1", numSlab(1, 3), refSlab(ID, "f1.cc"), false);
 
   auto Symbols = FS.buildIndex(IndexType::Light);
   EXPECT_THAT(runFuzzyFind(*Symbols, ""),
   UnorderedElementsAre(QName("1"), QName("2"), QName("3")));
   EXPECT_THAT(getRefs(*Symbols, ID), RefsAre({FileURI("f1.cc")}));
 
-  FS.update("f1", nullptr, nullptr);
+  FS.update("f1", nullptr, nullptr, false);
   auto Empty = FS.buildIndex(IndexType::Light);
   EXPECT_THAT(runFuzzyFind(*Empty, ""), IsEmpty());
   EXPECT_THAT(getRefs(*Empty, ID), ElementsAre());
@@ -366,6 +369,33 @@
   RefsAre({RefRange(Main.range())}));
 }
 
+TEST(FileSymbolsTest, CountReferencesNoRefSlabs) {
+  FileSymbols FS;
+  FS.update("f1", numSlab(1, 3), nullptr, true);
+  FS.update("f2", numSlab(1, 3), nullptr, false);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(0)),
+   AllOf(QName("2"), NumReferences(0)),
+   AllOf(QName("3"), NumReferences(0;
+}
+
+TEST(FileSymbolsTest, CountReferencesWithRefSlabs) {
+  FileSymbols FS;
+  FS.update("f1cpp", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cpp"), true);
+  FS.update("f1h", numSlab(1, 3), refSlab(SymbolID("1"), "f1.h"), false);
+  FS.update("f2cpp", numSlab(1, 3), refSlab(SymbolID("2"), "f2.cpp"), true);
+  FS.update("f2h", numSlab(1, 3), refSlab(SymbolID("2"), "f2.h"), false);
+  FS.update("f3cpp", numSlab(1, 3), refSlab(SymbolID("3"), "f3.cpp"), true);
+  FS.update("f3h", numSlab(1, 3), refSlab(SymbolID("3"), "f3.h"), false);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(1)),
+   AllOf(QName("2"), NumReferences(1)),
+   AllOf(QName("3"), NumReferences(1;
+}
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
===
--- clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -32,6 +32,7 @@
   return !arg.IsTU && !arg.URI.empty() && 

[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-04-29 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/index/Background.h:113
 
+  // Note that FileSymbols counts References by incrementing it per each file
+  // mentioning the symbol, including headers. This contradicts with the

kadircet wrote:
> ilya-biryukov wrote:
> > We should agree on and stick to a single behavior in both cases.
> > I suggest keeping the current behavior for now (translation units). Why 
> > can't we implement that?
> Because `FileSymbols` only knows about file names and has no idea about 
> whether a given file is TU or not. We might add some heuristics on the file 
> extensions, or change SymbolCollector to count number of files(this would 
> require maintaining a cache to de-duplicate references coming from the same 
> files but originating from a different TU).
Update from the offline conversation:
It should be possible to produce the same counts as static-indexer, will wait 
for an updated revision.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-04-25 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet marked 2 inline comments as done.
kadircet added a comment.

It has been a long time since I've proposed that change and even I forgot some 
of the high level details. Therefore, I wanted to sum up the state again so 
that we can decide on how to move forward.

Currently we have two different on-disk formats for index in clangd:

- Static index:
  - Merged single file, since merging is done before persistence, the data 
on-disk contains correct reference counts.
  - Produced by running SymbolCollector on each TU. Collector doesn't keep 
state between different TUs, merging is done on the caller-site.
  - No need to count references when loading, doesn't interact with 
`FileSymbols`.

- Background index:
  - Sharded multiple files.
  - Produced by running SymbolCollector on each TU, but instead of merging we 
separate those symbols into distinct files. Merging is done later on within 
`FileSymbols` which has no information about TUs.

FileSymbols is currently being used by BackgroundIndex and FileIndex(Dynamic 
Idx):

- In the case of BackgroundIndex, a symbol occurs at most twice(once in the 
header declaring it and once in the implementation file, if they are distinct), 
therefore when merging is done symbol references doesn't go above that.
- FileIndex doesn't perform de-duplication before-hand therefore it can perform 
reference counting while performing the merge. But currently it doesn't perform 
any merging.

As a result, changing `FileSymbols` would only effect `BackgroundIndex` and 
nothing else. Since merging occurs using the information in `RefSlabs` this can 
also be extended to benefit `FileIndex` and will most definitely be used
by any sort of indexing that performs merging over multiple files.

For unification purposes I would rather delete the `References` counting logic 
in `SymbolCollector` and perform it only at mergers(clangd-indexer and 
FileSymbols)




Comment at: clang-tools-extra/clangd/index/Background.h:113
 
+  // Note that FileSymbols counts References by incrementing it per each file
+  // mentioning the symbol, including headers. This contradicts with the

ilya-biryukov wrote:
> We should agree on and stick to a single behavior in both cases.
> I suggest keeping the current behavior for now (translation units). Why can't 
> we implement that?
Because `FileSymbols` only knows about file names and has no idea about whether 
a given file is TU or not. We might add some heuristics on the file extensions, 
or change SymbolCollector to count number of files(this would require 
maintaining a cache to de-duplicate references coming from the same files but 
originating from a different TU).



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:52
+  continue;
+// If this is the first time we see references to a symbol, reset its
+// Reference count, since filesymbols re-count number of references

ilya-biryukov wrote:
> The comment suggests there's something cheesy going on.
> Why would FileSymbols recompute the number of references? Can we avoid this 
> complicated logic?
This was the idea behind https://github.com/clangd/clangd/issues/23. Please see 
the main comment for a higher level discussion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-04-25 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/index/Background.h:113
 
+  // Note that FileSymbols counts References by incrementing it per each file
+  // mentioning the symbol, including headers. This contradicts with the

We should agree on and stick to a single behavior in both cases.
I suggest keeping the current behavior for now (translation units). Why can't 
we implement that?



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:52
+  continue;
+// If this is the first time we see references to a symbol, reset its
+// Reference count, since filesymbols re-count number of references

The comment suggests there's something cheesy going on.
Why would FileSymbols recompute the number of references? Can we avoid this 
complicated logic?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-22 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet added inline comments.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:46
+auto It = MergedSyms->find(Sym.first);
+assert(It != MergedSyms->end() && "Reference to unknown symbol!");
+// Note that we increment References per each file mentioning the

ioeric wrote:
> I think we can trigger this assertion with an incomplete background index. 
> For example, we've seen references of a symbol in some files but we've not 
> parsed the file that contains the decls.
You are right, thanks for pointing this out!



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:50
+// FileIndex keeps only oneslab per file.
+// This contradicts with behavior inside clangd-indexer, it only counts
+// translation units since it processes each header multiple times.

ioeric wrote:
> kadircet wrote:
> > ioeric wrote:
> > > this is a bit of a code smell. FileIndex is not supposed to know anything 
> > > about clangd-indexer etc.
> > I've actually just left the comment for review so that we can decide what 
> > to do about discrepancy(with my reasoning). It is not like FileIndex is 
> > somehow tied to clangd-indexer now?
> the comment is useful. I just think it's in the wrong place. If we define the 
> reference counting behavior for `FileSymbols`, `FileSymbols`wouldn't need to 
> know about clangd-indexer or background-index. This comment can then live in 
> background index instead.
Moved into index/background.h



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:189
   AllSymbols.push_back();
+mergeRefs(RefSlabs, RefsStorage, AllRefs);
 break;

ioeric wrote:
> any reason not to count references for `PickOne`?
Because PickOne is not populating SymsStorage and is merely passing along 
pointers to SymbolSlabs. Counting references in that case would imply also 
making a new copy per each unique symbol. 

Do you think it is worth it?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-22 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet updated this revision to Diff 191844.
kadircet marked 11 inline comments as done.
kadircet added a comment.

- Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481

Files:
  clang-tools-extra/clangd/index/Background.h
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/clangd/index/FileIndex.h
  clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
  clang-tools-extra/unittests/clangd/FileIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/FileIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/FileIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/FileIndexTests.cpp
@@ -46,6 +46,9 @@
   return llvm::StringRef(arg.Definition.FileURI) == U;
 }
 MATCHER_P(QName, N, "") { return (arg.Scope + arg.Name).str() == N; }
+MATCHER_P(NumReferences, N, "") {
+  return arg.References == static_cast(N);
+}
 
 namespace clang {
 namespace clangd {
@@ -59,6 +62,7 @@
   Symbol Sym;
   Sym.ID = SymbolID(ID);
   Sym.Name = ID;
+  Sym.References = 1;
   return Sym;
 }
 
@@ -417,6 +421,31 @@
   EXPECT_THAT(getRefs(Index, Foo.ID), RefsAre({RefRange(Main.range())}));
 }
 
+TEST(FileSymbolsTest, CountReferencesNoRefs) {
+  FileSymbols FS;
+  FS.update("f1", numSlab(1, 3), nullptr);
+  FS.update("f2", numSlab(1, 3), nullptr);
+  FS.update("f3", numSlab(1, 3), nullptr);
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(3)),
+   AllOf(QName("2"), NumReferences(3)),
+   AllOf(QName("3"), NumReferences(3;
+}
+
+TEST(FileSymbolsTest, CountReferencesWithRefs) {
+  FileSymbols FS;
+  FS.update("f1", numSlab(1, 3), refSlab(SymbolID("1"), "f1.cpp"));
+  FS.update("f2", numSlab(1, 3), refSlab(SymbolID("2"), "f2.cpp"));
+  FS.update("f3", numSlab(1, 3), refSlab(SymbolID("3"), "f3.cpp"));
+  EXPECT_THAT(
+  runFuzzyFind(*FS.buildIndex(IndexType::Light, DuplicateHandling::Merge),
+   ""),
+  UnorderedElementsAre(AllOf(QName("1"), NumReferences(1)),
+   AllOf(QName("2"), NumReferences(1)),
+   AllOf(QName("3"), NumReferences(1;
+}
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
@@ -32,6 +32,7 @@
   return !arg.IsTU && !arg.URI.empty() && arg.Digest == FileDigest{{0}} &&
  arg.DirectIncludes.empty();
 }
+MATCHER_P(NumReferences, N, "") { return arg.References == N; }
 
 class MemoryShardStorage : public BackgroundIndexStorage {
   mutable std::mutex StorageMu;
@@ -127,20 +128,25 @@
   CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
 
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
-  EXPECT_THAT(
-  runFuzzyFind(Idx, ""),
-  UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"),
-   AllOf(Named("f_b"), Declared(), Not(Defined();
+  EXPECT_THAT(runFuzzyFind(Idx, ""),
+  UnorderedElementsAre(AllOf(Named("common"), NumReferences(2U)),
+   AllOf(Named("A_CC"), NumReferences(1U)),
+   AllOf(Named("g"), NumReferences(0U)),
+   AllOf(Named("f_b"), Declared(),
+ Not(Defined()), NumReferences(1U;
 
   Cmd.Filename = testPath("root/B.cc");
   Cmd.CommandLine = {"clang++", Cmd.Filename};
-  CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
+  CDB.setCompileCommand(testPath("root/B.cc"), Cmd);
 
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
   // B_CC is dropped as we don't collect symbols from A.h in this compilation.
   EXPECT_THAT(runFuzzyFind(Idx, ""),
-  UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"),
-   AllOf(Named("f_b"), Declared(), Defined(;
+  UnorderedElementsAre(AllOf(Named("common"), NumReferences(3U)),
+   AllOf(Named("A_CC"), NumReferences(1U)),
+   AllOf(Named("g"), NumReferences(0U)),
+   AllOf(Named("f_b"), Declared(), Defined(),
+ NumReferences(2U;
 
   auto Syms = runFuzzyFind(Idx, "common");
   EXPECT_THAT(Syms, UnorderedElementsAre(Named("common")));
Index: clang-tools-extra/clangd/index/FileIndex.h
===
--- clang-tools-extra/clangd/index/FileIndex.h
+++ clang-tools-extra/clangd/index/FileIndex.h
@@ -56,6 +56,10 @@
 ///
 /// 

[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-20 Thread Ilya Biryukov via Phabricator via cfe-commits
ilya-biryukov added inline comments.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:170
   I.first->second = mergeSymbol(I.first->second, Sym);
+// We re-count number of references while merging refs from scratch.
+I.first->getSecond().References = 0;

ioeric wrote:
> kadircet wrote:
> > ioeric wrote:
> > > kadircet wrote:
> > > > ioeric wrote:
> > > > > note that references might not always exist. SymbolCollector can be 
> > > > > set up to not collect references.
> > > > I thought we wouldn't make any promises about `References` in such a 
> > > > case, do we?
> > > Is this documented somewhere?
> > > 
> > > `References` and `RefSlab` have been two independent things. `References` 
> > > existed before `RefSlab`. Now might be a good time to unify them, but I 
> > > think we should think about how this is reflected in their definitions 
> > > (documentation etc) and other producers and consumers of the structures.
> > Both of them are only produced by SymbolCollector, which has an option 
> > `CountReferences` with a comment saying `// Populate the Symbol.References 
> > field.`.
> > Unfortunately that's not what it does, instead it always sets 
> > `Symol.References` field to `1` for every symbol collected during indexing 
> > of a single TU.
> > Then it is up to the consumers to merge those `1`s. So even though we say:
> > ```
> > /// The number of translation units that reference this symbol from their 
> > main
> > /// file. This number is only meaningful if aggregated in an index.```
> > in the comments of `Symbol::References` it is totally up-to-the consumer 
> > who performs the merging.
> > 
> > The option controls whether Symbol Occurences should be collected or not.
> > 
> > I see two possible options:
> >  - Un-tie SymbolCollector from Symbol::References and rather define it as:
> > ```
> > /// This field is for the convenience of an aggregated symbol index
> > ```
> >  - Rather change Index structure's(consumers of this information) to 
> > store(and build) this information internally if they plan to make use of it 
> > ?
> > 
> > WDYT?
> > Unfortunately that's not what it does, instead it always sets 
> > Symol.References field to 1 for every symbol collected during indexing of a 
> > single TU.
> Looking at the code, I think `Symol.References` is indeed only populated 
> (i.e. set to 1) when `CountReferences` is enabled. Honestly, we should 
> probably get rid of `CountReferences`; I don't see a reason not to count 
> references. 
> 
> But the statement `count one per TU` still stands in SymbolCollector, as it 
> always only handles one TU at a time. 
> 
> Actually, all I'm asking in the initial comment is to define the `References` 
> counting behavior when there is no reference ;) I think a reasonable solution 
> is to keep the `references` untouched when there is no reference, so that 
> `References` aggregation via `mergeSymbol` can still work (for 
> one-TU-per-Slab use cases). 
> 
> And we should probably also document the new behavior e.g. 
> `DuplicateHandling` in FileIndex.h seems to be a good place.
> Honestly, we should probably get rid of CountReferences; I don't see a reason 
> not to count references
We introduced it to avoid getting partial counts in dynamic index. In the 
absence of the static index, they would skew ranking towards symbols used 
inside the files you have open, which is not really a good signal in the first 
place.

I'm not sure if that actually hurts much in practice, though. It could very 
possibly be the case that the added complexity isn't worth it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-20 Thread Eric Liu via Phabricator via cfe-commits
ioeric added inline comments.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:33
+// If MergedSyms is provided, increments a symbols Reference count once for
+// every file it was mentioned in.
+void mergeRefs(llvm::ArrayRef> RefSlabs,

nit: s/every file/every slab/?

There is no "file" in this context.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:46
+auto It = MergedSyms->find(Sym.first);
+assert(It != MergedSyms->end() && "Reference to unknown symbol!");
+// Note that we increment References per each file mentioning the

I think we can trigger this assertion with an incomplete background index. For 
example, we've seen references of a symbol in some files but we've not parsed 
the file that contains the decls.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:50
+// FileIndex keeps only oneslab per file.
+// This contradicts with behavior inside clangd-indexer, it only counts
+// translation units since it processes each header multiple times.

kadircet wrote:
> ioeric wrote:
> > this is a bit of a code smell. FileIndex is not supposed to know anything 
> > about clangd-indexer etc.
> I've actually just left the comment for review so that we can decide what to 
> do about discrepancy(with my reasoning). It is not like FileIndex is somehow 
> tied to clangd-indexer now?
the comment is useful. I just think it's in the wrong place. If we define the 
reference counting behavior for `FileSymbols`, `FileSymbols`wouldn't need to 
know about clangd-indexer or background-index. This comment can then live in 
background index instead.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:170
   I.first->second = mergeSymbol(I.first->second, Sym);
+// We re-count number of references while merging refs from scratch.
+I.first->getSecond().References = 0;

kadircet wrote:
> ioeric wrote:
> > kadircet wrote:
> > > ioeric wrote:
> > > > note that references might not always exist. SymbolCollector can be set 
> > > > up to not collect references.
> > > I thought we wouldn't make any promises about `References` in such a 
> > > case, do we?
> > Is this documented somewhere?
> > 
> > `References` and `RefSlab` have been two independent things. `References` 
> > existed before `RefSlab`. Now might be a good time to unify them, but I 
> > think we should think about how this is reflected in their definitions 
> > (documentation etc) and other producers and consumers of the structures.
> Both of them are only produced by SymbolCollector, which has an option 
> `CountReferences` with a comment saying `// Populate the Symbol.References 
> field.`.
> Unfortunately that's not what it does, instead it always sets 
> `Symol.References` field to `1` for every symbol collected during indexing of 
> a single TU.
> Then it is up to the consumers to merge those `1`s. So even though we say:
> ```
> /// The number of translation units that reference this symbol from their main
> /// file. This number is only meaningful if aggregated in an index.```
> in the comments of `Symbol::References` it is totally up-to-the consumer who 
> performs the merging.
> 
> The option controls whether Symbol Occurences should be collected or not.
> 
> I see two possible options:
>  - Un-tie SymbolCollector from Symbol::References and rather define it as:
> ```
> /// This field is for the convenience of an aggregated symbol index
> ```
>  - Rather change Index structure's(consumers of this information) to 
> store(and build) this information internally if they plan to make use of it ?
> 
> WDYT?
> Unfortunately that's not what it does, instead it always sets 
> Symol.References field to 1 for every symbol collected during indexing of a 
> single TU.
Looking at the code, I think `Symol.References` is indeed only populated (i.e. 
set to 1) when `CountReferences` is enabled. Honestly, we should probably get 
rid of `CountReferences`; I don't see a reason not to count references. 

But the statement `count one per TU` still stands in SymbolCollector, as it 
always only handles one TU at a time. 

Actually, all I'm asking in the initial comment is to define the `References` 
counting behavior when there is no reference ;) I think a reasonable solution 
is to keep the `references` untouched when there is no reference, so that 
`References` aggregation via `mergeSymbol` can still work (for one-TU-per-Slab 
use cases). 

And we should probably also document the new behavior e.g. `DuplicateHandling` 
in FileIndex.h seems to be a good place.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:180
 }
 // FIXME: aggregate symbol reference count based on references.
 break;

this FIXME can be removed?



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:189

[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-19 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet marked an inline comment as done.
kadircet added inline comments.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:170
   I.first->second = mergeSymbol(I.first->second, Sym);
+// We re-count number of references while merging refs from scratch.
+I.first->getSecond().References = 0;

ioeric wrote:
> kadircet wrote:
> > ioeric wrote:
> > > note that references might not always exist. SymbolCollector can be set 
> > > up to not collect references.
> > I thought we wouldn't make any promises about `References` in such a case, 
> > do we?
> Is this documented somewhere?
> 
> `References` and `RefSlab` have been two independent things. `References` 
> existed before `RefSlab`. Now might be a good time to unify them, but I think 
> we should think about how this is reflected in their definitions 
> (documentation etc) and other producers and consumers of the structures.
Both of them are only produced by SymbolCollector, which has an option 
`CountReferences` with a comment saying `// Populate the Symbol.References 
field.`.
Unfortunately that's not what it does, instead it always sets 
`Symol.References` field to `1` for every symbol collected during indexing of a 
single TU.
Then it is up to the consumers to merge those `1`s. So even though we say:
```
/// The number of translation units that reference this symbol from their main
/// file. This number is only meaningful if aggregated in an index.```
in the comments of `Symbol::References` it is totally up-to-the consumer who 
performs the merging.

The option controls whether Symbol Occurences should be collected or not.

I see two possible options:
 - Un-tie SymbolCollector from Symbol::References and rather define it as:
```
/// This field is for the convenience of an aggregated symbol index
```
 - Rather change Index structure's(consumers of this information) to store(and 
build) this information internally if they plan to make use of it ?

WDYT?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-19 Thread Eric Liu via Phabricator via cfe-commits
ioeric added a comment.

> I don't follow why this can over-count, FileIndex keeps only one RefSlab per 
> each file, so we can't over-count? Did you mean it will be more than #TUs ?

Yes. This is how `Symbol::References` is described in its documentation. If we 
want to change/expand the semantic, we need to make it clear in the 
documentation as well.




Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:170
   I.first->second = mergeSymbol(I.first->second, Sym);
+// We re-count number of references while merging refs from scratch.
+I.first->getSecond().References = 0;

kadircet wrote:
> ioeric wrote:
> > note that references might not always exist. SymbolCollector can be set up 
> > to not collect references.
> I thought we wouldn't make any promises about `References` in such a case, do 
> we?
Is this documented somewhere?

`References` and `RefSlab` have been two independent things. `References` 
existed before `RefSlab`. Now might be a good time to unify them, but I think 
we should think about how this is reflected in their definitions (documentation 
etc) and other producers and consumers of the structures.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-19 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet marked 2 inline comments as done.
kadircet added a comment.

In D59481#1432881 , @ioeric wrote:

> I'm not sure if FileIndex is the correct layer to make decision about how 
> References is calculated. Currently, we have two use cases in clangd 1) one 
> slab per TU and 2) one slab per file. It seems to me that we would want 
> different strategies for the two use cases, so it might make sense to make 
> this configurable in FileIndex.  And in one case, we have `References` ~= # 
> of TUs, and in the other case, we would have `References` ~= # of files.


`References` ~= # of TUs case has nothing to do with FileIndex actually, it 
only happens if references are counted beforehand(which is currently only 
happening in clangd-indexer).
FileIndex has two ways to handle duplicates during an index merge:

- `pickone` which doesn't count references at all (this is the oneslab per TU 
case)
- `merge` which counts references as number of files this symbol has occured 
(this is the oneslab per File case)

> This can over-count `References`  in the second case. It might be fine as an 
> approximation (if there is no better solution), but we should definitely 
> document it (e.g. in `BackgroundIndex`).

I don't follow why this can over-count, FileIndex keeps only one RefSlab per 
each file, so we can't over-count? Did you mean it will be more than #TUs ?




Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:50
+// FileIndex keeps only oneslab per file.
+// This contradicts with behavior inside clangd-indexer, it only counts
+// translation units since it processes each header multiple times.

ioeric wrote:
> this is a bit of a code smell. FileIndex is not supposed to know anything 
> about clangd-indexer etc.
I've actually just left the comment for review so that we can decide what to do 
about discrepancy(with my reasoning). It is not like FileIndex is somehow tied 
to clangd-indexer now?



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:170
   I.first->second = mergeSymbol(I.first->second, Sym);
+// We re-count number of references while merging refs from scratch.
+I.first->getSecond().References = 0;

ioeric wrote:
> note that references might not always exist. SymbolCollector can be set up to 
> not collect references.
I thought we wouldn't make any promises about `References` in such a case, do 
we?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-18 Thread Eric Liu via Phabricator via cfe-commits
ioeric added a comment.

I'm not sure if FileIndex is the correct layer to make decision about how 
References is calculated. Currently, we have two use cases in clangd 1) one 
slab per TU and 2) one slab per file. It seems to me that we would want 
different strategies for the two use cases, so it might make sense to make this 
configurable in FileIndex.  And in one case, we have `References` ~= # of TUs, 
and in the other case, we would have `References` ~= # of files. This can 
over-count `References`  in the second case. It might be fine as an 
approximation (if there is no better solution), but we should definitely 
document it (e.g. in `BackgroundIndex`).




Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:50
+// FileIndex keeps only oneslab per file.
+// This contradicts with behavior inside clangd-indexer, it only counts
+// translation units since it processes each header multiple times.

this is a bit of a code smell. FileIndex is not supposed to know anything about 
clangd-indexer etc.



Comment at: clang-tools-extra/clangd/index/FileIndex.cpp:170
   I.first->second = mergeSymbol(I.first->second, Sym);
+// We re-count number of references while merging refs from scratch.
+I.first->getSecond().References = 0;

note that references might not always exist. SymbolCollector can be set up to 
not collect references.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D59481/new/

https://reviews.llvm.org/D59481



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D59481: [clangd] Count number of references while merging RefSlabs inside FileIndex

2019-03-18 Thread Kadir Cetinkaya via Phabricator via cfe-commits
kadircet created this revision.
kadircet added a reviewer: ilya-biryukov.
Herald added subscribers: cfe-commits, jdoerfert, arphaman, mgrang, jkorous, 
MaskRay, ioeric.
Herald added a project: clang.

For counting number of references clangd was relying on merging every
duplication of a symbol. Unfortunately this does not apply to FileIndex(and one
of its users' BackgroundIndex), since we get rid of duplication by simply
dropping symbols coming from non-canonical locations. So only one or two(coming
from canonical declaration header and defined source file, if exists)
replications of the same symbol reaches merging step.

This patch changes reference counting logic to rather count number of different
RefSlabs a given SymbolID exists.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D59481

Files:
  clang-tools-extra/clangd/index/FileIndex.cpp
  clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
===
--- clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
@@ -32,6 +32,7 @@
   return !arg.IsTU && !arg.URI.empty() && arg.Digest == FileDigest{{0}} &&
  arg.DirectIncludes.empty();
 }
+MATCHER_P(NumReferences, N, "") { return arg.References == N; }
 
 class MemoryShardStorage : public BackgroundIndexStorage {
   mutable std::mutex StorageMu;
@@ -127,20 +128,25 @@
   CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
 
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
-  EXPECT_THAT(
-  runFuzzyFind(Idx, ""),
-  UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"),
-   AllOf(Named("f_b"), Declared(), Not(Defined();
+  EXPECT_THAT(runFuzzyFind(Idx, ""),
+  UnorderedElementsAre(AllOf(Named("common"), NumReferences(2U)),
+   AllOf(Named("A_CC"), NumReferences(1U)),
+   AllOf(Named("g"), NumReferences(0U)),
+   AllOf(Named("f_b"), Declared(),
+ Not(Defined()), NumReferences(1U;
 
   Cmd.Filename = testPath("root/B.cc");
   Cmd.CommandLine = {"clang++", Cmd.Filename};
-  CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
+  CDB.setCompileCommand(testPath("root/B.cc"), Cmd);
 
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
   // B_CC is dropped as we don't collect symbols from A.h in this compilation.
   EXPECT_THAT(runFuzzyFind(Idx, ""),
-  UnorderedElementsAre(Named("common"), Named("A_CC"), Named("g"),
-   AllOf(Named("f_b"), Declared(), Defined(;
+  UnorderedElementsAre(AllOf(Named("common"), NumReferences(3U)),
+   AllOf(Named("A_CC"), NumReferences(1U)),
+   AllOf(Named("g"), NumReferences(0U)),
+   AllOf(Named("f_b"), Declared(), Defined(),
+ NumReferences(2U;
 
   auto Syms = runFuzzyFind(Idx, "common");
   EXPECT_THAT(Syms, UnorderedElementsAre(Named("common")));
Index: clang-tools-extra/clangd/index/FileIndex.cpp
===
--- clang-tools-extra/clangd/index/FileIndex.cpp
+++ clang-tools-extra/clangd/index/FileIndex.cpp
@@ -27,6 +27,47 @@
 
 namespace clang {
 namespace clangd {
+namespace {
+// Puts all refs for the same symbol into RefsStorage in a contigious way.
+// If MergedSyms is provided, increments a symbols Reference count once for
+// every file it was mentioned in.
+void mergeRefs(llvm::ArrayRef> RefSlabs,
+   std::vector ,
+   llvm::DenseMap> ,
+   llvm::DenseMap *MergedSyms = nullptr) {
+  llvm::DenseMap> MergedRefs;
+  size_t Count = 0;
+  for (const auto  : RefSlabs) {
+for (const auto  : *RefSlab) {
+  MergedRefs[Sym.first].append(Sym.second.begin(), Sym.second.end());
+  Count += Sym.second.size();
+  if (MergedSyms) {
+auto It = MergedSyms->find(Sym.first);
+assert(It != MergedSyms->end() && "Reference to unknown symbol!");
+// Note that we increment References per each file mentioning the
+// symbol, including headers. This only happens once for each file since
+// FileIndex keeps only oneslab per file.
+// This contradicts with behavior inside clangd-indexer, it only counts
+// translation units since it processes each header multiple times.
+It->getSecond().References++;
+  }
+}
+  }
+  RefsStorage.reserve(Count);
+  AllRefs.reserve(MergedRefs.size());
+  for (auto  : MergedRefs) {
+auto  = Sym.second;
+// Sorting isn't required, but yields more stable results over rebuilds.
+llvm::sort(SymRefs);
+llvm::copy(SymRefs, back_inserter(RefsStorage));
+AllRefs.try_emplace(
+