[PATCH] D104348: [index] Fix performance regression with indexing macros

2021-06-16 Thread Ben Langmuir via Phabricator via cfe-commits
benlangmuir closed this revision.
benlangmuir added a comment.

Pushed rG773ad55a393f 



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104348/new/

https://reviews.llvm.org/D104348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104348: [index] Fix performance regression with indexing macros

2021-06-16 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman accepted this revision.
arphaman added a comment.
This revision is now accepted and ready to land.

Thanks, LGTM!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D104348/new/

https://reviews.llvm.org/D104348

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D104348: [index] Fix performance regression with indexing macros

2021-06-15 Thread Ben Langmuir via Phabricator via cfe-commits
benlangmuir created this revision.
benlangmuir added a reviewer: arphaman.
benlangmuir requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

When using FileIndexRecord with macros, symbol references can be seen out of 
source order, which was causing a regression to insert the symbols into a 
vector. Instead, we now lazily sort the vector. The impact is small on most 
code, but in very large files with many macro references (M) near the beginning 
of the file followed by many decl references (D) it was O(M*D). A particularly 
bad protobuf-generated header was observed with a 100% regression in practice.

  

rdar://78628133


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D104348

Files:
  clang/lib/Index/FileIndexRecord.cpp
  clang/lib/Index/FileIndexRecord.h


Index: clang/lib/Index/FileIndexRecord.h
===
--- clang/lib/Index/FileIndexRecord.h
+++ clang/lib/Index/FileIndexRecord.h
@@ -27,14 +27,13 @@
 private:
   FileID FID;
   bool IsSystem;
-  std::vector Decls;
+  mutable bool IsSorted = false;
+  mutable std::vector Decls;
 
 public:
   FileIndexRecord(FileID FID, bool IsSystem) : FID(FID), IsSystem(IsSystem) {}
 
-  ArrayRef getDeclOccurrencesSortedByOffset() const {
-return Decls;
-  }
+  ArrayRef getDeclOccurrencesSortedByOffset() const;
 
   FileID getFileID() const { return FID; }
   bool isSystem() const { return IsSystem; }
Index: clang/lib/Index/FileIndexRecord.cpp
===
--- clang/lib/Index/FileIndexRecord.cpp
+++ clang/lib/Index/FileIndexRecord.cpp
@@ -17,23 +17,16 @@
 using namespace clang;
 using namespace clang::index;
 
-static void addOccurrence(std::vector ,
-  DeclOccurrence Info) {
-  auto IsNextOccurence = [&]() -> bool {
-if (Decls.empty())
-  return true;
-auto  = Decls.back();
-return Last.Offset < Info.Offset;
-  };
-
-  if (IsNextOccurence()) {
-Decls.push_back(std::move(Info));
-return;
+ArrayRef
+FileIndexRecord::getDeclOccurrencesSortedByOffset() const {
+  if (!IsSorted) {
+llvm::stable_sort(Decls,
+  [](const DeclOccurrence , const DeclOccurrence ) {
+return A.Offset < B.Offset;
+  });
+IsSorted = true;
   }
-
-  // We keep Decls in order as we need to access them in this order in all 
cases.
-  auto It = llvm::upper_bound(Decls, Info);
-  Decls.insert(It, std::move(Info));
+  return Decls;
 }
 
 void FileIndexRecord::addDeclOccurence(SymbolRoleSet Roles, unsigned Offset,
@@ -41,13 +34,15 @@
ArrayRef Relations) {
   assert(D->isCanonicalDecl() &&
  "Occurrences should be associated with their canonical decl");
-  addOccurrence(Decls, DeclOccurrence(Roles, Offset, D, Relations));
+  IsSorted = false;
+  Decls.emplace_back(Roles, Offset, D, Relations);
 }
 
 void FileIndexRecord::addMacroOccurence(SymbolRoleSet Roles, unsigned Offset,
 const IdentifierInfo *Name,
 const MacroInfo *MI) {
-  addOccurrence(Decls, DeclOccurrence(Roles, Offset, Name, MI));
+  IsSorted = false;
+  Decls.emplace_back(Roles, Offset, Name, MI);
 }
 
 void FileIndexRecord::removeHeaderGuardMacros() {


Index: clang/lib/Index/FileIndexRecord.h
===
--- clang/lib/Index/FileIndexRecord.h
+++ clang/lib/Index/FileIndexRecord.h
@@ -27,14 +27,13 @@
 private:
   FileID FID;
   bool IsSystem;
-  std::vector Decls;
+  mutable bool IsSorted = false;
+  mutable std::vector Decls;
 
 public:
   FileIndexRecord(FileID FID, bool IsSystem) : FID(FID), IsSystem(IsSystem) {}
 
-  ArrayRef getDeclOccurrencesSortedByOffset() const {
-return Decls;
-  }
+  ArrayRef getDeclOccurrencesSortedByOffset() const;
 
   FileID getFileID() const { return FID; }
   bool isSystem() const { return IsSystem; }
Index: clang/lib/Index/FileIndexRecord.cpp
===
--- clang/lib/Index/FileIndexRecord.cpp
+++ clang/lib/Index/FileIndexRecord.cpp
@@ -17,23 +17,16 @@
 using namespace clang;
 using namespace clang::index;
 
-static void addOccurrence(std::vector ,
-  DeclOccurrence Info) {
-  auto IsNextOccurence = [&]() -> bool {
-if (Decls.empty())
-  return true;
-auto  = Decls.back();
-return Last.Offset < Info.Offset;
-  };
-
-  if (IsNextOccurence()) {
-Decls.push_back(std::move(Info));
-return;
+ArrayRef
+FileIndexRecord::getDeclOccurrencesSortedByOffset() const {
+  if (!IsSorted) {
+llvm::stable_sort(Decls,
+  [](const DeclOccurrence , const DeclOccurrence ) {
+return A.Offset < B.Offset;
+  });
+IsSorted = true;
   }
-
-  // We keep Decls in order as