kbobyrev updated this revision to Diff 165224.
kbobyrev added a comment.

Add documentation, don't expose `PostingList` interface implementations in the 
public API (similarly to `Iterator`).


https://reviews.llvm.org/D51982

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/Dex.cpp
  clang-tools-extra/clangd/index/dex/Dex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Iterator.h
  clang-tools-extra/clangd/index/dex/PostingList.cpp
  clang-tools-extra/clangd/index/dex/PostingList.h
  clang-tools-extra/unittests/clangd/DexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexTests.cpp
@@ -47,8 +47,8 @@
 }
 
 TEST(DexIterators, DocumentIterator) {
-  const PostingList L = {4, 7, 8, 20, 42, 100};
-  auto DocIterator = create(L);
+  const auto L = buildSparsePostingList({4, 7, 8, 20, 42, 100});
+  auto DocIterator = L->iterator();
 
   EXPECT_EQ(DocIterator->peek(), 4U);
   EXPECT_FALSE(DocIterator->reachedEnd());
@@ -70,28 +70,28 @@
 }
 
 TEST(DexIterators, AndWithEmpty) {
-  const PostingList L0;
-  const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
+  const auto L0 = buildSparsePostingList({});
+  const auto L1 = buildSparsePostingList({0, 5, 7, 10, 42, 320, 9000});
 
-  auto AndEmpty = createAnd(create(L0));
+  auto AndEmpty = createAnd(L0->iterator());
   EXPECT_TRUE(AndEmpty->reachedEnd());
 
-  auto AndWithEmpty = createAnd(create(L0), create(L1));
+  auto AndWithEmpty = createAnd(L0->iterator(), L1->iterator());
   EXPECT_TRUE(AndWithEmpty->reachedEnd());
 
   EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
 }
 
 TEST(DexIterators, AndTwoLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+  const auto L0 = buildSparsePostingList({0, 5, 7, 10, 42, 320, 9000});
+  const auto L1 = buildSparsePostingList({0, 4, 7, 10, 30, 60, 320, 9000});
 
-  auto And = createAnd(create(L1), create(L0));
+  auto And = createAnd(L1->iterator(), L0->iterator());
 
   EXPECT_FALSE(And->reachedEnd());
   EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
 
-  And = createAnd(create(L0), create(L1));
+  And = createAnd(L0->iterator(), L1->iterator());
 
   And->advanceTo(0);
   EXPECT_EQ(And->peek(), 0U);
@@ -107,11 +107,11 @@
 }
 
 TEST(DexIterators, AndThreeLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
-  const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
+  const auto L0 = buildSparsePostingList({0, 5, 7, 10, 42, 320, 9000});
+  const auto L1 = buildSparsePostingList({0, 4, 7, 10, 30, 60, 320, 9000});
+  const auto L2 = buildSparsePostingList({1, 4, 7, 11, 30, 60, 320, 9000});
 
-  auto And = createAnd(create(L0), create(L1), create(L2));
+  auto And = createAnd(L0->iterator(), L1->iterator(), L2->iterator());
   EXPECT_EQ(And->peek(), 7U);
   And->advanceTo(300);
   EXPECT_EQ(And->peek(), 320U);
@@ -121,24 +121,24 @@
 }
 
 TEST(DexIterators, OrWithEmpty) {
-  const PostingList L0;
-  const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
+  const auto L0 = buildSparsePostingList({});
+  const auto L1 = buildSparsePostingList({0, 5, 7, 10, 42, 320, 9000});
 
-  auto OrEmpty = createOr(create(L0));
+  auto OrEmpty = createOr(L0->iterator());
   EXPECT_TRUE(OrEmpty->reachedEnd());
 
-  auto OrWithEmpty = createOr(create(L0), create(L1));
+  auto OrWithEmpty = createOr(L0->iterator(), L1->iterator());
   EXPECT_FALSE(OrWithEmpty->reachedEnd());
 
   EXPECT_THAT(consumeIDs(*OrWithEmpty),
               ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
 }
 
 TEST(DexIterators, OrTwoLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+  const auto L0 = buildSparsePostingList({0, 5, 7, 10, 42, 320, 9000});
+  const auto L1 = buildSparsePostingList({0, 4, 7, 10, 30, 60, 320, 9000});
 
-  auto Or = createOr(create(L0), create(L1));
+  auto Or = createOr(L0->iterator(), L1->iterator());
 
   EXPECT_FALSE(Or->reachedEnd());
   EXPECT_EQ(Or->peek(), 0U);
@@ -161,18 +161,18 @@
   Or->advanceTo(9001);
   EXPECT_TRUE(Or->reachedEnd());
 
-  Or = createOr(create(L0), create(L1));
+  Or = createOr(L0->iterator(), L1->iterator());
 
   EXPECT_THAT(consumeIDs(*Or),
               ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
 }
 
 TEST(DexIterators, OrThreeLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
-  const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
+  const auto L0 = buildSparsePostingList({0, 5, 7, 10, 42, 320, 9000});
+  const auto L1 = buildSparsePostingList({0, 4, 7, 10, 30, 60, 320, 9000});
+  const auto L2 = buildSparsePostingList({1, 4, 7, 11, 30, 60, 320, 9000});
 
-  auto Or = createOr(create(L0), create(L1), create(L2));
+  auto Or = createOr(L0->iterator(), L1->iterator(), L2->iterator());
 
   EXPECT_FALSE(Or->reachedEnd());
   EXPECT_EQ(Or->peek(), 0U);
@@ -221,19 +221,19 @@
   //                  |1, 5, 7, 9|                      |1, 5|    |0, 3, 5|
   //                  +----------+                      +----+    +-------+
   //
-  const PostingList L0 = {1, 3, 5, 8, 9};
-  const PostingList L1 = {1, 5, 7, 9};
-  const PostingList L3;
-  const PostingList L4 = {1, 5};
-  const PostingList L5 = {0, 3, 5};
+  const auto L0 = buildSparsePostingList({1, 3, 5, 8, 9});
+  const auto L1 = buildSparsePostingList({1, 5, 7, 9});
+  const auto L3 = buildSparsePostingList({});
+  const auto L4 = buildSparsePostingList({1, 5});
+  const auto L5 = buildSparsePostingList({0, 3, 5});
 
   // Root of the query tree: [1, 5]
   auto Root = createAnd(
       // Lower And Iterator: [1, 5, 9]
-      createAnd(create(L0), createBoost(create(L1), 2U)),
+      createAnd(L0->iterator(), createBoost(L1->iterator(), 2U)),
       // Lower Or Iterator: [0, 1, 5]
-      createOr(create(L3), createBoost(create(L4), 3U),
-               createBoost(create(L5), 4U)));
+      createOr(L3->iterator(), createBoost(L4->iterator(), 3U),
+               createBoost(L5->iterator(), 4U)));
 
   EXPECT_FALSE(Root->reachedEnd());
   EXPECT_EQ(Root->peek(), 1U);
@@ -255,52 +255,51 @@
 }
 
 TEST(DexIterators, StringRepresentation) {
-  const PostingList L0 = {4, 7, 8, 20, 42, 100};
-  const PostingList L1 = {1, 3, 5, 8, 9};
-  const PostingList L2 = {1, 5, 7, 9};
-  const PostingList L3 = {0, 5};
-  const PostingList L4 = {0, 1, 5};
-  const PostingList L5;
+  const auto L0 = buildSparsePostingList({4, 7, 8, 20, 42, 100});
+  const auto L1 = buildSparsePostingList({1, 3, 5, 8, 9});
+  const auto L2 = buildSparsePostingList({1, 5, 7, 9});
+  const auto L3 = buildSparsePostingList({0, 5});
+  const auto L4 = buildSparsePostingList({0, 1, 5});
+  const auto L5 = buildSparsePostingList({});
 
-  EXPECT_EQ(llvm::to_string(*(create(L0))), "[{4}, 7, 8, 20, 42, 100, END]");
+  EXPECT_EQ(llvm::to_string(*(L0->iterator())), "[4]");
 
-  auto Nested = createAnd(createAnd(create(L1), create(L2)),
-                          createOr(create(L3), create(L4), create(L5)));
+  auto Nested =
+      createAnd(createAnd(L1->iterator(), L2->iterator()),
+                createOr(L3->iterator(), L4->iterator(), L5->iterator()));
 
-  EXPECT_EQ(llvm::to_string(*Nested),
-            "(& (| [0, {5}, END] [0, {1}, 5, END] [{END}]) (& [{1}, 5, 7, 9, "
-            "END] [{1}, 3, 5, 8, 9, END]))");
+  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [5] [1] [END]) (& [1] [1]))");
 }
 
 TEST(DexIterators, Limit) {
-  const PostingList L0 = {3, 6, 7, 20, 42, 100};
-  const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
-  const PostingList L2 = {0, 3, 5, 7, 8, 100};
+  const auto L0 = buildSparsePostingList({3, 6, 7, 20, 42, 100});
+  const auto L1 = buildSparsePostingList({1, 3, 5, 6, 7, 30, 100});
+  const auto L2 = buildSparsePostingList({0, 3, 5, 7, 8, 100});
 
-  auto DocIterator = createLimit(create(L0), 42);
+  auto DocIterator = createLimit(L0->iterator(), 42);
   EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
 
-  DocIterator = createLimit(create(L0), 3);
+  DocIterator = createLimit(L0->iterator(), 3);
   EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
 
-  DocIterator = createLimit(create(L0), 0);
+  DocIterator = createLimit(L0->iterator(), 0);
   EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
 
-  auto AndIterator =
-      createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
-                createLimit(create(L1), 3), createLimit(create(L2), 42));
+  auto AndIterator = createAnd(
+      createLimit(createTrue(9000), 343), createLimit(L0->iterator(), 2),
+      createLimit(L1->iterator(), 3), createLimit(L2->iterator(), 42));
   EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
 }
 
 TEST(DexIterators, True) {
   auto TrueIterator = createTrue(0U);
   EXPECT_TRUE(TrueIterator->reachedEnd());
   EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
 
-  PostingList L0 = {1, 2, 5, 7};
+  const auto L0 = buildSparsePostingList({1, 2, 5, 7});
   TrueIterator = createTrue(7U);
   EXPECT_THAT(TrueIterator->peek(), 0);
-  auto AndIterator = createAnd(create(L0), move(TrueIterator));
+  auto AndIterator = createAnd(L0->iterator(), move(TrueIterator));
   EXPECT_FALSE(AndIterator->reachedEnd());
   EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
 }
@@ -311,10 +310,10 @@
   auto ElementBoost = BoostIterator->consume();
   EXPECT_THAT(ElementBoost, 42U);
 
-  const PostingList L0 = {2, 4};
-  const PostingList L1 = {1, 4};
-  auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
-                       createBoost(create(L1), 3U));
+  const auto L0 = buildSparsePostingList({2, 4});
+  const auto L1 = buildSparsePostingList({1, 4});
+  auto Root = createOr(createTrue(5U), createBoost(L0->iterator(), 2U),
+                       createBoost(L1->iterator(), 3U));
 
   ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
@@ -453,10 +452,10 @@
 }
 
 TEST(Dex, FuzzyFind) {
-  auto Index = Dex::build(
-      generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-                       "other::ABC", "other::A"}),
-      URISchemes);
+  auto Index =
+      Dex::build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC",
+                                  "ns::nested::ABC", "other::ABC", "other::A"}),
+                 URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -524,16 +523,14 @@
 }
 
 TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
-  auto I =
-      Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
+  auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
 }
 
 TEST(DexTest, MatchQualifiedNamesWithGlobalScope) {
-  auto I =
-      Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
+  auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {""};
Index: clang-tools-extra/clangd/index/dex/PostingList.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/PostingList.h
@@ -0,0 +1,67 @@
+//===--- PostingList.h - Symbol identifiers storage interface  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines posting list interface: a storage for identifiers of symbols
+// which can be characterized by a specific feature (such as fuzzy-find trigram,
+// scope, type or any other Search Token). Posting lists are values for
+// inverted index, which maps search tokens to corresponding posting lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include <cstdint>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// Symbol position in the list of all index symbols sorted by a pre-computed
+/// symbol quality.
+using DocID = uint32_t;
+
+class Iterator;
+
+/// PostingList is an interface for the storage of DocIDs which can be inserted
+/// to the Query Tree as a leaf by constructing Iterator over given PostingList.
+/// Depending on posting list density, users can decide to utilize more
+/// efficient representations to balance the space vs. performance trade-off.
+/// For example, dense posting lists might benefit from various compression
+/// techniques, but require decompression and thus are the cause of performance
+/// overhead.
+// FIXME(kbobyrev): Implement dense posting list implementation using VByte
+// encoding.
+class PostingList {
+public:
+  /// Returns Query Tree leaf iterator over given PostingList.
+  virtual std::unique_ptr<Iterator> iterator() const = 0;
+
+  /// Returns size of underlying storage.
+  virtual size_t bytes() const = 0;
+
+  virtual ~PostingList() {}
+};
+
+/// Returns posting list with sparse in-memory representation. This is useful
+/// for small size posting lists with huge gaps between subsequent DocIDs.
+/// Sparse posting list stores an array of DocIDs and requires
+/// List.size() * sizeof(DocID) memory. It does not induce performance overhead
+/// for data (de)compression, but can consume a lot of memory when given large
+/// dense posting list.
+std::unique_ptr<PostingList>
+buildSparsePostingList(const std::vector<DocID> &&Documents);
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
Index: clang-tools-extra/clangd/index/dex/PostingList.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/PostingList.cpp
@@ -0,0 +1,106 @@
+//===--- PostingList.cpp - Symbol identifiers storage interface -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PostingList.h"
+#include "Iterator.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+namespace {
+
+/// SparsePostingList owns underlying data represented as std::vector<DocID>.
+/// It is effective where gaps between subsequent DocIDs are expected to take
+/// the whole sizeof(DocID) filled with significant bits, because it doesn't
+/// compress underlying data.
+class SparsePostingList : public PostingList {
+public:
+  explicit SparsePostingList(const std::vector<DocID> &&Documents)
+      : Documents(std::move(Documents)) {}
+
+  virtual std::unique_ptr<Iterator> iterator() const override {
+    return llvm::make_unique<SparseIterator>(move(Documents));
+  }
+
+  virtual size_t bytes() const override {
+    return Documents.size() * sizeof(DocID);
+  }
+
+private:
+  /// Implements Iterator over SparsePostingList. This is the most basic
+  /// iterator and is simply a wrapper around
+  /// std::vector<DocID>::const_iterator.
+  class SparseIterator : public Iterator {
+  public:
+    explicit SparseIterator(llvm::ArrayRef<DocID> Documents)
+        : Documents(Documents), Index(std::begin(Documents)) {}
+
+    bool reachedEnd() const override { return Index == std::end(Documents); }
+
+    /// Advances cursor to the next item.
+    void advance() override {
+      assert(!reachedEnd() &&
+             "Sparse Posting List iterator can't advance() at the end.");
+      ++Index;
+    }
+
+    /// Applies binary search to advance cursor to the next item with DocID
+    /// equal or higher than the given one.
+    void advanceTo(DocID ID) override {
+      assert(!reachedEnd() &&
+             "Sparse Posting List iterator can't advance() at the end.");
+      // If current ID is beyond requested one, iterator is already in the right
+      // state.
+      if (peek() < ID)
+        Index = std::lower_bound(Index, std::end(Documents), ID);
+    }
+
+    DocID peek() const override {
+      assert(!reachedEnd() &&
+             "Sparse Posting List iterator can't peek() at the end.");
+      return *Index;
+    }
+
+    float consume() override {
+      assert(!reachedEnd() &&
+             "Sparse Posting List iterator can't consume() at the end.");
+      return DEFAULT_BOOST_SCORE;
+    }
+
+    size_t estimateSize() const override { return Documents.size(); }
+
+  private:
+    llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+      OS << '[';
+      if (Index != std::end(Documents))
+        OS << *Index;
+      else
+        OS << "END";
+      OS << ']';
+      return OS;
+    }
+
+    llvm::ArrayRef<DocID> Documents;
+    llvm::ArrayRef<DocID>::const_iterator Index;
+  };
+
+  const std::vector<DocID> Documents;
+};
+
+} // namespace
+
+std::unique_ptr<PostingList>
+buildSparsePostingList(const std::vector<DocID> &&Documents) {
+  return llvm::make_unique<SparsePostingList>(move(Documents));
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/dex/Iterator.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.h
+++ clang-tools-extra/clangd/index/dex/Iterator.h
@@ -36,29 +36,12 @@
 #include <algorithm>
 #include <memory>
 #include <vector>
+#include "PostingList.h"
 
 namespace clang {
 namespace clangd {
 namespace dex {
 
-/// Symbol position in the list of all index symbols sorted by a pre-computed
-/// symbol quality.
-using DocID = uint32_t;
-/// Contains sorted sequence of DocIDs all of which belong to symbols matching
-/// certain criteria, i.e. containing a Search Token. PostingLists are values
-/// for the inverted index.
-// FIXME(kbobyrev): Posting lists for incomplete trigrams (one/two symbols) are
-// likely to be very dense and hence require special attention so that the index
-// doesn't use too much memory. Possible solution would be to construct
-// compressed posting lists which consist of ranges of DocIDs instead of
-// distinct DocIDs. A special case would be the empty query: for that case
-// TrueIterator should be implemented - an iterator which doesn't actually store
-// any PostingList within itself, but "contains" all DocIDs in range
-// [0, IndexSize).
-using PostingList = std::vector<DocID>;
-/// Immutable reference to PostingList object.
-using PostingListRef = llvm::ArrayRef<DocID>;
-
 /// Iterator is the interface for Query Tree node. The simplest type of Iterator
 /// is DocumentIterator which is simply a wrapper around PostingList iterator
 /// and serves as the Query Tree leaf. More sophisticated examples of iterators
@@ -131,11 +114,6 @@
 /// to acquire preliminary scores of requested items.
 std::vector<std::pair<DocID, float>> consume(Iterator &It);
 
-/// Returns a document iterator over given PostingList.
-///
-/// DocumentIterator returns DEFAULT_BOOST_SCORE for each processed item.
-std::unique_ptr<Iterator> create(PostingListRef Documents);
-
 /// Returns AND Iterator which performs the intersection of the PostingLists of
 /// its children.
 ///
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Iterator.h"
+#include "PostingList.h"
 #include <algorithm>
 #include <cassert>
 #include <numeric>
@@ -18,69 +19,6 @@
 
 namespace {
 
-/// Implements Iterator over a PostingList. DocumentIterator is the most basic
-/// iterator: it doesn't have any children (hence it is the leaf of iterator
-/// tree) and is simply a wrapper around PostingList::const_iterator.
-class DocumentIterator : public Iterator {
-public:
-  explicit DocumentIterator(PostingListRef Documents)
-      : Documents(Documents), Index(std::begin(Documents)) {}
-
-  bool reachedEnd() const override { return Index == std::end(Documents); }
-
-  /// Advances cursor to the next item.
-  void advance() override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end.");
-    ++Index;
-  }
-
-  /// Applies binary search to advance cursor to the next item with DocID equal
-  /// or higher than the given one.
-  void advanceTo(DocID ID) override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end.");
-    // If current ID is beyond requested one, iterator is already in the right
-    // state.
-    if (peek() < ID)
-      Index = std::lower_bound(Index, std::end(Documents), ID);
-  }
-
-  DocID peek() const override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't peek() at the end.");
-    return *Index;
-  }
-
-  float consume() override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't consume() at the end.");
-    return DEFAULT_BOOST_SCORE;
-  }
-
-  size_t estimateSize() const override { return Documents.size(); }
-
-private:
-  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
-    OS << '[';
-    auto Separator = "";
-    for (auto It = std::begin(Documents); It != std::end(Documents); ++It) {
-      OS << Separator;
-      if (It == Index)
-        OS << '{' << *It << '}';
-      else
-        OS << *It;
-      Separator = ", ";
-    }
-    OS << Separator;
-    if (Index == std::end(Documents))
-      OS << "{END}";
-    else
-      OS << "END";
-    OS << ']';
-    return OS;
-  }
-
-  PostingListRef Documents;
-  PostingListRef::const_iterator Index;
-};
-
 /// Implements Iterator over the intersection of other iterators.
 ///
 /// AndIterator iterates through common items among all children. It becomes
@@ -399,10 +337,6 @@
   return Result;
 }
 
-std::unique_ptr<Iterator> create(PostingListRef Documents) {
-  return llvm::make_unique<DocumentIterator>(Documents);
-}
-
 std::unique_ptr<Iterator>
 createAnd(std::vector<std::unique_ptr<Iterator>> Children) {
   return llvm::make_unique<AndIterator>(move(Children));
Index: clang-tools-extra/clangd/index/dex/Dex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.h
+++ clang-tools-extra/clangd/index/dex/Dex.h
@@ -21,6 +21,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
 
 #include "Iterator.h"
+#include "PostingList.h"
 #include "Token.h"
 #include "Trigram.h"
 #include "index/Index.h"
@@ -98,7 +99,7 @@
   /// posting list would contain all indices of symbols defined in namespace
   /// std. Inverted index is used to retrieve posting lists which are processed
   /// during the fuzzyFind process.
-  llvm::DenseMap<Token, PostingList> InvertedIndex;
+  llvm::DenseMap<Token, std::unique_ptr<PostingList>> InvertedIndex;
   std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
   // Size of memory retained by KeepAlive.
   size_t BackingDataSize = 0;
Index: clang-tools-extra/clangd/index/dex/Dex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.cpp
+++ clang-tools-extra/clangd/index/dex/Dex.cpp
@@ -46,7 +46,7 @@
 std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
     llvm::ArrayRef<std::string> ProximityPaths,
     llvm::ArrayRef<std::string> URISchemes,
-    const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
+    const llvm::DenseMap<Token, std::unique_ptr<PostingList>> &InvertedIndex) {
   std::vector<std::unique_ptr<Iterator>> BoostingIterators;
   // Deduplicate parent URIs extracted from the ProximityPaths.
   llvm::StringSet<> ParentURIs;
@@ -82,7 +82,7 @@
       // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
       PathProximitySignals.SymbolURI = ParentURI;
       BoostingIterators.push_back(
-          createBoost(create(It->second), PathProximitySignals.evaluate()));
+          createBoost(It->second->iterator(), PathProximitySignals.evaluate()));
     }
   }
   return BoostingIterators;
@@ -112,13 +112,20 @@
     Symbols[I] = ScoredSymbols[I].second;
   }
 
-  // Populate TempInvertedIndex with posting lists for index symbols.
+  // Populate TempInvertedIndex with lists for index symbols.
+  llvm::DenseMap<Token, std::vector<DocID>> TempInvertedIndex;
   for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
     const auto *Sym = Symbols[SymbolRank];
     for (const auto &Token : generateSearchTokens(*Sym))
-      InvertedIndex[Token].push_back(SymbolRank);
+      TempInvertedIndex[Token].push_back(SymbolRank);
   }
 
+  // Convert lists of items to posting lists.
+  for (const auto &TokenToPostingList : TempInvertedIndex)
+    InvertedIndex.insert(
+        {TokenToPostingList.first,
+         buildSparsePostingList(move(TokenToPostingList.second))});
+
   vlog("Built Dex with estimated memory usage {0} bytes.",
        estimateMemoryUsage());
 }
@@ -142,7 +149,7 @@
   for (const auto &Trigram : TrigramTokens) {
     const auto It = InvertedIndex.find(Trigram);
     if (It != InvertedIndex.end())
-      TrigramIterators.push_back(create(It->second));
+      TrigramIterators.push_back(It->second->iterator());
   }
   if (!TrigramIterators.empty())
     TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
@@ -152,7 +159,7 @@
   for (const auto &Scope : Req.Scopes) {
     const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
     if (It != InvertedIndex.end())
-      ScopeIterators.push_back(create(It->second));
+      ScopeIterators.push_back(It->second->iterator());
   }
   // Add OR iterator for scopes if there are any Scope Iterators.
   if (!ScopeIterators.empty())
@@ -232,7 +239,7 @@
   Bytes += LookupTable.getMemorySize();
   Bytes += InvertedIndex.getMemorySize();
   for (const auto &P : InvertedIndex)
-    Bytes += P.second.size() * sizeof(DocID);
+    Bytes += P.second->bytes();
   return Bytes + BackingDataSize;
 }
 
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -48,6 +48,7 @@
 
   index/dex/Dex.cpp
   index/dex/Iterator.cpp
+  index/dex/PostingList.cpp
   index/dex/Trigram.cpp
 
   LINK_LIBS
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to