kbobyrev updated this revision to Diff 162341.
kbobyrev marked 7 inline comments as done.
kbobyrev added a comment.

Address a round of comments & simplify code.


https://reviews.llvm.org/D51029

Files:
  clang-tools-extra/clangd/index/dex/DexIndex.cpp
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Iterator.h
  clang-tools-extra/unittests/clangd/DexIndexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -30,8 +30,10 @@
 namespace {
 
 std::vector<DocID>
-consumeIDs(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()) {
-  auto IDAndScore = consume(It, Limit);
+consumeIDs(std::unique_ptr<Iterator> It,
+           size_t Limit = std::numeric_limits<size_t>::max()) {
+  auto Root = createLimit(move(It), Limit);
+  auto IDAndScore = consume(*Root);
   std::vector<DocID> IDs(IDAndScore.size());
   for (size_t I = 0; I < IDAndScore.size(); ++I)
     IDs[I] = IDAndScore[I].first;
@@ -71,7 +73,7 @@
   auto AndWithEmpty = createAnd(create(L0), create(L1));
   EXPECT_TRUE(AndWithEmpty->reachedEnd());
 
-  EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
+  EXPECT_THAT(consumeIDs(move(AndWithEmpty)), ElementsAre());
 }
 
 TEST(DexIndexIterators, AndTwoLists) {
@@ -81,7 +83,7 @@
   auto And = createAnd(create(L1), create(L0));
 
   EXPECT_FALSE(And->reachedEnd());
-  EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
+  EXPECT_THAT(consumeIDs(move(And)), ElementsAre(0U, 7U, 10U, 320U, 9000U));
 
   And = createAnd(create(L0), create(L1));
 
@@ -122,7 +124,7 @@
   auto OrWithEmpty = createOr(create(L0), create(L1));
   EXPECT_FALSE(OrWithEmpty->reachedEnd());
 
-  EXPECT_THAT(consumeIDs(*OrWithEmpty),
+  EXPECT_THAT(consumeIDs(move(OrWithEmpty)),
               ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
 }
 
@@ -155,7 +157,7 @@
 
   Or = createOr(create(L0), create(L1));
 
-  EXPECT_THAT(consumeIDs(*Or),
+  EXPECT_THAT(consumeIDs(move(Or)),
               ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
 }
 
@@ -234,13 +236,13 @@
   Root->advanceTo(1);
   Root->advanceTo(0);
   EXPECT_EQ(Root->peek(), 1U);
-  auto ElementBoost = Root->consume(Root->peek());
+  auto ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 6);
   Root->advance();
   EXPECT_EQ(Root->peek(), 5U);
   Root->advanceTo(5);
   EXPECT_EQ(Root->peek(), 5U);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 8);
   Root->advanceTo(9000);
   EXPECT_TRUE(Root->reachedEnd());
@@ -265,64 +267,67 @@
 }
 
 TEST(DexIndexIterators, Limit) {
-  const PostingList L0 = {4, 7, 8, 20, 42, 100};
-  const PostingList L1 = {1, 3, 5, 8, 9};
-  const PostingList L2 = {1, 5, 7, 9};
-  const PostingList L3 = {0, 5};
-  const PostingList L4 = {0, 1, 5};
-  const PostingList L5;
+  const PostingList L0 = {3, 6, 7, 20, 42, 100};
+  const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
+  const PostingList L2 = {0, 3, 5, 7, 8, 100};
 
   auto DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator, 42), ElementsAre(4, 7, 8, 20, 42, 100));
+  EXPECT_THAT(consumeIDs(move(DocIterator), 42),
+              ElementsAre(3, 6, 7, 20, 42, 100));
 
   DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(4, 7, 8, 20, 42, 100));
+  EXPECT_THAT(consumeIDs(move(DocIterator)), ElementsAre(3, 6, 7, 20, 42, 100));
 
   DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator, 3), ElementsAre(4, 7, 8));
+  EXPECT_THAT(consumeIDs(move(DocIterator), 3), ElementsAre(3, 6, 7));
 
   DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator, 0), ElementsAre());
+  EXPECT_THAT(consumeIDs(move(DocIterator), 0), ElementsAre());
+
+  auto AndIterator =
+      createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
+                createLimit(create(L1), 3), createLimit(create(L2), 42));
+  EXPECT_THAT(consumeIDs(move(AndIterator)), ElementsAre(3, 7));
 }
 
 TEST(DexIndexIterators, True) {
   auto TrueIterator = createTrue(0U);
   EXPECT_TRUE(TrueIterator->reachedEnd());
-  EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
+  EXPECT_THAT(consumeIDs(move(TrueIterator)), ElementsAre());
 
   PostingList L0 = {1, 2, 5, 7};
   TrueIterator = createTrue(7U);
   EXPECT_THAT(TrueIterator->peek(), 0);
   auto AndIterator = createAnd(create(L0), move(TrueIterator));
   EXPECT_FALSE(AndIterator->reachedEnd());
-  EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
+  EXPECT_THAT(consumeIDs(move(AndIterator)), ElementsAre(1, 2, 5));
 }
 
 TEST(DexIndexIterators, Boost) {
   auto BoostIterator = createBoost(createTrue(5U), 42U);
   EXPECT_FALSE(BoostIterator->reachedEnd());
-  auto ElementBoost = BoostIterator->consume(BoostIterator->peek());
+  auto ElementBoost = BoostIterator->consume();
   EXPECT_THAT(ElementBoost, 42U);
 
   const PostingList L0 = {2, 4};
   const PostingList L1 = {1, 4};
   auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
                        createBoost(create(L1), 3U));
 
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
   Root->advance();
   EXPECT_THAT(Root->peek(), 1U);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 3);
 
   Root->advance();
   EXPECT_THAT(Root->peek(), 2U);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 2);
 
   Root->advanceTo(4);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 3);
 }
 
Index: clang-tools-extra/clangd/index/dex/Iterator.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.h
+++ clang-tools-extra/clangd/index/dex/Iterator.h
@@ -87,13 +87,14 @@
   ///
   /// Note: reachedEnd() must be false.
   virtual DocID peek() const = 0;
-  /// Retrieves boosting score. Query tree root should pass Root->peek() to this
-  /// function, the parameter is needed to propagate through the tree. Given ID
-  /// should be compared against BOOST iterator peek()s: some of the iterators
-  /// would not point to the item which was propagated to the top of the query
-  /// tree (e.g. if these iterators are branches of OR iterator) and hence
-  /// shouldn't apply any boosting to the consumed item.
-  virtual float consume(DocID ID) = 0;
+  /// Informs the iterator that the current document was consumed, and returns
+  /// its boost.
+  ///
+  /// Note: If this iterator has any child iterators that contain the document,
+  /// consume() should be called on those and their boosts incorporated.
+  /// consume() must *not* be called on children that don't contain the current
+  /// doc.
+  virtual float consume() = 0;
 
   virtual ~Iterator() {}
 
@@ -118,17 +119,15 @@
   virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
 };
 
-/// Advances the iterator until it is either exhausted or the number of
-/// requested items is reached. Returns pairs of document IDs with the
-/// corresponding boosting score.
+/// Advances the iterator until it is exhausted. Returns pairs of document IDs
+/// with the corresponding boosting score.
 ///
 /// Boosting can be seen as a compromise between retrieving too many items and
 /// calculating finals score for each of them (which might be very expensive)
 /// and not retrieving enough items so that items with very high final score
 /// would not be processed. Boosting score is a computationally efficient way
 /// to acquire preliminary scores of requested items.
-std::vector<std::pair<DocID, float>>
-consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max());
+std::vector<std::pair<DocID, float>> consume(Iterator &It);
 
 /// Returns a document iterator over given PostingList.
 ///
@@ -165,6 +164,13 @@
 std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
                                       float Factor);
 
+/// Returns LIMIT iterator, which yields up to N elements of its child iterator.
+/// Elements only count towards the limit if they are part of the final result
+/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2),
+/// not ().
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+                                      size_t Limit);
+
 /// This allows createAnd(create(...), create(...)) syntax.
 template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
   std::vector<std::unique_ptr<Iterator>> Children;
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -46,7 +46,7 @@
     return *Index;
   }
 
-  float consume(DocID ID) override { return DEFAULT_BOOST_SCORE; }
+  float consume() override { return DEFAULT_BOOST_SCORE; }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -105,16 +105,12 @@
 
   DocID peek() const override { return Children.front()->peek(); }
 
-  // If not exhausted and points to the given item, consume() returns the
-  // product of Children->consume(ID). Otherwise, DEFAULT_BOOST_SCORE is
-  // returned.
-  float consume(DocID ID) override {
-    if (reachedEnd() || peek() != ID)
-      return DEFAULT_BOOST_SCORE;
+  float consume() override {
+    assert(!reachedEnd() && "AndIterator can't consume() at the end.");
     return std::accumulate(
         begin(Children), end(Children), DEFAULT_BOOST_SCORE,
         [&](float Current, const std::unique_ptr<Iterator> &Child) {
-          return Current * Child->consume(ID);
+          return Current * Child->consume();
         });
   }
 
@@ -226,15 +222,16 @@
 
   // Returns the maximum boosting score among all Children when iterator is not
   // exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
-  float consume(DocID ID) override {
-    if (reachedEnd() || peek() != ID)
-      return DEFAULT_BOOST_SCORE;
+  float consume() override {
+    assert(!reachedEnd() &&
+           "OrIterator can't consume() after it reached the end.");
+    const DocID ID = peek();
     return std::accumulate(
         begin(Children), end(Children), DEFAULT_BOOST_SCORE,
-        [&](float Current, const std::unique_ptr<Iterator> &Child) {
+        [&](float Boost, const std::unique_ptr<Iterator> &Child) {
           return (!Child->reachedEnd() && Child->peek() == ID)
-                     ? std::max(Current, Child->consume(ID))
-                     : Current;
+                     ? std::max(Boost, Child->consume())
+                     : Boost;
         });
   }
 
@@ -278,7 +275,7 @@
     return Index;
   }
 
-  float consume(DocID) override { return DEFAULT_BOOST_SCORE; }
+  float consume() override { return DEFAULT_BOOST_SCORE; }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -306,7 +303,7 @@
 
   DocID peek() const override { return Child->peek(); }
 
-  float consume(DocID ID) override { return Child->consume(ID) * Factor; }
+  float consume() override { return Child->consume() * Factor; }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -318,15 +315,50 @@
   float Factor;
 };
 
+/// This iterator limits the number of items retrieved from the child iterator
+/// on top of the query tree. To ensure that query tree with LIMIT iterators
+/// inside works correctly, users have to call Root->consume(Root->peek()) each
+/// time item is retrieved at the root of query tree.
+class LimitIterator : public Iterator {
+public:
+  LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
+      : Child(move(Child)), Limit(Limit), ItemsLeft(Limit) {}
+
+  bool reachedEnd() const override {
+    return ItemsLeft == 0 || Child->reachedEnd();
+  }
+
+  void advance() override { Child->advance(); }
+
+  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
+
+  DocID peek() const override { return Child->peek(); }
+
+  /// Decreases the limit in case the element consumed at top of the query tree
+  /// comes from the underlying iterator.
+  float consume() override {
+    assert(!reachedEnd() && "LimitIterator can't consume at the end.");
+    --ItemsLeft;
+    return Child->consume();
+  }
+
+private:
+  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+    OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')';
+    return OS;
+  }
+
+  std::unique_ptr<Iterator> Child;
+  size_t Limit;
+  size_t ItemsLeft;
+};
+
 } // end namespace
 
-std::vector<std::pair<DocID, float>> consume(Iterator &It, size_t Limit) {
+std::vector<std::pair<DocID, float>> consume(Iterator &It) {
   std::vector<std::pair<DocID, float>> Result;
-  for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit;
-       It.advance(), ++Retrieved) {
-    DocID Document = It.peek();
-    Result.push_back(std::make_pair(Document, It.consume(Document)));
-  }
+  for (; !It.reachedEnd(); It.advance())
+    Result.push_back(std::make_pair(It.peek(), It.consume()));
   return Result;
 }
 
@@ -353,6 +385,11 @@
   return llvm::make_unique<BoostIterator>(move(Child), Factor);
 }
 
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+                                      size_t Size) {
+  return llvm::make_unique<LimitIterator>(move(Child), Size);
+}
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -125,10 +125,10 @@
     // using 100x of the requested number might not be good in practice, e.g.
     // when the requested number of items is small.
     const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+    auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
     // FIXME(kbobyrev): Add boosting to the query and utilize retrieved
     // boosting scores.
-    std::vector<std::pair<DocID, float>> SymbolDocIDs =
-        consume(*QueryIterator, ItemsToRetrieve);
+    std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
 
     // Retrieve top Req.MaxCandidateCount items.
     std::priority_queue<std::pair<float, const Symbol *>> Top;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to