kbobyrev updated this revision to Diff 162341.
kbobyrev marked 7 inline comments as done.
kbobyrev added a comment.
Address a round of comments & simplify code.
https://reviews.llvm.org/D51029
Files:
clang-tools-extra/clangd/index/dex/DexIndex.cpp
clang-tools-extra/clangd/index/dex/Iterator.cpp
clang-tools-extra/clangd/index/dex/Iterator.h
clang-tools-extra/unittests/clangd/DexIndexTests.cpp
Index: clang-tools-extra/unittests/clangd/DexIndexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexIndexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexIndexTests.cpp
@@ -30,8 +30,10 @@
namespace {
std::vector<DocID>
-consumeIDs(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()) {
- auto IDAndScore = consume(It, Limit);
+consumeIDs(std::unique_ptr<Iterator> It,
+ size_t Limit = std::numeric_limits<size_t>::max()) {
+ auto Root = createLimit(move(It), Limit);
+ auto IDAndScore = consume(*Root);
std::vector<DocID> IDs(IDAndScore.size());
for (size_t I = 0; I < IDAndScore.size(); ++I)
IDs[I] = IDAndScore[I].first;
@@ -71,7 +73,7 @@
auto AndWithEmpty = createAnd(create(L0), create(L1));
EXPECT_TRUE(AndWithEmpty->reachedEnd());
- EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
+ EXPECT_THAT(consumeIDs(move(AndWithEmpty)), ElementsAre());
}
TEST(DexIndexIterators, AndTwoLists) {
@@ -81,7 +83,7 @@
auto And = createAnd(create(L1), create(L0));
EXPECT_FALSE(And->reachedEnd());
- EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
+ EXPECT_THAT(consumeIDs(move(And)), ElementsAre(0U, 7U, 10U, 320U, 9000U));
And = createAnd(create(L0), create(L1));
@@ -122,7 +124,7 @@
auto OrWithEmpty = createOr(create(L0), create(L1));
EXPECT_FALSE(OrWithEmpty->reachedEnd());
- EXPECT_THAT(consumeIDs(*OrWithEmpty),
+ EXPECT_THAT(consumeIDs(move(OrWithEmpty)),
ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
}
@@ -155,7 +157,7 @@
Or = createOr(create(L0), create(L1));
- EXPECT_THAT(consumeIDs(*Or),
+ EXPECT_THAT(consumeIDs(move(Or)),
ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
}
@@ -234,13 +236,13 @@
Root->advanceTo(1);
Root->advanceTo(0);
EXPECT_EQ(Root->peek(), 1U);
- auto ElementBoost = Root->consume(Root->peek());
+ auto ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 6);
Root->advance();
EXPECT_EQ(Root->peek(), 5U);
Root->advanceTo(5);
EXPECT_EQ(Root->peek(), 5U);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 8);
Root->advanceTo(9000);
EXPECT_TRUE(Root->reachedEnd());
@@ -265,64 +267,67 @@
}
TEST(DexIndexIterators, Limit) {
- const PostingList L0 = {4, 7, 8, 20, 42, 100};
- const PostingList L1 = {1, 3, 5, 8, 9};
- const PostingList L2 = {1, 5, 7, 9};
- const PostingList L3 = {0, 5};
- const PostingList L4 = {0, 1, 5};
- const PostingList L5;
+ const PostingList L0 = {3, 6, 7, 20, 42, 100};
+ const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
+ const PostingList L2 = {0, 3, 5, 7, 8, 100};
auto DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator, 42), ElementsAre(4, 7, 8, 20, 42, 100));
+ EXPECT_THAT(consumeIDs(move(DocIterator), 42),
+ ElementsAre(3, 6, 7, 20, 42, 100));
DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(4, 7, 8, 20, 42, 100));
+ EXPECT_THAT(consumeIDs(move(DocIterator)), ElementsAre(3, 6, 7, 20, 42, 100));
DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator, 3), ElementsAre(4, 7, 8));
+ EXPECT_THAT(consumeIDs(move(DocIterator), 3), ElementsAre(3, 6, 7));
DocIterator = create(L0);
- EXPECT_THAT(consumeIDs(*DocIterator, 0), ElementsAre());
+ EXPECT_THAT(consumeIDs(move(DocIterator), 0), ElementsAre());
+
+ auto AndIterator =
+ createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
+ createLimit(create(L1), 3), createLimit(create(L2), 42));
+ EXPECT_THAT(consumeIDs(move(AndIterator)), ElementsAre(3, 7));
}
TEST(DexIndexIterators, True) {
auto TrueIterator = createTrue(0U);
EXPECT_TRUE(TrueIterator->reachedEnd());
- EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
+ EXPECT_THAT(consumeIDs(move(TrueIterator)), ElementsAre());
PostingList L0 = {1, 2, 5, 7};
TrueIterator = createTrue(7U);
EXPECT_THAT(TrueIterator->peek(), 0);
auto AndIterator = createAnd(create(L0), move(TrueIterator));
EXPECT_FALSE(AndIterator->reachedEnd());
- EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
+ EXPECT_THAT(consumeIDs(move(AndIterator)), ElementsAre(1, 2, 5));
}
TEST(DexIndexIterators, Boost) {
auto BoostIterator = createBoost(createTrue(5U), 42U);
EXPECT_FALSE(BoostIterator->reachedEnd());
- auto ElementBoost = BoostIterator->consume(BoostIterator->peek());
+ auto ElementBoost = BoostIterator->consume();
EXPECT_THAT(ElementBoost, 42U);
const PostingList L0 = {2, 4};
const PostingList L1 = {1, 4};
auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
createBoost(create(L1), 3U));
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
Root->advance();
EXPECT_THAT(Root->peek(), 1U);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 3);
Root->advance();
EXPECT_THAT(Root->peek(), 2U);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 2);
Root->advanceTo(4);
- ElementBoost = Root->consume(Root->peek());
+ ElementBoost = Root->consume();
EXPECT_THAT(ElementBoost, 3);
}
Index: clang-tools-extra/clangd/index/dex/Iterator.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.h
+++ clang-tools-extra/clangd/index/dex/Iterator.h
@@ -87,13 +87,14 @@
///
/// Note: reachedEnd() must be false.
virtual DocID peek() const = 0;
- /// Retrieves boosting score. Query tree root should pass Root->peek() to this
- /// function, the parameter is needed to propagate through the tree. Given ID
- /// should be compared against BOOST iterator peek()s: some of the iterators
- /// would not point to the item which was propagated to the top of the query
- /// tree (e.g. if these iterators are branches of OR iterator) and hence
- /// shouldn't apply any boosting to the consumed item.
- virtual float consume(DocID ID) = 0;
+ /// Informs the iterator that the current document was consumed, and returns
+ /// its boost.
+ ///
+ /// Note: If this iterator has any child iterators that contain the document,
+ /// consume() should be called on those and their boosts incorporated.
+ /// consume() must *not* be called on children that don't contain the current
+ /// doc.
+ virtual float consume() = 0;
virtual ~Iterator() {}
@@ -118,17 +119,15 @@
virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
};
-/// Advances the iterator until it is either exhausted or the number of
-/// requested items is reached. Returns pairs of document IDs with the
-/// corresponding boosting score.
+/// Advances the iterator until it is exhausted. Returns pairs of document IDs
+/// with the corresponding boosting score.
///
/// Boosting can be seen as a compromise between retrieving too many items and
/// calculating finals score for each of them (which might be very expensive)
/// and not retrieving enough items so that items with very high final score
/// would not be processed. Boosting score is a computationally efficient way
/// to acquire preliminary scores of requested items.
-std::vector<std::pair<DocID, float>>
-consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max());
+std::vector<std::pair<DocID, float>> consume(Iterator &It);
/// Returns a document iterator over given PostingList.
///
@@ -165,6 +164,13 @@
std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
float Factor);
+/// Returns LIMIT iterator, which yields up to N elements of its child iterator.
+/// Elements only count towards the limit if they are part of the final result
+/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2),
+/// not ().
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+ size_t Limit);
+
/// This allows createAnd(create(...), create(...)) syntax.
template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
std::vector<std::unique_ptr<Iterator>> Children;
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -46,7 +46,7 @@
return *Index;
}
- float consume(DocID ID) override { return DEFAULT_BOOST_SCORE; }
+ float consume() override { return DEFAULT_BOOST_SCORE; }
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -105,16 +105,12 @@
DocID peek() const override { return Children.front()->peek(); }
- // If not exhausted and points to the given item, consume() returns the
- // product of Children->consume(ID). Otherwise, DEFAULT_BOOST_SCORE is
- // returned.
- float consume(DocID ID) override {
- if (reachedEnd() || peek() != ID)
- return DEFAULT_BOOST_SCORE;
+ float consume() override {
+ assert(!reachedEnd() && "AndIterator can't consume() at the end.");
return std::accumulate(
begin(Children), end(Children), DEFAULT_BOOST_SCORE,
[&](float Current, const std::unique_ptr<Iterator> &Child) {
- return Current * Child->consume(ID);
+ return Current * Child->consume();
});
}
@@ -226,15 +222,16 @@
// Returns the maximum boosting score among all Children when iterator is not
// exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
- float consume(DocID ID) override {
- if (reachedEnd() || peek() != ID)
- return DEFAULT_BOOST_SCORE;
+ float consume() override {
+ assert(!reachedEnd() &&
+ "OrIterator can't consume() after it reached the end.");
+ const DocID ID = peek();
return std::accumulate(
begin(Children), end(Children), DEFAULT_BOOST_SCORE,
- [&](float Current, const std::unique_ptr<Iterator> &Child) {
+ [&](float Boost, const std::unique_ptr<Iterator> &Child) {
return (!Child->reachedEnd() && Child->peek() == ID)
- ? std::max(Current, Child->consume(ID))
- : Current;
+ ? std::max(Boost, Child->consume())
+ : Boost;
});
}
@@ -278,7 +275,7 @@
return Index;
}
- float consume(DocID) override { return DEFAULT_BOOST_SCORE; }
+ float consume() override { return DEFAULT_BOOST_SCORE; }
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -306,7 +303,7 @@
DocID peek() const override { return Child->peek(); }
- float consume(DocID ID) override { return Child->consume(ID) * Factor; }
+ float consume() override { return Child->consume() * Factor; }
private:
llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -318,15 +315,50 @@
float Factor;
};
+/// This iterator limits the number of items retrieved from the child iterator
+/// on top of the query tree. To ensure that query tree with LIMIT iterators
+/// inside works correctly, users have to call Root->consume(Root->peek()) each
+/// time item is retrieved at the root of query tree.
+class LimitIterator : public Iterator {
+public:
+ LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
+ : Child(move(Child)), Limit(Limit), ItemsLeft(Limit) {}
+
+ bool reachedEnd() const override {
+ return ItemsLeft == 0 || Child->reachedEnd();
+ }
+
+ void advance() override { Child->advance(); }
+
+ void advanceTo(DocID ID) override { Child->advanceTo(ID); }
+
+ DocID peek() const override { return Child->peek(); }
+
+ /// Decreases the limit in case the element consumed at top of the query tree
+ /// comes from the underlying iterator.
+ float consume() override {
+ assert(!reachedEnd() && "LimitIterator can't consume at the end.");
+ --ItemsLeft;
+ return Child->consume();
+ }
+
+private:
+ llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+ OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')';
+ return OS;
+ }
+
+ std::unique_ptr<Iterator> Child;
+ size_t Limit;
+ size_t ItemsLeft;
+};
+
} // end namespace
-std::vector<std::pair<DocID, float>> consume(Iterator &It, size_t Limit) {
+std::vector<std::pair<DocID, float>> consume(Iterator &It) {
std::vector<std::pair<DocID, float>> Result;
- for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit;
- It.advance(), ++Retrieved) {
- DocID Document = It.peek();
- Result.push_back(std::make_pair(Document, It.consume(Document)));
- }
+ for (; !It.reachedEnd(); It.advance())
+ Result.push_back(std::make_pair(It.peek(), It.consume()));
return Result;
}
@@ -353,6 +385,11 @@
return llvm::make_unique<BoostIterator>(move(Child), Factor);
}
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+ size_t Size) {
+ return llvm::make_unique<LimitIterator>(move(Child), Size);
+}
+
} // namespace dex
} // namespace clangd
} // namespace clang
Index: clang-tools-extra/clangd/index/dex/DexIndex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/DexIndex.cpp
+++ clang-tools-extra/clangd/index/dex/DexIndex.cpp
@@ -125,10 +125,10 @@
// using 100x of the requested number might not be good in practice, e.g.
// when the requested number of items is small.
const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+ auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
// FIXME(kbobyrev): Add boosting to the query and utilize retrieved
// boosting scores.
- std::vector<std::pair<DocID, float>> SymbolDocIDs =
- consume(*QueryIterator, ItemsToRetrieve);
+ std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
// Retrieve top Req.MaxCandidateCount items.
std::priority_queue<std::pair<float, const Symbol *>> Top;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits