kaivalnp commented on code in PR #951: URL: https://github.com/apache/lucene/pull/951#discussion_r906442078
########## lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java: ########## @@ -121,36 +120,50 @@ public Query rewrite(IndexReader reader) throws IOException { return createRewrittenQuery(reader, topK); } - private TopDocs searchLeaf(LeafReaderContext ctx, BitSetCollector filterCollector) - throws IOException { + private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight) throws IOException { + Bits liveDocs = ctx.reader().getLiveDocs(); + int maxDoc = ctx.reader().maxDoc(); - if (filterCollector == null) { - Bits acceptDocs = ctx.reader().getLiveDocs(); - return approximateSearch(ctx, acceptDocs, Integer.MAX_VALUE); + if (filterWeight == null) { + return approximateSearch(ctx, liveDocs, Integer.MAX_VALUE); } else { - BitSetIterator filterIterator = filterCollector.getIterator(ctx.ord); - if (filterIterator == null || filterIterator.cost() == 0) { + Scorer scorer = filterWeight.scorer(ctx); + if (scorer == null) { return NO_RESULTS; - } + } else { + BitSetIterator filterIterator = + cacheIntoBitSetIterator(scorer.iterator(), liveDocs, maxDoc); - if (filterIterator.cost() <= k) { - // If there are <= k possible matches, short-circuit and perform exact search, since HNSW - // must always visit at least k documents - return exactSearch(ctx, filterIterator); + if (filterIterator.cost() <= k) { + return exactSearch(ctx, filterIterator); + } + TopDocs results = + approximateSearch(ctx, filterIterator.getBitSet(), (int) filterIterator.cost()); + if (results.totalHits.relation == TotalHits.Relation.EQUAL_TO) { + return results; + } else { + return exactSearch(ctx, filterIterator); + } } + } + } - // Perform the approximate kNN search - Bits acceptDocs = - filterIterator.getBitSet(); // The filter iterator already incorporates live docs - int visitedLimit = (int) filterIterator.cost(); - TopDocs results = approximateSearch(ctx, acceptDocs, visitedLimit); - if (results.totalHits.relation == TotalHits.Relation.EQUAL_TO) { - return results; - } else { - // We stopped the kNN search because it visited too many nodes, so fall back to exact search - return exactSearch(ctx, filterIterator); - } + private BitSetIterator cacheIntoBitSetIterator( Review Comment: Sure, we can return a `BitSet` for clarity.. I think using `cost` there was justified since it was calculated and set during hit collection itself (and was accurate) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org