kaivalnp commented on code in PR #951:
URL: https://github.com/apache/lucene/pull/951#discussion_r906442078


##########
lucene/core/src/java/org/apache/lucene/search/KnnVectorQuery.java:
##########
@@ -121,36 +120,50 @@ public Query rewrite(IndexReader reader) throws 
IOException {
     return createRewrittenQuery(reader, topK);
   }
 
-  private TopDocs searchLeaf(LeafReaderContext ctx, BitSetCollector 
filterCollector)
-      throws IOException {
+  private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight) 
throws IOException {
+    Bits liveDocs = ctx.reader().getLiveDocs();
+    int maxDoc = ctx.reader().maxDoc();
 
-    if (filterCollector == null) {
-      Bits acceptDocs = ctx.reader().getLiveDocs();
-      return approximateSearch(ctx, acceptDocs, Integer.MAX_VALUE);
+    if (filterWeight == null) {
+      return approximateSearch(ctx, liveDocs, Integer.MAX_VALUE);
     } else {
-      BitSetIterator filterIterator = filterCollector.getIterator(ctx.ord);
-      if (filterIterator == null || filterIterator.cost() == 0) {
+      Scorer scorer = filterWeight.scorer(ctx);
+      if (scorer == null) {
         return NO_RESULTS;
-      }
+      } else {
+        BitSetIterator filterIterator =
+            cacheIntoBitSetIterator(scorer.iterator(), liveDocs, maxDoc);
 
-      if (filterIterator.cost() <= k) {
-        // If there are <= k possible matches, short-circuit and perform exact 
search, since HNSW
-        // must always visit at least k documents
-        return exactSearch(ctx, filterIterator);
+        if (filterIterator.cost() <= k) {
+          return exactSearch(ctx, filterIterator);
+        }
+        TopDocs results =
+            approximateSearch(ctx, filterIterator.getBitSet(), (int) 
filterIterator.cost());
+        if (results.totalHits.relation == TotalHits.Relation.EQUAL_TO) {
+          return results;
+        } else {
+          return exactSearch(ctx, filterIterator);
+        }
       }
+    }
+  }
 
-      // Perform the approximate kNN search
-      Bits acceptDocs =
-          filterIterator.getBitSet(); // The filter iterator already 
incorporates live docs
-      int visitedLimit = (int) filterIterator.cost();
-      TopDocs results = approximateSearch(ctx, acceptDocs, visitedLimit);
-      if (results.totalHits.relation == TotalHits.Relation.EQUAL_TO) {
-        return results;
-      } else {
-        // We stopped the kNN search because it visited too many nodes, so 
fall back to exact search
-        return exactSearch(ctx, filterIterator);
-      }
+  private BitSetIterator cacheIntoBitSetIterator(

Review Comment:
   Sure, we can return a `BitSet` for clarity..
   I think using `cost` there was justified since it was calculated and set 
during hit collection itself (and was accurate)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to