michaeljmarshall commented on code in PR #4353:
URL: https://github.com/apache/cassandra/pull/4353#discussion_r2674069635


##########
src/java/org/apache/cassandra/index/sai/disk/v1/segment/VectorIndexSegmentSearcher.java:
##########
@@ -245,35 +316,42 @@ public KeyRangeIterator limitToTopKResults(QueryContext 
context, List<PrimaryKey
                         break;
 
                     int segmentRowId = metadata.toSegmentRowId(sstableRowId);
-                    rowIds.add(segmentRowId);
                     // VSTODO now that we know the size of keys evaluated, is 
it worth doing the brute
                     // force check eagerly to potentially skip the PK to 
sstable row id to ordinal lookup?
                     int ordinal = 
ordinalsView.getOrdinalForRowId(segmentRowId);
                     if (ordinal >= 0)
-                        bits.set(ordinal);
+                        segmentOrdinalPairs.add(segmentRowId, ordinal);
                 }
             }
 
-            if (shouldUseBruteForce(topK, limit, rowIds.size()))
-                return toPrimaryKeyIterator(new 
IntArrayPostingList(rowIds.toIntArray()), context);
+            int topK = optimizeFor.topKFor(limit);
+            float[] queryVector = 
index.termType().decomposeVector(expression.lower().value.raw.duplicate());
+
+            if (shouldUseBruteForce(topK, limit, segmentOrdinalPairs.size()))
+            {
+                return toScoreSortedIterator(orderByBruteForce(queryVector, 
segmentOrdinalPairs, limit, topK));
+            }
 
+            SparseFixedBitSet bits = bitSetForSearch();
+            segmentOrdinalPairs.forEachOrdinal(bits::set);
             // else ask the index to perform a search limited to the bits we 
created
-            float[] queryVector = 
index.termType().decomposeVector(expression.lower().value.raw.duplicate());
-            VectorPostingList results = graph.search(queryVector, topK, limit, 
bits);
-            updateExpectedNodes(results.getVisitedCount(), 
expectedNodesVisited(topK, maxSegmentRowId, graph.size()));
-            return toPrimaryKeyIterator(results, context);
+            int expectedNodesVisited = expectedNodesVisited(limit, 
segmentOrdinalPairs.size(), graph.size());
+            IntConsumer nodesVisitedConsumer = nodesVisited -> 
updateExpectedNodes(nodesVisited, expectedNodesVisited);
+            CloseableIterator<RowIdWithScore> result = 
graph.search(queryVector, topK, limit, bits, nodesVisitedConsumer);
+            return toScoreSortedIterator(result);
         }
     }
 
     private boolean shouldUseBruteForce(int topK, int limit, int numRows)
     {
         // if we have a small number of results then let TopK processor do 
exact NN computation
-        int maxBruteForceRows = min(globalBruteForceRows, 
maxBruteForceRows(topK, numRows, graph.size()));
+        int maxBruteForceRows = maxBruteForceRows(topK, numRows, graph.size());
         logger.trace("SAI materialized {} rows; max brute force rows is {} for 
sstable index with {} nodes, LIMIT {}",
                      numRows, maxBruteForceRows, graph.size(), limit);
         Tracing.trace("SAI materialized {} rows; max brute force rows is {} 
for sstable index with {} nodes, LIMIT {}",
                       numRows, maxBruteForceRows, graph.size(), limit);
-        return numRows <= maxBruteForceRows;
+        return FORCE_BRUTE_FORCE_ANN == null ? numRows <= maxBruteForceRows
+                                             : FORCE_BRUTE_FORCE_ANN;
     }
 
     private int maxBruteForceRows(int limit, int nPermittedOrdinals, int 
graphSize)

Review Comment:
   Correct, though I'm not sure when this changed. We rely on the vector graph 
having a copy of each row's vectors.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to