maedhroz commented on code in PR #4353:
URL: https://github.com/apache/cassandra/pull/4353#discussion_r2710981338


##########
src/java/org/apache/cassandra/index/sai/disk/v1/segment/VectorIndexSegmentSearcher.java:
##########
@@ -245,45 +311,54 @@ public KeyRangeIterator limitToTopKResults(QueryContext 
context, List<PrimaryKey
                         break;
 
                     int segmentRowId = metadata.toSegmentRowId(sstableRowId);
-                    rowIds.add(segmentRowId);
                     // VSTODO now that we know the size of keys evaluated, is 
it worth doing the brute
                     // force check eagerly to potentially skip the PK to 
sstable row id to ordinal lookup?
                     int ordinal = 
ordinalsView.getOrdinalForRowId(segmentRowId);
                     if (ordinal >= 0)
-                        bits.set(ordinal);
+                        segmentOrdinalPairs.add(segmentRowId, ordinal);
                 }
             }
 
-            if (shouldUseBruteForce(topK, limit, rowIds.size()))
-                return toPrimaryKeyIterator(new 
IntArrayPostingList(rowIds.toIntArray()), context);
+            int topK = optimizeFor.topKFor(limit);
+            float[] queryVector = 
index.termType().decomposeVector(orderer.lower().value.raw.duplicate());
+
+            if (shouldUseBruteForce(topK, limit, segmentOrdinalPairs.size()))
+            {
+                return toScoreSortedIterator(orderByBruteForce(queryVector, 
segmentOrdinalPairs, limit, topK));
+            }
 
+            SparseFixedBitSet bits = bitSetForSearch();
+            segmentOrdinalPairs.forEachOrdinal(bits::set);
             // else ask the index to perform a search limited to the bits we 
created
-            float[] queryVector = 
index.termType().decomposeVector(expression.lower().value.raw.duplicate());
-            VectorPostingList results = graph.search(queryVector, topK, limit, 
bits);
-            updateExpectedNodes(results.getVisitedCount(), 
expectedNodesVisited(topK, maxSegmentRowId, graph.size()));
-            return toPrimaryKeyIterator(results, context);
+            int expectedNodesVisited = expectedNodesVisited(limit, 
segmentOrdinalPairs.size(), graph.size());
+            IntConsumer nodesVisitedConsumer = nodesVisited -> 
updateExpectedNodes(nodesVisited, expectedNodesVisited);
+            CloseableIterator<RowIdWithScore> result = 
graph.search(queryVector, topK, limit, bits, nodesVisitedConsumer);
+            return toScoreSortedIterator(result);
         }
     }
 
     private boolean shouldUseBruteForce(int topK, int limit, int numRows)
     {
         // if we have a small number of results then let TopK processor do 
exact NN computation
-        int maxBruteForceRows = min(globalBruteForceRows, 
maxBruteForceRows(topK, numRows, graph.size()));
+        int maxBruteForceRows = maxBruteForceRows(topK, numRows, graph.size());
         logger.trace("SAI materialized {} rows; max brute force rows is {} for 
sstable index with {} nodes, LIMIT {}",
                      numRows, maxBruteForceRows, graph.size(), limit);
         Tracing.trace("SAI materialized {} rows; max brute force rows is {} 
for sstable index with {} nodes, LIMIT {}",
                       numRows, maxBruteForceRows, graph.size(), limit);
-        return numRows <= maxBruteForceRows;
+        return FORCE_BRUTE_FORCE_ANN == null ? numRows <= maxBruteForceRows
+                                             : FORCE_BRUTE_FORCE_ANN;
     }
 
     private int maxBruteForceRows(int limit, int nPermittedOrdinals, int 
graphSize)
     {
-        int expectedNodes = expectedNodesVisited(limit, nPermittedOrdinals, 
graphSize);
-        // ANN index will do a bunch of extra work besides the full 
comparisons (performing PQ similarity for each edge);
-        // brute force from sstable will also do a bunch of extra work (going 
through trie index to look up row).
-        // VSTODO I'm not sure which one is more expensive (and it depends on 
things like sstable chunk cache hit ratio)
-        // so I'm leaving it as a 1:1 ratio for now.
-        return max(limit, expectedNodes);
+        int expectedNodesVisited = expectedNodesVisited(limit, 
nPermittedOrdinals, graphSize);
+        int expectedComparisons = 
index.indexWriterConfig().getMaximumNodeConnections() * expectedNodesVisited;
+        // in-memory comparisons are cheaper than pulling a row off disk and 
then comparing
+        // VSTODO this is dramatically oversimplified
+        // larger dimension should increase this, because comparisons are more 
expensive
+        // lower chunk cache hit ratio should decrease this, because loading 
rows is more expensive

Review Comment:
   Still trying to make sense of this...so the comparison here is between 
sorting already materialized `SegmentRowIdOrdinalPairs` and searching the 
on-disk graph? ...or does it still have to do with pulling rows?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to