This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 58662209efc [opt](ann index) Introduce IDSelectorRoaring for a faster 
roaring_to_faiss_selector (#58095)
58662209efc is described below

commit 58662209efc0dbda1ebe8eabab8accc3d38b5513
Author: zhiqiang <[email protected]>
AuthorDate: Thu Nov 20 14:13:47 2025 +0800

    [opt](ann index) Introduce IDSelectorRoaring for a faster 
roaring_to_faiss_selector (#58095)
    
    ### What problem does this PR solve?
    
    Previous, roaring_to_faiss_selector is slow since we need to iterator
    through all elements of roaring to get an IDSelector, so we introduce a
    simple IDSelectorRoaring which holds a pointer to the original roaring
    bitmap to avoid costing too much in roaring_to_faiss_selector.
---
 .../segment_v2/ann_index/faiss_ann_index.cpp       | 28 ++++++++----
 .../olap/vector_search/faiss_vector_index_test.cpp | 50 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 9 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp 
b/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
index 140fc800482..5c29384b876 100644
--- a/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
+++ b/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
@@ -117,18 +117,28 @@ private:
     std::string _previous_name;
 };
 
+class IDSelectorRoaring : public faiss::IDSelector {
+public:
+    explicit IDSelectorRoaring(const roaring::Roaring* roaring) : 
_roaring(roaring) {
+        DCHECK(_roaring != nullptr);
+    }
+
+    bool is_member(faiss::idx_t id) const final {
+        if (id < 0) {
+            return false;
+        }
+        return _roaring->contains(cast_set<vectorized::UInt32>(id));
+    }
+
+private:
+    const roaring::Roaring* _roaring;
+};
+
 } // namespace
 std::unique_ptr<faiss::IDSelector> FaissVectorIndex::roaring_to_faiss_selector(
         const roaring::Roaring& roaring) {
-    std::vector<faiss::idx_t> ids;
-    ids.resize(roaring.cardinality());
-
-    size_t i = 0;
-    for (roaring::Roaring::const_iterator it = roaring.begin(); it != 
roaring.end(); ++it, ++i) {
-        ids[i] = cast_set<faiss::idx_t>(*it);
-    }
-    // construct derived and wrap into base unique_ptr explicitly
-    return std::unique_ptr<faiss::IDSelector>(new 
faiss::IDSelectorBatch(ids.size(), ids.data()));
+    // Wrap the roaring bitmap directly to avoid copying ids into an 
intermediate buffer.
+    return std::unique_ptr<faiss::IDSelector>(new IDSelectorRoaring(&roaring));
 }
 
 void FaissVectorIndex::update_roaring(const faiss::idx_t* labels, const size_t 
n,
diff --git a/be/test/olap/vector_search/faiss_vector_index_test.cpp 
b/be/test/olap/vector_search/faiss_vector_index_test.cpp
index 60da7e939ee..82869837c59 100644
--- a/be/test/olap/vector_search/faiss_vector_index_test.cpp
+++ b/be/test/olap/vector_search/faiss_vector_index_test.cpp
@@ -21,6 +21,7 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <limits>
 #include <memory>
 #include <random>
 #include <string>
@@ -916,6 +917,55 @@ TEST_F(VectorSearchTest, TestIdSelectorWithEmptyRoaring) {
     }
 }
 
+TEST_F(VectorSearchTest, TestIdSelectorRoaringBasicMembership) {
+    auto roaring = std::make_unique<roaring::Roaring>();
+    for (uint32_t i : {0u, 2u, 4u, 1000u}) {
+        roaring->add(i);
+    }
+    auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+    for (uint32_t i : {0u, 2u, 4u, 1000u}) {
+        ASSERT_TRUE(sel->is_member(static_cast<faiss::idx_t>(i)))
+                << "Expected id " << i << " present";
+    }
+    for (uint32_t i : {1u, 3u, 5u, 999u, 1001u}) {
+        ASSERT_FALSE(sel->is_member(static_cast<faiss::idx_t>(i)))
+                << "Unexpected id " << i << " present";
+    }
+    ASSERT_FALSE(sel->is_member(-1)) << "Negative ids should never match";
+}
+
+TEST_F(VectorSearchTest, TestIdSelectorRoaringReflectsBitmapUpdates) {
+    auto roaring = std::make_unique<roaring::Roaring>();
+    roaring->add(10);
+    auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+
+    ASSERT_TRUE(sel->is_member(10));
+    ASSERT_FALSE(sel->is_member(20));
+
+    roaring->add(20);
+    roaring->remove(10);
+
+    ASSERT_FALSE(sel->is_member(10)) << "Selector should track removals";
+    ASSERT_TRUE(sel->is_member(20)) << "Selector should track additions";
+}
+
+TEST_F(VectorSearchTest, TestIdSelectorRoaringHandlesUInt32Max) {
+    auto roaring = std::make_unique<roaring::Roaring>();
+    constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
+    roaring->add(kMax);
+    auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+
+    ASSERT_TRUE(sel->is_member(static_cast<faiss::idx_t>(kMax)))
+            << "Expected uint32_t max to be present";
+    bool exception_thrown = false;
+    try {
+        sel->is_member(static_cast<faiss::idx_t>(kMax) + 1);
+    } catch (const std::exception& e) {
+        exception_thrown = true;
+    }
+    ASSERT_TRUE(exception_thrown) << "Expected exception for value beyond 
uint32_t max";
+}
+
 // New tests: radius == 0 or < 0
 TEST_F(VectorSearchTest, L2RangeSearchZeroAndNegativeRadius) {
     const int dim = 32;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to