This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 58662209efc [opt](ann index) Introduce IDSelectorRoaring for a faster
roaring_to_faiss_selector (#58095)
58662209efc is described below
commit 58662209efc0dbda1ebe8eabab8accc3d38b5513
Author: zhiqiang <[email protected]>
AuthorDate: Thu Nov 20 14:13:47 2025 +0800
[opt](ann index) Introduce IDSelectorRoaring for a faster
roaring_to_faiss_selector (#58095)
### What problem does this PR solve?
Previous, roaring_to_faiss_selector is slow since we need to iterator
through all elements of roaring to get an IDSelector, so we introduce a
simple IDSelectorRoaring which holds a pointer to the original roaring
bitmap to avoid costing too much in roaring_to_faiss_selector.
---
.../segment_v2/ann_index/faiss_ann_index.cpp | 28 ++++++++----
.../olap/vector_search/faiss_vector_index_test.cpp | 50 ++++++++++++++++++++++
2 files changed, 69 insertions(+), 9 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
b/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
index 140fc800482..5c29384b876 100644
--- a/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
+++ b/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
@@ -117,18 +117,28 @@ private:
std::string _previous_name;
};
+class IDSelectorRoaring : public faiss::IDSelector {
+public:
+ explicit IDSelectorRoaring(const roaring::Roaring* roaring) :
_roaring(roaring) {
+ DCHECK(_roaring != nullptr);
+ }
+
+ bool is_member(faiss::idx_t id) const final {
+ if (id < 0) {
+ return false;
+ }
+ return _roaring->contains(cast_set<vectorized::UInt32>(id));
+ }
+
+private:
+ const roaring::Roaring* _roaring;
+};
+
} // namespace
std::unique_ptr<faiss::IDSelector> FaissVectorIndex::roaring_to_faiss_selector(
const roaring::Roaring& roaring) {
- std::vector<faiss::idx_t> ids;
- ids.resize(roaring.cardinality());
-
- size_t i = 0;
- for (roaring::Roaring::const_iterator it = roaring.begin(); it !=
roaring.end(); ++it, ++i) {
- ids[i] = cast_set<faiss::idx_t>(*it);
- }
- // construct derived and wrap into base unique_ptr explicitly
- return std::unique_ptr<faiss::IDSelector>(new
faiss::IDSelectorBatch(ids.size(), ids.data()));
+ // Wrap the roaring bitmap directly to avoid copying ids into an
intermediate buffer.
+ return std::unique_ptr<faiss::IDSelector>(new IDSelectorRoaring(&roaring));
}
void FaissVectorIndex::update_roaring(const faiss::idx_t* labels, const size_t
n,
diff --git a/be/test/olap/vector_search/faiss_vector_index_test.cpp
b/be/test/olap/vector_search/faiss_vector_index_test.cpp
index 60da7e939ee..82869837c59 100644
--- a/be/test/olap/vector_search/faiss_vector_index_test.cpp
+++ b/be/test/olap/vector_search/faiss_vector_index_test.cpp
@@ -21,6 +21,7 @@
#include <algorithm>
#include <cstddef>
+#include <limits>
#include <memory>
#include <random>
#include <string>
@@ -916,6 +917,55 @@ TEST_F(VectorSearchTest, TestIdSelectorWithEmptyRoaring) {
}
}
+TEST_F(VectorSearchTest, TestIdSelectorRoaringBasicMembership) {
+ auto roaring = std::make_unique<roaring::Roaring>();
+ for (uint32_t i : {0u, 2u, 4u, 1000u}) {
+ roaring->add(i);
+ }
+ auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+ for (uint32_t i : {0u, 2u, 4u, 1000u}) {
+ ASSERT_TRUE(sel->is_member(static_cast<faiss::idx_t>(i)))
+ << "Expected id " << i << " present";
+ }
+ for (uint32_t i : {1u, 3u, 5u, 999u, 1001u}) {
+ ASSERT_FALSE(sel->is_member(static_cast<faiss::idx_t>(i)))
+ << "Unexpected id " << i << " present";
+ }
+ ASSERT_FALSE(sel->is_member(-1)) << "Negative ids should never match";
+}
+
+TEST_F(VectorSearchTest, TestIdSelectorRoaringReflectsBitmapUpdates) {
+ auto roaring = std::make_unique<roaring::Roaring>();
+ roaring->add(10);
+ auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+
+ ASSERT_TRUE(sel->is_member(10));
+ ASSERT_FALSE(sel->is_member(20));
+
+ roaring->add(20);
+ roaring->remove(10);
+
+ ASSERT_FALSE(sel->is_member(10)) << "Selector should track removals";
+ ASSERT_TRUE(sel->is_member(20)) << "Selector should track additions";
+}
+
+TEST_F(VectorSearchTest, TestIdSelectorRoaringHandlesUInt32Max) {
+ auto roaring = std::make_unique<roaring::Roaring>();
+ constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
+ roaring->add(kMax);
+ auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+
+ ASSERT_TRUE(sel->is_member(static_cast<faiss::idx_t>(kMax)))
+ << "Expected uint32_t max to be present";
+ bool exception_thrown = false;
+ try {
+ sel->is_member(static_cast<faiss::idx_t>(kMax) + 1);
+ } catch (const std::exception& e) {
+ exception_thrown = true;
+ }
+ ASSERT_TRUE(exception_thrown) << "Expected exception for value beyond
uint32_t max";
+}
+
// New tests: radius == 0 or < 0
TEST_F(VectorSearchTest, L2RangeSearchZeroAndNegativeRadius) {
const int dim = 32;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]