This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 0cf45d18e51 branch-4.0: [opt](ann index) Introduce IDSelectorRoaring
for a faster roaring_to_faiss_selector #58095 (#58177)
0cf45d18e51 is described below
commit 0cf45d18e51ca36022d5b2cbbcdc85e55a404e48
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Nov 20 18:39:26 2025 +0800
branch-4.0: [opt](ann index) Introduce IDSelectorRoaring for a faster
roaring_to_faiss_selector #58095 (#58177)
Cherry-picked from #58095
Co-authored-by: zhiqiang <[email protected]>
---
.../segment_v2/ann_index/faiss_ann_index.cpp | 28 ++++++++----
.../olap/vector_search/faiss_vector_index_test.cpp | 50 ++++++++++++++++++++++
2 files changed, 69 insertions(+), 9 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
b/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
index 140fc800482..5c29384b876 100644
--- a/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
+++ b/be/src/olap/rowset/segment_v2/ann_index/faiss_ann_index.cpp
@@ -117,18 +117,28 @@ private:
std::string _previous_name;
};
+class IDSelectorRoaring : public faiss::IDSelector {
+public:
+ explicit IDSelectorRoaring(const roaring::Roaring* roaring) :
_roaring(roaring) {
+ DCHECK(_roaring != nullptr);
+ }
+
+ bool is_member(faiss::idx_t id) const final {
+ if (id < 0) {
+ return false;
+ }
+ return _roaring->contains(cast_set<vectorized::UInt32>(id));
+ }
+
+private:
+ const roaring::Roaring* _roaring;
+};
+
} // namespace
std::unique_ptr<faiss::IDSelector> FaissVectorIndex::roaring_to_faiss_selector(
const roaring::Roaring& roaring) {
- std::vector<faiss::idx_t> ids;
- ids.resize(roaring.cardinality());
-
- size_t i = 0;
- for (roaring::Roaring::const_iterator it = roaring.begin(); it !=
roaring.end(); ++it, ++i) {
- ids[i] = cast_set<faiss::idx_t>(*it);
- }
- // construct derived and wrap into base unique_ptr explicitly
- return std::unique_ptr<faiss::IDSelector>(new
faiss::IDSelectorBatch(ids.size(), ids.data()));
+ // Wrap the roaring bitmap directly to avoid copying ids into an
intermediate buffer.
+ return std::unique_ptr<faiss::IDSelector>(new IDSelectorRoaring(&roaring));
}
void FaissVectorIndex::update_roaring(const faiss::idx_t* labels, const size_t
n,
diff --git a/be/test/olap/vector_search/faiss_vector_index_test.cpp
b/be/test/olap/vector_search/faiss_vector_index_test.cpp
index 60da7e939ee..82869837c59 100644
--- a/be/test/olap/vector_search/faiss_vector_index_test.cpp
+++ b/be/test/olap/vector_search/faiss_vector_index_test.cpp
@@ -21,6 +21,7 @@
#include <algorithm>
#include <cstddef>
+#include <limits>
#include <memory>
#include <random>
#include <string>
@@ -916,6 +917,55 @@ TEST_F(VectorSearchTest, TestIdSelectorWithEmptyRoaring) {
}
}
+TEST_F(VectorSearchTest, TestIdSelectorRoaringBasicMembership) {
+ auto roaring = std::make_unique<roaring::Roaring>();
+ for (uint32_t i : {0u, 2u, 4u, 1000u}) {
+ roaring->add(i);
+ }
+ auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+ for (uint32_t i : {0u, 2u, 4u, 1000u}) {
+ ASSERT_TRUE(sel->is_member(static_cast<faiss::idx_t>(i)))
+ << "Expected id " << i << " present";
+ }
+ for (uint32_t i : {1u, 3u, 5u, 999u, 1001u}) {
+ ASSERT_FALSE(sel->is_member(static_cast<faiss::idx_t>(i)))
+ << "Unexpected id " << i << " present";
+ }
+ ASSERT_FALSE(sel->is_member(-1)) << "Negative ids should never match";
+}
+
+TEST_F(VectorSearchTest, TestIdSelectorRoaringReflectsBitmapUpdates) {
+ auto roaring = std::make_unique<roaring::Roaring>();
+ roaring->add(10);
+ auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+
+ ASSERT_TRUE(sel->is_member(10));
+ ASSERT_FALSE(sel->is_member(20));
+
+ roaring->add(20);
+ roaring->remove(10);
+
+ ASSERT_FALSE(sel->is_member(10)) << "Selector should track removals";
+ ASSERT_TRUE(sel->is_member(20)) << "Selector should track additions";
+}
+
+TEST_F(VectorSearchTest, TestIdSelectorRoaringHandlesUInt32Max) {
+ auto roaring = std::make_unique<roaring::Roaring>();
+ constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
+ roaring->add(kMax);
+ auto sel = FaissVectorIndex::roaring_to_faiss_selector(*roaring);
+
+ ASSERT_TRUE(sel->is_member(static_cast<faiss::idx_t>(kMax)))
+ << "Expected uint32_t max to be present";
+ bool exception_thrown = false;
+ try {
+ sel->is_member(static_cast<faiss::idx_t>(kMax) + 1);
+ } catch (const std::exception& e) {
+ exception_thrown = true;
+ }
+ ASSERT_TRUE(exception_thrown) << "Expected exception for value beyond
uint32_t max";
+}
+
// New tests: radius == 0 or < 0
TEST_F(VectorSearchTest, L2RangeSearchZeroAndNegativeRadius) {
const int dim = 32;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]