This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new f1cc4a373fd [fix](ann-index) Fix ANN range search state leakage and 
incorrect slot index tracking. (#63965)
f1cc4a373fd is described below

commit f1cc4a373fdc7303fcfa7af4481ed400ef47269b
Author: Qi Chen <[email protected]>
AuthorDate: Tue Jun 2 12:05:39 2026 +0800

    [fix](ann-index) Fix ANN range search state leakage and incorrect slot 
index tracking. (#63965)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    Cherrypick #63666
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/exprs/vectorized_fn_call.cpp                |  19 +-
 be/src/exprs/vectorized_fn_call.h                  |   3 +-
 be/src/exprs/vexpr.cpp                             |  12 +-
 be/src/exprs/vexpr.h                               |  23 ++-
 be/src/exprs/vexpr_context.cpp                     |  37 +++-
 be/src/exprs/vexpr_context.h                       |   3 +-
 be/src/exprs/virtual_slot_ref.cpp                  |   7 +-
 be/src/exprs/virtual_slot_ref.h                    |   5 +-
 be/src/storage/segment/segment_iterator.cpp        |  15 +-
 .../storage/index/ann/ann_range_search_test.cpp    | 210 ++++++++++++++++++++-
 .../ann_range_search_pushdown_regression.groovy    | 141 ++++++++++++++
 ...ge_search_source_index_status_regression.groovy |  84 +++++++++
 12 files changed, 500 insertions(+), 59 deletions(-)

diff --git a/be/src/exprs/vectorized_fn_call.cpp 
b/be/src/exprs/vectorized_fn_call.cpp
index a190891bdad..e28c08422db 100644
--- a/be/src/exprs/vectorized_fn_call.cpp
+++ b/be/src/exprs/vectorized_fn_call.cpp
@@ -535,7 +535,9 @@ Status VectorizedFnCall::evaluate_ann_range_search(
         const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
cid_to_index_iterators,
         const std::vector<ColumnId>& idx_to_cid,
         const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
-        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats) {
+        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+        AnnRangeSearchEvaluationResult& evaluation_result) {
+    evaluation_result = {};
     if (range_search_runtime.is_ann_range_search == false) {
         return Status::OK();
     }
@@ -544,8 +546,8 @@ Status VectorizedFnCall::evaluate_ann_range_search(
                               range_search_runtime.to_string());
     size_t origin_num = row_bitmap.cardinality();
 
-    int idx_in_block = static_cast<int>(range_search_runtime.src_col_idx);
-    DCHECK(idx_in_block < idx_to_cid.size())
+    const auto idx_in_block = range_search_runtime.src_col_idx;
+    DCHECK_LT(idx_in_block, idx_to_cid.size())
             << "idx_in_block: " << idx_in_block << ", idx_to_cid.size(): " << 
idx_to_cid.size();
 
     ColumnId src_col_cid = idx_to_cid[idx_in_block];
@@ -626,6 +628,7 @@ Status VectorizedFnCall::evaluate_ann_range_search(
     row_bitmap = *result.roaring;
 
     // Process virtual column
+    bool dist_fulfilled = false;
     if (range_search_runtime.dst_col_idx >= 0) {
         // Prepare materialization if we can use result from index.
         // Typical situation: range search and operator is LE or LT.
@@ -649,7 +652,7 @@ Status VectorizedFnCall::evaluate_ann_range_search(
             }
             
virtual_column_iterator->prepare_materialization(std::move(distance_col),
                                                              
std::move(result.row_ids));
-            _virtual_column_is_fulfilled = true;
+            dist_fulfilled = true;
         } else {
             // Whether the ANN index should have produced distance depends on 
metric and operator:
             //  - L2: distance is produced for LE/LT; not produced for GE/GT
@@ -663,17 +666,17 @@ Status VectorizedFnCall::evaluate_ann_range_search(
             // If we expected distance but didn't get it, assert in debug to 
catch logic errors.
             DCHECK(!should_have_distance) << "Expected distance from ANN index 
but got none";
 #endif
-            _virtual_column_is_fulfilled = false;
         }
     } else {
         // Dest is not virtual column.
-        _virtual_column_is_fulfilled = true;
+        dist_fulfilled = true;
     }
 
-    _has_been_executed = true;
+    evaluation_result.executed = true;
+    evaluation_result.dist_fulfilled = dist_fulfilled;
     VLOG_DEBUG << fmt::format(
             "Ann range search filtered {} rows, origin {} rows, virtual column 
is full-filled: {}",
-            origin_num - row_bitmap.cardinality(), origin_num, 
_virtual_column_is_fulfilled);
+            origin_num - row_bitmap.cardinality(), origin_num, dist_fulfilled);
 
     ann_index_stats = *stats;
     return Status::OK();
diff --git a/be/src/exprs/vectorized_fn_call.h 
b/be/src/exprs/vectorized_fn_call.h
index 979a0d5c1f8..f90206c6d81 100644
--- a/be/src/exprs/vectorized_fn_call.h
+++ b/be/src/exprs/vectorized_fn_call.h
@@ -92,7 +92,8 @@ public:
             const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
cid_to_index_iterators,
             const std::vector<ColumnId>& idx_to_cid,
             const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
-            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats) override;
+            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+            AnnRangeSearchEvaluationResult& result) override;
 
     void prepare_ann_range_search(const doris::VectorSearchUserParams& params,
                                   segment_v2::AnnRangeSearchRuntime& runtime,
diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp
index 7ab49cb4349..5c80aecede0 100644
--- a/be/src/exprs/vexpr.cpp
+++ b/be/src/exprs/vexpr.cpp
@@ -1017,7 +1017,9 @@ Status VExpr::evaluate_ann_range_search(
         const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
index_iterators,
         const std::vector<ColumnId>& idx_to_cid,
         const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
-        roaring::Roaring& row_bitmap, AnnIndexStats& ann_index_stats) {
+        roaring::Roaring& row_bitmap, AnnIndexStats& ann_index_stats,
+        AnnRangeSearchEvaluationResult& result) {
+    result = {};
     return Status::OK();
 }
 
@@ -1035,14 +1037,6 @@ void VExpr::prepare_ann_range_search(const 
doris::VectorSearchUserParams& params
     }
 }
 
-bool VExpr::ann_range_search_executedd() {
-    return _has_been_executed;
-}
-
-bool VExpr::ann_dist_is_fulfilled() const {
-    return _virtual_column_is_fulfilled;
-}
-
 Status VExpr::execute_filter(VExprContext* context, const Block* block,
                              uint8_t* __restrict result_filter_data, size_t 
rows, bool accept_null,
                              bool* can_filter_all) const {
diff --git a/be/src/exprs/vexpr.h b/be/src/exprs/vexpr.h
index 011c3a40dda..196ffd1b082 100644
--- a/be/src/exprs/vexpr.h
+++ b/be/src/exprs/vexpr.h
@@ -80,6 +80,15 @@ struct AnnRangeSearchRuntime;
 
 using Selector = IColumn::Selector;
 
+struct AnnRangeSearchEvaluationResult {
+    // Indicates whether the expr row_bitmap has been updated.
+    bool executed = false;
+    // Indicates whether the virtual column is fulfilled.
+    // NOTE, if there is no virtual column in the expr tree, and expr
+    // is evaluated by ann index, this flag is still true.
+    bool dist_fulfilled = false;
+};
+
 class VExpr {
 public:
     // resize inserted param column to make sure column size equal to 
block.rows() and return param column index
@@ -343,7 +352,8 @@ public:
             const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
cid_to_index_iterators,
             const std::vector<ColumnId>& idx_to_cid,
             const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
-            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats);
+            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+            AnnRangeSearchEvaluationResult& result);
 
     // Prepare the runtime for ANN range search.
     // AnnRangeSearchRuntime is used to store the runtime information of ann 
range search.
@@ -353,10 +363,6 @@ public:
                                           segment_v2::AnnRangeSearchRuntime& 
range_search_runtime,
                                           bool& suitable_for_ann_index);
 
-    bool ann_range_search_executedd();
-
-    bool ann_dist_is_fulfilled() const;
-
     virtual uint64_t get_digest(uint64_t seed) const;
 
 protected:
@@ -439,13 +445,6 @@ protected:
     // ensuring uniqueness during index traversal
     uint32_t _index_unique_id = 0;
     bool _enable_inverted_index_query = true;
-
-    // Indicates whether the expr row_bitmap has been updated.
-    bool _has_been_executed = false;
-    // Indicates whether the virtual column is fulfilled.
-    // NOTE, if there is no virtual column in the expr tree, and expr
-    // is evaluated by ann index, this flag is still true.
-    bool _virtual_column_is_fulfilled = false;
 };
 
 // NOLINTBEGIN(readability-function-size)
diff --git a/be/src/exprs/vexpr_context.cpp b/be/src/exprs/vexpr_context.cpp
index b6e5c2a08e8..29e0b25cf49 100644
--- a/be/src/exprs/vexpr_context.cpp
+++ b/be/src/exprs/vexpr_context.cpp
@@ -433,41 +433,58 @@ Status VExprContext::evaluate_ann_range_search(
         const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
         const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, 
VExpr*>>&
                 common_expr_to_slotref_map,
-        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats) {
+        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+        bool* ann_range_search_executed) {
+    if (ann_range_search_executed != nullptr) {
+        *ann_range_search_executed = false;
+    }
     if (_root == nullptr) {
         return Status::OK();
     }
 
+    AnnRangeSearchEvaluationResult evaluation_result;
     RETURN_IF_ERROR(_root->evaluate_ann_range_search(
             _ann_range_search_runtime, cid_to_index_iterators, idx_to_cid, 
column_iterators,
-            row_bitmap, ann_index_stats));
+            row_bitmap, ann_index_stats, evaluation_result));
 
-    if (!_root->ann_range_search_executedd()) {
+    if (!evaluation_result.executed) {
         return Status::OK();
     }
+    if (ann_range_search_executed != nullptr) {
+        *ann_range_search_executed = true;
+    }
+
+    DCHECK(_index_context != nullptr);
+    _index_context->set_index_result_for_expr(
+            _root.get(),
+            
segment_v2::InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(row_bitmap),
+                                                  
std::make_shared<roaring::Roaring>()));
 
-    if (!_root->ann_dist_is_fulfilled()) {
+    if (!evaluation_result.dist_fulfilled) {
         // Do not perform index scan in this case.
         return Status::OK();
     }
 
-    auto src_col_idx = _ann_range_search_runtime.src_col_idx;
+    DCHECK_LT(_ann_range_search_runtime.src_col_idx, idx_to_cid.size());
+    const auto src_col_idx = 
cast_set<int>(_ann_range_search_runtime.src_col_idx);
+    const auto src_col_key = 
cast_set<ColumnId>(_ann_range_search_runtime.src_col_idx);
     auto slot_ref_map_it = common_expr_to_slotref_map.find(this);
     if (slot_ref_map_it == common_expr_to_slotref_map.end()) {
         return Status::OK();
     }
     auto& slot_ref_map = slot_ref_map_it->second;
-    ColumnId cid = idx_to_cid[src_col_idx];
-    if (slot_ref_map.find(cid) == slot_ref_map.end()) {
+    auto slot_ref_it = slot_ref_map.find(src_col_key);
+    if (slot_ref_it == slot_ref_map.end()) {
         return Status::OK();
     }
-    const VExpr* slot_ref_expr_addr = slot_ref_map.find(cid)->second;
-    _index_context->set_true_for_index_status(slot_ref_expr_addr, 
idx_to_cid[cid]);
+    const VExpr* slot_ref_expr_addr = slot_ref_it->second;
+    _index_context->set_true_for_index_status(slot_ref_expr_addr, src_col_idx);
 
     VLOG_DEBUG << fmt::format(
             "Evaluate ann range search for expr {}, src_col_idx {}, cid {}, 
row_bitmap "
             "cardinality {}",
-            _root->debug_string(), src_col_idx, cid, row_bitmap.cardinality());
+            _root->debug_string(), src_col_idx, 
idx_to_cid[_ann_range_search_runtime.src_col_idx],
+            row_bitmap.cardinality());
     return Status::OK();
 }
 
diff --git a/be/src/exprs/vexpr_context.h b/be/src/exprs/vexpr_context.h
index fd4c42fe85a..ccd385d3a93 100644
--- a/be/src/exprs/vexpr_context.h
+++ b/be/src/exprs/vexpr_context.h
@@ -395,7 +395,8 @@ public:
             const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
             const std::unordered_map<VExprContext*, 
std::unordered_map<ColumnId, VExpr*>>&
                     common_expr_to_slotref_map,
-            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats);
+            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+            bool* ann_range_search_executed);
 
     uint64_t get_digest(uint64_t seed) const;
 
diff --git a/be/src/exprs/virtual_slot_ref.cpp 
b/be/src/exprs/virtual_slot_ref.cpp
index f7f767192ac..66448b39af1 100644
--- a/be/src/exprs/virtual_slot_ref.cpp
+++ b/be/src/exprs/virtual_slot_ref.cpp
@@ -234,12 +234,11 @@ Status VirtualSlotRef::evaluate_ann_range_search(
         const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
cid_to_index_iterators,
         const std::vector<ColumnId>& idx_to_cid,
         const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
-        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats) {
+        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+        AnnRangeSearchEvaluationResult& result) {
     return _virtual_column_expr->evaluate_ann_range_search(
             range_search_runtime, cid_to_index_iterators, idx_to_cid, 
column_iterators, row_bitmap,
-            ann_index_stats);
-
-    return Status::OK();
+            ann_index_stats, result);
 }
 #include "common/compile_check_end.h"
 } // namespace doris
diff --git a/be/src/exprs/virtual_slot_ref.h b/be/src/exprs/virtual_slot_ref.h
index e604d593376..382a3b45e34 100644
--- a/be/src/exprs/virtual_slot_ref.h
+++ b/be/src/exprs/virtual_slot_ref.h
@@ -107,7 +107,8 @@ public:
             const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
cid_to_index_iterators,
             const std::vector<ColumnId>& idx_to_cid,
             const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& 
column_iterators,
-            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats) override;
+            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& 
ann_index_stats,
+            AnnRangeSearchEvaluationResult& result) override;
 
 #ifdef BE_TEST
     // Test-only setter methods for unit testing
@@ -130,4 +131,4 @@ private:
     DataTypePtr _column_data_type;               ///< Data type of the column
 };
 #include "common/compile_check_end.h"
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/storage/segment/segment_iterator.cpp 
b/be/src/storage/segment/segment_iterator.cpp
index 3a9c37aafcd..fb336f8b25b 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -1240,9 +1240,14 @@ Status SegmentIterator::_apply_index_expr() {
     for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
         segment_v2::AnnIndexStats ann_index_stats;
         size_t origin_rows = _row_bitmap.cardinality();
+        bool ann_range_search_executed = false;
         RETURN_IF_ERROR(expr_ctx->evaluate_ann_range_search(
                 _index_iterators, _schema->column_ids(), _column_iterators,
-                _common_expr_to_slotref_map, _row_bitmap, ann_index_stats));
+                _common_expr_to_slotref_map, _row_bitmap, ann_index_stats,
+                &ann_range_search_executed));
+        if (ann_range_search_executed) {
+            _opts.stats->ann_index_range_search_cnt++;
+        }
         _opts.stats->rows_ann_index_range_filtered += (origin_rows - 
_row_bitmap.cardinality());
         _opts.stats->ann_index_load_ns += 
ann_index_stats.load_index_costs_ns.value();
         _opts.stats->ann_index_range_search_ns += 
ann_index_stats.search_costs_ns.value();
@@ -1258,14 +1263,6 @@ Status SegmentIterator::_apply_index_expr() {
         _opts.stats->ann_fall_back_brute_force_cnt += 
ann_index_stats.fall_back_brute_force_cnt;
     }
 
-    for (auto it = _common_expr_ctxs_push_down.begin(); it != 
_common_expr_ctxs_push_down.end();) {
-        if ((*it)->root()->ann_range_search_executedd()) {
-            _opts.stats->ann_index_range_search_cnt++;
-            it = _common_expr_ctxs_push_down.erase(it);
-        } else {
-            ++it;
-        }
-    }
     return Status::OK();
 }
 
diff --git a/be/test/storage/index/ann/ann_range_search_test.cpp 
b/be/test/storage/index/ann/ann_range_search_test.cpp
index 0cbf9ee6e9e..400e822695c 100644
--- a/be/test/storage/index/ann/ann_range_search_test.cpp
+++ b/be/test/storage/index/ann/ann_range_search_test.cpp
@@ -23,7 +23,9 @@
 #include <cassert>
 #include <cstdint>
 #include <iostream>
+#include <map>
 #include <memory>
+#include <unordered_map>
 #include <vector>
 
 #include "common/object_pool.h"
@@ -54,6 +56,18 @@ const std::string ann_range_search_thrift =
 const std::string thrift_table_desc =
         
R"xxx({"1":{"lst":["rec",7,{"1":{"i32":0},"2":{"i32":0},"3":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5},"5":{"i32":0}}}}]},"3":{"i64":-1}}},"4":{"i32":-1},"5":{"i32":-1},"6":{"i32":0},"7":{"i32":-1},"8":{"str":"id"},"9":{"i32":0},"10":{"tf":1},"11":{"i32":0},"12":{"tf":1},"13":{"tf":1},"14":{"tf":0},"17":{"i32":5}},{"1":{"i32":1},"2":{"i32":0},"3":{"rec":{"1":{"lst":["rec",2,{"1":{"i32":1},"4":{"tf":1},"5":{"lst":["tf",1,1]}},{"1":{"i32":0},"2":{"rec":{"1
 [...]
 
+static std::shared_ptr<IndexExecContext> create_index_context(
+        const std::vector<ColumnId>& col_ids,
+        const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& 
index_iterators,
+        std::vector<IndexFieldNameAndTypePair>& storage_name_and_type,
+        std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>&
+                common_expr_index_status) {
+    segment_v2::ColumnIteratorOptions column_iter_opts;
+    return std::make_shared<IndexExecContext>(col_ids, index_iterators, 
storage_name_and_type,
+                                              common_expr_index_status, 
nullptr, nullptr,
+                                              column_iter_opts);
+}
+
 TEST_F(VectorSearchTest, TestPrepareAnnRangeSearch) {
     TExpr texpr = read_from_json<TExpr>(ann_range_search_thrift);
     // std::cout << "range_search thrift:\n" << 
apache::thrift::ThriftDebugString(texpr) << std::endl;
@@ -166,11 +180,17 @@ TEST_F(VectorSearchTest, TestEvaluateAnnRangeSearch) {
     segment_v2::AnnIndexStats stats;
     std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>
             common_expr_to_slotref_map;
+    std::vector<IndexFieldNameAndTypePair> storage_name_and_type(4);
+    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>> 
common_expr_index_status;
+    range_search_ctx->set_index_context(create_index_context(
+            idx_to_cid, cid_to_index_iterators, storage_name_and_type, 
common_expr_index_status));
+    bool ann_range_search_executed = false;
     ASSERT_TRUE(range_search_ctx
                         ->evaluate_ann_range_search(cid_to_index_iterators, 
idx_to_cid,
                                                     column_iterators, 
common_expr_to_slotref_map,
-                                                    row_bitmap, stats)
+                                                    row_bitmap, stats, 
&ann_range_search_executed)
                         .ok());
+    EXPECT_TRUE(ann_range_search_executed);
 
     doris::segment_v2::VirtualColumnIterator* virtual_column_iter =
             
dynamic_cast<doris::segment_v2::VirtualColumnIterator*>(column_iterators[3].get());
@@ -263,11 +283,17 @@ TEST_F(VectorSearchTest, TestEvaluateAnnRangeSearch2) {
     segment_v2::AnnIndexStats stats;
     std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>
             common_expr_to_slotref_map;
+    std::vector<IndexFieldNameAndTypePair> storage_name_and_type(4);
+    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>> 
common_expr_index_status;
+    range_search_ctx->set_index_context(create_index_context(
+            idx_to_cid, cid_to_index_iterators, storage_name_and_type, 
common_expr_index_status));
+    bool ann_range_search_executed = false;
     ASSERT_TRUE(range_search_ctx
                         ->evaluate_ann_range_search(cid_to_index_iterators, 
idx_to_cid,
                                                     column_iterators, 
common_expr_to_slotref_map,
-                                                    row_bitmap, stats)
+                                                    row_bitmap, stats, 
&ann_range_search_executed)
                         .ok());
+    EXPECT_TRUE(ann_range_search_executed);
 
     doris::segment_v2::VirtualColumnIterator* virtual_column_iter =
             
dynamic_cast<doris::segment_v2::VirtualColumnIterator*>(column_iterators[3].get());
@@ -284,6 +310,184 @@ TEST_F(VectorSearchTest, TestEvaluateAnnRangeSearch2) {
     EXPECT_EQ(get_row_id_to_idx.size(), 10);
 }
 
+TEST_F(VectorSearchTest, 
TestEvaluateAnnRangeSearchStateDoesNotLeakAcrossClones) {
+    TExpr texpr = read_from_json<TExpr>(ann_range_search_thrift);
+    TDescriptorTable table1 = 
read_from_json<TDescriptorTable>(thrift_table_desc);
+    std::unique_ptr<doris::ObjectPool> pool = 
std::make_unique<doris::ObjectPool>();
+    auto desc_tbl = std::make_unique<DescriptorTbl>();
+    DescriptorTbl* desc_tbl_ptr = desc_tbl.get();
+    ASSERT_TRUE(DescriptorTbl::create(pool.get(), table1, 
&(desc_tbl_ptr)).ok());
+    RowDescriptor row_desc = RowDescriptor(*desc_tbl_ptr, {0}, {false});
+    std::unique_ptr<doris::RuntimeState> state = 
std::make_unique<doris::RuntimeState>();
+    state->set_desc_tbl(desc_tbl_ptr);
+
+    VExprContextSPtr range_search_ctx;
+    ASSERT_TRUE(VExpr::create_expr_tree(texpr, range_search_ctx).ok());
+    ASSERT_TRUE(range_search_ctx->prepare(state.get(), row_desc).ok());
+    ASSERT_TRUE(range_search_ctx->open(state.get()).ok());
+    doris::VectorSearchUserParams user_params;
+    range_search_ctx->prepare_ann_range_search(user_params);
+
+    VExprContextSPtr segment_with_ann_ctx;
+    ASSERT_TRUE(range_search_ctx->clone(state.get(), 
segment_with_ann_ctx).ok());
+    VExprContextSPtr segment_without_ann_ctx;
+    ASSERT_TRUE(range_search_ctx->clone(state.get(), 
segment_without_ann_ctx).ok());
+    ASSERT_EQ(segment_with_ann_ctx->root().get(), 
segment_without_ann_ctx->root().get());
+
+    std::vector<ColumnId> idx_to_cid = {0, 1, 2, 3};
+    std::vector<std::unique_ptr<segment_v2::IndexIterator>> 
ann_index_iterators(4);
+    ann_index_iterators[1] = 
std::make_unique<doris::vector_search_utils::MockAnnIndexIterator>();
+    auto* mock_ann_index_iter = 
dynamic_cast<doris::vector_search_utils::MockAnnIndexIterator*>(
+            ann_index_iterators[1].get());
+    std::map<std::string, std::string> properties;
+    properties["index_type"] = "hnsw";
+    properties["metric_type"] = "l2_distance";
+    properties["dim"] = "8";
+    auto pair = vector_search_utils::create_tmp_ann_index_reader(properties);
+    mock_ann_index_iter->_ann_reader = pair.second;
+
+    std::vector<std::unique_ptr<segment_v2::ColumnIterator>> 
ann_column_iterators(4);
+    ann_column_iterators[3] = 
std::make_unique<doris::segment_v2::VirtualColumnIterator>();
+    std::vector<IndexFieldNameAndTypePair> ann_storage_name_and_type(4);
+    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>> 
ann_index_status;
+    segment_with_ann_ctx->set_index_context(create_index_context(
+            idx_to_cid, ann_index_iterators, ann_storage_name_and_type, 
ann_index_status));
+
+    EXPECT_CALL(*mock_ann_index_iter, range_search(testing::_, testing::_, 
testing::_, testing::_))
+            .WillOnce(testing::Invoke([](const 
doris::segment_v2::AnnRangeSearchParams& params,
+                                         const doris::VectorSearchUserParams& 
custom_params,
+                                         
doris::segment_v2::AnnRangeSearchResult* result,
+                                         doris::segment_v2::AnnIndexStats* 
stats) {
+                result->roaring = std::make_shared<roaring::Roaring>();
+                result->roaring->add(1);
+                result->roaring->add(3);
+                result->row_ids = nullptr;
+                result->distance = nullptr;
+                return Status::OK();
+            }));
+
+    roaring::Roaring ann_row_bitmap;
+    segment_v2::AnnIndexStats ann_stats;
+    std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>
+            common_expr_to_slotref_map;
+    bool ann_range_search_executed = false;
+    ASSERT_TRUE(segment_with_ann_ctx
+                        ->evaluate_ann_range_search(ann_index_iterators, 
idx_to_cid,
+                                                    ann_column_iterators,
+                                                    
common_expr_to_slotref_map, ann_row_bitmap,
+                                                    ann_stats, 
&ann_range_search_executed)
+                        .ok());
+    EXPECT_TRUE(ann_range_search_executed);
+    const auto* ann_result = 
segment_with_ann_ctx->get_index_context()->get_index_result_for_expr(
+            segment_with_ann_ctx->root().get());
+    ASSERT_NE(ann_result, nullptr);
+    ASSERT_NE(ann_result->get_data_bitmap(), nullptr);
+    EXPECT_EQ(ann_result->get_data_bitmap()->cardinality(), 2);
+
+    std::vector<std::unique_ptr<segment_v2::IndexIterator>> 
no_ann_index_iterators(4);
+    std::vector<std::unique_ptr<segment_v2::ColumnIterator>> 
no_ann_column_iterators(4);
+    no_ann_column_iterators[3] = 
std::make_unique<doris::segment_v2::VirtualColumnIterator>();
+    std::vector<IndexFieldNameAndTypePair> no_ann_storage_name_and_type(4);
+    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>> 
no_ann_index_status;
+    segment_without_ann_ctx->set_index_context(create_index_context(
+            idx_to_cid, no_ann_index_iterators, no_ann_storage_name_and_type, 
no_ann_index_status));
+
+    roaring::Roaring no_ann_row_bitmap;
+    segment_v2::AnnIndexStats no_ann_stats;
+    bool no_ann_range_search_executed = true;
+    ASSERT_TRUE(segment_without_ann_ctx
+                        ->evaluate_ann_range_search(no_ann_index_iterators, 
idx_to_cid,
+                                                    no_ann_column_iterators,
+                                                    
common_expr_to_slotref_map, no_ann_row_bitmap,
+                                                    no_ann_stats, 
&no_ann_range_search_executed)
+                        .ok());
+    EXPECT_FALSE(no_ann_range_search_executed);
+    
EXPECT_FALSE(segment_without_ann_ctx->get_index_context()->has_index_result_for_expr(
+            segment_without_ann_ctx->root().get()));
+}
+
+TEST_F(VectorSearchTest, 
TestEvaluateAnnRangeSearchUsesSourceColumnIndexForSlotMap) {
+    TExpr texpr = read_from_json<TExpr>(ann_range_search_thrift);
+    TExprNode& opNode = texpr.nodes[0];
+    opNode.opcode = TExprOpcode::LT;
+    opNode.fn.name.function_name = doris::NameLess::name;
+    TDescriptorTable table1 = 
read_from_json<TDescriptorTable>(thrift_table_desc);
+    std::unique_ptr<doris::ObjectPool> pool = 
std::make_unique<doris::ObjectPool>();
+    auto desc_tbl = std::make_unique<DescriptorTbl>();
+    DescriptorTbl* desc_tbl_ptr = desc_tbl.get();
+    ASSERT_TRUE(DescriptorTbl::create(pool.get(), table1, 
&(desc_tbl_ptr)).ok());
+    RowDescriptor row_desc = RowDescriptor(*desc_tbl_ptr, {0}, {false});
+    std::unique_ptr<doris::RuntimeState> state = 
std::make_unique<doris::RuntimeState>();
+    state->set_desc_tbl(desc_tbl_ptr);
+
+    VExprContextSPtr range_search_ctx;
+    ASSERT_TRUE(VExpr::create_expr_tree(texpr, range_search_ctx).ok());
+    ASSERT_TRUE(range_search_ctx->prepare(state.get(), row_desc).ok());
+    ASSERT_TRUE(range_search_ctx->open(state.get()).ok());
+    doris::VectorSearchUserParams user_params;
+    range_search_ctx->prepare_ann_range_search(user_params);
+    ASSERT_EQ(range_search_ctx->_ann_range_search_runtime.src_col_idx, 1);
+    ASSERT_EQ(range_search_ctx->_ann_range_search_runtime.dst_col_idx, 3);
+
+    std::vector<ColumnId> idx_to_cid = {0, 5, 6, 7};
+    std::vector<std::unique_ptr<segment_v2::IndexIterator>> 
cid_to_index_iterators(8);
+    cid_to_index_iterators[5] =
+            
std::make_unique<doris::vector_search_utils::MockAnnIndexIterator>();
+    auto* mock_ann_index_iter = 
dynamic_cast<doris::vector_search_utils::MockAnnIndexIterator*>(
+            cid_to_index_iterators[5].get());
+    std::map<std::string, std::string> properties;
+    properties["index_type"] = "hnsw";
+    properties["metric_type"] = "l2_distance";
+    properties["dim"] = "8";
+    auto pair = vector_search_utils::create_tmp_ann_index_reader(properties);
+    mock_ann_index_iter->_ann_reader = pair.second;
+
+    std::vector<std::unique_ptr<segment_v2::ColumnIterator>> 
column_iterators(8);
+    column_iterators[7] = 
std::make_unique<doris::segment_v2::VirtualColumnIterator>();
+    std::vector<IndexFieldNameAndTypePair> storage_name_and_type(8);
+    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>> 
common_expr_index_status;
+    common_expr_index_status[5][range_search_ctx->root().get()] = false;
+    range_search_ctx->set_index_context(create_index_context(
+            idx_to_cid, cid_to_index_iterators, storage_name_and_type, 
common_expr_index_status));
+
+    std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>
+            common_expr_to_slotref_map;
+    common_expr_to_slotref_map[range_search_ctx.get()][1] = 
range_search_ctx->root().get();
+
+    EXPECT_CALL(*mock_ann_index_iter, range_search(testing::_, testing::_, 
testing::_, testing::_))
+            .WillOnce(testing::Invoke([](const 
doris::segment_v2::AnnRangeSearchParams& params,
+                                         const doris::VectorSearchUserParams& 
custom_params,
+                                         
doris::segment_v2::AnnRangeSearchResult* result,
+                                         doris::segment_v2::AnnIndexStats* 
stats) {
+                constexpr size_t num_results = 3;
+                result->roaring = std::make_shared<roaring::Roaring>();
+                result->row_ids = std::make_shared<std::vector<uint64_t>>();
+                result->distance = std::shared_ptr<float[]>(new 
float[num_results]);
+                for (size_t i = 0; i < num_results; ++i) {
+                    result->roaring->add(i * 2);
+                    result->row_ids->push_back(i * 2);
+                    result->distance[i] = static_cast<float>(i);
+                }
+                return Status::OK();
+            }));
+
+    roaring::Roaring row_bitmap;
+    segment_v2::AnnIndexStats stats;
+    bool ann_range_search_executed = false;
+    ASSERT_TRUE(range_search_ctx
+                        ->evaluate_ann_range_search(cid_to_index_iterators, 
idx_to_cid,
+                                                    column_iterators, 
common_expr_to_slotref_map,
+                                                    row_bitmap, stats, 
&ann_range_search_executed)
+                        .ok());
+    EXPECT_TRUE(ann_range_search_executed);
+    EXPECT_TRUE(common_expr_index_status[5][range_search_ctx->root().get()]);
+    const auto* result = 
range_search_ctx->get_index_context()->get_index_result_for_expr(
+            range_search_ctx->root().get());
+    ASSERT_NE(result, nullptr);
+    ASSERT_NE(result->get_data_bitmap(), nullptr);
+    EXPECT_EQ(result->get_data_bitmap()->cardinality(), 3);
+}
+
 TEST_F(VectorSearchTest, TestRangeSearchRuntimeInfoToString) {
     // Test default constructor
     doris::segment_v2::AnnRangeSearchRuntime runtime_info;
@@ -664,7 +868,7 @@ TEST_F(VectorSearchTest, 
TestEvaluateAnnRangeSearch_DimensionMismatch) {
 
     auto st = range_search_ctx->evaluate_ann_range_search(
             cid_to_index_iterators, idx_to_cid, column_iterators, 
common_expr_to_slotref_map,
-            row_bitmap, stats);
+            row_bitmap, stats, nullptr);
     EXPECT_FALSE(st.ok());
     EXPECT_TRUE(st.is<doris::ErrorCode::INVALID_ARGUMENT>());
 }
diff --git 
a/regression-test/suites/ann_index_p0/ann_range_search_pushdown_regression.groovy
 
b/regression-test/suites/ann_index_p0/ann_range_search_pushdown_regression.groovy
new file mode 100644
index 00000000000..c3122b9d1e6
--- /dev/null
+++ 
b/regression-test/suites/ann_index_p0/ann_range_search_pushdown_regression.groovy
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import groovy.json.JsonSlurper
+
+def getProfileList = {
+    def dst = "http://"; + context.config.feHttpAddress
+    def conn = new URL(dst + "/rest/v1/query_profile").openConnection()
+    conn.setRequestMethod("GET")
+    def encoding = 
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+            (context.config.feHttpPassword == null ? "" : 
context.config.feHttpPassword))
+            .getBytes("UTF-8"))
+    conn.setRequestProperty("Authorization", "Basic ${encoding}")
+    return conn.getInputStream().getText()
+}
+
+def getProfile = { id ->
+    def dst = "http://"; + context.config.feHttpAddress
+    def conn = new URL(dst + 
"/api/profile/text/?query_id=$id").openConnection()
+    conn.setRequestMethod("GET")
+    def encoding = 
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+            (context.config.feHttpPassword == null ? "" : 
context.config.feHttpPassword))
+            .getBytes("UTF-8"))
+    conn.setRequestProperty("Authorization", "Basic ${encoding}")
+    return conn.getInputStream().getText()
+}
+
+def extractCounterValue = { String profileText, String counterName ->
+    for (def line : profileText.split("\n")) {
+        if (line.contains(counterName + ":")) {
+            def m = (line =~ 
/${java.util.regex.Pattern.quote(counterName)}:\s*([0-9]+(?:\.[0-9]+)?)/)
+            if (m.find()) {
+                return m.group(1)
+            }
+        }
+    }
+    return null
+}
+
+suite("ann_range_search_pushdown_regression", "nonConcurrent") {
+    def getProfileWithToken = { token ->
+        String profileId = ""
+        int attempts = 0
+        while (attempts < 10 && (profileId == null || profileId == "")) {
+            List profileData = new 
JsonSlurper().parseText(getProfileList()).data.rows
+            for (def profileItem in profileData) {
+                if (profileItem["Sql Statement"].toString().contains(token)) {
+                    profileId = profileItem["Profile ID"].toString()
+                    break
+                }
+            }
+            if (profileId == null || profileId == "") {
+                Thread.sleep(300)
+            }
+            attempts++
+        }
+        assertTrue(profileId != null && profileId != "")
+        Thread.sleep(800)
+        return getProfile(profileId).toString()
+    }
+
+    sql "unset variable all;"
+    sql "set enable_common_expr_pushdown=true;"
+    sql "set experimental_enable_virtual_slot_for_cse=true;"
+    sql "set enable_no_need_read_data_opt=true;"
+    sql "set enable_profile=true;"
+    sql "set profile_level=2;"
+    sql "set parallel_pipeline_task_num=1;"
+    sql "set enable_sql_cache=false;"
+    sql "set enable_condition_cache=false;"
+
+    // Case 1: one rowset has an IVF ANN index, while the surrounding small
+    // rowsets skip ANN index building because they have fewer rows than nlist.
+    // This creates a single scan with mixed indexed/non-indexed segments. The
+    // ANN range search execution state must be per segment instead of stored 
on
+    // the shared expression root.
+    sql "drop table if exists ann_range_mixed_segment_index"
+    sql """
+        create table ann_range_mixed_segment_index (
+            id int not null,
+            embedding array<float> not null,
+            index idx_embedding(`embedding`) using ann properties(
+                "index_type"="ivf",
+                "metric_type"="l2_distance",
+                "dim"="3",
+                "nlist"="2"
+            )
+        ) duplicate key(id)
+        distributed by hash(id) buckets 1
+        properties(
+            "replication_num"="1",
+            "disable_auto_compaction"="true"
+        );
+    """
+
+    sql "set ivf_nprobe=2;"
+    sql """
+        insert into ann_range_mixed_segment_index values
+        (100, [10.0, 10.0, 10.0]);
+    """
+    sql """
+        insert into ann_range_mixed_segment_index values
+        (1, [0.0, 0.0, 0.0]),
+        (2, [0.1, 0.0, 0.0]),
+        (3, [0.2, 0.0, 0.0]),
+        (4, [0.3, 0.0, 0.0]);
+    """
+    sql """
+        insert into ann_range_mixed_segment_index values
+        (101, [10.0, 10.0, 10.0]);
+    """
+
+    def tokenMixed = UUID.randomUUID().toString()
+    def mixedRows = sql """
+        select id, "${tokenMixed}"
+        from ann_range_mixed_segment_index
+        where l2_distance_approximate(embedding, [0.0, 0.0, 0.0]) < 1.0
+        order by id;
+    """
+    assertEquals([1, 2, 3, 4], mixedRows.collect { it[0] })
+
+    def mixedProfile = getProfileWithToken(tokenMixed)
+    def rangeSearchCnt = extractCounterValue(mixedProfile, 
"AnnIndexRangeSearchCnt")
+    logger.info("Mixed indexed/non-indexed segment 
AnnIndexRangeSearchCnt=${rangeSearchCnt}")
+    assertEquals("1", rangeSearchCnt)
+
+}
diff --git 
a/regression-test/suites/ann_index_p0/ann_range_search_source_index_status_regression.groovy
 
b/regression-test/suites/ann_index_p0/ann_range_search_source_index_status_regression.groovy
new file mode 100644
index 00000000000..f4a881e75ce
--- /dev/null
+++ 
b/regression-test/suites/ann_index_p0/ann_range_search_source_index_status_regression.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("ann_range_search_source_index_status_regression", "nonConcurrent") {
+    sql "unset variable all;"
+    sql "set enable_common_expr_pushdown=true;"
+    sql "set experimental_enable_virtual_slot_for_cse=true;"
+    sql "set enable_no_need_read_data_opt=true;"
+    sql "set parallel_pipeline_task_num=1;"
+    sql "set enable_sql_cache=false;"
+    sql "set enable_condition_cache=false;"
+
+    // The source column's index in the scan block differs from its storage
+    // ColumnId. ANN range search should mark the common expression status by
+    // scan-block source column index, so the embedding column can be skipped
+    // when it is only used by the satisfied ANN predicate.
+    sql "drop table if exists ann_range_source_index_status"
+    sql """
+        create table ann_range_source_index_status (
+            id int not null,
+            pad_int int not null,
+            pad_text string not null,
+            embedding array<float> not null,
+            value int not null,
+            index idx_embedding(`embedding`) using ann properties(
+                "index_type"="hnsw",
+                "metric_type"="l2_distance",
+                "dim"="3"
+            )
+        ) duplicate key(id)
+        distributed by hash(id) buckets 1
+        properties("replication_num"="1");
+    """
+
+    sql """
+        insert into ann_range_source_index_status values
+        (1, 10, 'a', [0.0, 0.0, 0.0], 100),
+        (2, 20, 'b', [0.1, 0.0, 0.0], 200),
+        (3, 30, 'c', [0.2, 0.0, 0.0], 300),
+        (4, 40, 'd', [0.3, 0.0, 0.0], 400),
+        (5, 50, 'e', [0.4, 0.0, 0.0], 500),
+        (6, 60, 'f', [0.5, 0.0, 0.0], 600),
+        (7, 70, 'g', [0.6, 0.0, 0.0], 700),
+        (8, 80, 'h', [0.7, 0.0, 0.0], 800),
+        (9, 90, 'i', [0.8, 0.0, 0.0], 900),
+        (10, 100, 'j', [0.9, 0.0, 0.0], 1000);
+    """
+
+    try {
+        GetDebugPoint().enableDebugPointForAllBEs(
+                "segment_iterator._read_columns_by_index", [column_name: 
"embedding"])
+        def indexOnlyRows = sql """
+            select id
+            from ann_range_source_index_status
+            where l2_distance_approximate(embedding, [0.0, 0.0, 0.0]) < 1.0
+            order by id;
+        """
+        assertEquals([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], indexOnlyRows.collect { 
it[0] })
+    } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
+    }
+
+    def readEmbeddingRows = sql """
+        select id, embedding
+        from ann_range_source_index_status
+        where l2_distance_approximate(embedding, [0.0, 0.0, 0.0]) < 1.0
+        order by id;
+    """
+    assertEquals([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], readEmbeddingRows.collect { 
it[0] })
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to