This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new cc01c04f4c9 branch-4.0: [fix](ann range search)  range search prepare 
failed on NULL literal #60564 (#60821)
cc01c04f4c9 is described below

commit cc01c04f4c928fdb5fcea42107d732984e6ca8ec
Author: zhiqiang <[email protected]>
AuthorDate: Thu Feb 26 09:48:44 2026 +0800

    branch-4.0: [fix](ann range search)  range search prepare failed on NULL 
literal #60564 (#60821)
    
    cherry pick from #60564
---
 be/src/vec/exprs/vectorized_fn_call.cpp            | 105 +++++----
 .../ann_range_search_nullable_literal.out          |  54 +++++
 .../ann_range_search_nullable_literal.groovy       | 237 +++++++++++++++++++++
 3 files changed, 352 insertions(+), 44 deletions(-)

diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp 
b/be/src/vec/exprs/vectorized_fn_call.cpp
index 3ebaa8b1245..d2892b671ca 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -391,31 +391,7 @@ void VectorizedFnCall::prepare_ann_range_search(
     auto left_child = get_child(0);
     auto right_child = get_child(1);
 
-    auto right_literal = std::dynamic_pointer_cast<VLiteral>(right_child);
-    if (right_literal == nullptr) {
-        suitable_for_ann_index = false;
-        return;
-    }
-
-    auto right_col = 
right_literal->get_column_ptr()->convert_to_full_column_if_const();
-    auto right_type = right_literal->get_data_type();
-
-    PrimitiveType right_primitive = right_type->get_primitive_type();
-    const bool float32_literal = right_primitive == PrimitiveType::TYPE_FLOAT;
-    const bool float64_literal = right_primitive == PrimitiveType::TYPE_DOUBLE;
-    if (!float32_literal && !float64_literal) {
-        mark_unsuitable("Right child is not a Float32Literal or 
Float64Literal.");
-        return;
-    }
-
-    if (float32_literal) {
-        const ColumnFloat32* cf32_right = assert_cast<const 
ColumnFloat32*>(right_col.get());
-        range_search_runtime.radius = cf32_right->get_data()[0];
-    } else if (float64_literal) {
-        const ColumnFloat64* cf64_right = assert_cast<const 
ColumnFloat64*>(right_col.get());
-        range_search_runtime.radius = 
static_cast<float>(cf64_right->get_data()[0]);
-    }
-
+    // ========== Step 1: Check left child - must be a distance function 
==========
     auto get_virtual_expr = [&](const VExprSPtr& expr,
                                 std::shared_ptr<VirtualSlotRef>& slot_ref) -> 
VExprSPtr {
         auto virtual_ref = std::dynamic_pointer_cast<VirtualSlotRef>(expr);
@@ -430,22 +406,20 @@ void VectorizedFnCall::prepare_ann_range_search(
     std::shared_ptr<VirtualSlotRef> vir_slot_ref;
     auto normalized_left = get_virtual_expr(left_child, vir_slot_ref);
 
-    std::shared_ptr<VectorizedFnCall> function_call;
-    if (float32_literal) {
-        function_call = 
std::dynamic_pointer_cast<VectorizedFnCall>(normalized_left);
-        if (function_call == nullptr) {
-            mark_unsuitable("Left child is not a function call.");
-            return;
-        }
-    } else {
-        auto cast_float_to_double = 
std::dynamic_pointer_cast<VCastExpr>(normalized_left);
-        if (cast_float_to_double == nullptr) {
-            mark_unsuitable("Left child is not a cast expression.");
+    // Try to find the distance function call, it may be wrapped in a 
Cast(Float->Double)
+    std::shared_ptr<VectorizedFnCall> function_call =
+            std::dynamic_pointer_cast<VectorizedFnCall>(normalized_left);
+    bool has_float_to_double_cast = false;
+
+    if (function_call == nullptr) {
+        // Check if it's a Cast expression wrapping a function call
+        auto cast_expr = std::dynamic_pointer_cast<VCastExpr>(normalized_left);
+        if (cast_expr == nullptr) {
+            mark_unsuitable("Left child is neither a function call nor a cast 
expression.");
             return;
         }
-
-        auto normalized_cast_child =
-                get_virtual_expr(cast_float_to_double->get_child(0), 
vir_slot_ref);
+        has_float_to_double_cast = true;
+        auto normalized_cast_child = get_virtual_expr(cast_expr->get_child(0), 
vir_slot_ref);
         function_call = 
std::dynamic_pointer_cast<VectorizedFnCall>(normalized_cast_child);
         if (function_call == nullptr) {
             mark_unsuitable("Left child of cast is not a function call.");
@@ -453,17 +427,19 @@ void VectorizedFnCall::prepare_ann_range_search(
         }
     }
 
+    // Check if it's a supported distance function
     if (DISTANCE_FUNCS.find(function_call->_function_name) == 
DISTANCE_FUNCS.end()) {
         mark_unsuitable(fmt::format("Left child is not a supported distance 
function: {}",
                                     function_call->_function_name));
         return;
-    } else {
-        // Strip the _approximate suffix.
-        std::string metric_name = function_call->_function_name;
-        metric_name = metric_name.substr(0, metric_name.size() - 12);
-        range_search_runtime.metric_type = 
segment_v2::string_to_metric(metric_name);
     }
 
+    // Strip the _approximate suffix to get metric type
+    std::string metric_name = function_call->_function_name;
+    metric_name = metric_name.substr(0, metric_name.size() - 12);
+    range_search_runtime.metric_type = 
segment_v2::string_to_metric(metric_name);
+
+    // ========== Step 2: Validate distance function arguments ==========
     // Identify the slot ref child and the constant query array child 
(ArrayLiteral or CAST to array)
     Int32 idx_of_slot_ref = -1;
     Int32 idx_of_array_expr = -1;
@@ -502,6 +478,47 @@ void VectorizedFnCall::prepare_ann_range_search(
     }
     range_search_runtime.query_value = extract_result.value();
     range_search_runtime.dim = range_search_runtime.query_value->size();
+
+    // ========== Step 3: Check right child - must be a float/double literal 
==========
+    auto right_literal = std::dynamic_pointer_cast<VLiteral>(right_child);
+    if (right_literal == nullptr) {
+        mark_unsuitable("Right child is not a literal.");
+        return;
+    }
+
+    // Handle nullable literal gracefully - just mark as unsuitable instead of 
crash
+    if (right_literal->is_nullable()) {
+        mark_unsuitable("Right literal is nullable, not supported for ANN 
range search.");
+        return;
+    }
+
+    auto right_type = right_literal->get_data_type();
+    PrimitiveType right_primitive = right_type->get_primitive_type();
+    const bool float32_literal = right_primitive == PrimitiveType::TYPE_FLOAT;
+    const bool float64_literal = right_primitive == PrimitiveType::TYPE_DOUBLE;
+
+    if (!float32_literal && !float64_literal) {
+        mark_unsuitable("Right child is not a Float32Literal or 
Float64Literal.");
+        return;
+    }
+
+    // Validate consistency: if we have Cast(Float->Double), right must be 
double literal
+    if (has_float_to_double_cast && !float64_literal) {
+        mark_unsuitable("Cast expression expects double literal on right 
side.");
+        return;
+    }
+
+    // Extract radius value
+    auto right_col = 
right_literal->get_column_ptr()->convert_to_full_column_if_const();
+    if (float32_literal) {
+        const ColumnFloat32* cf32_right = assert_cast<const 
ColumnFloat32*>(right_col.get());
+        range_search_runtime.radius = cf32_right->get_data()[0];
+    } else {
+        const ColumnFloat64* cf64_right = assert_cast<const 
ColumnFloat64*>(right_col.get());
+        range_search_runtime.radius = 
static_cast<float>(cf64_right->get_data()[0]);
+    }
+
+    // ========== Done: Mark as suitable for ANN range search ==========
     range_search_runtime.is_ann_range_search = true;
     range_search_runtime.user_params = user_params;
     VLOG_DEBUG << fmt::format("Ann range search params: {}", 
range_search_runtime.to_string());
diff --git 
a/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out 
b/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
new file mode 100644
index 00000000000..a44d8ae0530
--- /dev/null
+++ b/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !nullable_subquery_empty --
+
+-- !nullable_subquery_empty_ge --
+
+-- !nullable_subquery_all_null --
+
+-- !nullable_subquery_normal --
+0      [1, 2, 3, 4]
+1      [2, 3, 4, 5]
+2      [3, 4, 5, 6]
+
+-- !nullable_subquery_normal_max --
+0      [1, 2, 3, 4]
+1      [2, 3, 4, 5]
+2      [3, 4, 5, 6]
+3      [4, 5, 6, 7]
+4      [5, 6, 7, 8]
+
+-- !coalesce_with_null --
+0      [1, 2, 3, 4]
+1      [2, 3, 4, 5]
+2      [3, 4, 5, 6]
+
+-- !case_nullable --
+0      [1, 2, 3, 4]
+1      [2, 3, 4, 5]
+2      [3, 4, 5, 6]
+
+-- !normal_literal --
+0      [1, 2, 3, 4]
+1      [2, 3, 4, 5]
+2      [3, 4, 5, 6]
+
+-- !ip_nullable_subquery --
+
+-- !non_dist_nullable_empty --
+
+-- !non_dist_nullable_all_null --
+
+-- !non_dist_nullable_normal --
+0      [1, 2, 3, 4]
+1      [2, 3, 4, 5]
+
+-- !non_dist_func_nullable --
+
+-- !arithmetic_nullable --
+
+-- !mixed_dist_and_regular_nullable --
+
+-- !dist_normal_regular_nullable --
+
+-- !or_condition_nullable --
+
diff --git 
a/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy 
b/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
new file mode 100644
index 00000000000..e4d86a6c745
--- /dev/null
+++ 
b/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test case for fix: Handle nullable literal gracefully in ANN range search
+// When the right-side literal of comparison is nullable (e.g., from scalar 
subquery
+// returning NULL), the query should not crash but fall back to normal 
execution.
+
+suite("ann_range_search_nullable_literal") {
+    sql "drop table if exists ann_nullable_test"
+    sql "drop table if exists ann_nullable_threshold"
+
+    // Main table with ANN index
+    sql """
+        create table ann_nullable_test (
+            id int not null,
+            embedding array<float> not null,
+            value double null,
+            INDEX ann_embedding(`embedding`) USING ANN 
PROPERTIES("index_type"="hnsw","metric_type"="l2_distance","dim"="4")
+        ) duplicate key (`id`) 
+        distributed by hash(`id`) buckets 1
+        properties("replication_num"="1");
+    """
+
+    // Auxiliary table for threshold values (can be empty to produce NULL from 
MIN/MAX)
+    sql """
+        create table ann_nullable_threshold (
+            id int not null,
+            threshold double null
+        ) duplicate key (`id`) 
+        distributed by hash(`id`) buckets 1
+        properties("replication_num"="1");
+    """
+
+    // Insert test data into main table
+    sql """
+        INSERT INTO ann_nullable_test (id, embedding, value) VALUES
+            (0, [1.0, 2.0, 3.0, 4.0], 10.5),
+            (1, [2.0, 3.0, 4.0, 5.0], 20.5),
+            (2, [3.0, 4.0, 5.0, 6.0], 30.5),
+            (3, [4.0, 5.0, 6.0, 7.0], 40.5),
+            (4, [5.0, 6.0, 7.0, 8.0], 50.5);
+    """
+
+    // Test 1: Scalar subquery returning NULL (empty table case)
+    // When threshold table is empty, MIN(threshold) returns NULL
+    // This should not crash, just return empty result (since comparing with 
NULL is always false)
+    qt_nullable_subquery_empty """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
(select min(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    qt_nullable_subquery_empty_ge """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) >= 
(select max(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Insert some data with NULL values
+    sql """
+        INSERT INTO ann_nullable_threshold (id, threshold) VALUES
+            (1, NULL),
+            (2, NULL);
+    """
+
+    // Test 2: Scalar subquery returning NULL (all values are NULL case)
+    qt_nullable_subquery_all_null """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
(select min(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Insert some non-NULL values
+    sql """
+        INSERT INTO ann_nullable_threshold (id, threshold) VALUES
+            (3, 5.0),
+            (4, 10.0);
+    """
+
+    // Test 3: Scalar subquery returning non-NULL value - should work normally
+    qt_nullable_subquery_normal """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
(select min(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    qt_nullable_subquery_normal_max """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
(select max(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Test 4: COALESCE with NULL - the result type might still be nullable
+    qt_coalesce_with_null """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
coalesce((select min(threshold) from ann_nullable_threshold where id = 1), 5.0)
+        order by id;
+    """
+
+    // Test 5: CASE expression that might return NULL
+    qt_case_nullable """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
+            (case when (select count(*) from ann_nullable_threshold where 
threshold is not null) > 0 
+                  then (select min(threshold) from ann_nullable_threshold 
where threshold is not null)
+                  else null end)
+        order by id;
+    """
+
+    // Test 6: Normal literal (not nullable) - should use ANN index
+    qt_normal_literal """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 5.0
+        order by id;
+    """
+
+    // Test 7: Inner product with nullable subquery
+    sql "drop table if exists ann_nullable_ip_test"
+    sql """
+        create table ann_nullable_ip_test (
+            id int not null,
+            embedding array<float> not null,
+            INDEX ann_embedding(`embedding`) USING ANN 
PROPERTIES("index_type"="hnsw","metric_type"="inner_product","dim"="4")
+        ) duplicate key (`id`) 
+        distributed by hash(`id`) buckets 1
+        properties("replication_num"="1");
+    """
+
+    sql """
+        INSERT INTO ann_nullable_ip_test (id, embedding) VALUES
+            (0, [1.0, 2.0, 3.0, 4.0]),
+            (1, [2.0, 3.0, 4.0, 5.0]),
+            (2, [3.0, 4.0, 5.0, 6.0]);
+    """
+
+    // Empty subquery returns NULL for inner_product comparison
+    qt_ip_nullable_subquery """
+        select id, embedding from ann_nullable_ip_test 
+        where inner_product_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) > 
(select min(threshold) from ann_nullable_threshold where id = 999)
+        order by id;
+    """
+
+    // ========== Test 8-12: Non-distance function comparisons with nullable 
literals ==========
+    // These tests ensure that when left child is NOT a distance function,
+    // the nullable literal on right side does not cause any issues.
+    // The query should execute normally without crashing.
+
+    // Test 8: Regular column comparison with nullable subquery (empty table)
+    sql "truncate table ann_nullable_threshold"
+    qt_non_dist_nullable_empty """
+        select id, embedding from ann_nullable_test 
+        where value < (select min(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Test 9: Regular column comparison with nullable subquery (all NULL 
values)
+    sql """
+        INSERT INTO ann_nullable_threshold (id, threshold) VALUES (1, NULL), 
(2, NULL);
+    """
+    qt_non_dist_nullable_all_null """
+        select id, embedding from ann_nullable_test 
+        where value < (select min(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Test 10: Regular column comparison with nullable subquery (has non-NULL 
values)
+    sql """
+        INSERT INTO ann_nullable_threshold (id, threshold) VALUES (3, 25.0), 
(4, 35.0);
+    """
+    qt_non_dist_nullable_normal """
+        select id, embedding from ann_nullable_test 
+        where value < (select min(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Test 11: Non-distance function (abs, sqrt, etc.) with nullable literal
+    qt_non_dist_func_nullable """
+        select id, embedding from ann_nullable_test 
+        where abs(value) < (select min(threshold) from ann_nullable_threshold 
where id = 999)
+        order by id;
+    """
+
+    // Test 12: Arithmetic expression with nullable literal
+    qt_arithmetic_nullable """
+        select id, embedding from ann_nullable_test 
+        where (value + 10) < (select min(threshold) from 
ann_nullable_threshold where id = 999)
+        order by id;
+    """
+
+    // ========== Test 13-15: Mixed scenarios ==========
+    // Test 13: Distance function AND regular comparison, both with nullable
+    sql "truncate table ann_nullable_threshold"
+    qt_mixed_dist_and_regular_nullable """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
(select min(threshold) from ann_nullable_threshold)
+          and value < (select max(threshold) from ann_nullable_threshold)
+        order by id;
+    """
+
+    // Test 14: Distance function with non-nullable, regular with nullable
+    sql """
+        INSERT INTO ann_nullable_threshold (id, threshold) VALUES (1, 5.0);
+    """
+    qt_dist_normal_regular_nullable """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 5.0
+          and value < (select min(threshold) from ann_nullable_threshold where 
id = 999)
+        order by id;
+    """
+
+    // Test 15: OR condition with nullable literals
+    qt_or_condition_nullable """
+        select id, embedding from ann_nullable_test 
+        where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 
(select min(threshold) from ann_nullable_threshold where id = 999)
+           or value < (select max(threshold) from ann_nullable_threshold where 
id = 999)
+        order by id;
+    """
+
+    // Cleanup
+    sql "drop table if exists ann_nullable_test"
+    sql "drop table if exists ann_nullable_threshold"
+    sql "drop table if exists ann_nullable_ip_test"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to