This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new cc01c04f4c9 branch-4.0: [fix](ann range search) range search prepare
failed on NULL literal #60564 (#60821)
cc01c04f4c9 is described below
commit cc01c04f4c928fdb5fcea42107d732984e6ca8ec
Author: zhiqiang <[email protected]>
AuthorDate: Thu Feb 26 09:48:44 2026 +0800
branch-4.0: [fix](ann range search) range search prepare failed on NULL
literal #60564 (#60821)
cherry pick from #60564
---
be/src/vec/exprs/vectorized_fn_call.cpp | 105 +++++----
.../ann_range_search_nullable_literal.out | 54 +++++
.../ann_range_search_nullable_literal.groovy | 237 +++++++++++++++++++++
3 files changed, 352 insertions(+), 44 deletions(-)
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp
b/be/src/vec/exprs/vectorized_fn_call.cpp
index 3ebaa8b1245..d2892b671ca 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -391,31 +391,7 @@ void VectorizedFnCall::prepare_ann_range_search(
auto left_child = get_child(0);
auto right_child = get_child(1);
- auto right_literal = std::dynamic_pointer_cast<VLiteral>(right_child);
- if (right_literal == nullptr) {
- suitable_for_ann_index = false;
- return;
- }
-
- auto right_col =
right_literal->get_column_ptr()->convert_to_full_column_if_const();
- auto right_type = right_literal->get_data_type();
-
- PrimitiveType right_primitive = right_type->get_primitive_type();
- const bool float32_literal = right_primitive == PrimitiveType::TYPE_FLOAT;
- const bool float64_literal = right_primitive == PrimitiveType::TYPE_DOUBLE;
- if (!float32_literal && !float64_literal) {
- mark_unsuitable("Right child is not a Float32Literal or
Float64Literal.");
- return;
- }
-
- if (float32_literal) {
- const ColumnFloat32* cf32_right = assert_cast<const
ColumnFloat32*>(right_col.get());
- range_search_runtime.radius = cf32_right->get_data()[0];
- } else if (float64_literal) {
- const ColumnFloat64* cf64_right = assert_cast<const
ColumnFloat64*>(right_col.get());
- range_search_runtime.radius =
static_cast<float>(cf64_right->get_data()[0]);
- }
-
+ // ========== Step 1: Check left child - must be a distance function
==========
auto get_virtual_expr = [&](const VExprSPtr& expr,
std::shared_ptr<VirtualSlotRef>& slot_ref) ->
VExprSPtr {
auto virtual_ref = std::dynamic_pointer_cast<VirtualSlotRef>(expr);
@@ -430,22 +406,20 @@ void VectorizedFnCall::prepare_ann_range_search(
std::shared_ptr<VirtualSlotRef> vir_slot_ref;
auto normalized_left = get_virtual_expr(left_child, vir_slot_ref);
- std::shared_ptr<VectorizedFnCall> function_call;
- if (float32_literal) {
- function_call =
std::dynamic_pointer_cast<VectorizedFnCall>(normalized_left);
- if (function_call == nullptr) {
- mark_unsuitable("Left child is not a function call.");
- return;
- }
- } else {
- auto cast_float_to_double =
std::dynamic_pointer_cast<VCastExpr>(normalized_left);
- if (cast_float_to_double == nullptr) {
- mark_unsuitable("Left child is not a cast expression.");
+ // Try to find the distance function call, it may be wrapped in a
Cast(Float->Double)
+ std::shared_ptr<VectorizedFnCall> function_call =
+ std::dynamic_pointer_cast<VectorizedFnCall>(normalized_left);
+ bool has_float_to_double_cast = false;
+
+ if (function_call == nullptr) {
+ // Check if it's a Cast expression wrapping a function call
+ auto cast_expr = std::dynamic_pointer_cast<VCastExpr>(normalized_left);
+ if (cast_expr == nullptr) {
+ mark_unsuitable("Left child is neither a function call nor a cast
expression.");
return;
}
-
- auto normalized_cast_child =
- get_virtual_expr(cast_float_to_double->get_child(0),
vir_slot_ref);
+ has_float_to_double_cast = true;
+ auto normalized_cast_child = get_virtual_expr(cast_expr->get_child(0),
vir_slot_ref);
function_call =
std::dynamic_pointer_cast<VectorizedFnCall>(normalized_cast_child);
if (function_call == nullptr) {
mark_unsuitable("Left child of cast is not a function call.");
@@ -453,17 +427,19 @@ void VectorizedFnCall::prepare_ann_range_search(
}
}
+ // Check if it's a supported distance function
if (DISTANCE_FUNCS.find(function_call->_function_name) ==
DISTANCE_FUNCS.end()) {
mark_unsuitable(fmt::format("Left child is not a supported distance
function: {}",
function_call->_function_name));
return;
- } else {
- // Strip the _approximate suffix.
- std::string metric_name = function_call->_function_name;
- metric_name = metric_name.substr(0, metric_name.size() - 12);
- range_search_runtime.metric_type =
segment_v2::string_to_metric(metric_name);
}
+ // Strip the _approximate suffix to get metric type
+ std::string metric_name = function_call->_function_name;
+ metric_name = metric_name.substr(0, metric_name.size() - 12);
+ range_search_runtime.metric_type =
segment_v2::string_to_metric(metric_name);
+
+ // ========== Step 2: Validate distance function arguments ==========
// Identify the slot ref child and the constant query array child
(ArrayLiteral or CAST to array)
Int32 idx_of_slot_ref = -1;
Int32 idx_of_array_expr = -1;
@@ -502,6 +478,47 @@ void VectorizedFnCall::prepare_ann_range_search(
}
range_search_runtime.query_value = extract_result.value();
range_search_runtime.dim = range_search_runtime.query_value->size();
+
+ // ========== Step 3: Check right child - must be a float/double literal
==========
+ auto right_literal = std::dynamic_pointer_cast<VLiteral>(right_child);
+ if (right_literal == nullptr) {
+ mark_unsuitable("Right child is not a literal.");
+ return;
+ }
+
+ // Handle nullable literal gracefully - just mark as unsuitable instead of
crash
+ if (right_literal->is_nullable()) {
+ mark_unsuitable("Right literal is nullable, not supported for ANN
range search.");
+ return;
+ }
+
+ auto right_type = right_literal->get_data_type();
+ PrimitiveType right_primitive = right_type->get_primitive_type();
+ const bool float32_literal = right_primitive == PrimitiveType::TYPE_FLOAT;
+ const bool float64_literal = right_primitive == PrimitiveType::TYPE_DOUBLE;
+
+ if (!float32_literal && !float64_literal) {
+ mark_unsuitable("Right child is not a Float32Literal or
Float64Literal.");
+ return;
+ }
+
+ // Validate consistency: if we have Cast(Float->Double), right must be
double literal
+ if (has_float_to_double_cast && !float64_literal) {
+ mark_unsuitable("Cast expression expects double literal on right
side.");
+ return;
+ }
+
+ // Extract radius value
+ auto right_col =
right_literal->get_column_ptr()->convert_to_full_column_if_const();
+ if (float32_literal) {
+ const ColumnFloat32* cf32_right = assert_cast<const
ColumnFloat32*>(right_col.get());
+ range_search_runtime.radius = cf32_right->get_data()[0];
+ } else {
+ const ColumnFloat64* cf64_right = assert_cast<const
ColumnFloat64*>(right_col.get());
+ range_search_runtime.radius =
static_cast<float>(cf64_right->get_data()[0]);
+ }
+
+ // ========== Done: Mark as suitable for ANN range search ==========
range_search_runtime.is_ann_range_search = true;
range_search_runtime.user_params = user_params;
VLOG_DEBUG << fmt::format("Ann range search params: {}",
range_search_runtime.to_string());
diff --git
a/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
b/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
new file mode 100644
index 00000000000..a44d8ae0530
--- /dev/null
+++ b/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !nullable_subquery_empty --
+
+-- !nullable_subquery_empty_ge --
+
+-- !nullable_subquery_all_null --
+
+-- !nullable_subquery_normal --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !nullable_subquery_normal_max --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+3 [4, 5, 6, 7]
+4 [5, 6, 7, 8]
+
+-- !coalesce_with_null --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !case_nullable --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !normal_literal --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !ip_nullable_subquery --
+
+-- !non_dist_nullable_empty --
+
+-- !non_dist_nullable_all_null --
+
+-- !non_dist_nullable_normal --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+
+-- !non_dist_func_nullable --
+
+-- !arithmetic_nullable --
+
+-- !mixed_dist_and_regular_nullable --
+
+-- !dist_normal_regular_nullable --
+
+-- !or_condition_nullable --
+
diff --git
a/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
b/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
new file mode 100644
index 00000000000..e4d86a6c745
--- /dev/null
+++
b/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test case for fix: Handle nullable literal gracefully in ANN range search
+// When the right-side literal of comparison is nullable (e.g., from scalar
subquery
+// returning NULL), the query should not crash but fall back to normal
execution.
+
+suite("ann_range_search_nullable_literal") {
+ sql "drop table if exists ann_nullable_test"
+ sql "drop table if exists ann_nullable_threshold"
+
+ // Main table with ANN index
+ sql """
+ create table ann_nullable_test (
+ id int not null,
+ embedding array<float> not null,
+ value double null,
+ INDEX ann_embedding(`embedding`) USING ANN
PROPERTIES("index_type"="hnsw","metric_type"="l2_distance","dim"="4")
+ ) duplicate key (`id`)
+ distributed by hash(`id`) buckets 1
+ properties("replication_num"="1");
+ """
+
+ // Auxiliary table for threshold values (can be empty to produce NULL from
MIN/MAX)
+ sql """
+ create table ann_nullable_threshold (
+ id int not null,
+ threshold double null
+ ) duplicate key (`id`)
+ distributed by hash(`id`) buckets 1
+ properties("replication_num"="1");
+ """
+
+ // Insert test data into main table
+ sql """
+ INSERT INTO ann_nullable_test (id, embedding, value) VALUES
+ (0, [1.0, 2.0, 3.0, 4.0], 10.5),
+ (1, [2.0, 3.0, 4.0, 5.0], 20.5),
+ (2, [3.0, 4.0, 5.0, 6.0], 30.5),
+ (3, [4.0, 5.0, 6.0, 7.0], 40.5),
+ (4, [5.0, 6.0, 7.0, 8.0], 50.5);
+ """
+
+ // Test 1: Scalar subquery returning NULL (empty table case)
+ // When threshold table is empty, MIN(threshold) returns NULL
+ // This should not crash, just return empty result (since comparing with
NULL is always false)
+ qt_nullable_subquery_empty """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ qt_nullable_subquery_empty_ge """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) >=
(select max(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Insert some data with NULL values
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES
+ (1, NULL),
+ (2, NULL);
+ """
+
+ // Test 2: Scalar subquery returning NULL (all values are NULL case)
+ qt_nullable_subquery_all_null """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Insert some non-NULL values
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES
+ (3, 5.0),
+ (4, 10.0);
+ """
+
+ // Test 3: Scalar subquery returning non-NULL value - should work normally
+ qt_nullable_subquery_normal """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ qt_nullable_subquery_normal_max """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select max(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 4: COALESCE with NULL - the result type might still be nullable
+ qt_coalesce_with_null """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
coalesce((select min(threshold) from ann_nullable_threshold where id = 1), 5.0)
+ order by id;
+ """
+
+ // Test 5: CASE expression that might return NULL
+ qt_case_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
+ (case when (select count(*) from ann_nullable_threshold where
threshold is not null) > 0
+ then (select min(threshold) from ann_nullable_threshold
where threshold is not null)
+ else null end)
+ order by id;
+ """
+
+ // Test 6: Normal literal (not nullable) - should use ANN index
+ qt_normal_literal """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 5.0
+ order by id;
+ """
+
+ // Test 7: Inner product with nullable subquery
+ sql "drop table if exists ann_nullable_ip_test"
+ sql """
+ create table ann_nullable_ip_test (
+ id int not null,
+ embedding array<float> not null,
+ INDEX ann_embedding(`embedding`) USING ANN
PROPERTIES("index_type"="hnsw","metric_type"="inner_product","dim"="4")
+ ) duplicate key (`id`)
+ distributed by hash(`id`) buckets 1
+ properties("replication_num"="1");
+ """
+
+ sql """
+ INSERT INTO ann_nullable_ip_test (id, embedding) VALUES
+ (0, [1.0, 2.0, 3.0, 4.0]),
+ (1, [2.0, 3.0, 4.0, 5.0]),
+ (2, [3.0, 4.0, 5.0, 6.0]);
+ """
+
+ // Empty subquery returns NULL for inner_product comparison
+ qt_ip_nullable_subquery """
+ select id, embedding from ann_nullable_ip_test
+ where inner_product_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) >
(select min(threshold) from ann_nullable_threshold where id = 999)
+ order by id;
+ """
+
+ // ========== Test 8-12: Non-distance function comparisons with nullable
literals ==========
+ // These tests ensure that when left child is NOT a distance function,
+ // the nullable literal on right side does not cause any issues.
+ // The query should execute normally without crashing.
+
+ // Test 8: Regular column comparison with nullable subquery (empty table)
+ sql "truncate table ann_nullable_threshold"
+ qt_non_dist_nullable_empty """
+ select id, embedding from ann_nullable_test
+ where value < (select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 9: Regular column comparison with nullable subquery (all NULL
values)
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES (1, NULL),
(2, NULL);
+ """
+ qt_non_dist_nullable_all_null """
+ select id, embedding from ann_nullable_test
+ where value < (select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 10: Regular column comparison with nullable subquery (has non-NULL
values)
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES (3, 25.0),
(4, 35.0);
+ """
+ qt_non_dist_nullable_normal """
+ select id, embedding from ann_nullable_test
+ where value < (select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 11: Non-distance function (abs, sqrt, etc.) with nullable literal
+ qt_non_dist_func_nullable """
+ select id, embedding from ann_nullable_test
+ where abs(value) < (select min(threshold) from ann_nullable_threshold
where id = 999)
+ order by id;
+ """
+
+ // Test 12: Arithmetic expression with nullable literal
+ qt_arithmetic_nullable """
+ select id, embedding from ann_nullable_test
+ where (value + 10) < (select min(threshold) from
ann_nullable_threshold where id = 999)
+ order by id;
+ """
+
+ // ========== Test 13-15: Mixed scenarios ==========
+ // Test 13: Distance function AND regular comparison, both with nullable
+ sql "truncate table ann_nullable_threshold"
+ qt_mixed_dist_and_regular_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ and value < (select max(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 14: Distance function with non-nullable, regular with nullable
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES (1, 5.0);
+ """
+ qt_dist_normal_regular_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 5.0
+ and value < (select min(threshold) from ann_nullable_threshold where
id = 999)
+ order by id;
+ """
+
+ // Test 15: OR condition with nullable literals
+ qt_or_condition_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold where id = 999)
+ or value < (select max(threshold) from ann_nullable_threshold where
id = 999)
+ order by id;
+ """
+
+ // Cleanup
+ sql "drop table if exists ann_nullable_test"
+ sql "drop table if exists ann_nullable_threshold"
+ sql "drop table if exists ann_nullable_ip_test"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]