This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new be0d8e552ef [fix](ann range search) range search prepare failed on
NULL literal (#60564)
be0d8e552ef is described below
commit be0d8e552ef47dda5a6b6aa473753559ac458afc
Author: zhiqiang <[email protected]>
AuthorDate: Wed Feb 25 11:12:49 2026 +0800
[fix](ann range search) range search prepare failed on NULL literal
(#60564)
Refactor `prepare_ann_range_search` to check the distance function
first,
then validate the literal. This prevents crashes when the right-side
literal
has a nullable type.
Previously, when a nullable literal (e.g., from a scalar subquery
result)
was passed to ANN range search:
- Debug mode: threw an exception inside #ifndef NDEBUG block
- Release mode: crashed in assert_cast<ColumnFloat64*> because the
actual
column type was ColumnNullable
Now the function gracefully marks the expression as unsuitable for ANN
index optimization instead of crashing, allowing the query to fall back
to normal execution path.
Changes:
1. Reorder validation: check distance function before checking literal
2. Replace debug-only exception with mark_unsuitable() for nullable
literals
3. Add consistency check between Cast expression and literal type
4. Improve code structure with clear step comments
---
be/src/vec/exprs/vectorized_fn_call.cpp | 110 +++++-----
.../ann_range_search_nullable_literal.out | 54 +++++
.../ann_range_search_nullable_literal.groovy | 237 +++++++++++++++++++++
3 files changed, 352 insertions(+), 49 deletions(-)
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp
b/be/src/vec/exprs/vectorized_fn_call.cpp
index 58ecde934db..2b7fea40d9b 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -414,36 +414,7 @@ void VectorizedFnCall::prepare_ann_range_search(
auto left_child = get_child(0);
auto right_child = get_child(1);
- auto right_literal = std::dynamic_pointer_cast<VLiteral>(right_child);
- if (right_literal == nullptr) {
- suitable_for_ann_index = false;
- return;
- }
-#ifndef NDEBUG
- if (right_literal->is_nullable()) {
- throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
- "ANN range search with nullable literal is not
supported");
- }
-#endif
- auto right_col =
right_literal->get_column_ptr()->convert_to_full_column_if_const();
- auto right_type = right_literal->get_data_type();
-
- PrimitiveType right_primitive = right_type->get_primitive_type();
- const bool float32_literal = right_primitive == PrimitiveType::TYPE_FLOAT;
- const bool float64_literal = right_primitive == PrimitiveType::TYPE_DOUBLE;
- if (!float32_literal && !float64_literal) {
- mark_unsuitable("Right child is not a Float32Literal or
Float64Literal.");
- return;
- }
-
- if (float32_literal) {
- const ColumnFloat32* cf32_right = assert_cast<const
ColumnFloat32*>(right_col.get());
- range_search_runtime.radius = cf32_right->get_data()[0];
- } else if (float64_literal) {
- const ColumnFloat64* cf64_right = assert_cast<const
ColumnFloat64*>(right_col.get());
- range_search_runtime.radius =
static_cast<float>(cf64_right->get_data()[0]);
- }
-
+ // ========== Step 1: Check left child - must be a distance function
==========
auto get_virtual_expr = [&](const VExprSPtr& expr,
std::shared_ptr<VirtualSlotRef>& slot_ref) ->
VExprSPtr {
auto virtual_ref = std::dynamic_pointer_cast<VirtualSlotRef>(expr);
@@ -458,22 +429,20 @@ void VectorizedFnCall::prepare_ann_range_search(
std::shared_ptr<VirtualSlotRef> vir_slot_ref;
auto normalized_left = get_virtual_expr(left_child, vir_slot_ref);
- std::shared_ptr<VectorizedFnCall> function_call;
- if (float32_literal) {
- function_call =
std::dynamic_pointer_cast<VectorizedFnCall>(normalized_left);
- if (function_call == nullptr) {
- mark_unsuitable("Left child is not a function call.");
- return;
- }
- } else {
- auto cast_float_to_double =
std::dynamic_pointer_cast<VCastExpr>(normalized_left);
- if (cast_float_to_double == nullptr) {
- mark_unsuitable("Left child is not a cast expression.");
+ // Try to find the distance function call, it may be wrapped in a
Cast(Float->Double)
+ std::shared_ptr<VectorizedFnCall> function_call =
+ std::dynamic_pointer_cast<VectorizedFnCall>(normalized_left);
+ bool has_float_to_double_cast = false;
+
+ if (function_call == nullptr) {
+ // Check if it's a Cast expression wrapping a function call
+ auto cast_expr = std::dynamic_pointer_cast<VCastExpr>(normalized_left);
+ if (cast_expr == nullptr) {
+ mark_unsuitable("Left child is neither a function call nor a cast
expression.");
return;
}
-
- auto normalized_cast_child =
- get_virtual_expr(cast_float_to_double->get_child(0),
vir_slot_ref);
+ has_float_to_double_cast = true;
+ auto normalized_cast_child = get_virtual_expr(cast_expr->get_child(0),
vir_slot_ref);
function_call =
std::dynamic_pointer_cast<VectorizedFnCall>(normalized_cast_child);
if (function_call == nullptr) {
mark_unsuitable("Left child of cast is not a function call.");
@@ -481,17 +450,19 @@ void VectorizedFnCall::prepare_ann_range_search(
}
}
+ // Check if it's a supported distance function
if (DISTANCE_FUNCS.find(function_call->_function_name) ==
DISTANCE_FUNCS.end()) {
mark_unsuitable(fmt::format("Left child is not a supported distance
function: {}",
function_call->_function_name));
return;
- } else {
- // Strip the _approximate suffix.
- std::string metric_name = function_call->_function_name;
- metric_name = metric_name.substr(0, metric_name.size() - 12);
- range_search_runtime.metric_type =
segment_v2::string_to_metric(metric_name);
}
+ // Strip the _approximate suffix to get metric type
+ std::string metric_name = function_call->_function_name;
+ metric_name = metric_name.substr(0, metric_name.size() - 12);
+ range_search_runtime.metric_type =
segment_v2::string_to_metric(metric_name);
+
+ // ========== Step 2: Validate distance function arguments ==========
// Identify the slot ref child and the constant query array child
(ArrayLiteral or CAST to array)
Int32 idx_of_slot_ref = -1;
Int32 idx_of_array_expr = -1;
@@ -530,6 +501,47 @@ void VectorizedFnCall::prepare_ann_range_search(
}
range_search_runtime.query_value = extract_result.value();
range_search_runtime.dim = range_search_runtime.query_value->size();
+
+ // ========== Step 3: Check right child - must be a float/double literal
==========
+ auto right_literal = std::dynamic_pointer_cast<VLiteral>(right_child);
+ if (right_literal == nullptr) {
+ mark_unsuitable("Right child is not a literal.");
+ return;
+ }
+
+ // Handle nullable literal gracefully - just mark as unsuitable instead of
crash
+ if (right_literal->is_nullable()) {
+ mark_unsuitable("Right literal is nullable, not supported for ANN
range search.");
+ return;
+ }
+
+ auto right_type = right_literal->get_data_type();
+ PrimitiveType right_primitive = right_type->get_primitive_type();
+ const bool float32_literal = right_primitive == PrimitiveType::TYPE_FLOAT;
+ const bool float64_literal = right_primitive == PrimitiveType::TYPE_DOUBLE;
+
+ if (!float32_literal && !float64_literal) {
+ mark_unsuitable("Right child is not a Float32Literal or
Float64Literal.");
+ return;
+ }
+
+ // Validate consistency: if we have Cast(Float->Double), right must be
double literal
+ if (has_float_to_double_cast && !float64_literal) {
+ mark_unsuitable("Cast expression expects double literal on right
side.");
+ return;
+ }
+
+ // Extract radius value
+ auto right_col =
right_literal->get_column_ptr()->convert_to_full_column_if_const();
+ if (float32_literal) {
+ const ColumnFloat32* cf32_right = assert_cast<const
ColumnFloat32*>(right_col.get());
+ range_search_runtime.radius = cf32_right->get_data()[0];
+ } else {
+ const ColumnFloat64* cf64_right = assert_cast<const
ColumnFloat64*>(right_col.get());
+ range_search_runtime.radius =
static_cast<float>(cf64_right->get_data()[0]);
+ }
+
+ // ========== Done: Mark as suitable for ANN range search ==========
range_search_runtime.is_ann_range_search = true;
range_search_runtime.user_params = user_params;
VLOG_DEBUG << fmt::format("Ann range search params: {}",
range_search_runtime.to_string());
diff --git
a/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
b/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
new file mode 100644
index 00000000000..a44d8ae0530
--- /dev/null
+++ b/regression-test/data/ann_index_p0/ann_range_search_nullable_literal.out
@@ -0,0 +1,54 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !nullable_subquery_empty --
+
+-- !nullable_subquery_empty_ge --
+
+-- !nullable_subquery_all_null --
+
+-- !nullable_subquery_normal --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !nullable_subquery_normal_max --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+3 [4, 5, 6, 7]
+4 [5, 6, 7, 8]
+
+-- !coalesce_with_null --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !case_nullable --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !normal_literal --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+2 [3, 4, 5, 6]
+
+-- !ip_nullable_subquery --
+
+-- !non_dist_nullable_empty --
+
+-- !non_dist_nullable_all_null --
+
+-- !non_dist_nullable_normal --
+0 [1, 2, 3, 4]
+1 [2, 3, 4, 5]
+
+-- !non_dist_func_nullable --
+
+-- !arithmetic_nullable --
+
+-- !mixed_dist_and_regular_nullable --
+
+-- !dist_normal_regular_nullable --
+
+-- !or_condition_nullable --
+
diff --git
a/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
b/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
new file mode 100644
index 00000000000..e4d86a6c745
--- /dev/null
+++
b/regression-test/suites/ann_index_p0/ann_range_search_nullable_literal.groovy
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Test case for fix: Handle nullable literal gracefully in ANN range search
+// When the right-side literal of comparison is nullable (e.g., from scalar
subquery
+// returning NULL), the query should not crash but fall back to normal
execution.
+
+suite("ann_range_search_nullable_literal") {
+ sql "drop table if exists ann_nullable_test"
+ sql "drop table if exists ann_nullable_threshold"
+
+ // Main table with ANN index
+ sql """
+ create table ann_nullable_test (
+ id int not null,
+ embedding array<float> not null,
+ value double null,
+ INDEX ann_embedding(`embedding`) USING ANN
PROPERTIES("index_type"="hnsw","metric_type"="l2_distance","dim"="4")
+ ) duplicate key (`id`)
+ distributed by hash(`id`) buckets 1
+ properties("replication_num"="1");
+ """
+
+ // Auxiliary table for threshold values (can be empty to produce NULL from
MIN/MAX)
+ sql """
+ create table ann_nullable_threshold (
+ id int not null,
+ threshold double null
+ ) duplicate key (`id`)
+ distributed by hash(`id`) buckets 1
+ properties("replication_num"="1");
+ """
+
+ // Insert test data into main table
+ sql """
+ INSERT INTO ann_nullable_test (id, embedding, value) VALUES
+ (0, [1.0, 2.0, 3.0, 4.0], 10.5),
+ (1, [2.0, 3.0, 4.0, 5.0], 20.5),
+ (2, [3.0, 4.0, 5.0, 6.0], 30.5),
+ (3, [4.0, 5.0, 6.0, 7.0], 40.5),
+ (4, [5.0, 6.0, 7.0, 8.0], 50.5);
+ """
+
+ // Test 1: Scalar subquery returning NULL (empty table case)
+ // When threshold table is empty, MIN(threshold) returns NULL
+ // This should not crash, just return empty result (since comparing with
NULL is always false)
+ qt_nullable_subquery_empty """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ qt_nullable_subquery_empty_ge """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) >=
(select max(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Insert some data with NULL values
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES
+ (1, NULL),
+ (2, NULL);
+ """
+
+ // Test 2: Scalar subquery returning NULL (all values are NULL case)
+ qt_nullable_subquery_all_null """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Insert some non-NULL values
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES
+ (3, 5.0),
+ (4, 10.0);
+ """
+
+ // Test 3: Scalar subquery returning non-NULL value - should work normally
+ qt_nullable_subquery_normal """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ qt_nullable_subquery_normal_max """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select max(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 4: COALESCE with NULL - the result type might still be nullable
+ qt_coalesce_with_null """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
coalesce((select min(threshold) from ann_nullable_threshold where id = 1), 5.0)
+ order by id;
+ """
+
+ // Test 5: CASE expression that might return NULL
+ qt_case_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
+ (case when (select count(*) from ann_nullable_threshold where
threshold is not null) > 0
+ then (select min(threshold) from ann_nullable_threshold
where threshold is not null)
+ else null end)
+ order by id;
+ """
+
+ // Test 6: Normal literal (not nullable) - should use ANN index
+ qt_normal_literal """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 5.0
+ order by id;
+ """
+
+ // Test 7: Inner product with nullable subquery
+ sql "drop table if exists ann_nullable_ip_test"
+ sql """
+ create table ann_nullable_ip_test (
+ id int not null,
+ embedding array<float> not null,
+ INDEX ann_embedding(`embedding`) USING ANN
PROPERTIES("index_type"="hnsw","metric_type"="inner_product","dim"="4")
+ ) duplicate key (`id`)
+ distributed by hash(`id`) buckets 1
+ properties("replication_num"="1");
+ """
+
+ sql """
+ INSERT INTO ann_nullable_ip_test (id, embedding) VALUES
+ (0, [1.0, 2.0, 3.0, 4.0]),
+ (1, [2.0, 3.0, 4.0, 5.0]),
+ (2, [3.0, 4.0, 5.0, 6.0]);
+ """
+
+ // Empty subquery returns NULL for inner_product comparison
+ qt_ip_nullable_subquery """
+ select id, embedding from ann_nullable_ip_test
+ where inner_product_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) >
(select min(threshold) from ann_nullable_threshold where id = 999)
+ order by id;
+ """
+
+ // ========== Test 8-12: Non-distance function comparisons with nullable
literals ==========
+ // These tests ensure that when left child is NOT a distance function,
+ // the nullable literal on right side does not cause any issues.
+ // The query should execute normally without crashing.
+
+ // Test 8: Regular column comparison with nullable subquery (empty table)
+ sql "truncate table ann_nullable_threshold"
+ qt_non_dist_nullable_empty """
+ select id, embedding from ann_nullable_test
+ where value < (select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 9: Regular column comparison with nullable subquery (all NULL
values)
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES (1, NULL),
(2, NULL);
+ """
+ qt_non_dist_nullable_all_null """
+ select id, embedding from ann_nullable_test
+ where value < (select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 10: Regular column comparison with nullable subquery (has non-NULL
values)
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES (3, 25.0),
(4, 35.0);
+ """
+ qt_non_dist_nullable_normal """
+ select id, embedding from ann_nullable_test
+ where value < (select min(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 11: Non-distance function (abs, sqrt, etc.) with nullable literal
+ qt_non_dist_func_nullable """
+ select id, embedding from ann_nullable_test
+ where abs(value) < (select min(threshold) from ann_nullable_threshold
where id = 999)
+ order by id;
+ """
+
+ // Test 12: Arithmetic expression with nullable literal
+ qt_arithmetic_nullable """
+ select id, embedding from ann_nullable_test
+ where (value + 10) < (select min(threshold) from
ann_nullable_threshold where id = 999)
+ order by id;
+ """
+
+ // ========== Test 13-15: Mixed scenarios ==========
+ // Test 13: Distance function AND regular comparison, both with nullable
+ sql "truncate table ann_nullable_threshold"
+ qt_mixed_dist_and_regular_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold)
+ and value < (select max(threshold) from ann_nullable_threshold)
+ order by id;
+ """
+
+ // Test 14: Distance function with non-nullable, regular with nullable
+ sql """
+ INSERT INTO ann_nullable_threshold (id, threshold) VALUES (1, 5.0);
+ """
+ qt_dist_normal_regular_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) < 5.0
+ and value < (select min(threshold) from ann_nullable_threshold where
id = 999)
+ order by id;
+ """
+
+ // Test 15: OR condition with nullable literals
+ qt_or_condition_nullable """
+ select id, embedding from ann_nullable_test
+ where l2_distance_approximate(embedding, [1.0, 2.0, 3.0, 4.0]) <
(select min(threshold) from ann_nullable_threshold where id = 999)
+ or value < (select max(threshold) from ann_nullable_threshold where
id = 999)
+ order by id;
+ """
+
+ // Cleanup
+ sql "drop table if exists ann_nullable_test"
+ sql "drop table if exists ann_nullable_threshold"
+ sql "drop table if exists ann_nullable_ip_test"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]