This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 6f39c467ce4 branch-4.0: [fix](inverted index) fix is null predicate
for inverted index evaluate #56964 (#57020)
6f39c467ce4 is described below
commit 6f39c467ce402668a05078b041f948c3222d9e36
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Oct 16 14:48:45 2025 +0800
branch-4.0: [fix](inverted index) fix is null predicate for inverted index
evaluate #56964 (#57020)
Cherry-picked from #56964
Co-authored-by: Jack <[email protected]>
---
be/src/vec/functions/is_null.h | 23 ++--
be/test/vec/function/function_is_null_test.cpp | 147 +++++++++++++++++++++
.../inverted_index_p0/test_inverted_is_null.groovy | 72 ++++++++++
3 files changed, 232 insertions(+), 10 deletions(-)
diff --git a/be/src/vec/functions/is_null.h b/be/src/vec/functions/is_null.h
index 682d3bce1e1..46d746e185b 100644
--- a/be/src/vec/functions/is_null.h
+++ b/be/src/vec/functions/is_null.h
@@ -83,18 +83,21 @@ public:
return Status::OK();
}
auto* index_iter = iterators[0];
- if (index_iter->has_null()) {
- segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
-
RETURN_IF_ERROR(index_iter->read_null_bitmap(&null_bitmap_cache_handle));
- std::shared_ptr<roaring::Roaring> null_bitmap =
null_bitmap_cache_handle.get_bitmap();
- // only inverted index has null bitmap, so we can calculate
- if (null_bitmap) {
- // null_bitmap is null bitmap
- bitmap_result =
segment_v2::InvertedIndexResultBitmap(null_bitmap, null_bitmap);
- }
+ if (!index_iter->has_null()) {
+ return Status::OK();
+ }
+ segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
+
RETURN_IF_ERROR(index_iter->read_null_bitmap(&null_bitmap_cache_handle));
+ std::shared_ptr<roaring::Roaring> null_bitmap =
null_bitmap_cache_handle.get_bitmap();
+ if (!null_bitmap) {
+ return Status::OK();
}
+ auto data_bitmap = std::make_shared<roaring::Roaring>(*null_bitmap);
+ auto empty_null_bitmap = std::make_shared<roaring::Roaring>();
+ bitmap_result =
segment_v2::InvertedIndexResultBitmap(std::move(data_bitmap),
+
std::move(empty_null_bitmap));
return Status::OK();
}
};
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/test/vec/function/function_is_null_test.cpp
b/be/test/vec/function/function_is_null_test.cpp
index 2f1bead49e2..dc9fd9ddafa 100644
--- a/be/test/vec/function/function_is_null_test.cpp
+++ b/be/test/vec/function/function_is_null_test.cpp
@@ -240,4 +240,151 @@ TEST_F(FunctionIsNullTest, gc_binlogs_test) {
check_result(string_reader.get(), false, 1);
}
}
+
+// Test corner cases for evaluate_inverted_index function
+TEST_F(FunctionIsNullTest, evaluate_inverted_index_corner_cases) {
+ OlapReaderStatistics stats;
+ RuntimeState runtime_state;
+ io::IOContext io_ctx;
+
+ auto context = std::make_shared<segment_v2::IndexQueryContext>();
+ context->io_ctx = &io_ctx;
+ context->stats = &stats;
+ context->runtime_state = &runtime_state;
+
+ ColumnsWithTypeAndName arguments;
+ std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
+
+ auto is_null_function = std::make_shared<FunctionIsNull>();
+ auto is_not_null_function = std::make_shared<FunctionIsNotNull>();
+
+ // Test case 1: empty iterators
+ {
+ std::vector<segment_v2::IndexIterator*> iterators;
+ segment_v2::InvertedIndexResultBitmap bitmap_result;
+ EXPECT_TRUE(is_null_function
+ ->evaluate_inverted_index(arguments,
data_type_with_names, iterators, 3,
+ bitmap_result)
+ .ok());
+ EXPECT_TRUE(bitmap_result.is_empty());
+ }
+ {
+ std::vector<segment_v2::IndexIterator*> iterators;
+ segment_v2::InvertedIndexResultBitmap bitmap_result;
+ EXPECT_TRUE(is_not_null_function
+ ->evaluate_inverted_index(arguments,
data_type_with_names, iterators, 3,
+ bitmap_result)
+ .ok());
+ EXPECT_TRUE(bitmap_result.is_empty());
+ }
+
+ // Test case 2: iterators[0] == nullptr
+ {
+ std::vector<segment_v2::IndexIterator*> iterators;
+ iterators.push_back(nullptr);
+ segment_v2::InvertedIndexResultBitmap bitmap_result;
+ EXPECT_TRUE(is_null_function
+ ->evaluate_inverted_index(arguments,
data_type_with_names, iterators, 3,
+ bitmap_result)
+ .ok());
+ EXPECT_TRUE(bitmap_result.is_empty());
+ }
+ {
+ std::vector<segment_v2::IndexIterator*> iterators;
+ iterators.push_back(nullptr);
+ segment_v2::InvertedIndexResultBitmap bitmap_result;
+ EXPECT_TRUE(is_not_null_function
+ ->evaluate_inverted_index(arguments,
data_type_with_names, iterators, 3,
+ bitmap_result)
+ .ok());
+ EXPECT_TRUE(bitmap_result.is_empty());
+ }
+
+ // Test case 3 & 4: iterator without null bitmap or null_bitmap is nullptr
+ // These cases require creating a mock iterator with has_null() returning
false
+ // or read_null_bitmap() returning nullptr
+ // We'll create a simple test using real data but verifying the behavior
+
_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ _tablet_schema.reset(new TabletSchema);
+ _tablet_schema->init_from_pb(_schema_pb);
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+
+ tablet_meta->set_tablet_uid(TabletUid(30, 30));
+ tablet_meta.get()->_tablet_id = 300;
+ _tablet.reset(new Tablet(*_engine_ref, tablet_meta, _data_dir.get()));
+ EXPECT_TRUE(_tablet->init().ok());
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ RowsetSharedPtr rowset;
+ const auto& res =
+ RowsetFactory::create_rowset_writer(*_engine_ref,
rowset_writer_context(), false);
+ EXPECT_TRUE(res.has_value()) << res.error();
+ const auto& rowset_writer = res.value();
+
+ Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Create block with NO null values to test the scenario where
+ // iterator might not have null bitmap or it's nullptr
+ vectorized::Field key1 = vectorized::Field::create_field<TYPE_INT>(10);
+ vectorized::Field key2 = vectorized::Field::create_field<TYPE_INT>(20);
+ vectorized::Field key3 = vectorized::Field::create_field<TYPE_INT>(30);
+ vectorized::Field v1 =
vectorized::Field::create_field<TYPE_STRING>("value1");
+ vectorized::Field v2 =
vectorized::Field::create_field<TYPE_STRING>("value2");
+ vectorized::Field v3 =
vectorized::Field::create_field<TYPE_STRING>("value3");
+
+ columns[0]->insert(key1);
+ columns[0]->insert(key2);
+ columns[0]->insert(key3);
+ columns[1]->insert(v1);
+ columns[1]->insert(v2);
+ columns[1]->insert(v3);
+
+ EXPECT_TRUE(rowset_writer->add_block(&block).ok());
+ EXPECT_TRUE(rowset_writer->flush().ok());
+ EXPECT_TRUE(rowset_writer->build(rowset).ok());
+
+ // Test with data that has no nulls
+ // This will exercise the code path where has_null() might return false
+ // or null_bitmap might be nullptr
+ for (int i = 0; i < rowset->num_segments(); i++) {
+ auto segment_path = rowset->segment_path(i);
+ EXPECT_TRUE(segment_path.has_value());
+ std::string index_prefix = std::string(
+
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path.value()));
+ auto index_file_reader = std::make_shared<IndexFileReader>(
+ io::global_local_filesystem(), index_prefix,
InvertedIndexStorageFormatPB::V2);
+ EXPECT_TRUE(index_file_reader->init().ok());
+
+ auto index_metas = _tablet_schema->inverted_indexs(0);
+ EXPECT_FALSE(index_metas.empty());
+ auto index_meta = index_metas[0];
+ auto bkd_reader = BkdIndexReader::create_shared(index_meta,
index_file_reader);
+ EXPECT_TRUE(bkd_reader);
+
+ std::unique_ptr<IndexIterator> iter;
+ EXPECT_TRUE(bkd_reader->new_iterator(&iter).ok());
+ EXPECT_TRUE(iter);
+ iter->set_context(context);
+
+ std::vector<segment_v2::IndexIterator*> iterators;
+ iterators.push_back(iter.get());
+
+ segment_v2::InvertedIndexResultBitmap bitmap_result1;
+ EXPECT_TRUE(is_null_function
+ ->evaluate_inverted_index(arguments,
data_type_with_names, iterators, 3,
+ bitmap_result1)
+ .ok());
+ // When there's no null data, the result should be empty or have 0
cardinality
+ // depending on whether has_null() returns false
+
+ segment_v2::InvertedIndexResultBitmap bitmap_result2;
+ EXPECT_TRUE(is_not_null_function
+ ->evaluate_inverted_index(arguments,
data_type_with_names, iterators, 3,
+ bitmap_result2)
+ .ok());
+ // Similar test for is_not_null
+ }
+}
} // namespace doris
diff --git
a/regression-test/suites/inverted_index_p0/test_inverted_is_null.groovy
b/regression-test/suites/inverted_index_p0/test_inverted_is_null.groovy
new file mode 100644
index 00000000000..20987910d67
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_inverted_is_null.groovy
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_inverted_is_null", "p0") {
+ def tableName = "tbl_inverted_is_null"
+ sql """DROP TABLE IF EXISTS ${tableName}"""
+ sql """
+ CREATE TABLE ${tableName} (
+ id INT,
+ dt DATE NULL,
+ str_col STRING NULL,
+ val INT NULL,
+ INDEX idx_dt (dt) USING INVERTED,
+ INDEX idx_str (str_col) USING INVERTED,
+ INDEX idx_val (val) USING INVERTED
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ sql """INSERT INTO ${tableName} VALUES
+ (1, NULL, 'foo', 1),
+ (2, NULL, 'bar', -1),
+ (3, '2024-01-01', 'baz', 5),
+ (4, NULL, 'qux', 10)
+ """
+
+ sql "SET enable_common_expr_pushdown=true"
+ sql "SET inverted_index_skip_threshold=0"
+
+ def nullBranchQuery = """
+ SELECT COUNT(*)
+ FROM ${tableName}
+ WHERE (str_col LIKE CONCAT('%', 'no-hit', '%'))
+ OR (dt IS NULL) AND NOT val BETWEEN -9223372036854775808 AND 0
+ """
+
+ def negatedNotNullQuery = """
+ SELECT COUNT(*)
+ FROM ${tableName}
+ WHERE NOT (dt IS NOT NULL)
+ """
+
+ sql "SET enable_inverted_index_query=true"
+ def resultWithIndex = sql(nullBranchQuery)
+ def resultWithIndexNegatedNotNull = sql(negatedNotNullQuery)
+ assertEquals(2, resultWithIndex[0][0]) // previously returned 0 when
dt IS NULL relied on inverted index
+ assertEquals(3, resultWithIndexNegatedNotNull[0][0]) // previously
returned 0 when NOT (dt IS NOT NULL) was evaluated via inverted index
+
+ sql "SET enable_inverted_index_query=false"
+ def resultWithoutIndex = sql(nullBranchQuery)
+ def resultWithoutIndexNegatedNotNull = sql(negatedNotNullQuery)
+ assertEquals(2, resultWithoutIndex[0][0])
+ assertEquals(3, resultWithoutIndexNegatedNotNull[0][0])
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]