This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 6f39c467ce4 branch-4.0: [fix](inverted index) fix is null predicate 
for inverted index evaluate #56964 (#57020)
6f39c467ce4 is described below

commit 6f39c467ce402668a05078b041f948c3222d9e36
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Oct 16 14:48:45 2025 +0800

    branch-4.0: [fix](inverted index) fix is null predicate for inverted index 
evaluate #56964 (#57020)
    
    Cherry-picked from #56964
    
    Co-authored-by: Jack <[email protected]>
---
 be/src/vec/functions/is_null.h                     |  23 ++--
 be/test/vec/function/function_is_null_test.cpp     | 147 +++++++++++++++++++++
 .../inverted_index_p0/test_inverted_is_null.groovy |  72 ++++++++++
 3 files changed, 232 insertions(+), 10 deletions(-)

diff --git a/be/src/vec/functions/is_null.h b/be/src/vec/functions/is_null.h
index 682d3bce1e1..46d746e185b 100644
--- a/be/src/vec/functions/is_null.h
+++ b/be/src/vec/functions/is_null.h
@@ -83,18 +83,21 @@ public:
             return Status::OK();
         }
         auto* index_iter = iterators[0];
-        if (index_iter->has_null()) {
-            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
-            
RETURN_IF_ERROR(index_iter->read_null_bitmap(&null_bitmap_cache_handle));
-            std::shared_ptr<roaring::Roaring> null_bitmap = 
null_bitmap_cache_handle.get_bitmap();
-            // only inverted index has null bitmap, so we can calculate
-            if (null_bitmap) {
-                // null_bitmap is null bitmap
-                bitmap_result = 
segment_v2::InvertedIndexResultBitmap(null_bitmap, null_bitmap);
-            }
+        if (!index_iter->has_null()) {
+            return Status::OK();
+        }
+        segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
+        
RETURN_IF_ERROR(index_iter->read_null_bitmap(&null_bitmap_cache_handle));
+        std::shared_ptr<roaring::Roaring> null_bitmap = 
null_bitmap_cache_handle.get_bitmap();
+        if (!null_bitmap) {
+            return Status::OK();
         }
+        auto data_bitmap = std::make_shared<roaring::Roaring>(*null_bitmap);
+        auto empty_null_bitmap = std::make_shared<roaring::Roaring>();
+        bitmap_result = 
segment_v2::InvertedIndexResultBitmap(std::move(data_bitmap),
+                                                              
std::move(empty_null_bitmap));
         return Status::OK();
     }
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+} // namespace doris::vectorized
diff --git a/be/test/vec/function/function_is_null_test.cpp 
b/be/test/vec/function/function_is_null_test.cpp
index 2f1bead49e2..dc9fd9ddafa 100644
--- a/be/test/vec/function/function_is_null_test.cpp
+++ b/be/test/vec/function/function_is_null_test.cpp
@@ -240,4 +240,151 @@ TEST_F(FunctionIsNullTest, gc_binlogs_test) {
         check_result(string_reader.get(), false, 1);
     }
 }
+
+// Test corner cases for evaluate_inverted_index function
+TEST_F(FunctionIsNullTest, evaluate_inverted_index_corner_cases) {
+    OlapReaderStatistics stats;
+    RuntimeState runtime_state;
+    io::IOContext io_ctx;
+
+    auto context = std::make_shared<segment_v2::IndexQueryContext>();
+    context->io_ctx = &io_ctx;
+    context->stats = &stats;
+    context->runtime_state = &runtime_state;
+
+    ColumnsWithTypeAndName arguments;
+    std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
+
+    auto is_null_function = std::make_shared<FunctionIsNull>();
+    auto is_not_null_function = std::make_shared<FunctionIsNotNull>();
+
+    // Test case 1: empty iterators
+    {
+        std::vector<segment_v2::IndexIterator*> iterators;
+        segment_v2::InvertedIndexResultBitmap bitmap_result;
+        EXPECT_TRUE(is_null_function
+                            ->evaluate_inverted_index(arguments, 
data_type_with_names, iterators, 3,
+                                                      bitmap_result)
+                            .ok());
+        EXPECT_TRUE(bitmap_result.is_empty());
+    }
+    {
+        std::vector<segment_v2::IndexIterator*> iterators;
+        segment_v2::InvertedIndexResultBitmap bitmap_result;
+        EXPECT_TRUE(is_not_null_function
+                            ->evaluate_inverted_index(arguments, 
data_type_with_names, iterators, 3,
+                                                      bitmap_result)
+                            .ok());
+        EXPECT_TRUE(bitmap_result.is_empty());
+    }
+
+    // Test case 2: iterators[0] == nullptr
+    {
+        std::vector<segment_v2::IndexIterator*> iterators;
+        iterators.push_back(nullptr);
+        segment_v2::InvertedIndexResultBitmap bitmap_result;
+        EXPECT_TRUE(is_null_function
+                            ->evaluate_inverted_index(arguments, 
data_type_with_names, iterators, 3,
+                                                      bitmap_result)
+                            .ok());
+        EXPECT_TRUE(bitmap_result.is_empty());
+    }
+    {
+        std::vector<segment_v2::IndexIterator*> iterators;
+        iterators.push_back(nullptr);
+        segment_v2::InvertedIndexResultBitmap bitmap_result;
+        EXPECT_TRUE(is_not_null_function
+                            ->evaluate_inverted_index(arguments, 
data_type_with_names, iterators, 3,
+                                                      bitmap_result)
+                            .ok());
+        EXPECT_TRUE(bitmap_result.is_empty());
+    }
+
+    // Test case 3 & 4: iterator without null bitmap or null_bitmap is nullptr
+    // These cases require creating a mock iterator with has_null() returning 
false
+    // or read_null_bitmap() returning nullptr
+    // We'll create a simple test using real data but verifying the behavior
+    
_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+    _tablet_schema.reset(new TabletSchema);
+    _tablet_schema->init_from_pb(_schema_pb);
+    TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+
+    tablet_meta->set_tablet_uid(TabletUid(30, 30));
+    tablet_meta.get()->_tablet_id = 300;
+    _tablet.reset(new Tablet(*_engine_ref, tablet_meta, _data_dir.get()));
+    EXPECT_TRUE(_tablet->init().ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+    RowsetSharedPtr rowset;
+    const auto& res =
+            RowsetFactory::create_rowset_writer(*_engine_ref, 
rowset_writer_context(), false);
+    EXPECT_TRUE(res.has_value()) << res.error();
+    const auto& rowset_writer = res.value();
+
+    Block block = _tablet_schema->create_block();
+    auto columns = block.mutate_columns();
+
+    // Create block with NO null values to test the scenario where
+    // iterator might not have null bitmap or it's nullptr
+    vectorized::Field key1 = vectorized::Field::create_field<TYPE_INT>(10);
+    vectorized::Field key2 = vectorized::Field::create_field<TYPE_INT>(20);
+    vectorized::Field key3 = vectorized::Field::create_field<TYPE_INT>(30);
+    vectorized::Field v1 = 
vectorized::Field::create_field<TYPE_STRING>("value1");
+    vectorized::Field v2 = 
vectorized::Field::create_field<TYPE_STRING>("value2");
+    vectorized::Field v3 = 
vectorized::Field::create_field<TYPE_STRING>("value3");
+
+    columns[0]->insert(key1);
+    columns[0]->insert(key2);
+    columns[0]->insert(key3);
+    columns[1]->insert(v1);
+    columns[1]->insert(v2);
+    columns[1]->insert(v3);
+
+    EXPECT_TRUE(rowset_writer->add_block(&block).ok());
+    EXPECT_TRUE(rowset_writer->flush().ok());
+    EXPECT_TRUE(rowset_writer->build(rowset).ok());
+
+    // Test with data that has no nulls
+    // This will exercise the code path where has_null() might return false
+    // or null_bitmap might be nullptr
+    for (int i = 0; i < rowset->num_segments(); i++) {
+        auto segment_path = rowset->segment_path(i);
+        EXPECT_TRUE(segment_path.has_value());
+        std::string index_prefix = std::string(
+                
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path.value()));
+        auto index_file_reader = std::make_shared<IndexFileReader>(
+                io::global_local_filesystem(), index_prefix, 
InvertedIndexStorageFormatPB::V2);
+        EXPECT_TRUE(index_file_reader->init().ok());
+
+        auto index_metas = _tablet_schema->inverted_indexs(0);
+        EXPECT_FALSE(index_metas.empty());
+        auto index_meta = index_metas[0];
+        auto bkd_reader = BkdIndexReader::create_shared(index_meta, 
index_file_reader);
+        EXPECT_TRUE(bkd_reader);
+
+        std::unique_ptr<IndexIterator> iter;
+        EXPECT_TRUE(bkd_reader->new_iterator(&iter).ok());
+        EXPECT_TRUE(iter);
+        iter->set_context(context);
+
+        std::vector<segment_v2::IndexIterator*> iterators;
+        iterators.push_back(iter.get());
+
+        segment_v2::InvertedIndexResultBitmap bitmap_result1;
+        EXPECT_TRUE(is_null_function
+                            ->evaluate_inverted_index(arguments, 
data_type_with_names, iterators, 3,
+                                                      bitmap_result1)
+                            .ok());
+        // When there's no null data, the result should be empty or have 0 
cardinality
+        // depending on whether has_null() returns false
+
+        segment_v2::InvertedIndexResultBitmap bitmap_result2;
+        EXPECT_TRUE(is_not_null_function
+                            ->evaluate_inverted_index(arguments, 
data_type_with_names, iterators, 3,
+                                                      bitmap_result2)
+                            .ok());
+        // Similar test for is_not_null
+    }
+}
 } // namespace doris
diff --git 
a/regression-test/suites/inverted_index_p0/test_inverted_is_null.groovy 
b/regression-test/suites/inverted_index_p0/test_inverted_is_null.groovy
new file mode 100644
index 00000000000..20987910d67
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_inverted_is_null.groovy
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_inverted_is_null", "p0") {
+    def tableName = "tbl_inverted_is_null"
+    sql """DROP TABLE IF EXISTS ${tableName}"""
+    sql """
+        CREATE TABLE ${tableName} (
+            id INT,
+            dt DATE NULL,
+            str_col STRING NULL,
+            val INT NULL,
+            INDEX idx_dt (dt) USING INVERTED,
+            INDEX idx_str (str_col) USING INVERTED,
+            INDEX idx_val (val) USING INVERTED
+        )
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES(
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    sql """INSERT INTO ${tableName} VALUES
+        (1, NULL, 'foo', 1),
+        (2, NULL, 'bar', -1),
+        (3, '2024-01-01', 'baz', 5),
+        (4, NULL, 'qux', 10)
+    """
+
+    sql "SET enable_common_expr_pushdown=true"
+    sql "SET inverted_index_skip_threshold=0"
+
+    def nullBranchQuery = """
+        SELECT COUNT(*)
+        FROM ${tableName}
+        WHERE (str_col LIKE CONCAT('%', 'no-hit', '%'))
+           OR (dt IS NULL) AND NOT val BETWEEN -9223372036854775808 AND 0
+    """
+
+    def negatedNotNullQuery = """
+        SELECT COUNT(*)
+        FROM ${tableName}
+        WHERE NOT (dt IS NOT NULL)
+    """
+
+    sql "SET enable_inverted_index_query=true"
+    def resultWithIndex = sql(nullBranchQuery)
+    def resultWithIndexNegatedNotNull = sql(negatedNotNullQuery)
+    assertEquals(2, resultWithIndex[0][0])      // previously returned 0 when 
dt IS NULL relied on inverted index
+    assertEquals(3, resultWithIndexNegatedNotNull[0][0]) // previously 
returned 0 when NOT (dt IS NOT NULL) was evaluated via inverted index
+
+    sql "SET enable_inverted_index_query=false"
+    def resultWithoutIndex = sql(nullBranchQuery)
+    def resultWithoutIndexNegatedNotNull = sql(negatedNotNullQuery)
+    assertEquals(2, resultWithoutIndex[0][0])
+    assertEquals(3, resultWithoutIndexNegatedNotNull[0][0])
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to