This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a0f4c4f9405 [fix](bloomfilter) fix inlist support for date/datetimev1 
in bloomfilter index (#46961)
a0f4c4f9405 is described below

commit a0f4c4f9405e0cc0fc5ef75ecae21c81ccf5ff1c
Author: airborne12 <[email protected]>
AuthorDate: Wed Jan 15 10:56:08 2025 +0800

    [fix](bloomfilter) fix inlist support for date/datetimev1 in bloomfilter 
index (#46961)
    
    Problem Summary:
    #43351 fix date/datetime v1 support in comparison predicate, this PR try
    to fix it in inlist predicate.
---
 be/src/olap/in_list_predicate.h                    |  17 +++
 be/test/olap/date_bloom_filter_test.cpp            | 140 +++++++++++++++++++++
 .../data/bloom_filter_p0/test_bloom_filter.out     |  21 ++++
 .../bloom_filter_p0/test_bloom_filter.groovy       |  35 ++++++
 4 files changed, 213 insertions(+)

diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index deb3f666f0e..872ff8853d8 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -322,6 +322,23 @@ public:
                                 sizeof(decimal12_t))) {
                         return true;
                     }
+                } else if constexpr (Type == PrimitiveType::TYPE_DATE) {
+                    const T* value = (const T*)(iter->get_value());
+                    uint24_t date_value(value->to_olap_date());
+                    if (bf->test_bytes(
+                                const_cast<char*>(reinterpret_cast<const 
char*>(&date_value)),
+                                sizeof(uint24_t))) {
+                        return true;
+                    }
+                    // DatetimeV1 using int64_t in bloom filter
+                } else if constexpr (Type == PrimitiveType::TYPE_DATETIME) {
+                    const T* value = (const T*)(iter->get_value());
+                    int64_t datetime_value(value->to_olap_datetime());
+                    if (bf->test_bytes(
+                                const_cast<char*>(reinterpret_cast<const 
char*>(&datetime_value)),
+                                sizeof(int64_t))) {
+                        return true;
+                    }
                 } else {
                     const T* value = (const T*)(iter->get_value());
                     if (bf->test_bytes(reinterpret_cast<const char*>(value), 
sizeof(*value))) {
diff --git a/be/test/olap/date_bloom_filter_test.cpp 
b/be/test/olap/date_bloom_filter_test.cpp
index 51de4ebd8e7..cf869bf82e3 100644
--- a/be/test/olap/date_bloom_filter_test.cpp
+++ b/be/test/olap/date_bloom_filter_test.cpp
@@ -18,11 +18,13 @@
 #include <gtest/gtest.h>
 
 #include "olap/comparison_predicate.h"
+#include "olap/in_list_predicate.h"
 #include "olap/rowset/beta_rowset.h"
 #include "olap/rowset/beta_rowset_writer.h"
 #include "olap/rowset/rowset_factory.h"
 #include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
 #include "olap/storage_engine.h"
+#include "runtime/define_primitive_type.h"
 #include "util/date_func.h"
 #include "vec/runtime/vdatetime_value.h"
 
@@ -189,4 +191,142 @@ TEST_F(DateBloomFilterTest, query_index_test) {
         test("2024-11-20 09:00:00", false);
     }
 }
+
+TEST_F(DateBloomFilterTest, in_list_predicate_test) {
+    
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+    
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+    RowsetSharedPtr rowset;
+    const auto& res =
+            RowsetFactory::create_rowset_writer(*_engine_ref, 
rowset_writer_context(), false);
+    EXPECT_TRUE(res.has_value()) << res.error();
+    const auto& rowset_writer = res.value();
+
+    Block block = _tablet_schema->create_block();
+    auto columns = block.mutate_columns();
+
+    // Insert test data
+    auto date = timestamp_from_date("2024-11-08");
+    auto datetime = timestamp_from_datetime("2024-11-08 09:00:00");
+    uint24_t olap_date_value(date.to_olap_date());
+    uint64_t olap_datetime_value(datetime.to_olap_datetime());
+    columns[0]->insert_many_fix_len_data(reinterpret_cast<const 
char*>(&olap_date_value), 1);
+    columns[1]->insert_many_fix_len_data(reinterpret_cast<const 
char*>(&olap_datetime_value), 1);
+
+    date = timestamp_from_date("2024-11-09");
+    datetime = timestamp_from_datetime("2024-11-09 09:00:00");
+    olap_date_value = date.to_olap_date();
+    olap_datetime_value = datetime.to_olap_datetime();
+    columns[0]->insert_many_fix_len_data(reinterpret_cast<const 
char*>(&olap_date_value), 1);
+    columns[1]->insert_many_fix_len_data(reinterpret_cast<const 
char*>(&olap_datetime_value), 1);
+
+    EXPECT_TRUE(rowset_writer->add_block(&block).ok());
+    EXPECT_TRUE(rowset_writer->flush().ok());
+    EXPECT_TRUE(rowset_writer->build(rowset).ok());
+    EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
+
+    segment_v2::SegmentSharedPtr segment;
+    EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+    auto st = segment->_create_column_readers(*(segment->_footer_pb));
+    EXPECT_TRUE(st.ok());
+
+    // Test DATE column with IN predicate
+    {
+        const auto& reader = segment->_column_readers[0];
+        std::unique_ptr<BloomFilterIndexIterator> bf_iter;
+        EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, 
nullptr).ok());
+        EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, 
nullptr).ok());
+        std::unique_ptr<BloomFilter> bf;
+        EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());
+
+        // Test positive cases
+        auto test_positive = [&](const std::vector<std::string>& values, bool 
result) {
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
+            for (const auto& value : values) {
+                auto v = timestamp_from_date(value);
+                hybrid_set->insert(&v);
+            }
+            std::unique_ptr<InListPredicateBase<TYPE_DATE, 
PredicateType::IN_LIST,
+                                                
HybridSet<PrimitiveType::TYPE_DATE>>>
+                    date_pred(new InListPredicateBase<TYPE_DATE, 
PredicateType::IN_LIST,
+                                                      
HybridSet<PrimitiveType::TYPE_DATE>>(
+                            0, hybrid_set));
+            EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
+        };
+
+        test_positive({"2024-11-08", "2024-11-09"}, true);
+        test_positive({"2024-11-08"}, true);
+        test_positive({"2024-11-09"}, true);
+
+        auto test_negative = [&](const std::vector<std::string>& values, bool 
result) {
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
+
+            for (const auto& value : values) {
+                auto v = timestamp_from_date(value);
+                hybrid_set->insert(&v);
+            }
+
+            std::unique_ptr<InListPredicateBase<TYPE_DATE, 
PredicateType::IN_LIST,
+                                                
HybridSet<PrimitiveType::TYPE_DATE>>>
+                    date_pred(new InListPredicateBase<TYPE_DATE, 
PredicateType::IN_LIST,
+                                                      
HybridSet<PrimitiveType::TYPE_DATE>>(
+                            0, hybrid_set));
+
+            EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
+        };
+
+        test_negative({"2024-11-20"}, false);
+        test_negative({"2024-11-08", "2024-11-20"}, true);
+        test_negative({"2024-11-20", "2024-11-21"}, false);
+    }
+
+    // Test DATETIME column with IN predicate
+    {
+        const auto& reader = segment->_column_readers[1];
+        std::unique_ptr<BloomFilterIndexIterator> bf_iter;
+        EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, 
nullptr).ok());
+        EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, 
nullptr).ok());
+        std::unique_ptr<BloomFilter> bf;
+        EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());
+
+        // Test positive cases
+        auto test_positive = [&](const std::vector<std::string>& values, bool 
result) {
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
+            for (const auto& value : values) {
+                auto v = timestamp_from_datetime(value);
+                hybrid_set->insert(&v);
+            }
+            std::unique_ptr<InListPredicateBase<TYPE_DATETIME, 
PredicateType::IN_LIST,
+                                                
HybridSet<PrimitiveType::TYPE_DATETIME>>>
+                    datetime_pred(new InListPredicateBase<TYPE_DATETIME, 
PredicateType::IN_LIST,
+                                                          
HybridSet<PrimitiveType::TYPE_DATETIME>>(
+                            0, hybrid_set));
+            EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
+        };
+
+        test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}, true);
+        test_positive({"2024-11-08 09:00:00"}, true);
+        test_positive({"2024-11-09 09:00:00"}, true);
+
+        // Test negative cases
+        auto test_negative = [&](const std::vector<std::string>& values, bool 
result) {
+            auto hybrid_set = 
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
+            for (const auto& value : values) {
+                auto v = timestamp_from_datetime(value);
+                hybrid_set->insert(&v);
+            }
+            std::unique_ptr<InListPredicateBase<TYPE_DATETIME, 
PredicateType::IN_LIST,
+                                                
HybridSet<PrimitiveType::TYPE_DATETIME>>>
+                    datetime_pred(new InListPredicateBase<TYPE_DATETIME, 
PredicateType::IN_LIST,
+                                                          
HybridSet<PrimitiveType::TYPE_DATETIME>>(
+                            0, hybrid_set));
+            EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
+        };
+
+        test_negative({"2024-11-20 09:00:00"}, false);
+        test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}, true);
+        test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}, false);
+    }
+}
+
 } // namespace doris
diff --git a/regression-test/data/bloom_filter_p0/test_bloom_filter.out 
b/regression-test/data/bloom_filter_p0/test_bloom_filter.out
new file mode 100644
index 00000000000..9425c984f08
--- /dev/null
+++ b/regression-test/data/bloom_filter_p0/test_bloom_filter.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_datetime_v1 --
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+2      2       2       2024-12-18T20:00        2024-12-18T20:00        
2024-12-18      2024-12-18      3.33    3.33
+
+-- !select_datetime_v2 --
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+2      2       2       2024-12-18T20:00        2024-12-18T20:00        
2024-12-18      2024-12-18      3.33    3.33
+
+-- !select_date_v1 --
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+2      2       2       2024-12-18T20:00        2024-12-18T20:00        
2024-12-18      2024-12-18      3.33    3.33
+
+-- !select_date_v2 --
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+1      1       1       2024-12-17T20:00        2024-12-17T20:00        
2024-12-17      2024-12-17      3.32    3.32
+2      2       2       2024-12-18T20:00        2024-12-18T20:00        
2024-12-18      2024-12-18      3.33    3.33
+
diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy 
b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
index 23e1c7ed596..ff8710c5998 100644
--- a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
+++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
@@ -148,4 +148,39 @@ suite("test_bloom_filter") {
         sql """ALTER TABLE ${test_json_tb} SET("bloom_filter_columns" = 
"k1,j1")"""
         exception "not supported in bloom filter index"
     }
+
+    // bloom filter index for datetime/date/decimal columns
+    def test_datetime_tb = "test_datetime_bloom_filter_tb"
+    sql """DROP TABLE IF EXISTS ${test_datetime_tb}"""
+    sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'false')"""
+    sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'false')"""
+    sql """CREATE TABLE IF NOT EXISTS ${test_datetime_tb} (
+            a int,
+            b int,
+            c int,
+            d DATETIMEV1,
+            d2 DATETIMEV2,
+            da DATEv1,
+            dav2 DATEV2,
+            dec decimal(10,2),
+            dec2 decimalv2(10,2)
+        ) ENGINE=OLAP
+        DUPLICATE KEY(a)
+        DISTRIBUTED BY HASH(a) BUCKETS 5
+        PROPERTIES (
+            "replication_num" = "1"
+        )"""
+    sql """INSERT INTO ${test_datetime_tb} VALUES
+        (1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", 
"2024-12-17", "3.32", "3.32"),
+        (1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", 
"2024-12-17", "3.32", "3.32"),
+        (2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18", 
"2024-12-18", "3.33", "3.33"),
+        (3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22", 
"2024-12-22", "4.33", "4.33")"""
+    sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" = 
"d,d2,da,dav2,dec,dec2")"""
+    Thread.sleep(3000)
+    qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN 
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
+    qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN 
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
+    qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN 
("2024-12-17", "2024-12-18") order by a"""
+    qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN 
("2024-12-17", "2024-12-18") order by a"""
+    sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')"""
+    sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')"""
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to