This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a0f4c4f9405 [fix](bloomfilter) fix inlist support for date/datetimev1
in bloomfilter index (#46961)
a0f4c4f9405 is described below
commit a0f4c4f9405e0cc0fc5ef75ecae21c81ccf5ff1c
Author: airborne12 <[email protected]>
AuthorDate: Wed Jan 15 10:56:08 2025 +0800
[fix](bloomfilter) fix inlist support for date/datetimev1 in bloomfilter
index (#46961)
Problem Summary:
#43351 fix date/datetime v1 support in comparison predicate, this PR try
to fix it in inlist predicate.
---
be/src/olap/in_list_predicate.h | 17 +++
be/test/olap/date_bloom_filter_test.cpp | 140 +++++++++++++++++++++
.../data/bloom_filter_p0/test_bloom_filter.out | 21 ++++
.../bloom_filter_p0/test_bloom_filter.groovy | 35 ++++++
4 files changed, 213 insertions(+)
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index deb3f666f0e..872ff8853d8 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -322,6 +322,23 @@ public:
sizeof(decimal12_t))) {
return true;
}
+ } else if constexpr (Type == PrimitiveType::TYPE_DATE) {
+ const T* value = (const T*)(iter->get_value());
+ uint24_t date_value(value->to_olap_date());
+ if (bf->test_bytes(
+ const_cast<char*>(reinterpret_cast<const
char*>(&date_value)),
+ sizeof(uint24_t))) {
+ return true;
+ }
+ // DatetimeV1 using int64_t in bloom filter
+ } else if constexpr (Type == PrimitiveType::TYPE_DATETIME) {
+ const T* value = (const T*)(iter->get_value());
+ int64_t datetime_value(value->to_olap_datetime());
+ if (bf->test_bytes(
+ const_cast<char*>(reinterpret_cast<const
char*>(&datetime_value)),
+ sizeof(int64_t))) {
+ return true;
+ }
} else {
const T* value = (const T*)(iter->get_value());
if (bf->test_bytes(reinterpret_cast<const char*>(value),
sizeof(*value))) {
diff --git a/be/test/olap/date_bloom_filter_test.cpp
b/be/test/olap/date_bloom_filter_test.cpp
index 51de4ebd8e7..cf869bf82e3 100644
--- a/be/test/olap/date_bloom_filter_test.cpp
+++ b/be/test/olap/date_bloom_filter_test.cpp
@@ -18,11 +18,13 @@
#include <gtest/gtest.h>
#include "olap/comparison_predicate.h"
+#include "olap/in_list_predicate.h"
#include "olap/rowset/beta_rowset.h"
#include "olap/rowset/beta_rowset_writer.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
#include "olap/storage_engine.h"
+#include "runtime/define_primitive_type.h"
#include "util/date_func.h"
#include "vec/runtime/vdatetime_value.h"
@@ -189,4 +191,142 @@ TEST_F(DateBloomFilterTest, query_index_test) {
test("2024-11-20 09:00:00", false);
}
}
+
+TEST_F(DateBloomFilterTest, in_list_predicate_test) {
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ RowsetSharedPtr rowset;
+ const auto& res =
+ RowsetFactory::create_rowset_writer(*_engine_ref,
rowset_writer_context(), false);
+ EXPECT_TRUE(res.has_value()) << res.error();
+ const auto& rowset_writer = res.value();
+
+ Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ // Insert test data
+ auto date = timestamp_from_date("2024-11-08");
+ auto datetime = timestamp_from_datetime("2024-11-08 09:00:00");
+ uint24_t olap_date_value(date.to_olap_date());
+ uint64_t olap_datetime_value(datetime.to_olap_datetime());
+ columns[0]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_date_value), 1);
+ columns[1]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_datetime_value), 1);
+
+ date = timestamp_from_date("2024-11-09");
+ datetime = timestamp_from_datetime("2024-11-09 09:00:00");
+ olap_date_value = date.to_olap_date();
+ olap_datetime_value = datetime.to_olap_datetime();
+ columns[0]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_date_value), 1);
+ columns[1]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_datetime_value), 1);
+
+ EXPECT_TRUE(rowset_writer->add_block(&block).ok());
+ EXPECT_TRUE(rowset_writer->flush().ok());
+ EXPECT_TRUE(rowset_writer->build(rowset).ok());
+ EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
+
+ segment_v2::SegmentSharedPtr segment;
+ EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+ auto st = segment->_create_column_readers(*(segment->_footer_pb));
+ EXPECT_TRUE(st.ok());
+
+ // Test DATE column with IN predicate
+ {
+ const auto& reader = segment->_column_readers[0];
+ std::unique_ptr<BloomFilterIndexIterator> bf_iter;
+ EXPECT_TRUE(reader->_bloom_filter_index->load(true, true,
nullptr).ok());
+ EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter,
nullptr).ok());
+ std::unique_ptr<BloomFilter> bf;
+ EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());
+
+ // Test positive cases
+ auto test_positive = [&](const std::vector<std::string>& values, bool
result) {
+ auto hybrid_set =
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
+ for (const auto& value : values) {
+ auto v = timestamp_from_date(value);
+ hybrid_set->insert(&v);
+ }
+ std::unique_ptr<InListPredicateBase<TYPE_DATE,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATE>>>
+ date_pred(new InListPredicateBase<TYPE_DATE,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATE>>(
+ 0, hybrid_set));
+ EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
+ };
+
+ test_positive({"2024-11-08", "2024-11-09"}, true);
+ test_positive({"2024-11-08"}, true);
+ test_positive({"2024-11-09"}, true);
+
+ auto test_negative = [&](const std::vector<std::string>& values, bool
result) {
+ auto hybrid_set =
std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
+
+ for (const auto& value : values) {
+ auto v = timestamp_from_date(value);
+ hybrid_set->insert(&v);
+ }
+
+ std::unique_ptr<InListPredicateBase<TYPE_DATE,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATE>>>
+ date_pred(new InListPredicateBase<TYPE_DATE,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATE>>(
+ 0, hybrid_set));
+
+ EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
+ };
+
+ test_negative({"2024-11-20"}, false);
+ test_negative({"2024-11-08", "2024-11-20"}, true);
+ test_negative({"2024-11-20", "2024-11-21"}, false);
+ }
+
+ // Test DATETIME column with IN predicate
+ {
+ const auto& reader = segment->_column_readers[1];
+ std::unique_ptr<BloomFilterIndexIterator> bf_iter;
+ EXPECT_TRUE(reader->_bloom_filter_index->load(true, true,
nullptr).ok());
+ EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter,
nullptr).ok());
+ std::unique_ptr<BloomFilter> bf;
+ EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());
+
+ // Test positive cases
+ auto test_positive = [&](const std::vector<std::string>& values, bool
result) {
+ auto hybrid_set =
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
+ for (const auto& value : values) {
+ auto v = timestamp_from_datetime(value);
+ hybrid_set->insert(&v);
+ }
+ std::unique_ptr<InListPredicateBase<TYPE_DATETIME,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATETIME>>>
+ datetime_pred(new InListPredicateBase<TYPE_DATETIME,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATETIME>>(
+ 0, hybrid_set));
+ EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
+ };
+
+ test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}, true);
+ test_positive({"2024-11-08 09:00:00"}, true);
+ test_positive({"2024-11-09 09:00:00"}, true);
+
+ // Test negative cases
+ auto test_negative = [&](const std::vector<std::string>& values, bool
result) {
+ auto hybrid_set =
std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
+ for (const auto& value : values) {
+ auto v = timestamp_from_datetime(value);
+ hybrid_set->insert(&v);
+ }
+ std::unique_ptr<InListPredicateBase<TYPE_DATETIME,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATETIME>>>
+ datetime_pred(new InListPredicateBase<TYPE_DATETIME,
PredicateType::IN_LIST,
+
HybridSet<PrimitiveType::TYPE_DATETIME>>(
+ 0, hybrid_set));
+ EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
+ };
+
+ test_negative({"2024-11-20 09:00:00"}, false);
+ test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}, true);
+ test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}, false);
+ }
+}
+
} // namespace doris
diff --git a/regression-test/data/bloom_filter_p0/test_bloom_filter.out
b/regression-test/data/bloom_filter_p0/test_bloom_filter.out
new file mode 100644
index 00000000000..9425c984f08
--- /dev/null
+++ b/regression-test/data/bloom_filter_p0/test_bloom_filter.out
@@ -0,0 +1,21 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_datetime_v1 --
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+2 2 2 2024-12-18T20:00 2024-12-18T20:00
2024-12-18 2024-12-18 3.33 3.33
+
+-- !select_datetime_v2 --
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+2 2 2 2024-12-18T20:00 2024-12-18T20:00
2024-12-18 2024-12-18 3.33 3.33
+
+-- !select_date_v1 --
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+2 2 2 2024-12-18T20:00 2024-12-18T20:00
2024-12-18 2024-12-18 3.33 3.33
+
+-- !select_date_v2 --
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+1 1 1 2024-12-17T20:00 2024-12-17T20:00
2024-12-17 2024-12-17 3.32 3.32
+2 2 2 2024-12-18T20:00 2024-12-18T20:00
2024-12-18 2024-12-18 3.33 3.33
+
diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
index 23e1c7ed596..ff8710c5998 100644
--- a/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
+++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
@@ -148,4 +148,39 @@ suite("test_bloom_filter") {
sql """ALTER TABLE ${test_json_tb} SET("bloom_filter_columns" =
"k1,j1")"""
exception "not supported in bloom filter index"
}
+
+ // bloom filter index for datetime/date/decimal columns
+ def test_datetime_tb = "test_datetime_bloom_filter_tb"
+ sql """DROP TABLE IF EXISTS ${test_datetime_tb}"""
+ sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'false')"""
+ sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'false')"""
+ sql """CREATE TABLE IF NOT EXISTS ${test_datetime_tb} (
+ a int,
+ b int,
+ c int,
+ d DATETIMEV1,
+ d2 DATETIMEV2,
+ da DATEv1,
+ dav2 DATEV2,
+ dec decimal(10,2),
+ dec2 decimalv2(10,2)
+ ) ENGINE=OLAP
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY HASH(a) BUCKETS 5
+ PROPERTIES (
+ "replication_num" = "1"
+ )"""
+ sql """INSERT INTO ${test_datetime_tb} VALUES
+ (1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17",
"2024-12-17", "3.32", "3.32"),
+ (1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17",
"2024-12-17", "3.32", "3.32"),
+ (2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18",
"2024-12-18", "3.33", "3.33"),
+ (3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22",
"2024-12-22", "4.33", "4.33")"""
+ sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" =
"d,d2,da,dav2,dec,dec2")"""
+ Thread.sleep(3000)
+ qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
+ qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN
("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
+ qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN
("2024-12-17", "2024-12-18") order by a"""
+ qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN
("2024-12-17", "2024-12-18") order by a"""
+ sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')"""
+ sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]