This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 15f85e2cfb2 [fix] (bloom filter) Fix the bloom filter calculation for
date and datetime (#43351) (#43622)
15f85e2cfb2 is described below
commit 15f85e2cfb2212cfbd6df32eba880e7bcb12bf61
Author: Sun Chenyang <[email protected]>
AuthorDate: Tue Nov 12 10:56:55 2024 +0800
[fix] (bloom filter) Fix the bloom filter calculation for date and datetime
(#43351) (#43622)
pick from master #43351
---------
Co-authored-by: csun5285 <[email protected]>
---
be/src/olap/comparison_predicate.h | 13 ++-
be/src/olap/tablet_meta.h | 5 +
be/test/olap/date_bloom_filter_test.cpp | 195 ++++++++++++++++++++++++++++++++
3 files changed, 212 insertions(+), 1 deletion(-)
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 2e0c4db4ba0..7523e991291 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -202,12 +202,23 @@ public:
return bf->test_bytes(_value.data, _value.size);
} else {
// DecimalV2 using decimal12_t in bloom filter, should convert
value to decimal12_t
- // Datev1/DatetimeV1 using VecDatetimeValue in bloom filter,
NO need to convert.
if constexpr (Type == PrimitiveType::TYPE_DECIMALV2) {
decimal12_t decimal12_t_val(_value.int_value(),
_value.frac_value());
return bf->test_bytes(
const_cast<char*>(reinterpret_cast<const
char*>(&decimal12_t_val)),
sizeof(decimal12_t));
+ // Datev1 using uint24_t in bloom filter
+ } else if constexpr (Type == PrimitiveType::TYPE_DATE) {
+ uint24_t date_value(_value.to_olap_date());
+ return bf->test_bytes(
+ const_cast<char*>(reinterpret_cast<const
char*>(&date_value)),
+ sizeof(uint24_t));
+ // DatetimeV1 using int64_t in bloom filter
+ } else if constexpr (Type == PrimitiveType::TYPE_DATETIME) {
+ int64_t datetime_value(_value.to_olap_datetime());
+ return bf->test_bytes(
+ const_cast<char*>(reinterpret_cast<const
char*>(&datetime_value)),
+ sizeof(int64_t));
} else {
return
bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&_value)),
sizeof(T));
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 77d5554aae2..3c36cad53ab 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -118,6 +118,11 @@ public:
TabletMeta(const TabletMeta& tablet_meta);
TabletMeta(TabletMeta&& tablet_meta) = delete;
+// UT
+#ifdef BE_TEST
+ TabletMeta(TabletSchemaSPtr tablet_schema) : _schema(tablet_schema) {}
+#endif
+
// Function create_from_file is used to be compatible with previous
tablet_meta.
// Previous tablet_meta is a physical file in tablet dir, which is not
stored in rocksdb.
Status create_from_file(const std::string& file_path);
diff --git a/be/test/olap/date_bloom_filter_test.cpp
b/be/test/olap/date_bloom_filter_test.cpp
new file mode 100644
index 00000000000..d3839f8c268
--- /dev/null
+++ b/be/test/olap/date_bloom_filter_test.cpp
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "olap/comparison_predicate.h"
+#include "olap/rowset/beta_rowset.h"
+#include "olap/rowset/beta_rowset_writer.h"
+#include "olap/rowset/rowset_factory.h"
+#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
+#include "olap/storage_engine.h"
+#include "util/date_func.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris {
+
+using namespace doris::vectorized;
+
+constexpr static uint32_t MAX_PATH_LEN = 1024;
+constexpr static std::string_view dest_dir = "./ut_dir/date_bloom_filter";
+static int64_t inc_id = 1000;
+
+class DateBloomFilterTest : public ::testing::Test {
+protected:
+ void SetUp() override {
+ // absolute dir
+ char buffer[MAX_PATH_LEN];
+ EXPECT_NE(getcwd(buffer, MAX_PATH_LEN), nullptr);
+ _curreent_dir = std::string(buffer);
+ _absolute_dir = _curreent_dir + std::string(dest_dir);
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_absolute_dir).ok());
+
+ // storage engine
+ doris::EngineOptions options;
+ auto* engine = new StorageEngine(options);
+ _engine_ref = engine;
+ _data_dir = std::make_unique<DataDir>(_absolute_dir);
+ static_cast<void>(_data_dir->update_capacity());
+ ExecEnv::GetInstance()->set_storage_engine(engine);
+
+ // tablet_schema
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
+ construct_column(schema_pb.add_column(), 0, "DATE", "date_column");
+ construct_column(schema_pb.add_column(), 1, "DATETIME",
"datetime_column");
+
+ _tablet_schema.reset(new TabletSchema);
+ _tablet_schema->init_from_pb(schema_pb);
+
+ // tablet
+ TabletMetaSharedPtr tablet_meta(new TabletMeta(_tablet_schema));
+
+ _tablet.reset(new Tablet(*_engine_ref, tablet_meta, _data_dir.get()));
+ EXPECT_TRUE(_tablet->init().ok());
+ }
+ void TearDown() override {
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_absolute_dir).ok());
+ if (_engine_ref != nullptr) {
+ _engine_ref->stop();
+ delete _engine_ref;
+ _engine_ref = nullptr;
+ ExecEnv::GetInstance()->set_storage_engine(nullptr);
+ }
+ }
+
+ void construct_column(ColumnPB* column_pb, int32_t col_unique_id,
+ const std::string& column_type, const std::string&
column_name) {
+ column_pb->set_unique_id(col_unique_id);
+ column_pb->set_name(column_name);
+ column_pb->set_type(column_type);
+ column_pb->set_is_key(true);
+ column_pb->set_is_nullable(true);
+ column_pb->set_is_bf_column(true);
+ }
+
+ RowsetWriterContext rowset_writer_context() {
+ RowsetWriterContext context;
+ RowsetId rowset_id;
+ rowset_id.init(inc_id);
+ context.rowset_id = rowset_id;
+ context.rowset_type = BETA_ROWSET;
+ context.data_dir = _data_dir.get();
+ context.rowset_state = VISIBLE;
+ context.tablet_schema = _tablet_schema;
+ context.rowset_dir = _tablet->tablet_path();
+ context.version = Version(inc_id, inc_id);
+ context.max_rows_per_segment = 200;
+ inc_id++;
+ return context;
+ }
+
+ DateBloomFilterTest() = default;
+ ~DateBloomFilterTest() override = default;
+
+private:
+ TabletSchemaSPtr _tablet_schema = nullptr;
+ StorageEngine* _engine_ref = nullptr;
+ std::unique_ptr<DataDir> _data_dir = nullptr;
+ TabletSharedPtr _tablet = nullptr;
+ std::string _absolute_dir;
+ std::string _curreent_dir;
+};
+
+TEST_F(DateBloomFilterTest, query_index_test) {
+
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
+
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());
+
+ RowsetSharedPtr rowset;
+ std::unique_ptr<RowsetWriter> rowset_writer;
+ const auto& res =
+ RowsetFactory::create_rowset_writer(rowset_writer_context(),
false, &rowset_writer);
+
+ Block block = _tablet_schema->create_block();
+ auto columns = block.mutate_columns();
+
+ auto date = timestamp_from_date("2024-11-08");
+ auto datetime = timestamp_from_datetime("2024-11-08 09:00:00");
+ uint24_t olap_date_value(date.to_olap_date());
+ uint64_t olap_datetime_value(datetime.to_olap_datetime());
+ columns[0]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_date_value), 1);
+ columns[1]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_datetime_value), 1);
+
+ date = timestamp_from_date("2024-11-09");
+ datetime = timestamp_from_datetime("2024-11-09 09:00:00");
+ olap_date_value = date.to_olap_date();
+ olap_datetime_value = datetime.to_olap_datetime();
+ columns[0]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_date_value), 1);
+ columns[1]->insert_many_fix_len_data(reinterpret_cast<const
char*>(&olap_datetime_value), 1);
+
+ EXPECT_TRUE(rowset_writer->add_block(&block).ok());
+ EXPECT_TRUE(rowset_writer->flush().ok());
+ EXPECT_TRUE(rowset_writer->build(rowset).ok());
+ EXPECT_TRUE(_tablet->add_rowset(rowset).ok());
+
+ segment_v2::SegmentSharedPtr segment;
+ EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
+ auto st = segment->_create_column_readers(*(segment->_footer_pb));
+ EXPECT_TRUE(st.ok());
+
+ // date
+ {
+ const auto& reader = segment->_column_readers[0];
+ std::unique_ptr<BloomFilterIndexIterator> bf_iter;
+ EXPECT_TRUE(reader->_bloom_filter_index->load(true, true).ok());
+ EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter).ok());
+ std::unique_ptr<BloomFilter> bf;
+ EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());
+ auto test = [&](const std::string& query_string, bool result) {
+ auto date = timestamp_from_date(query_string);
+ std::unique_ptr<ComparisonPredicateBase<TYPE_DATE,
PredicateType::EQ>> date_pred(
+ new ComparisonPredicateBase<TYPE_DATE,
PredicateType::EQ>(0, date));
+ EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
+ };
+ test("2024-11-08", true);
+ test("2024-11-09", true);
+ test("2024-11-20", false);
+ }
+
+ // datetime
+ {
+ const auto& reader = segment->_column_readers[1];
+ std::unique_ptr<BloomFilterIndexIterator> bf_iter;
+ EXPECT_TRUE(reader->_bloom_filter_index->load(true, true).ok());
+ EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter).ok());
+ std::unique_ptr<BloomFilter> bf;
+ EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());
+ auto test = [&](const std::string& query_string, bool result) {
+ auto datetime = timestamp_from_datetime(query_string);
+ std::unique_ptr<ComparisonPredicateBase<TYPE_DATETIME,
PredicateType::EQ>> date_pred(
+ new ComparisonPredicateBase<TYPE_DATETIME,
PredicateType::EQ>(0, datetime));
+ EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
+ };
+ test("2024-11-08 09:00:00", true);
+ test("2024-11-09 09:00:00", true);
+ test("2024-11-20 09:00:00", false);
+ }
+}
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]