This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 30024bdef11742d686f427524061164c8d444cdd Author: Xin Liao <[email protected]> AuthorDate: Wed Jul 12 19:13:36 2023 +0800 [enhancement](merge-on-write) split delete bitmap from tablet meta (#21456) --- be/src/olap/compaction.cpp | 11 ++++ be/src/olap/data_dir.cpp | 42 +++++++++++++++ be/src/olap/olap_meta.cpp | 23 ++++++++ be/src/olap/olap_meta.h | 7 ++- be/src/olap/storage_engine.cpp | 30 +++++++++++ be/src/olap/storage_engine.h | 2 + be/src/olap/tablet.cpp | 6 +++ be/src/olap/tablet_meta_manager.cpp | 90 +++++++++++++++++++++++++++++++ be/src/olap/tablet_meta_manager.h | 16 ++++++ be/src/olap/txn_manager.cpp | 6 ++- be/test/olap/tablet_meta_manager_test.cpp | 71 ++++++++++++++++++++++++ 11 files changed, 301 insertions(+), 3 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 57ed2e2558..ec4f6a6ff9 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -50,6 +50,7 @@ #include "olap/storage_policy.h" #include "olap/tablet.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/task/engine_checksum_task.h" #include "olap/txn_manager.h" #include "olap/utils.h" @@ -650,10 +651,20 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { RETURN_IF_ERROR(_tablet->modify_rowsets(output_rowsets, _input_rowsets, true)); } + int64_t cur_max_version = 0; { std::shared_lock rlock(_tablet->get_header_lock()); + cur_max_version = _tablet->max_version_unlocked().second; _tablet->save_meta(); } + if (_tablet->keys_type() == KeysType::UNIQUE_KEYS && + _tablet->enable_unique_key_merge_on_write()) { + auto st = TabletMetaManager::remove_old_version_delete_bitmap( + _tablet->data_dir(), _tablet->tablet_id(), cur_max_version); + if (!st.ok()) { + LOG(WARNING) << "failed to remove old version delete bitmap, st: " << st; + } + } return Status::OK(); } diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 64cbf128ff..ce244f09a5 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -28,6 +28,7 @@ #include <filesystem> #include <memory> #include <new> +#include <roaring/roaring.hh> #include <set> #include <sstream> #include <string> @@ -42,6 +43,7 @@ #include "io/fs/local_file_system.h" #include "io/fs/path.h" #include "io/fs/remote_file_system.h" +#include "olap/olap_common.h" #include "olap/olap_define.h" #include "olap/olap_meta.h" #include "olap/rowset/beta_rowset.h" @@ -538,6 +540,46 @@ Status DataDir::load() { } } + auto load_delete_bitmap_func = [this](int64_t tablet_id, int64_t version, const string& val) { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); + if (!tablet) { + return true; + } + const std::vector<RowsetMetaSharedPtr>& all_rowsets = tablet->tablet_meta()->all_rs_metas(); + RowsetIdUnorderedSet rowset_ids; + for (auto& rowset_meta : all_rowsets) { + rowset_ids.insert(rowset_meta->rowset_id()); + } + + DeleteBitmapPB delete_bitmap_pb; + delete_bitmap_pb.ParseFromString(val); + int rst_ids_size = delete_bitmap_pb.rowset_ids_size(); + int seg_ids_size = delete_bitmap_pb.segment_ids_size(); + int seg_maps_size = delete_bitmap_pb.segment_delete_bitmaps_size(); + CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size); + + for (size_t i = 0; i < rst_ids_size; ++i) { + RowsetId rst_id; + rst_id.init(delete_bitmap_pb.rowset_ids(i)); + // only process the rowset in _rs_metas + if (rowset_ids.find(rst_id) == rowset_ids.end()) { + continue; + } + auto seg_id = delete_bitmap_pb.segment_ids(i); + auto iter = tablet->tablet_meta()->delete_bitmap().delete_bitmap.find( + {rst_id, seg_id, version}); + // This version of delete bitmap already exists + if (iter != tablet->tablet_meta()->delete_bitmap().delete_bitmap.end()) { + continue; + } + auto bitmap = delete_bitmap_pb.segment_delete_bitmaps(i).data(); + tablet->tablet_meta()->delete_bitmap().delete_bitmap[{rst_id, seg_id, version}] = + roaring::Roaring::read(bitmap); + } + return true; + }; + TabletMetaManager::traverse_delete_bitmap(_meta, load_delete_bitmap_func); + // At startup, we only count these invalid rowset, but do not actually delete it. // The actual delete operation is in StorageEngine::_clean_unused_rowset_metas, // which is cleaned up uniformly by the background cleanup thread. diff --git a/be/src/olap/olap_meta.cpp b/be/src/olap/olap_meta.cpp index e5403546fd..ef728caaf6 100644 --- a/be/src/olap/olap_meta.cpp +++ b/be/src/olap/olap_meta.cpp @@ -211,6 +211,29 @@ Status OlapMeta::put(const int column_family_index, const std::vector<BatchEntry return Status::OK(); } +Status OlapMeta::put(rocksdb::WriteBatch* batch) { + DorisMetrics::instance()->meta_write_request_total->increment(1); + + rocksdb::Status s; + { + int64_t duration_ns = 0; + Defer defer([&] { + DorisMetrics::instance()->meta_write_request_duration_us->increment(duration_ns / 1000); + }); + SCOPED_RAW_TIMER(&duration_ns); + + WriteOptions write_options; + write_options.sync = config::sync_tablet_meta; + s = _db->Write(write_options, batch); + } + + if (!s.ok()) { + LOG(WARNING) << "rocks db put batch failed, reason:" << s.ToString(); + return Status::Error<META_PUT_ERROR>(); + } + return Status::OK(); +} + Status OlapMeta::remove(const int column_family_index, const std::string& key) { DorisMetrics::instance()->meta_write_request_total->increment(1); auto& handle = _handles[column_family_index]; diff --git a/be/src/olap/olap_meta.h b/be/src/olap/olap_meta.h index 0b5e40045a..174f2d065f 100644 --- a/be/src/olap/olap_meta.h +++ b/be/src/olap/olap_meta.h @@ -27,6 +27,7 @@ namespace rocksdb { class ColumnFamilyHandle; class DB; +class WriteBatch; } // namespace rocksdb namespace doris { @@ -41,7 +42,6 @@ public: : key(key_arg), value(value_arg) {} }; -public: OlapMeta(const std::string& root_path); ~OlapMeta(); @@ -53,6 +53,7 @@ public: Status put(const int column_family_index, const std::string& key, const std::string& value); Status put(const int column_family_index, const std::vector<BatchEntry>& entries); + Status put(rocksdb::WriteBatch* batch); Status remove(const int column_family_index, const std::string& key); Status remove(const int column_family_index, const std::vector<std::string>& keys); @@ -62,6 +63,10 @@ public: std::string get_root_path() const { return _root_path; } + rocksdb::ColumnFamilyHandle* get_handle(const int column_family_index) { + return _handles[column_family_index].get(); + } + private: std::string _root_path; // keep order of _db && _handles, we need destroy _handles before _db diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp index 6a2b869099..d52353b3cb 100644 --- a/be/src/olap/storage_engine.cpp +++ b/be/src/olap/storage_engine.cpp @@ -42,6 +42,7 @@ #include <random> #include <set> #include <thread> +#include <unordered_set> #include <utility> #include "agent/task_worker_pool.h" @@ -64,6 +65,7 @@ #include "olap/single_replica_compaction.h" #include "olap/tablet_manager.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/task/engine_task.h" #include "olap/txn_manager.h" #include "runtime/memory/mem_tracker.h" @@ -690,6 +692,9 @@ Status StorageEngine::start_trash_sweep(double* usage, bool ignore_guard) { // clean unused rowset metas in OlapMeta _clean_unused_rowset_metas(); + // cleand unused delete bitmap for deleted tablet + _clean_unused_delete_bitmap(); + // clean unused rowsets in remote storage backends for (auto data_dir : get_stores()) { data_dir->perform_remote_rowset_gc(); @@ -765,6 +770,31 @@ void StorageEngine::_clean_unused_rowset_metas() { } } +void StorageEngine::_clean_unused_delete_bitmap() { + std::unordered_set<int64_t> removed_tablets; + auto clean_delete_bitmap_func = [this, &removed_tablets](int64_t tablet_id, int64_t version, + const std::string& val) -> bool { + TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id); + if (tablet == nullptr) { + if (removed_tablets.insert(tablet_id).second) { + LOG(INFO) << "clean ununsed delete bitmap for deleted tablet, tablet_id: " + << tablet_id; + } + } + return true; + }; + auto data_dirs = get_stores(); + for (auto data_dir : data_dirs) { + TabletMetaManager::traverse_delete_bitmap(data_dir->get_meta(), clean_delete_bitmap_func); + for (auto id : removed_tablets) { + TabletMetaManager::remove_old_version_delete_bitmap(data_dir, id, INT64_MAX); + } + LOG(INFO) << "removed invalid delete bitmap from dir: " << data_dir->path() + << ", deleted tablets size: " << removed_tablets.size(); + removed_tablets.clear(); + } +} + void StorageEngine::gc_binlogs(const std::unordered_map<int64_t, int64_t>& gc_tablet_infos) { for (auto [tablet_id, version] : gc_tablet_infos) { LOG(INFO) << fmt::format("start to gc binlogs for tablet_id: {}, version: {}", tablet_id, diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h index 0113101565..d6215586ed 100644 --- a/be/src/olap/storage_engine.h +++ b/be/src/olap/storage_engine.h @@ -247,6 +247,8 @@ private: void _clean_unused_rowset_metas(); + void _clean_unused_delete_bitmap(); + Status _do_sweep(const std::string& scan_root, const time_t& local_tm_now, const int32_t expire); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 8eb2d0fd30..d5652ba6d5 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -101,6 +101,7 @@ #include "olap/storage_policy.h" #include "olap/tablet_manager.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/tablet_schema.h" #include "olap/txn_manager.h" #include "olap/types.h" @@ -1516,6 +1517,11 @@ bool Tablet::do_tablet_meta_checkpoint() { rs_meta->set_remove_from_rowset_meta(); } + if (keys_type() == UNIQUE_KEYS && enable_unique_key_merge_on_write()) { + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, tablet_id(), + max_version_unlocked().second); + } + _newly_created_rowset_num = 0; _last_checkpoint_time = UnixMillis(); return true; diff --git a/be/src/olap/tablet_meta_manager.cpp b/be/src/olap/tablet_meta_manager.cpp index 8f6283b83c..20e5747e2f 100644 --- a/be/src/olap/tablet_meta_manager.cpp +++ b/be/src/olap/tablet_meta_manager.cpp @@ -20,6 +20,7 @@ #include <fmt/format.h> #include <gen_cpp/olap_file.pb.h> #include <rocksdb/db.h> +#include <rocksdb/write_batch.h> #include <boost/algorithm/string/trim.hpp> #include <fstream> @@ -28,6 +29,7 @@ #include <vector> #include "common/logging.h" +#include "gutil/endian.h" #include "json2pb/json_to_pb.h" #include "json2pb/pb_to_json.h" #include "olap/data_dir.h" @@ -199,4 +201,92 @@ Status TabletMetaManager::traverse_pending_publish( return status; } +std::string TabletMetaManager::encode_delete_bitmap_key(TTabletId tablet_id, int64_t version) { + std::string key; + key.reserve(20); + key.append(DELETE_BITMAP); + put_fixed64_le(&key, BigEndian::FromHost64(tablet_id)); + put_fixed64_le(&key, BigEndian::FromHost64(version)); + return key; +} + +std::string TabletMetaManager::encode_delete_bitmap_key(TTabletId tablet_id) { + std::string key; + key.reserve(12); + key.append(DELETE_BITMAP); + put_fixed64_le(&key, BigEndian::FromHost64(tablet_id)); + return key; +} + +void TabletMetaManager::decode_delete_bitmap_key(const string& enc_key, TTabletId* tablet_id, + int64_t* version) { + DCHECK_EQ(enc_key.size(), 20); + *tablet_id = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 4)); + *version = BigEndian::ToHost64(UNALIGNED_LOAD64(enc_key.data() + 12)); +} + +Status TabletMetaManager::save_delete_bitmap(DataDir* store, TTabletId tablet_id, + DeleteBitmapPtr delete_bimap, int64_t version) { + VLOG_NOTICE << "save delete bitmap, tablet_id:" << tablet_id << ", version: " << version; + if (delete_bimap->delete_bitmap.empty()) { + return Status::OK(); + } + OlapMeta* meta = store->get_meta(); + DeleteBitmapPB delete_bitmap_pb; + for (auto& [id, bitmap] : delete_bimap->delete_bitmap) { + auto& rowset_id = std::get<0>(id); + int64_t segment_id = std::get<1>(id); + delete_bitmap_pb.add_rowset_ids(rowset_id.to_string()); + delete_bitmap_pb.add_segment_ids(segment_id); + std::string bitmap_data(bitmap.getSizeInBytes(), '\0'); + bitmap.write(bitmap_data.data()); + *(delete_bitmap_pb.add_segment_delete_bitmaps()) = std::move(bitmap_data); + } + std::string key = encode_delete_bitmap_key(tablet_id, version); + std::string val; + bool ok = delete_bitmap_pb.SerializeToString(&val); + if (!ok) { + auto msg = fmt::format("failed to serialize delete bitmap, tablet_id: {}, version: {}", + tablet_id, version); + LOG(WARNING) << msg; + return Status::InternalError(msg); + } + return meta->put(META_COLUMN_FAMILY_INDEX, key, val); +} + +Status TabletMetaManager::traverse_delete_bitmap( + OlapMeta* meta, std::function<bool(int64_t, int64_t, const std::string&)> const& func) { + auto traverse_header_func = [&func](const std::string& key, const std::string& value) -> bool { + TTabletId tablet_id; + int64_t version; + decode_delete_bitmap_key(key, &tablet_id, &version); + VLOG_NOTICE << "traverse delete bitmap, tablet_id: " << tablet_id + << ", version: " << version; + return func(tablet_id, version, value); + }; + return meta->iterate(META_COLUMN_FAMILY_INDEX, DELETE_BITMAP, traverse_header_func); +} + +Status TabletMetaManager::remove_old_version_delete_bitmap(DataDir* store, TTabletId tablet_id, + int64_t version) { + OlapMeta* meta = store->get_meta(); + std::string begin_key = encode_delete_bitmap_key(tablet_id); + std::string end_key = encode_delete_bitmap_key(tablet_id, version); + + std::vector<std::string> remove_keys; + auto get_remove_keys_func = [&](const std::string& key, const std::string& val) -> bool { + // include end_key + if (key > end_key) { + return false; + } + remove_keys.push_back(key); + return true; + }; + LOG(INFO) << "remove old version delete bitmap, tablet_id: " << tablet_id + << " version: " << version << " removed keys size: " << remove_keys.size(); + ; + RETURN_IF_ERROR(meta->iterate(META_COLUMN_FAMILY_INDEX, begin_key, get_remove_keys_func)); + return meta->remove(META_COLUMN_FAMILY_INDEX, remove_keys); +} + } // namespace doris diff --git a/be/src/olap/tablet_meta_manager.h b/be/src/olap/tablet_meta_manager.h index 6ba1d76757..9c0b733888 100644 --- a/be/src/olap/tablet_meta_manager.h +++ b/be/src/olap/tablet_meta_manager.h @@ -36,6 +36,8 @@ const std::string HEADER_PREFIX = "tabletmeta_"; const std::string PENDING_PUBLISH_INFO = "ppi_"; +const std::string DELETE_BITMAP = "dlb_"; + // Helper Class for managing tablet headers of one root path. class TabletMetaManager { public: @@ -69,6 +71,20 @@ public: static Status traverse_pending_publish( OlapMeta* meta, std::function<bool(int64_t, int64_t, const std::string&)> const& func); + + static Status save_delete_bitmap(DataDir* store, TTabletId tablet_id, + DeleteBitmapPtr delete_bimap, int64_t version); + + static Status traverse_delete_bitmap( + OlapMeta* meta, std::function<bool(int64_t, int64_t, const std::string&)> const& func); + + static std::string encode_delete_bitmap_key(TTabletId tablet_id, int64_t version); + static std::string encode_delete_bitmap_key(TTabletId tablet_id); + + static void decode_delete_bitmap_key(const string& enc_key, TTabletId* tablet_id, + int64_t* version); + static Status remove_old_version_delete_bitmap(DataDir* store, TTabletId tablet_id, + int64_t version); }; } // namespace doris diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index 432f96e707..e3dc722b0b 100644 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -42,6 +42,7 @@ #include "olap/storage_engine.h" #include "olap/tablet_manager.h" #include "olap/tablet_meta.h" +#include "olap/tablet_meta_manager.h" #include "olap/task/engine_publish_version_task.h" #include "util/time.h" @@ -381,8 +382,9 @@ Status TxnManager::publish_txn(OlapMeta* meta, TPartitionId partition_id, } stats->partial_update_write_segment_us = MonotonicMicros() - t3; int64_t t4 = MonotonicMicros(); - std::shared_lock rlock(tablet->get_header_lock()); - tablet->save_meta(); + RETURN_IF_ERROR(TabletMetaManager::save_delete_bitmap( + tablet->data_dir(), tablet->tablet_id(), tablet_txn_info.delete_bitmap, + version.second)); stats->save_meta_time_us = MonotonicMicros() - t4; } diff --git a/be/test/olap/tablet_meta_manager_test.cpp b/be/test/olap/tablet_meta_manager_test.cpp index 66964504eb..0ba8bcc53d 100644 --- a/be/test/olap/tablet_meta_manager_test.cpp +++ b/be/test/olap/tablet_meta_manager_test.cpp @@ -20,12 +20,15 @@ #include <gen_cpp/olap_file.pb.h> #include <gtest/gtest-message.h> #include <gtest/gtest-test-part.h> +#include <gtest/gtest.h> #include <json2pb/json_to_pb.h> +#include <stddef.h> #include <filesystem> #include <fstream> #include <memory> #include <new> +#include <roaring/roaring.hh> #include <string> #include "gtest/gtest_pred_impl.h" @@ -108,4 +111,72 @@ TEST_F(TabletMetaManagerTest, TestLoad) { // EXPECT_EQ(_json_header, json_meta_read); } +TEST_F(TabletMetaManagerTest, TestDeleteBimapEncode) { + TTabletId tablet_id = 1234; + int64_t version = 456; + std::string key = TabletMetaManager::encode_delete_bitmap_key(tablet_id, version); + + TTabletId de_tablet_id; + int64_t de_version; + TabletMetaManager::decode_delete_bitmap_key(key, &de_tablet_id, &de_version); + EXPECT_EQ(tablet_id, de_tablet_id); + EXPECT_EQ(version, de_version); +} + +TEST_F(TabletMetaManagerTest, TestSaveDeleteBimap) { + int64_t test_tablet_id = 10086; + std::shared_ptr<DeleteBitmap> dbmp = std::make_shared<DeleteBitmap>(test_tablet_id); + auto gen1 = [&dbmp](int64_t max_rst_id, uint32_t max_seg_id, uint32_t max_row) { + for (int64_t rst = 0; rst < max_rst_id; ++rst) { + for (uint32_t seg = 0; seg < max_seg_id; ++seg) { + for (uint32_t row = 0; row < max_row; ++row) { + dbmp->add({RowsetId {2, 0, 1, rst}, seg, 0}, row); + } + } + } + }; + int64_t max_rst_id = 5; + int64_t max_seg_id = 5; + int64_t max_version = 300; + gen1(max_rst_id, max_seg_id, 10); + for (int64_t ver = 0; ver < max_version; ++ver) { + TabletMetaManager::save_delete_bitmap(_data_dir, test_tablet_id, dbmp, ver); + } + size_t num_keys = 0; + auto load_delete_bitmap_func = [&](int64_t tablet_id, int64_t version, const string& val) { + EXPECT_EQ(tablet_id, test_tablet_id); + DeleteBitmapPB delete_bitmap_pb; + delete_bitmap_pb.ParseFromString(val); + int rst_ids_size = delete_bitmap_pb.rowset_ids_size(); + int seg_ids_size = delete_bitmap_pb.segment_ids_size(); + int seg_maps_size = delete_bitmap_pb.segment_delete_bitmaps_size(); + EXPECT_EQ(rst_ids_size, max_rst_id * max_seg_id); + EXPECT_EQ(seg_ids_size, rst_ids_size); + EXPECT_EQ(seg_maps_size, rst_ids_size); + for (size_t i = 0; i < rst_ids_size; i++) { + auto bitmap = roaring::Roaring::read(delete_bitmap_pb.segment_delete_bitmaps(i).data()); + EXPECT_EQ(bitmap.cardinality(), 10); + } + ++num_keys; + return true; + }; + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, max_version); + + num_keys = 0; + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, test_tablet_id, 100); + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, max_version - 101); + + num_keys = 0; + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, test_tablet_id, 200); + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, max_version - 201); + + num_keys = 0; + TabletMetaManager::remove_old_version_delete_bitmap(_data_dir, test_tablet_id, INT64_MAX); + TabletMetaManager::traverse_delete_bitmap(_data_dir->get_meta(), load_delete_bitmap_func); + EXPECT_EQ(num_keys, 0); +} + } // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
