This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new b770295e7 [bootstrap] Reduce the memory size consumed when bootstrap
b770295e7 is described below
commit b770295e77a9d5592aaf4e79d8cf7ea3ba154992
Author: Yingchun Lai <[email protected]>
AuthorDate: Mon Aug 29 16:11:00 2022 +0800
[bootstrap] Reduce the memory size consumed when bootstrap
When a tserver bootstraps, it loads all tablets metadata,
and finds out the maximum fs block id it has seen to notify
the block manager to avoid generating duplicate ids.
However, a tablet may contain a large number of blocks,
that means it consumes a large amount of memory when
tablets parellel loaded when tserver bootstrap.
This patch optimizes the tablet bootstrap to find the
maximum block id for a tablet without storing all the
block identifiers in the memory.
Change-Id: If4df35f63e99a3f8331da51114991515ea4ee496
Reviewed-on: http://gerrit.cloudera.org:8080/18924
Tested-by: Yingchun Lai <[email protected]>
Reviewed-by: Alexey Serbin <[email protected]>
---
src/kudu/tablet/rowset_metadata.cc | 27 ++++++++++++++++++++++++++-
src/kudu/tablet/rowset_metadata.h | 5 ++++-
src/kudu/tablet/tablet_metadata-test.cc | 29 +++++++++++++++++++++++++++--
src/kudu/tablet/tablet_metadata.cc | 17 ++++++++++-------
src/kudu/tablet/tablet_metadata.h | 9 ++++-----
5 files changed, 71 insertions(+), 16 deletions(-)
diff --git a/src/kudu/tablet/rowset_metadata.cc
b/src/kudu/tablet/rowset_metadata.cc
index 181b3710c..718710e31 100644
--- a/src/kudu/tablet/rowset_metadata.cc
+++ b/src/kudu/tablet/rowset_metadata.cc
@@ -298,8 +298,12 @@ int64_t RowSetMetadata::live_row_count() const {
return live_row_count_;
}
-BlockIdContainer RowSetMetadata::GetAllBlocks() {
+BlockIdContainer RowSetMetadata::GetAllBlocks() const {
BlockIdContainer blocks;
+ blocks.reserve(blocks_by_col_id_.size() +
+ undo_delta_blocks_.size() +
+ redo_delta_blocks_.size() +
+ 2); // '2' is reserved for 'adhoc_index_block_' and
'bloom_block_'
std::lock_guard<LockType> l(lock_);
if (!adhoc_index_block_.IsNull()) {
blocks.push_back(adhoc_index_block_);
@@ -316,6 +320,27 @@ BlockIdContainer RowSetMetadata::GetAllBlocks() {
return blocks;
}
+BlockId RowSetMetadata::GetMaxLiveBlockId() const {
+ BlockId max_block_id;
+ std::lock_guard<LockType> l(lock_);
+ if (!adhoc_index_block_.IsNull()) {
+ max_block_id = std::max(max_block_id, adhoc_index_block_);
+ }
+ if (!bloom_block_.IsNull()) {
+ max_block_id = std::max(max_block_id, bloom_block_);
+ }
+ for (const auto& e : blocks_by_col_id_) {
+ max_block_id = std::max(max_block_id, e.second);
+ }
+ for (const auto& block_id : undo_delta_blocks_) {
+ max_block_id = std::max(max_block_id, block_id);
+ }
+ for (const auto& block_id : redo_delta_blocks_) {
+ max_block_id = std::max(max_block_id, block_id);
+ }
+ return max_block_id;
+}
+
RowSetMetadataUpdate::RowSetMetadataUpdate() {
}
diff --git a/src/kudu/tablet/rowset_metadata.h
b/src/kudu/tablet/rowset_metadata.h
index 938ac6bc4..85efd9d6b 100644
--- a/src/kudu/tablet/rowset_metadata.h
+++ b/src/kudu/tablet/rowset_metadata.h
@@ -23,6 +23,7 @@
#include <mutex>
#include <optional>
#include <string>
+#include <type_traits>
#include <utility>
#include <vector>
@@ -226,7 +227,9 @@ class RowSetMetadata {
void ToProtobuf(RowSetDataPB *pb);
- BlockIdContainer GetAllBlocks();
+ BlockIdContainer GetAllBlocks() const;
+
+ BlockId GetMaxLiveBlockId() const;
// Increase the row count.
// Note:
diff --git a/src/kudu/tablet/tablet_metadata-test.cc
b/src/kudu/tablet/tablet_metadata-test.cc
index a6b066ca6..e71182467 100644
--- a/src/kudu/tablet/tablet_metadata-test.cc
+++ b/src/kudu/tablet/tablet_metadata-test.cc
@@ -17,12 +17,14 @@
#include "kudu/tablet/tablet_metadata.h"
+#include <algorithm>
#include <cstdint>
#include <map>
#include <memory>
#include <optional>
#include <ostream>
#include <string>
+#include <type_traits>
#include <unordered_map>
#include <unordered_set>
@@ -45,7 +47,6 @@
#include "kudu/tablet/rowset_metadata.h"
#include "kudu/tablet/tablet-harness.h"
#include "kudu/tablet/tablet-test-util.h"
-#include "kudu/tablet/tablet.h"
#include "kudu/tablet/txn_metadata.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/status.h"
@@ -213,7 +214,7 @@ TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
map<ColumnId, BlockId> block_by_column;
for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
- block_by_column[ColumnId(j)] = BlockId(j);
+ block_by_column[ColumnId(j)] = BlockId(FLAGS_test_block_count_per_rs * i
+ j);
}
meta->SetColumnDataBlocks(block_by_column);
rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
@@ -229,6 +230,30 @@ TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
}
}
+TEST_F(TestTabletMetadata, GetMaxLiveBlockId) {
+ auto* tablet_meta = harness_->tablet()->metadata();
+ RowSetMetadataVector rs_metas;
+ for (int i = 0; i < FLAGS_test_row_set_count; ++i) {
+ unique_ptr<RowSetMetadata> meta;
+ ASSERT_OK(RowSetMetadata::CreateNew(tablet_meta, i, &meta));
+
+ map<ColumnId, BlockId> block_by_column;
+ for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
+ block_by_column[ColumnId(j)] = BlockId(FLAGS_test_block_count_per_rs * i
+ j);
+ }
+ meta->SetColumnDataBlocks(block_by_column);
+ rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
+ }
+ ASSERT_OK(tablet_meta->UpdateAndFlush(RowSetMetadataIds(), rs_metas,
+ TabletMetadata::kNoMrsFlushed));
+
+ BlockIdContainer block_ids = tablet_meta->CollectBlockIds();
+ BlockId max_block_id = tablet_meta->GetMaxLiveBlockId();
+ ASSERT_FALSE(max_block_id.IsNull());
+ const auto expected_max_block_id = std::max_element(block_ids.begin(),
block_ids.end());
+ ASSERT_EQ(*expected_max_block_id, max_block_id);
+}
+
TEST_F(TestTabletMetadata, TestTxnMetadata) {
constexpr const char* kOwner = "txn";
const Timestamp kDummyTimestamp = Timestamp(1337);
diff --git a/src/kudu/tablet/tablet_metadata.cc
b/src/kudu/tablet/tablet_metadata.cc
index 1237d106e..7ef28cad0 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -28,7 +28,6 @@
#include <utility>
#include <gflags/gflags.h>
-#include <google/protobuf/stubs/port.h>
#include "kudu/common/common.pb.h"
#include "kudu/common/schema.h"
@@ -214,7 +213,7 @@ vector<BlockIdPB> TabletMetadata::CollectBlockIdPBs(const
TabletSuperBlockPB& su
return block_ids;
}
-BlockIdContainer TabletMetadata::CollectBlockIds() {
+BlockIdContainer TabletMetadata::CollectBlockIds() const {
BlockIdContainer block_ids;
for (const auto& r : rowsets_) {
BlockIdContainer rowset_block_ids = r->GetAllBlocks();
@@ -225,6 +224,14 @@ BlockIdContainer TabletMetadata::CollectBlockIds() {
return block_ids;
}
+BlockId TabletMetadata::GetMaxLiveBlockId() const {
+ BlockId max_block_id;
+ for (const auto& r : rowsets_) {
+ max_block_id = std::max(max_block_id, r->GetMaxLiveBlockId());
+ }
+ return max_block_id;
+}
+
Status TabletMetadata::DeleteTabletData(TabletDataState delete_type,
const optional<OpId>&
last_logged_opid) {
DCHECK(!last_logged_opid || last_logged_opid->IsInitialized());
@@ -448,11 +455,7 @@ Status TabletMetadata::LoadFromSuperBlock(const
TabletSuperBlockPB& superblock)
// Determine the largest block ID known to the tablet metadata so we can
// notify the block manager of blocks it may have missed (e.g. if a data
// directory failed and the blocks on it were not read).
- BlockId max_block_id;
- const auto& block_ids = CollectBlockIds();
- for (BlockId block_id : block_ids) {
- max_block_id = std::max(max_block_id, block_id);
- }
+ BlockId max_block_id = GetMaxLiveBlockId();
for (const BlockIdPB& block_pb : superblock.orphaned_blocks()) {
BlockId orphaned_block_id = BlockId::FromPB(block_pb);
diff --git a/src/kudu/tablet/tablet_metadata.h
b/src/kudu/tablet/tablet_metadata.h
index 2750f2760..b5580c6e6 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -31,6 +31,7 @@
#include "kudu/common/common.pb.h"
#include "kudu/common/partition.h"
#include "kudu/common/schema.h"
+#include "kudu/consensus/opid.pb.h"
#include "kudu/fs/block_id.h"
#include "kudu/gutil/atomicops.h"
#include "kudu/gutil/macros.h"
@@ -47,10 +48,6 @@ class BlockIdPB;
class FsManager;
class Timestamp;
-namespace consensus {
-class OpId;
-} // namespace consensus
-
namespace log {
class MinLogIndexAnchorer;
} // namespace log
@@ -129,7 +126,9 @@ class TabletMetadata : public
RefCountedThreadSafe<TabletMetadata> {
static std::vector<BlockIdPB> CollectBlockIdPBs(
const TabletSuperBlockPB& superblock);
- BlockIdContainer CollectBlockIds();
+ BlockIdContainer CollectBlockIds() const;
+
+ BlockId GetMaxLiveBlockId() const;
const std::string& tablet_id() const {
DCHECK_NE(state_, kNotLoadedYet);