This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new b770295e7 [bootstrap] Reduce the memory size consumed when bootstrap
b770295e7 is described below

commit b770295e77a9d5592aaf4e79d8cf7ea3ba154992
Author: Yingchun Lai <[email protected]>
AuthorDate: Mon Aug 29 16:11:00 2022 +0800

    [bootstrap] Reduce the memory size consumed when bootstrap
    
    When a tserver bootstraps, it loads all tablets metadata,
    and finds out the maximum fs block id it has seen to notify
    the block manager to avoid generating duplicate ids.
    However, a tablet may contain a large number of blocks,
    that means it consumes a large amount of memory when
    tablets parellel loaded when tserver bootstrap.
    
    This patch optimizes the tablet bootstrap to find the
    maximum block id for a tablet without storing all the
    block identifiers in the memory.
    
    Change-Id: If4df35f63e99a3f8331da51114991515ea4ee496
    Reviewed-on: http://gerrit.cloudera.org:8080/18924
    Tested-by: Yingchun Lai <[email protected]>
    Reviewed-by: Alexey Serbin <[email protected]>
---
 src/kudu/tablet/rowset_metadata.cc      | 27 ++++++++++++++++++++++++++-
 src/kudu/tablet/rowset_metadata.h       |  5 ++++-
 src/kudu/tablet/tablet_metadata-test.cc | 29 +++++++++++++++++++++++++++--
 src/kudu/tablet/tablet_metadata.cc      | 17 ++++++++++-------
 src/kudu/tablet/tablet_metadata.h       |  9 ++++-----
 5 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/src/kudu/tablet/rowset_metadata.cc 
b/src/kudu/tablet/rowset_metadata.cc
index 181b3710c..718710e31 100644
--- a/src/kudu/tablet/rowset_metadata.cc
+++ b/src/kudu/tablet/rowset_metadata.cc
@@ -298,8 +298,12 @@ int64_t RowSetMetadata::live_row_count() const {
   return live_row_count_;
 }
 
-BlockIdContainer RowSetMetadata::GetAllBlocks() {
+BlockIdContainer RowSetMetadata::GetAllBlocks() const {
   BlockIdContainer blocks;
+  blocks.reserve(blocks_by_col_id_.size() +
+                 undo_delta_blocks_.size() +
+                 redo_delta_blocks_.size() +
+                 2);  // '2' is reserved for 'adhoc_index_block_' and 
'bloom_block_'
   std::lock_guard<LockType> l(lock_);
   if (!adhoc_index_block_.IsNull()) {
     blocks.push_back(adhoc_index_block_);
@@ -316,6 +320,27 @@ BlockIdContainer RowSetMetadata::GetAllBlocks() {
   return blocks;
 }
 
+BlockId RowSetMetadata::GetMaxLiveBlockId() const {
+  BlockId max_block_id;
+  std::lock_guard<LockType> l(lock_);
+  if (!adhoc_index_block_.IsNull()) {
+    max_block_id = std::max(max_block_id, adhoc_index_block_);
+  }
+  if (!bloom_block_.IsNull()) {
+    max_block_id = std::max(max_block_id, bloom_block_);
+  }
+  for (const auto& e : blocks_by_col_id_) {
+    max_block_id = std::max(max_block_id, e.second);
+  }
+  for (const auto& block_id : undo_delta_blocks_) {
+    max_block_id = std::max(max_block_id, block_id);
+  }
+  for (const auto& block_id : redo_delta_blocks_) {
+    max_block_id = std::max(max_block_id, block_id);
+  }
+  return max_block_id;
+}
+
 RowSetMetadataUpdate::RowSetMetadataUpdate() {
 }
 
diff --git a/src/kudu/tablet/rowset_metadata.h 
b/src/kudu/tablet/rowset_metadata.h
index 938ac6bc4..85efd9d6b 100644
--- a/src/kudu/tablet/rowset_metadata.h
+++ b/src/kudu/tablet/rowset_metadata.h
@@ -23,6 +23,7 @@
 #include <mutex>
 #include <optional>
 #include <string>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
@@ -226,7 +227,9 @@ class RowSetMetadata {
 
   void ToProtobuf(RowSetDataPB *pb);
 
-  BlockIdContainer GetAllBlocks();
+  BlockIdContainer GetAllBlocks() const;
+
+  BlockId GetMaxLiveBlockId() const;
 
   // Increase the row count.
   // Note:
diff --git a/src/kudu/tablet/tablet_metadata-test.cc 
b/src/kudu/tablet/tablet_metadata-test.cc
index a6b066ca6..e71182467 100644
--- a/src/kudu/tablet/tablet_metadata-test.cc
+++ b/src/kudu/tablet/tablet_metadata-test.cc
@@ -17,12 +17,14 @@
 
 #include "kudu/tablet/tablet_metadata.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <map>
 #include <memory>
 #include <optional>
 #include <ostream>
 #include <string>
+#include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -45,7 +47,6 @@
 #include "kudu/tablet/rowset_metadata.h"
 #include "kudu/tablet/tablet-harness.h"
 #include "kudu/tablet/tablet-test-util.h"
-#include "kudu/tablet/tablet.h"
 #include "kudu/tablet/txn_metadata.h"
 #include "kudu/util/pb_util.h"
 #include "kudu/util/status.h"
@@ -213,7 +214,7 @@ TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
 
     map<ColumnId, BlockId> block_by_column;
     for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
-      block_by_column[ColumnId(j)] = BlockId(j);
+      block_by_column[ColumnId(j)] = BlockId(FLAGS_test_block_count_per_rs * i 
+ j);
     }
     meta->SetColumnDataBlocks(block_by_column);
     rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
@@ -229,6 +230,30 @@ TEST_F(TestTabletMetadata, BenchmarkCollectBlockIds) {
   }
 }
 
+TEST_F(TestTabletMetadata, GetMaxLiveBlockId) {
+  auto* tablet_meta = harness_->tablet()->metadata();
+  RowSetMetadataVector rs_metas;
+  for (int i = 0; i < FLAGS_test_row_set_count; ++i) {
+    unique_ptr<RowSetMetadata> meta;
+    ASSERT_OK(RowSetMetadata::CreateNew(tablet_meta, i, &meta));
+
+    map<ColumnId, BlockId> block_by_column;
+    for (int j = 0; j < FLAGS_test_block_count_per_rs; ++j) {
+      block_by_column[ColumnId(j)] = BlockId(FLAGS_test_block_count_per_rs * i 
+ j);
+    }
+    meta->SetColumnDataBlocks(block_by_column);
+    rs_metas.emplace_back(shared_ptr<RowSetMetadata>(meta.release()));
+  }
+  ASSERT_OK(tablet_meta->UpdateAndFlush(RowSetMetadataIds(), rs_metas,
+                                        TabletMetadata::kNoMrsFlushed));
+
+  BlockIdContainer block_ids = tablet_meta->CollectBlockIds();
+  BlockId max_block_id = tablet_meta->GetMaxLiveBlockId();
+  ASSERT_FALSE(max_block_id.IsNull());
+  const auto expected_max_block_id = std::max_element(block_ids.begin(), 
block_ids.end());
+  ASSERT_EQ(*expected_max_block_id, max_block_id);
+}
+
 TEST_F(TestTabletMetadata, TestTxnMetadata) {
   constexpr const char* kOwner = "txn";
   const Timestamp kDummyTimestamp = Timestamp(1337);
diff --git a/src/kudu/tablet/tablet_metadata.cc 
b/src/kudu/tablet/tablet_metadata.cc
index 1237d106e..7ef28cad0 100644
--- a/src/kudu/tablet/tablet_metadata.cc
+++ b/src/kudu/tablet/tablet_metadata.cc
@@ -28,7 +28,6 @@
 #include <utility>
 
 #include <gflags/gflags.h>
-#include <google/protobuf/stubs/port.h>
 
 #include "kudu/common/common.pb.h"
 #include "kudu/common/schema.h"
@@ -214,7 +213,7 @@ vector<BlockIdPB> TabletMetadata::CollectBlockIdPBs(const 
TabletSuperBlockPB& su
   return block_ids;
 }
 
-BlockIdContainer TabletMetadata::CollectBlockIds() {
+BlockIdContainer TabletMetadata::CollectBlockIds() const {
   BlockIdContainer block_ids;
   for (const auto& r : rowsets_) {
     BlockIdContainer rowset_block_ids = r->GetAllBlocks();
@@ -225,6 +224,14 @@ BlockIdContainer TabletMetadata::CollectBlockIds() {
   return block_ids;
 }
 
+BlockId TabletMetadata::GetMaxLiveBlockId() const {
+  BlockId max_block_id;
+  for (const auto& r : rowsets_) {
+    max_block_id = std::max(max_block_id, r->GetMaxLiveBlockId());
+  }
+  return max_block_id;
+}
+
 Status TabletMetadata::DeleteTabletData(TabletDataState delete_type,
                                         const optional<OpId>& 
last_logged_opid) {
   DCHECK(!last_logged_opid || last_logged_opid->IsInitialized());
@@ -448,11 +455,7 @@ Status TabletMetadata::LoadFromSuperBlock(const 
TabletSuperBlockPB& superblock)
     // Determine the largest block ID known to the tablet metadata so we can
     // notify the block manager of blocks it may have missed (e.g. if a data
     // directory failed and the blocks on it were not read).
-    BlockId max_block_id;
-    const auto& block_ids = CollectBlockIds();
-    for (BlockId block_id : block_ids) {
-      max_block_id = std::max(max_block_id, block_id);
-    }
+    BlockId max_block_id = GetMaxLiveBlockId();
 
     for (const BlockIdPB& block_pb : superblock.orphaned_blocks()) {
       BlockId orphaned_block_id = BlockId::FromPB(block_pb);
diff --git a/src/kudu/tablet/tablet_metadata.h 
b/src/kudu/tablet/tablet_metadata.h
index 2750f2760..b5580c6e6 100644
--- a/src/kudu/tablet/tablet_metadata.h
+++ b/src/kudu/tablet/tablet_metadata.h
@@ -31,6 +31,7 @@
 #include "kudu/common/common.pb.h"
 #include "kudu/common/partition.h"
 #include "kudu/common/schema.h"
+#include "kudu/consensus/opid.pb.h"
 #include "kudu/fs/block_id.h"
 #include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/macros.h"
@@ -47,10 +48,6 @@ class BlockIdPB;
 class FsManager;
 class Timestamp;
 
-namespace consensus {
-class OpId;
-} // namespace consensus
-
 namespace log {
 class MinLogIndexAnchorer;
 } // namespace log
@@ -129,7 +126,9 @@ class TabletMetadata : public 
RefCountedThreadSafe<TabletMetadata> {
   static std::vector<BlockIdPB> CollectBlockIdPBs(
       const TabletSuperBlockPB& superblock);
 
-  BlockIdContainer CollectBlockIds();
+  BlockIdContainer CollectBlockIds() const;
+
+  BlockId GetMaxLiveBlockId() const;
 
   const std::string& tablet_id() const {
     DCHECK_NE(state_, kNotLoadedYet);

Reply via email to