Repository: kudu Updated Branches: refs/heads/master 314c9d8c1 -> 854658377
KUDU-1755 Part 1: Improve tablet on disk size metric This adds bloomfile, ad hoc index, and superblock sizes to the on-disk size metric for tablets. It also renames some on-disk size methods, clarifying their meaning and removing the word "estimate" because they are no longer estimates. A follow up will address log segments and cmeta. Change-Id: I32dce598bbb8e18325210a49fc436fd0f7ac68fd Reviewed-on: http://gerrit.cloudera.org:8080/6967 Tested-by: Kudu Jenkins Reviewed-by: Alexey Serbin <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/09543976 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/09543976 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/09543976 Branch: refs/heads/master Commit: 09543976c284eb05d93ce7d8c3254f7309d13869 Parents: 314c9d8 Author: Will Berkeley <[email protected]> Authored: Fri May 19 13:22:24 2017 -0700 Committer: Will Berkeley <[email protected]> Committed: Tue May 30 18:19:31 2017 +0000 ---------------------------------------------------------------------- src/kudu/cfile/bloomfile-test.cc | 4 ++- src/kudu/cfile/bloomfile.h | 5 ++++ src/kudu/tablet/cfile_set.cc | 13 +++++++++- src/kudu/tablet/cfile_set.h | 7 +++++- src/kudu/tablet/compaction.cc | 2 +- src/kudu/tablet/compaction_policy-test.cc | 2 +- src/kudu/tablet/delta_tracker.cc | 4 +-- src/kudu/tablet/delta_tracker.h | 8 +++--- src/kudu/tablet/diskrowset-test.cc | 20 +++++++-------- src/kudu/tablet/diskrowset.cc | 26 +++++++++++-------- src/kudu/tablet/diskrowset.h | 18 +++++++------ src/kudu/tablet/memrowset.h | 4 +-- src/kudu/tablet/mock-rowsets.h | 8 +++--- src/kudu/tablet/rowset.cc | 8 +++--- src/kudu/tablet/rowset.h | 10 +++++--- src/kudu/tablet/rowset_info.cc | 2 +- src/kudu/tablet/rowset_info.h | 2 +- src/kudu/tablet/tablet.cc | 12 ++++++--- src/kudu/tablet/tablet.h | 7 ++++-- src/kudu/tablet/tablet_history_gc-test.cc | 4 +-- src/kudu/tablet/tablet_metadata-test.cc | 35 ++++++++++++++++++++++++++ src/kudu/tablet/tablet_metadata.cc | 3 +++ src/kudu/tablet/tablet_metadata.h | 9 +++++++ src/kudu/tablet/tablet_replica.cc | 2 +- src/kudu/tablet/tablet_replica.h | 2 +- src/kudu/util/metrics.h | 2 +- 26 files changed, 153 insertions(+), 66 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/cfile/bloomfile-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/bloomfile-test.cc b/src/kudu/cfile/bloomfile-test.cc index 6c89249..57e1ee2 100644 --- a/src/kudu/cfile/bloomfile-test.cc +++ b/src/kudu/cfile/bloomfile-test.cc @@ -101,7 +101,8 @@ TEST_F(BloomFileTest, TestLazyInit) { unique_ptr<ReadableBlock> count_block( new CountingReadableBlock(std::move(block), &bytes_read)); - // Lazily opening the bloom file should not trigger any reads. + // Lazily opening the bloom file should not trigger any reads, + // and the file size should be available before Init(). gscoped_ptr<BloomFileReader> reader; ReaderOptions opts; opts.parent_mem_tracker = tracker; @@ -109,6 +110,7 @@ TEST_F(BloomFileTest, TestLazyInit) { ASSERT_EQ(0, bytes_read); int64_t lazy_mem_usage = tracker->consumption(); ASSERT_GT(lazy_mem_usage, initial_mem_usage); + ASSERT_GT(reader->FileSize(), 0); // But initializing it should (only the first time), and the bloom's // memory usage should increase. http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/cfile/bloomfile.h ---------------------------------------------------------------------- diff --git a/src/kudu/cfile/bloomfile.h b/src/kudu/cfile/bloomfile.h index c88f0cc..5de88d7 100644 --- a/src/kudu/cfile/bloomfile.h +++ b/src/kudu/cfile/bloomfile.h @@ -103,6 +103,11 @@ class BloomFileReader { Status CheckKeyPresent(const BloomKeyProbe &probe, bool *maybe_present); + // Can be called before Init(). + uint64_t FileSize() const { + return reader_->file_size(); + } + private: DISALLOW_COPY_AND_ASSIGN(BloomFileReader); http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/cfile_set.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc index f79fbbd..5ba509c 100644 --- a/src/kudu/tablet/cfile_set.cc +++ b/src/kudu/tablet/cfile_set.cc @@ -195,7 +195,18 @@ Status CFileSet::GetBounds(string* min_encoded_key, return Status::OK(); } -uint64_t CFileSet::EstimateOnDiskSize() const { +uint64_t CFileSet::OnDiskSize() const { + uint64_t ret = OnDiskDataSize(); + if (bloom_reader_) { + ret += bloom_reader_->FileSize(); + } + if (ad_hoc_idx_reader_) { + ret += ad_hoc_idx_reader_->file_size(); + } + return ret; +} + +uint64_t CFileSet::OnDiskDataSize() const { uint64_t ret = 0; for (const auto& e : readers_by_col_id_) { ret += e.second->file_size(); http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/cfile_set.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/cfile_set.h b/src/kudu/tablet/cfile_set.h index 5cdf21d..34b8c3c 100644 --- a/src/kudu/tablet/cfile_set.h +++ b/src/kudu/tablet/cfile_set.h @@ -68,7 +68,12 @@ class CFileSet : public std::enable_shared_from_this<CFileSet> { virtual Status GetBounds(std::string* min_encoded_key, std::string* max_encoded_key) const; - uint64_t EstimateOnDiskSize() const; + // The total on-disk size of this cfile set, including the ad hoc index and + // bloom files, in bytes. + uint64_t OnDiskSize() const; + + // The size on-disk of this cfile set's data, in bytes. + uint64_t OnDiskDataSize() const; // Determine the index of the given row key. // Sets *idx to boost::none if the row is not found. http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/compaction.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/compaction.cc b/src/kudu/tablet/compaction.cc index 00ba3e8..4621b6c 100644 --- a/src/kudu/tablet/compaction.cc +++ b/src/kudu/tablet/compaction.cc @@ -882,7 +882,7 @@ void RowSetsInCompaction::DumpToLog() const { // Dump the selected rowsets to the log, and collect corresponding iterators. for (const shared_ptr<RowSet> &rs : rowsets_) { LOG(INFO) << rs->ToString() << "(current size on disk: ~" - << rs->EstimateOnDiskSize() << " bytes)"; + << rs->OnDiskSize() << " bytes)"; } } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/compaction_policy-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/compaction_policy-test.cc b/src/kudu/tablet/compaction_policy-test.cc index b34d1bd..ecc9f3f 100644 --- a/src/kudu/tablet/compaction_policy-test.cc +++ b/src/kudu/tablet/compaction_policy-test.cc @@ -107,7 +107,7 @@ TEST(TestCompactionPolicy, TestYcsbCompaction) { LOG(INFO) << "quality=" << quality; int total_size = 0; for (const auto* rs : picked) { - total_size += rs->EstimateCompactionSize() / 1024 / 1024; + total_size += rs->OnDiskDataSizeNoUndos() / 1024 / 1024; } ASSERT_LE(total_size, budget_mb); qualities.push_back(quality); http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/delta_tracker.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc index 1996842..eece75e 100644 --- a/src/kudu/tablet/delta_tracker.cc +++ b/src/kudu/tablet/delta_tracker.cc @@ -734,7 +734,7 @@ size_t DeltaTracker::CountRedoDeltaStores() const { return redo_delta_stores_.size(); } -uint64_t DeltaTracker::EstimateOnDiskSize() const { +uint64_t DeltaTracker::OnDiskSize() const { shared_lock<rw_spinlock> lock(component_lock_); uint64_t size = 0; for (const shared_ptr<DeltaStore>& ds : redo_delta_stores_) { @@ -746,7 +746,7 @@ uint64_t DeltaTracker::EstimateOnDiskSize() const { return size; } -uint64_t DeltaTracker::EstimateRedoDeltaOnDiskSize() const { +uint64_t DeltaTracker::RedoDeltaOnDiskSize() const { shared_lock<rw_spinlock> lock(component_lock_); uint64_t size = 0; for (const shared_ptr<DeltaStore>& ds : redo_delta_stores_) { http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/delta_tracker.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/delta_tracker.h b/src/kudu/tablet/delta_tracker.h index a71b67c..4caee2c 100644 --- a/src/kudu/tablet/delta_tracker.h +++ b/src/kudu/tablet/delta_tracker.h @@ -216,11 +216,11 @@ class DeltaTracker { // Return the number of redo delta stores, not including the DeltaMemStore. size_t CountRedoDeltaStores() const; - // Estimate the number of bytes on disk of all delta blocks. - uint64_t EstimateOnDiskSize() const; + // Return the size on-disk of all delta blocks, in bytes. + uint64_t OnDiskSize() const; - // Estimate the number of bytes on disk of REDO deltas. - uint64_t EstimateRedoDeltaOnDiskSize() const; + // Return the size on-disk of REDO deltas, in bytes. + uint64_t RedoDeltaOnDiskSize() const; // Retrieves the list of column indexes that currently have updates. void GetColumnIdsWithUpdates(std::vector<ColumnId>* col_ids) const; http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/diskrowset-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/diskrowset-test.cc b/src/kudu/tablet/diskrowset-test.cc index f7a5ecc..6b9ee3b 100644 --- a/src/kudu/tablet/diskrowset-test.cc +++ b/src/kudu/tablet/diskrowset-test.cc @@ -585,17 +585,17 @@ TEST_F(TestRowSet, TestDiskSizeEstimation) { // The rowset consists of base data and REDO deltas, so // 1. the delta tracker's on-disk estimate should be the same as the on-disk estimate for REDOs. // 2. the rowset's on-disk estimate and the sum of the base data and REDO estimates should equal. - ASSERT_EQ(rs->delta_tracker()->EstimateOnDiskSize(), - rs->delta_tracker()->EstimateRedoDeltaOnDiskSize()); - ASSERT_EQ(rs->EstimateOnDiskSize(), - rs->EstimateBaseDataDiskSize() + rs->EstimateRedoDeltaDiskSize()); + ASSERT_EQ(rs->delta_tracker()->OnDiskSize(), + rs->delta_tracker()->RedoDeltaOnDiskSize()); + ASSERT_EQ(rs->OnDiskSize(), + rs->BaseDataOnDiskSize() + rs->RedoDeltaOnDiskSize()); // Convert the REDO delta to an UNDO delta. // REDO size should be zero, but there should be UNDOs, so the on-disk size of the rowset // should be larger than the base data. ASSERT_OK(rs->MajorCompactDeltaStores(HistoryGcOpts::Disabled())); - ASSERT_EQ(0, rs->EstimateRedoDeltaDiskSize()); - ASSERT_GT(rs->EstimateOnDiskSize(), rs->EstimateBaseDataDiskSize()); + ASSERT_EQ(0, rs->RedoDeltaOnDiskSize()); + ASSERT_GT(rs->OnDiskSize(), rs->BaseDataOnDiskSize()); // Write a second delta file. UpdateExistingRows(rs.get(), FLAGS_update_fraction, nullptr); @@ -603,10 +603,10 @@ TEST_F(TestRowSet, TestDiskSizeEstimation) { // There's base data, REDOs, and UNDOs, so the delta tracker and rowset's sizes should be larger // than estimates counting only base data and REDOs. - ASSERT_GT(rs->delta_tracker()->EstimateOnDiskSize(), - rs->delta_tracker()->EstimateRedoDeltaOnDiskSize()); - ASSERT_GT(rs->EstimateOnDiskSize(), - rs->EstimateBaseDataDiskSize() + rs->EstimateRedoDeltaDiskSize()); + ASSERT_GT(rs->delta_tracker()->OnDiskSize(), + rs->delta_tracker()->RedoDeltaOnDiskSize()); + ASSERT_GT(rs->OnDiskSize(), + rs->BaseDataOnDiskSize() + rs->RedoDeltaOnDiskSize()); } } // namespace tablet http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/diskrowset.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc index 7f7329e..4a33fde 100644 --- a/src/kudu/tablet/diskrowset.cc +++ b/src/kudu/tablet/diskrowset.cc @@ -674,28 +674,34 @@ Status DiskRowSet::GetBounds(std::string* min_encoded_key, return base_data_->GetBounds(min_encoded_key, max_encoded_key); } -uint64_t DiskRowSet::EstimateBaseDataDiskSize() const { +uint64_t DiskRowSet::BaseDataOnDiskSize() const { DCHECK(open_); shared_lock<rw_spinlock> l(component_lock_); - return base_data_->EstimateOnDiskSize(); + return base_data_->OnDiskSize(); } -uint64_t DiskRowSet::EstimateRedoDeltaDiskSize() const { +uint64_t DiskRowSet::BaseDataOnDiskSizeNoMetadata() const { DCHECK(open_); shared_lock<rw_spinlock> l(component_lock_); - return delta_tracker_->EstimateRedoDeltaOnDiskSize(); + return base_data_->OnDiskDataSize(); } -uint64_t DiskRowSet::EstimateOnDiskSize() const { +uint64_t DiskRowSet::RedoDeltaOnDiskSize() const { DCHECK(open_); shared_lock<rw_spinlock> l(component_lock_); - return base_data_->EstimateOnDiskSize() + delta_tracker_->EstimateOnDiskSize(); + return delta_tracker_->RedoDeltaOnDiskSize(); } -uint64_t DiskRowSet::EstimateCompactionSize() const { +uint64_t DiskRowSet::OnDiskSize() const { DCHECK(open_); shared_lock<rw_spinlock> l(component_lock_); - return base_data_->EstimateOnDiskSize() + delta_tracker_->EstimateRedoDeltaOnDiskSize(); + return base_data_->OnDiskSize() + delta_tracker_->OnDiskSize(); +} + +uint64_t DiskRowSet::OnDiskDataSizeNoUndos() const { + DCHECK(open_); + shared_lock<rw_spinlock> l(component_lock_); + return base_data_->OnDiskDataSize() + delta_tracker_->RedoDeltaOnDiskSize(); } size_t DiskRowSet::DeltaMemStoreSize() const { @@ -734,7 +740,7 @@ double DiskRowSet::DeltaStoresCompactionPerfImprovementScore(DeltaCompactionType DCHECK(open_); double perf_improv = 0; size_t store_count = CountDeltaStores(); - uint64_t base_data_size = EstimateBaseDataDiskSize(); + uint64_t base_data_size = BaseDataOnDiskSizeNoMetadata(); if (store_count == 0) { return perf_improv; @@ -745,7 +751,7 @@ double DiskRowSet::DeltaStoresCompactionPerfImprovementScore(DeltaCompactionType delta_tracker_->GetColumnIdsWithUpdates(&col_ids_with_updates); // If we have files but no updates, we don't want to major compact. if (!col_ids_with_updates.empty()) { - double ratio = static_cast<double>(EstimateRedoDeltaDiskSize()) / base_data_size; + double ratio = static_cast<double>(RedoDeltaOnDiskSize()) / base_data_size; if (ratio >= FLAGS_tablet_delta_store_major_compact_min_ratio) { perf_improv = ratio; } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/diskrowset.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/diskrowset.h b/src/kudu/tablet/diskrowset.h index ad722b7..59d20eb 100644 --- a/src/kudu/tablet/diskrowset.h +++ b/src/kudu/tablet/diskrowset.h @@ -328,17 +328,19 @@ class DiskRowSet : public RowSet { virtual Status GetBounds(std::string* min_encoded_key, std::string* max_encoded_key) const OVERRIDE; - // Estimate the number of bytes on-disk for the base data. - uint64_t EstimateBaseDataDiskSize() const; + // Estimate the on-disk size of this rowset's cfile set, including bloomfiles + // and the ad hoc index. + uint64_t BaseDataOnDiskSize() const; - // Estimate the number of bytes on-disk of REDO deltas. - uint64_t EstimateRedoDeltaDiskSize() const; + // Estimate the size on-disk of the data in this rowset's cfile set. + uint64_t BaseDataOnDiskSizeNoMetadata() const; - // Estimate the total number of bytes on-disk. Excludes the bloom files and the ad hoc index. - // TODO(wdberkeley) Offer a version that has the real total disk space usage. See KUDU-1755. - uint64_t EstimateOnDiskSize() const OVERRIDE; + // Estimate the size on-disk of this rowset's REDO deltas. + uint64_t RedoDeltaOnDiskSize() const; - uint64_t EstimateCompactionSize() const OVERRIDE; + uint64_t OnDiskSize() const OVERRIDE; + + uint64_t OnDiskDataSizeNoUndos() const OVERRIDE; size_t DeltaMemStoreSize() const OVERRIDE; http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/memrowset.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/memrowset.h b/src/kudu/tablet/memrowset.h index 8027e30..ae953c3 100644 --- a/src/kudu/tablet/memrowset.h +++ b/src/kudu/tablet/memrowset.h @@ -233,11 +233,11 @@ class MemRowSet : public RowSet, virtual Status GetBounds(std::string *min_encoded_key, std::string *max_encoded_key) const OVERRIDE; - uint64_t EstimateOnDiskSize() const OVERRIDE { + uint64_t OnDiskSize() const OVERRIDE { return 0; } - uint64_t EstimateCompactionSize() const OVERRIDE { + uint64_t OnDiskDataSizeNoUndos() const OVERRIDE { return 0; } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/mock-rowsets.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/mock-rowsets.h b/src/kudu/tablet/mock-rowsets.h index 4201508..37c2c8b 100644 --- a/src/kudu/tablet/mock-rowsets.h +++ b/src/kudu/tablet/mock-rowsets.h @@ -75,11 +75,11 @@ class MockRowSet : public RowSet { LOG(FATAL) << "Unimplemented"; return Status::OK(); } - virtual uint64_t EstimateOnDiskSize() const OVERRIDE { + virtual uint64_t OnDiskSize() const OVERRIDE { LOG(FATAL) << "Unimplemented"; return 0; } - virtual uint64_t EstimateCompactionSize() const OVERRIDE { + virtual uint64_t OnDiskDataSizeNoUndos() const OVERRIDE { LOG(FATAL) << "Unimplemented"; return 0; } @@ -164,11 +164,11 @@ class MockDiskRowSet : public MockRowSet { return Status::OK(); } - virtual uint64_t EstimateOnDiskSize() const OVERRIDE { + virtual uint64_t OnDiskSize() const OVERRIDE { return size_; } - virtual uint64_t EstimateCompactionSize() const OVERRIDE { + virtual uint64_t OnDiskDataSizeNoUndos() const OVERRIDE { return size_; } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/rowset.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/rowset.cc b/src/kudu/tablet/rowset.cc index 52681ce..0295f23 100644 --- a/src/kudu/tablet/rowset.cc +++ b/src/kudu/tablet/rowset.cc @@ -207,20 +207,20 @@ Status DuplicatingRowSet::GetBounds(string* min_encoded_key, return Status::OK(); } -uint64_t DuplicatingRowSet::EstimateOnDiskSize() const { +uint64_t DuplicatingRowSet::OnDiskSize() const { uint64_t size = 0; for (const shared_ptr<RowSet> &rs : new_rowsets_) { - size += rs->EstimateOnDiskSize(); + size += rs->OnDiskSize(); } return size; } -uint64_t DuplicatingRowSet::EstimateCompactionSize() const { +uint64_t DuplicatingRowSet::OnDiskDataSizeNoUndos() const { // The actual value of this doesn't matter, since it won't be selected // for compaction. uint64_t size = 0; for (const shared_ptr<RowSet> &rs : new_rowsets_) { - size += rs->EstimateCompactionSize(); + size += rs->OnDiskDataSizeNoUndos(); } return size; } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/rowset.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/rowset.h b/src/kudu/tablet/rowset.h index 46bdcb7..f85d2c7 100644 --- a/src/kudu/tablet/rowset.h +++ b/src/kudu/tablet/rowset.h @@ -115,10 +115,10 @@ class RowSet { virtual Status DebugDump(vector<string> *lines = NULL) = 0; // Estimate the number of bytes on-disk - virtual uint64_t EstimateOnDiskSize() const = 0; + virtual uint64_t OnDiskSize() const = 0; // Estimate the number of bytes relevant for compaction. - virtual uint64_t EstimateCompactionSize() const = 0; + virtual uint64_t OnDiskDataSizeNoUndos() const = 0; // Return the lock used for including this DiskRowSet in a compaction. // This prevents multiple compactions and flushes from trying to include @@ -329,9 +329,11 @@ class DuplicatingRowSet : public RowSet { virtual Status GetBounds(std::string* min_encoded_key, std::string* max_encoded_key) const OVERRIDE; - uint64_t EstimateOnDiskSize() const OVERRIDE; + // Return the total size on-disk of this rowset. + uint64_t OnDiskSize() const OVERRIDE; - uint64_t EstimateCompactionSize() const OVERRIDE; + // Return the size of this rowset relevant for merge compactions. + uint64_t OnDiskDataSizeNoUndos() const OVERRIDE; string ToString() const OVERRIDE; http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/rowset_info.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc index 0fa88d5..2b85288 100644 --- a/src/kudu/tablet/rowset_info.cc +++ b/src/kudu/tablet/rowset_info.cc @@ -254,7 +254,7 @@ void RowSetInfo::CollectOrdered(const RowSetTree& tree, RowSetInfo::RowSetInfo(RowSet* rs, double init_cdf) : rowset_(rs), - size_bytes_(rs->EstimateCompactionSize()), + size_bytes_(rs->OnDiskDataSizeNoUndos()), size_mb_(std::max(implicit_cast<int>(size_bytes_ / 1024 / 1024), kMinSizeMb)), cdf_min_key_(init_cdf), cdf_max_key_(init_cdf) { http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/rowset_info.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h index 767b7a6..1ece56f 100644 --- a/src/kudu/tablet/rowset_info.h +++ b/src/kudu/tablet/rowset_info.h @@ -83,7 +83,7 @@ class RowSetInfo { RowSet* const rowset_; - // Cached version of rowset_->EstimateOnDiskSize(). + // Cached version of rowset_->OnDiskDataSize(). const int size_bytes_; // The size in MB, already clamped so that all rowsets have size at least http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc index 5503dc3..aaaa72b 100644 --- a/src/kudu/tablet/tablet.cc +++ b/src/kudu/tablet/tablet.cc @@ -206,7 +206,7 @@ Tablet::Tablet(const scoped_refptr<TabletMetadata>& metadata, metric_entity_, Bind(&Tablet::MemRowSetSize, Unretained(this))) ->AutoDetach(&metric_detacher_); METRIC_on_disk_size.InstantiateFunctionGauge( - metric_entity_, Bind(&Tablet::EstimateOnDiskSize, Unretained(this))) + metric_entity_, Bind(&Tablet::OnDiskSize, Unretained(this))) ->AutoDetach(&metric_detacher_); } @@ -1467,7 +1467,7 @@ Status Tablet::DoMergeCompactionOrFlush(const RowSetsInCompaction &input, if (input.num_rowsets() > 1) { MAYBE_FAULT(FLAGS_fault_crash_before_flush_tablet_meta_after_compaction); } else if (input.num_rowsets() == 1 && - input.rowsets()[0]->EstimateCompactionSize() == 0) { + input.rowsets()[0]->OnDiskDataSizeNoUndos() == 0) { MAYBE_FAULT(FLAGS_fault_crash_before_flush_tablet_meta_after_flush_mrs); } @@ -1660,7 +1660,11 @@ size_t Tablet::MemRowSetLogReplaySize(const ReplaySizeMap& replay_size_map) cons return GetReplaySizeForIndex(comps->memrowset->MinUnflushedLogIndex(), replay_size_map); } -size_t Tablet::EstimateOnDiskSize() const { +size_t Tablet::OnDiskSize() const { + return OnDiskDataSize() + metadata()->on_disk_size(); +} + +size_t Tablet::OnDiskDataSize() const { scoped_refptr<TabletComponents> comps; GetComponents(&comps); @@ -1668,7 +1672,7 @@ size_t Tablet::EstimateOnDiskSize() const { size_t ret = 0; for (const shared_ptr<RowSet> &rowset : comps->rowsets->all_rowsets()) { - ret += rowset->EstimateOnDiskSize(); + ret += rowset->OnDiskSize(); } return ret; http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h index 4c5b6e5..67e854e 100644 --- a/src/kudu/tablet/tablet.h +++ b/src/kudu/tablet/tablet.h @@ -235,8 +235,11 @@ class Tablet { // the current MRS. size_t MemRowSetLogReplaySize(const ReplaySizeMap& replay_size_map) const; - // Estimate the total on-disk size of this tablet, in bytes. - size_t EstimateOnDiskSize() const; + // Return the total on-disk size of this tablet, in bytes. + size_t OnDiskSize() const; + + // Return the total on-disk size of this tablet's data, in bytes. + size_t OnDiskDataSize() const; // Get the total size of all the DMS size_t DeltaMemStoresSize() const; http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet_history_gc-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_history_gc-test.cc b/src/kudu/tablet/tablet_history_gc-test.cc index b071661..da751f8 100644 --- a/src/kudu/tablet/tablet_history_gc-test.cc +++ b/src/kudu/tablet/tablet_history_gc-test.cc @@ -237,7 +237,7 @@ TEST_F(TabletHistoryGcTest, TestNoGenerateUndoOnMRSFlush) { for (const auto& rsmd : tablet()->metadata()->rowsets()) { ASSERT_EQ(0, rsmd->undo_delta_blocks().size()); } - ASSERT_EQ(0, tablet()->EstimateOnDiskSize()); + ASSERT_EQ(0, tablet()->OnDiskDataSize()); // Now check the same thing (flush not generating an UNDO), but without the // delete following the insert. We do it with a single row. @@ -310,7 +310,7 @@ TEST_F(TabletHistoryGcTest, TestRowRemovalGCOnMergeCompaction) { ASSERT_OK(tablet()->Compact(Tablet::FORCE_COMPACT_ALL)); ASSERT_DEBUG_DUMP_ROWS_MATCH(""); NO_FATALS(VerifyTestRowsWithTimestampAndVerifier(kStartRow, 0, prev_time, boost::none)); - ASSERT_EQ(0, tablet()->EstimateOnDiskSize()); + ASSERT_EQ(0, tablet()->OnDiskDataSize()); } // Test that we don't over-aggressively GC history prior to the AHM. http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet_metadata-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata-test.cc b/src/kudu/tablet/tablet_metadata-test.cc index 5378921..1935d35 100644 --- a/src/kudu/tablet/tablet_metadata-test.cc +++ b/src/kudu/tablet/tablet_metadata-test.cc @@ -94,6 +94,41 @@ TEST_F(TestTabletMetadata, TestLoadFromSuperBlock) { << SecureDebugString(superblock_pb_1); } +TEST_F(TestTabletMetadata, TestOnDiskSize) { + TabletMetadata* meta = harness_->tablet()->metadata(); + + // The tablet metadata was flushed on creation. + int64_t initial_size = meta->on_disk_size(); + ASSERT_GT(initial_size, 0); + + // Write some data to the tablet and flush. + gscoped_ptr<KuduPartialRow> row; + BuildPartialRow(0, 0, "foo", &row); + writer_->Insert(*row); + ASSERT_OK(harness_->tablet()->Flush()); + + // The tablet metadata grows after flushing a new rowset. + int64_t middle_size = meta->on_disk_size(); + ASSERT_GT(middle_size, initial_size); + + // Create another rowset. + // The on-disk size shouldn't change until after flush. + BuildPartialRow(1, 1, "bar", &row); + writer_->Insert(*row); + ASSERT_EQ(middle_size, meta->on_disk_size()); + ASSERT_OK(harness_->tablet()->Flush()); + int64_t final_size = meta->on_disk_size(); + ASSERT_GT(final_size, middle_size); + + // Shut down the tablet. + harness_->tablet()->Shutdown(); + + // The on-disk size and the size of the superblock PB should agree. + TabletSuperBlockPB superblock_pb; + ASSERT_OK(meta->ToSuperBlock(&superblock_pb)); + ASSERT_EQ(superblock_pb.ByteSize(), final_size); +} + } // namespace tablet } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet_metadata.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.cc b/src/kudu/tablet/tablet_metadata.cc index 501cb7e..24cd33d 100644 --- a/src/kudu/tablet/tablet_metadata.cc +++ b/src/kudu/tablet/tablet_metadata.cc @@ -48,6 +48,7 @@ TAG_FLAG(enable_tablet_orphaned_block_deletion, advanced); TAG_FLAG(enable_tablet_orphaned_block_deletion, hidden); TAG_FLAG(enable_tablet_orphaned_block_deletion, runtime); +using std::memory_order_relaxed; using std::shared_ptr; using base::subtle::Barrier_AtomicIncrement; @@ -369,6 +370,7 @@ Status TabletMetadata::LoadFromSuperBlock(const TabletSuperBlockPB& superblock) DeleteOrphanedBlocks(orphaned_blocks); } + on_disk_size_.store(superblock.ByteSizeLong(), memory_order_relaxed); return Status::OK(); } @@ -533,6 +535,7 @@ Status TabletMetadata::ReplaceSuperBlockUnlocked(const TabletSuperBlockPB &pb) { fs_manager_->env(), path, pb, pb_util::OVERWRITE, pb_util::SYNC), Substitute("Failed to write tablet metadata $0", tablet_id_)); + on_disk_size_.store(pb.ByteSizeLong(), memory_order_relaxed); return Status::OK(); } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet_metadata.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_metadata.h b/src/kudu/tablet/tablet_metadata.h index d295d51..316b390 100644 --- a/src/kudu/tablet/tablet_metadata.h +++ b/src/kudu/tablet/tablet_metadata.h @@ -17,6 +17,7 @@ #ifndef KUDU_TABLET_TABLET_METADATA_H #define KUDU_TABLET_TABLET_METADATA_H +#include <atomic> #include <boost/optional/optional_fwd.hpp> #include <memory> #include <string> @@ -225,6 +226,10 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> { // Fully replace a superblock (used for bootstrap). Status ReplaceSuperBlock(const TabletSuperBlockPB &pb); + int64_t on_disk_size() const { + return on_disk_size_.load(std::memory_order_relaxed); + } + // ========================================================================== // Stuff used by the tests // ========================================================================== @@ -352,6 +357,10 @@ class TabletMetadata : public RefCountedThreadSafe<TabletMetadata> { // to disk. StatusClosure pre_flush_callback_; + // The on-disk size of the tablet metadata, as of the last successful + // call to Flush() or LoadFromDisk(). + std::atomic<int64_t> on_disk_size_; + DISALLOW_COPY_AND_ASSIGN(TabletMetadata); }; http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet_replica.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_replica.cc b/src/kudu/tablet/tablet_replica.cc index 3914dc9..259ac05 100644 --- a/src/kudu/tablet/tablet_replica.cc +++ b/src/kudu/tablet/tablet_replica.cc @@ -359,7 +359,7 @@ void TabletReplica::GetTabletStatusPB(TabletStatusPB* status_pb_out) const { status_pb_out->set_state(state_); status_pb_out->set_tablet_data_state(meta_->tablet_data_state()); if (tablet_) { - status_pb_out->set_estimated_on_disk_size(tablet_->EstimateOnDiskSize()); + status_pb_out->set_estimated_on_disk_size(tablet_->OnDiskSize()); } } http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/tablet/tablet_replica.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet_replica.h b/src/kudu/tablet/tablet_replica.h index 7f207cb..0dfc5f2 100644 --- a/src/kudu/tablet/tablet_replica.h +++ b/src/kudu/tablet/tablet_replica.h @@ -110,7 +110,7 @@ class TabletReplica : public RefCountedThreadSafe<TabletReplica>, Status WaitUntilConsensusRunning(const MonoDelta& timeout); // Submits a write to a tablet and executes it asynchronously. - // The caller is expected to build and pass a TrasactionContext that points + // The caller is expected to build and pass a TransactionContext that points // to the RPC WriteRequest, WriteResponse, RpcContext and to the tablet's // MvccManager. Status SubmitWrite(std::unique_ptr<WriteTransactionState> tx_state); http://git-wip-us.apache.org/repos/asf/kudu/blob/09543976/src/kudu/util/metrics.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/metrics.h b/src/kudu/util/metrics.h index 8aeaf93..b9b8509 100644 --- a/src/kudu/util/metrics.h +++ b/src/kudu/util/metrics.h @@ -62,7 +62,7 @@ // // MetricEntity instances may also carry a key-value map of string attributes. These // attributes are directly exposed to monitoring systems via the JSON output. Monitoring -// systems may use this information to allow hierarchical aggregation beteween entities, +// systems may use this information to allow hierarchical aggregation between entities, // display them to the user, etc. // // Metric instances
