Repository: kudu Updated Branches: refs/heads/master 6a12ba3f7 -> b1f1388e2
compaction_policy: avoid O(n^2) calls to EstimateOnDiskSize In a cluster workload with a 130GB+ tablet, I found that the maintenance manager scheduler thread was spending tens of seconds inside RowSetInfo::CollectOrdered(), mostly inside calls to EstimateOnDiskSize(). While any individual call is not exceedingly slow, they involve a lot of virtual function calls and potential CPU cache misses, so it appears to add up. I deployed this patch on the cluster and found that the MaintenanceManager 'FindBestOps' call went from ~16 seconds to ~350ms. Change-Id: Ic2949218d7f5fd822571a7b14d1d0b4430aeee1d Reviewed-on: http://gerrit.cloudera.org:8080/4191 Tested-by: Kudu Jenkins Reviewed-by: David Ribeiro Alves <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/859cf31d Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/859cf31d Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/859cf31d Branch: refs/heads/master Commit: 859cf31d13228d5e07d5ed35bdeb6e237dc1701f Parents: 6a12ba3 Author: Todd Lipcon <[email protected]> Authored: Wed Aug 31 15:30:25 2016 -0700 Committer: Todd Lipcon <[email protected]> Committed: Wed Sep 7 06:28:44 2016 +0000 ---------------------------------------------------------------------- src/kudu/tablet/rowset_info.cc | 8 ++++---- src/kudu/tablet/rowset_info.h | 11 +++++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/859cf31d/src/kudu/tablet/rowset_info.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc index 8ed1028..123266d 100644 --- a/src/kudu/tablet/rowset_info.cc +++ b/src/kudu/tablet/rowset_info.cc @@ -137,7 +137,7 @@ double WidthByDataSize(const Slice& prev, const Slice& next, for (const auto& rs_rsi : active) { double fraction = StringFractionInRange(rs_rsi.second, prev, next); - weight += rs_rsi.first->EstimateOnDiskSize() * fraction; + weight += rs_rsi.second->size_bytes() * fraction; } return weight; @@ -253,8 +253,8 @@ void RowSetInfo::CollectOrdered(const RowSetTree& tree, RowSetInfo::RowSetInfo(RowSet* rs, double init_cdf) : rowset_(rs), - size_mb_(std::max(implicit_cast<int>(rs->EstimateOnDiskSize() / 1024 / 1024), - kMinSizeMb)), + size_bytes_(rs->EstimateOnDiskSize()), + size_mb_(std::max(implicit_cast<int>(size_bytes_ / 1024 / 1024), kMinSizeMb)), cdf_min_key_(init_cdf), cdf_max_key_(init_cdf) { has_bounds_ = rs->GetBounds(&min_key_, &max_key_).ok(); @@ -266,7 +266,7 @@ void RowSetInfo::FinalizeCDFVector(vector<RowSetInfo>* vec, for (RowSetInfo& cdf_rs : *vec) { CHECK_GT(cdf_rs.size_mb_, 0) << "Expected file size to be at least 1MB " << "for RowSet " << cdf_rs.rowset_->ToString() - << ", was " << cdf_rs.rowset_->EstimateOnDiskSize() + << ", was " << cdf_rs.size_bytes() << " bytes."; cdf_rs.cdf_min_key_ /= quot; cdf_rs.cdf_max_key_ /= quot; http://git-wip-us.apache.org/repos/asf/kudu/blob/859cf31d/src/kudu/tablet/rowset_info.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h index cb315dc..767b7a6 100644 --- a/src/kudu/tablet/rowset_info.h +++ b/src/kudu/tablet/rowset_info.h @@ -41,6 +41,7 @@ class RowSetInfo { std::vector<RowSetInfo>* min_key, std::vector<RowSetInfo>* max_key); + int size_bytes() const { return size_bytes_; } int size_mb() const { return size_mb_; } // Return the value of the CDF at the minimum key of this candidate. @@ -80,8 +81,14 @@ class RowSetInfo { static void FinalizeCDFVector(std::vector<RowSetInfo>* vec, double quot); - RowSet* rowset_; - int size_mb_; + RowSet* const rowset_; + + // Cached version of rowset_->EstimateOnDiskSize(). + const int size_bytes_; + + // The size in MB, already clamped so that all rowsets have size at least + // 1MB. This is cached to avoid the branch during the selection hot path. + const int size_mb_; // True if the RowSet has known bounds. // MemRowSets in particular do not.
