Repository: kudu Updated Branches: refs/heads/master 279b72916 -> e3a8f64b9
Account for REINSERTs in delta stats This makes DeltaStats also account for REINSERTs. This is not actually used outside of tests, but seems like it would be silly to not count this type of delta. In the future it might be useful for selecting undo deltas for minor delta compaction. The protobuf field is optional to preserve data format compatibility. Change-Id: Idd60f6c1c12803d339f5f8d96c6b089fab21b13f Reviewed-on: http://gerrit.cloudera.org:8080/4932 Tested-by: Kudu Jenkins Reviewed-by: Jean-Daniel Cryans <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3c68deac Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3c68deac Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3c68deac Branch: refs/heads/master Commit: 3c68deacc05f6b5a9b82522baa38e046744b1815 Parents: 279b729 Author: David Alves <[email protected]> Authored: Mon Nov 7 21:46:42 2016 -0800 Committer: David Ribeiro Alves <[email protected]> Committed: Tue Nov 8 23:38:14 2016 +0000 ---------------------------------------------------------------------- src/kudu/tablet/delta_stats.cc | 48 ++++++++++++++++++++++++++----------- src/kudu/tablet/delta_stats.h | 9 ++++++- src/kudu/tablet/tablet.proto | 4 ++++ 3 files changed, 46 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/3c68deac/src/kudu/tablet/delta_stats.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/delta_stats.cc b/src/kudu/tablet/delta_stats.cc index f46becb..25b7ec8 100644 --- a/src/kudu/tablet/delta_stats.cc +++ b/src/kudu/tablet/delta_stats.cc @@ -24,6 +24,8 @@ #include "kudu/tablet/tablet.pb.h" #include "kudu/util/bitmap.h" +using strings::Substitute; + namespace kudu { using std::vector; @@ -32,6 +34,7 @@ namespace tablet { DeltaStats::DeltaStats() : delete_count_(0), + reinsert_count_(0), max_timestamp_(Timestamp::kMin), min_timestamp_(Timestamp::kMax) { } @@ -45,21 +48,35 @@ void DeltaStats::IncrDeleteCount(int64_t delete_count) { delete_count_ += delete_count; } +void DeltaStats::IncrReinsertCount(int64_t reinsert_count) { + reinsert_count_ += reinsert_count; +} + Status DeltaStats::UpdateStats(const Timestamp& timestamp, const RowChangeList& update) { - // Decode the update, incrementing the update count for each of the + // Decode the mutation incrementing the update count for each of the // columns we find present. - RowChangeListDecoder update_decoder(update); - RETURN_NOT_OK(update_decoder.Init()); - if (PREDICT_FALSE(update_decoder.is_delete())) { - IncrDeleteCount(1); - } else if (PREDICT_TRUE(update_decoder.is_update())) { - vector<ColumnId> col_ids; - RETURN_NOT_OK(update_decoder.GetIncludedColumnIds(&col_ids)); - for (ColumnId col_id : col_ids) { - IncrUpdateCount(col_id, 1); + RowChangeListDecoder decoder(update); + RETURN_NOT_OK(decoder.Init()); + switch (decoder.get_type()) { + case RowChangeList::kReinsert: { + IncrReinsertCount(1); + } // FALLTHROUGH INTENDED. REINSERTs contain column updates so we need to account + // for those in the updated column stats. + case RowChangeList::kUpdate: { + vector<ColumnId> col_ids; + RETURN_NOT_OK(decoder.GetIncludedColumnIds(&col_ids)); + for (const ColumnId& col_id : col_ids) { + IncrUpdateCount(col_id, 1); + } + break; + } + case RowChangeList::kDelete: { + IncrDeleteCount(1); + break; } - } // Don't handle re-inserts + default: LOG(FATAL) << "Invalid mutation type: " << decoder.get_type(); + } if (min_timestamp_.CompareTo(timestamp) > 0) { min_timestamp_ = timestamp; @@ -76,6 +93,8 @@ string DeltaStats::ToString() const { "ts range=[$0, $1]", min_timestamp_.ToString(), max_timestamp_.ToString()); + ret.append(Substitute(", delete_count=[$0]", delete_count_)); + ret.append(Substitute(", reinsert_count=[$0]", reinsert_count_)); ret.append(", update_counts_by_col_id=["); ret.append(JoinKeysAndValuesIterator(update_counts_by_col_id_.begin(), update_counts_by_col_id_.end(), @@ -88,8 +107,8 @@ string DeltaStats::ToString() const { void DeltaStats::ToPB(DeltaStatsPB* pb) const { pb->Clear(); pb->set_delete_count(delete_count_); - typedef std::pair<ColumnId, int64_t> entry; - for (const entry& e : update_counts_by_col_id_) { + pb->set_reinsert_count(reinsert_count_); + for (const auto& e : update_counts_by_col_id_) { DeltaStatsPB::ColumnStats* stats = pb->add_column_stats(); stats->set_col_id(e.first); stats->set_update_count(e.second); @@ -101,8 +120,9 @@ void DeltaStats::ToPB(DeltaStatsPB* pb) const { Status DeltaStats::InitFromPB(const DeltaStatsPB& pb) { delete_count_ = pb.delete_count(); + reinsert_count_ = pb.reinsert_count(); update_counts_by_col_id_.clear(); - for (const DeltaStatsPB::ColumnStats stats : pb.column_stats()) { + for (const DeltaStatsPB::ColumnStats& stats : pb.column_stats()) { IncrUpdateCount(ColumnId(stats.col_id()), stats.update_count()); } max_timestamp_.FromUint64(pb.max_timestamp()); http://git-wip-us.apache.org/repos/asf/kudu/blob/3c68deac/src/kudu/tablet/delta_stats.h ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/delta_stats.h b/src/kudu/tablet/delta_stats.h index d10f4b2..a5213c0 100644 --- a/src/kudu/tablet/delta_stats.h +++ b/src/kudu/tablet/delta_stats.h @@ -47,6 +47,9 @@ class DeltaStats { // Increment the per-store delete count by 'delete_count'. void IncrDeleteCount(int64_t delete_count); + // Increment the per-store reinsert count by 'reinsert_count'. + void IncrReinsertCount(int64_t reinsert_count); + // Increment delete and update counts based on changes contained in // 'update'. Status UpdateStats(const Timestamp& timestamp, @@ -55,8 +58,11 @@ class DeltaStats { // Return the number of deletes in the current delta store. int64_t delete_count() const { return delete_count_; } + // Return the number of reinserts in the current delta store. + int64_t reinsert_count() const { return reinsert_count_; } + // Returns number of updates for a given column. - int64_t update_count_for_col_id(ColumnId col_id) const { + int64_t update_count_for_col_id(const ColumnId& col_id) const { return FindWithDefault(update_counts_by_col_id_, col_id, 0); } @@ -95,6 +101,7 @@ class DeltaStats { private: std::unordered_map<ColumnId, int64_t> update_counts_by_col_id_; uint64_t delete_count_; + uint64_t reinsert_count_; Timestamp max_timestamp_; Timestamp min_timestamp_; }; http://git-wip-us.apache.org/repos/asf/kudu/blob/3c68deac/src/kudu/tablet/tablet.proto ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet.proto b/src/kudu/tablet/tablet.proto index 468add3..975c3be 100644 --- a/src/kudu/tablet/tablet.proto +++ b/src/kudu/tablet/tablet.proto @@ -64,6 +64,10 @@ message DeltaStatsPB { // Number of deletes (deletes result in deletion of an entire row) required int64 delete_count = 1; + // Number of reinserts. + // Optional for data format compatibility. + optional int64 reinsert_count = 6 [ default = 0]; + // REMOVED: replaced by column_stats, which maps by column ID, // whereas this older version mapped by index. // repeated int64 per_column_update_count = 2;
