This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new f6f8bbf KUDU-2807 Crash when flush or compaction overlaps with
another compaction
f6f8bbf is described below
commit f6f8bbf35aa33e668f9cc5ce9b1e80d202a7f736
Author: Will Berkeley <[email protected]>
AuthorDate: Tue May 7 09:31:49 2019 -0700
KUDU-2807 Crash when flush or compaction overlaps with another compaction
Commit d3684a7b2add8f06b7189adb9ce9222b8ae1eff5 introduced a metric for
average rowset height. Computing this requires examining the rowsets in
the rowset tree and briefly taking each one's `compact_flush_lock_`.
However, any time a thread takes the `compact_flush_lock_` of a rowset,
it must hold the `compact_select_lock_` of the tablet that rowset
belongs to. This was not happening in two of the three places where the
average height is computed:
1. When opening the tablet.
2. When updating the rowset tree during a flush or compaction.
The first case is benign (as far as I know). The second case could cause
a crash like
F0429 07:26:56.918041 34043 tablet.cc:2268] Check failed: lock.owns_lock()
RowSet(24130) unable to lock compact_flush_lock
MM ops enforced the invariant above by try-locking the
`compact_flush_lock_` and checking that they obtained the lock, while
holding the `compact_select_lock_`. So, if a MM op try-locked a rowset
at the same time as another MM op was holding its `compact_flush_lock_`,
the above crash would result.
This patch fixes the crash by ensuring that the `compact_select_lock_`
is held whenever `ComputeCdfAndCheckOrdered`, which computes the average
rowset height, is called. I also made a small modification to the scope
of a `component_lock_` to avoid having to define a lock order for
`component_lock_` and `compact_select_lock_`.
Change-Id: Ic255f0466aa2c158fa32e8e38428eddfcf901b99
Reviewed-on: http://gerrit.cloudera.org:8080/13264
Reviewed-by: Adar Dembo <[email protected]>
Tested-by: Will Berkeley <[email protected]>
---
src/kudu/tablet/rowset_info.h | 2 ++
src/kudu/tablet/tablet.cc | 57 +++++++++++++++++++++++--------------------
src/kudu/tablet/tablet.h | 5 +++-
3 files changed, 36 insertions(+), 28 deletions(-)
diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index af2d07d..f133e3e 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -51,6 +51,8 @@ class RowSetInfo {
// rowset tree is set into 'average_height', if it is not nullptr.
// If one of 'info_by_min_key' and 'info_by_max_key' is nullptr, the other
// must be.
+ // Requires holding the compact_select_lock_ for the tablet that the
+ // rowsets in 'tree' references.
static void ComputeCdfAndCollectOrdered(const RowSetTree& tree,
double* average_height,
std::vector<RowSetInfo>*
info_by_min_key,
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 7cb1053..daee9a5 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -266,7 +266,6 @@ Status Tablet::Open() {
TRACE_EVENT0("tablet", "Tablet::Open");
RETURN_IF_STOPPED_OR_CHECK_STATE(kInitialized);
- std::lock_guard<rw_spinlock> lock(component_lock_);
CHECK(schema()->has_column_ids());
next_mrs_id_ = metadata_->last_durable_mrs_id() + 1;
@@ -293,15 +292,6 @@ Status Tablet::Open() {
shared_ptr<RowSetTree> new_rowset_tree(new RowSetTree());
CHECK_OK(new_rowset_tree->Reset(rowsets_opened));
- if (metrics_) {
- // Compute the initial average height of the rowset tree.
- double avg_height;
- RowSetInfo::ComputeCdfAndCollectOrdered(*new_rowset_tree,
- &avg_height,
- nullptr,
- nullptr);
- metrics_->average_diskrowset_height->set_value(avg_height);
- }
// Now that the current state is loaded, create the new MemRowSet with the
next id.
shared_ptr<MemRowSet> new_mrs;
@@ -309,7 +299,13 @@ Status Tablet::Open() {
log_anchor_registry_.get(),
mem_trackers_.tablet_tracker,
&new_mrs));
- components_ = new TabletComponents(new_mrs, new_rowset_tree);
+ {
+ std::lock_guard<rw_spinlock> lock(component_lock_);
+ components_ = new TabletComponents(new_mrs, new_rowset_tree);
+ }
+
+ // Compute the initial average rowset height.
+ UpdateAverageRowsetHeight();
{
std::lock_guard<simple_spinlock> l(state_lock_);
@@ -1055,10 +1051,10 @@ void Tablet::ModifyRowSetTree(const RowSetTree&
old_tree,
CHECK_OK(new_tree->Reset(post_swap));
}
-void Tablet::AtomicSwapRowSets(const RowSetVector &old_rowsets,
- const RowSetVector &new_rowsets) {
+void Tablet::AtomicSwapRowSets(const RowSetVector &to_remove,
+ const RowSetVector &to_add) {
std::lock_guard<rw_spinlock> lock(component_lock_);
- AtomicSwapRowSetsUnlocked(old_rowsets, new_rowsets);
+ AtomicSwapRowSetsUnlocked(to_remove, to_add);
}
void Tablet::AtomicSwapRowSetsUnlocked(const RowSetVector &to_remove,
@@ -1070,19 +1066,6 @@ void Tablet::AtomicSwapRowSetsUnlocked(const
RowSetVector &to_remove,
to_remove, to_add, new_tree.get());
components_ = new TabletComponents(components_->memrowset, new_tree);
-
- // Recompute the average rowset height.
- // TODO(wdberkeley): We should be able to cache the computation of the CDF
- // and average height and efficiently recompute it instead of doing it from
- // scratch.
- if (metrics_) {
- double avg_height;
- RowSetInfo::ComputeCdfAndCollectOrdered(*new_tree,
- &avg_height,
- nullptr,
- nullptr);
- metrics_->average_diskrowset_height->set_value(avg_height);
- }
}
Status Tablet::DoMajorDeltaCompaction(const vector<ColumnId>& col_ids,
@@ -1725,6 +1708,7 @@ Status Tablet::DoMergeCompactionOrFlush(const
RowSetsInCompaction &input,
// Replace the compacted rowsets with the new on-disk rowsets, making them
visible now that
// their metadata was written to disk.
AtomicSwapRowSets({ inprogress_rowset }, new_disk_rowsets);
+ UpdateAverageRowsetHeight();
const auto rows_written = drsw.rows_written_count();
const auto drs_written = drsw.drs_written_count();
@@ -1755,9 +1739,28 @@ Status Tablet::HandleEmptyCompactionOrFlush(const
RowSetVector& rowsets,
"Failed to flush new tablet metadata");
AtomicSwapRowSets(rowsets, RowSetVector());
+ UpdateAverageRowsetHeight();
return Status::OK();
}
+void Tablet::UpdateAverageRowsetHeight() {
+ if (!metrics_) {
+ return;
+ }
+ // TODO(wdberkeley): We should be able to cache the computation of the CDF
+ // and average height and efficiently recompute it instead of doing it from
+ // scratch.
+ scoped_refptr<TabletComponents> comps;
+ GetComponents(&comps);
+ std::lock_guard<std::mutex> l(compact_select_lock_);
+ double avg_height;
+ RowSetInfo::ComputeCdfAndCollectOrdered(*comps->rowsets,
+ &avg_height,
+ nullptr,
+ nullptr);
+ metrics_->average_diskrowset_height->set_value(avg_height);
+}
+
Status Tablet::Compact(CompactFlags flags) {
RETURN_IF_STOPPED_OR_CHECK_STATE(kOpen);
diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h
index 87448f5..05c7c9b 100644
--- a/src/kudu/tablet/tablet.h
+++ b/src/kudu/tablet/tablet.h
@@ -347,7 +347,6 @@ class Tablet {
// memrowset in the current implementation.
Status CountRows(uint64_t *count) const;
-
// Verbosely dump this entire tablet to the logs. This is only
// really useful when debugging unit tests failures where the tablet
// has a very small number of rows.
@@ -599,6 +598,10 @@ class Tablet {
Status HandleEmptyCompactionOrFlush(const RowSetVector& rowsets,
int mrs_being_flushed);
+ // Updates the average rowset height metric. Acquires the tablet's
+ // compact_select_lock_.
+ void UpdateAverageRowsetHeight();
+
Status FlushMetadata(const RowSetVector& to_remove,
const RowSetMetadataVector& to_add,
int64_t mrs_being_flushed);