Repository: kudu Updated Branches: refs/heads/master 93be1310d -> d13ac79a4
KUDU-1495. Maintenance manager should not schedule new ops during unregister This fixes a bug where deleting a tablet could block for an extended time on compactions being scheduled if the maintenance manager was configured with multiple threads. The issue was that Unregister() waited for the number of running operations to reach 0 before removing the op from the candidate list of ops, but did not prevent new instances of that op from being scheduled. Change-Id: I3675705caf5b73f8a480036b974e4db6c205616a Reviewed-on: http://gerrit.cloudera.org:8080/4295 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/531e7d99 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/531e7d99 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/531e7d99 Branch: refs/heads/master Commit: 531e7d99c68a7767cfc900447179fcc116f32f1c Parents: 93be131 Author: Todd Lipcon <[email protected]> Authored: Thu Sep 1 17:06:59 2016 -0700 Committer: Todd Lipcon <[email protected]> Committed: Fri Sep 2 01:56:12 2016 +0000 ---------------------------------------------------------------------- src/kudu/tablet/tablet.cc | 6 ++++ src/kudu/util/maintenance_manager-test.cc | 47 +++++++++++++++++++++++--- src/kudu/util/maintenance_manager.cc | 9 +++-- src/kudu/util/maintenance_manager.h | 19 +++++++++++ 4 files changed, 74 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/tablet/tablet.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc index 387d6f5..faf1ae1 100644 --- a/src/kudu/tablet/tablet.cc +++ b/src/kudu/tablet/tablet.cc @@ -1206,6 +1206,12 @@ void Tablet::RegisterMaintenanceOps(MaintenanceManager* maint_mgr) { } void Tablet::UnregisterMaintenanceOps() { + // First cancel all of the operations, so that while we're waiting for one + // operation to finish in Unregister(), a different one can't get re-scheduled. + for (MaintenanceOp* op : maintenance_ops_) { + op->CancelAndDisable(); + } + for (MaintenanceOp* op : maintenance_ops_) { op->Unregister(); } http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/util/maintenance_manager-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/maintenance_manager-test.cc b/src/kudu/util/maintenance_manager-test.cc index 60a9d51..73e11a6 100644 --- a/src/kudu/util/maintenance_manager-test.cc +++ b/src/kudu/util/maintenance_manager-test.cc @@ -85,7 +85,8 @@ class TestMaintenanceOp : public MaintenanceOp { metric_entity_(METRIC_ENTITY_test.Instantiate(&metric_registry_, "test")), maintenance_op_duration_(METRIC_maintenance_op_duration.Instantiate(metric_entity_)), maintenance_ops_running_(METRIC_maintenance_ops_running.Instantiate(metric_entity_, 0)), - remaining_runs_(1) { + remaining_runs_(1), + sleep_time_(MonoDelta::FromSeconds(0)) { } virtual ~TestMaintenanceOp() {} @@ -100,10 +101,14 @@ class TestMaintenanceOp : public MaintenanceOp { } virtual void Perform() OVERRIDE { - DLOG(INFO) << "Performing op " << name(); - std::lock_guard<Mutex> guard(lock_); - CHECK_GE(remaining_runs_, 1); - remaining_runs_--; + { + std::lock_guard<Mutex> guard(lock_); + DLOG(INFO) << "Performing op " << name(); + CHECK_GE(remaining_runs_, 1); + remaining_runs_--; + } + + SleepFor(sleep_time_); } virtual void UpdateStats(MaintenanceOpStats* stats) OVERRIDE { @@ -119,6 +124,11 @@ class TestMaintenanceOp : public MaintenanceOp { remaining_runs_ = runs; } + void set_sleep_time(MonoDelta time) { + std::lock_guard<Mutex> guard(lock_); + sleep_time_ = time; + } + void set_ram_anchored(uint64_t ram_anchored) { std::lock_guard<Mutex> guard(lock_); consumption_.Reset(ram_anchored); @@ -156,6 +166,9 @@ class TestMaintenanceOp : public MaintenanceOp { // The number of remaining times this operation will run before disabling // itself. int remaining_runs_; + + // The amount of time each op invocation will sleep. + MonoDelta sleep_time_; }; // Create an op and wait for it to start running. Unregister it while it is @@ -179,6 +192,30 @@ TEST_F(MaintenanceManagerTest, TestRegisterUnregister) { ThreadJoiner(thread.get()).Join(); } +// Regression test for KUDU-1495: when an operation is being unregistered, +// new instances of that operation should not be scheduled. +TEST_F(MaintenanceManagerTest, TestNewOpsDontGetScheduledDuringUnregister) { + TestMaintenanceOp op1("1", MaintenanceOp::HIGH_IO_USAGE, test_tracker_); + op1.set_ram_anchored(1001); + + // Set the op to run up to 10 times, and each time should sleep for a second. + op1.set_remaining_runs(10); + op1.set_sleep_time(MonoDelta::FromSeconds(1)); + manager_->RegisterOp(&op1); + + // Wait until two instances of the ops start running, since we have two MM threads. + AssertEventually([&]() { + ASSERT_EQ(op1.RunningGauge()->value(), 2); + }); + + // Trigger Unregister while they are running. This should wait for the currently- + // running operations to complete, but no new operations should be scheduled. + manager_->UnregisterOp(&op1); + + // Hence, we should have run only the original two that we saw above. + ASSERT_LE(op1.DurationHistogram()->TotalCount(), 2); +} + // Test that we'll run an operation that doesn't improve performance when memory // pressure gets high. TEST_F(MaintenanceManagerTest, TestMemoryPressure) { http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/util/maintenance_manager.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/maintenance_manager.cc b/src/kudu/util/maintenance_manager.cc index b596e0e..7628fbb 100644 --- a/src/kudu/util/maintenance_manager.cc +++ b/src/kudu/util/maintenance_manager.cc @@ -72,7 +72,11 @@ void MaintenanceOpStats::Clear() { } MaintenanceOp::MaintenanceOp(std::string name, IOUsage io_usage) - : name_(std::move(name)), running_(0), io_usage_(io_usage) {} + : name_(std::move(name)), + running_(0), + cancel_(false), + io_usage_(io_usage) { +} MaintenanceOp::~MaintenanceOp() { CHECK(!manager_.get()) << "You must unregister the " << name_ @@ -165,6 +169,7 @@ void MaintenanceManager::UnregisterOp(MaintenanceOp* op) { VLOG_AND_TRACE("maintenance", 1) << "Waiting for op " << op->name() << " to finish so " << "we can unregister it."; } + op->CancelAndDisable(); while (iter->first->running_ > 0) { op->cond_->Wait(); iter = ops_.find(op); @@ -270,7 +275,7 @@ MaintenanceOp* MaintenanceManager::FindBestOp() { // Update op stats. stats.Clear(); op->UpdateStats(&stats); - if (!stats.valid() || !stats.runnable()) { + if (op->cancelled() || !stats.valid() || !stats.runnable()) { continue; } if (stats.logs_retained_bytes() > low_io_most_logs_retained_bytes && http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/util/maintenance_manager.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/maintenance_manager.h b/src/kudu/util/maintenance_manager.h index c55d62b..2ae4ccb 100644 --- a/src/kudu/util/maintenance_manager.h +++ b/src/kudu/util/maintenance_manager.h @@ -26,6 +26,7 @@ #include <vector> #include "kudu/gutil/macros.h" +#include "kudu/util/atomic.h" #include "kudu/util/condition_variable.h" #include "kudu/util/maintenance_manager.pb.h" #include "kudu/util/monotime.h" @@ -177,6 +178,19 @@ class MaintenanceOp { IOUsage io_usage() const { return io_usage_; } + // Return true if the operation has been cancelled due to Unregister() pending. + bool cancelled() const { + return cancel_.Load(); + } + + // Cancel this operation, which prevents new instances of it from being scheduled + // regardless of whether the statistics indicate it is runnable. Instances may also + // optionally poll 'cancelled()' on a periodic basis to know if they should abort a + // lengthy operation in the middle of Perform(). + void CancelAndDisable() { + cancel_.Store(true); + } + private: DISALLOW_COPY_AND_ASSIGN(MaintenanceOp); @@ -186,6 +200,11 @@ class MaintenanceOp { // The number of times that this op is currently running. uint32_t running_; + // Set when we are trying to unregister the maintenance operation. + // Ongoing operations could read this boolean and cancel themselves. + // New operations will not be scheduled when this boolean is set. + AtomicBool cancel_; + // Condition variable which the UnregisterOp function can wait on. // // Note: 'cond_' is used with the MaintenanceManager's mutex. As such,
