Repository: kudu
Updated Branches:
  refs/heads/master 93be1310d -> d13ac79a4


KUDU-1495. Maintenance manager should not schedule new ops during unregister

This fixes a bug where deleting a tablet could block for an extended
time on compactions being scheduled if the maintenance manager was
configured with multiple threads.

The issue was that Unregister() waited for the number of running
operations to reach 0 before removing the op from the candidate list of
ops, but did not prevent new instances of that op from being scheduled.

Change-Id: I3675705caf5b73f8a480036b974e4db6c205616a
Reviewed-on: http://gerrit.cloudera.org:8080/4295
Reviewed-by: Adar Dembo <[email protected]>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/531e7d99
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/531e7d99
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/531e7d99

Branch: refs/heads/master
Commit: 531e7d99c68a7767cfc900447179fcc116f32f1c
Parents: 93be131
Author: Todd Lipcon <[email protected]>
Authored: Thu Sep 1 17:06:59 2016 -0700
Committer: Todd Lipcon <[email protected]>
Committed: Fri Sep 2 01:56:12 2016 +0000

----------------------------------------------------------------------
 src/kudu/tablet/tablet.cc                 |  6 ++++
 src/kudu/util/maintenance_manager-test.cc | 47 +++++++++++++++++++++++---
 src/kudu/util/maintenance_manager.cc      |  9 +++--
 src/kudu/util/maintenance_manager.h       | 19 +++++++++++
 4 files changed, 74 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 387d6f5..faf1ae1 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -1206,6 +1206,12 @@ void Tablet::RegisterMaintenanceOps(MaintenanceManager* 
maint_mgr) {
 }
 
 void Tablet::UnregisterMaintenanceOps() {
+  // First cancel all of the operations, so that while we're waiting for one
+  // operation to finish in Unregister(), a different one can't get 
re-scheduled.
+  for (MaintenanceOp* op : maintenance_ops_) {
+    op->CancelAndDisable();
+  }
+
   for (MaintenanceOp* op : maintenance_ops_) {
     op->Unregister();
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/util/maintenance_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager-test.cc 
b/src/kudu/util/maintenance_manager-test.cc
index 60a9d51..73e11a6 100644
--- a/src/kudu/util/maintenance_manager-test.cc
+++ b/src/kudu/util/maintenance_manager-test.cc
@@ -85,7 +85,8 @@ class TestMaintenanceOp : public MaintenanceOp {
       metric_entity_(METRIC_ENTITY_test.Instantiate(&metric_registry_, 
"test")),
       
maintenance_op_duration_(METRIC_maintenance_op_duration.Instantiate(metric_entity_)),
       
maintenance_ops_running_(METRIC_maintenance_ops_running.Instantiate(metric_entity_,
 0)),
-      remaining_runs_(1) {
+      remaining_runs_(1),
+      sleep_time_(MonoDelta::FromSeconds(0)) {
   }
 
   virtual ~TestMaintenanceOp() {}
@@ -100,10 +101,14 @@ class TestMaintenanceOp : public MaintenanceOp {
   }
 
   virtual void Perform() OVERRIDE {
-    DLOG(INFO) << "Performing op " << name();
-    std::lock_guard<Mutex> guard(lock_);
-    CHECK_GE(remaining_runs_, 1);
-    remaining_runs_--;
+    {
+      std::lock_guard<Mutex> guard(lock_);
+      DLOG(INFO) << "Performing op " << name();
+      CHECK_GE(remaining_runs_, 1);
+      remaining_runs_--;
+    }
+
+    SleepFor(sleep_time_);
   }
 
   virtual void UpdateStats(MaintenanceOpStats* stats) OVERRIDE {
@@ -119,6 +124,11 @@ class TestMaintenanceOp : public MaintenanceOp {
     remaining_runs_ = runs;
   }
 
+  void set_sleep_time(MonoDelta time) {
+    std::lock_guard<Mutex> guard(lock_);
+    sleep_time_ = time;
+  }
+
   void set_ram_anchored(uint64_t ram_anchored) {
     std::lock_guard<Mutex> guard(lock_);
     consumption_.Reset(ram_anchored);
@@ -156,6 +166,9 @@ class TestMaintenanceOp : public MaintenanceOp {
   // The number of remaining times this operation will run before disabling
   // itself.
   int remaining_runs_;
+
+  // The amount of time each op invocation will sleep.
+  MonoDelta sleep_time_;
 };
 
 // Create an op and wait for it to start running.  Unregister it while it is
@@ -179,6 +192,30 @@ TEST_F(MaintenanceManagerTest, TestRegisterUnregister) {
   ThreadJoiner(thread.get()).Join();
 }
 
+// Regression test for KUDU-1495: when an operation is being unregistered,
+// new instances of that operation should not be scheduled.
+TEST_F(MaintenanceManagerTest, TestNewOpsDontGetScheduledDuringUnregister) {
+  TestMaintenanceOp op1("1", MaintenanceOp::HIGH_IO_USAGE, test_tracker_);
+  op1.set_ram_anchored(1001);
+
+  // Set the op to run up to 10 times, and each time should sleep for a second.
+  op1.set_remaining_runs(10);
+  op1.set_sleep_time(MonoDelta::FromSeconds(1));
+  manager_->RegisterOp(&op1);
+
+  // Wait until two instances of the ops start running, since we have two MM 
threads.
+  AssertEventually([&]() {
+      ASSERT_EQ(op1.RunningGauge()->value(), 2);
+    });
+
+  // Trigger Unregister while they are running. This should wait for the 
currently-
+  // running operations to complete, but no new operations should be scheduled.
+  manager_->UnregisterOp(&op1);
+
+  // Hence, we should have run only the original two that we saw above.
+  ASSERT_LE(op1.DurationHistogram()->TotalCount(), 2);
+}
+
 // Test that we'll run an operation that doesn't improve performance when 
memory
 // pressure gets high.
 TEST_F(MaintenanceManagerTest, TestMemoryPressure) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/util/maintenance_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager.cc 
b/src/kudu/util/maintenance_manager.cc
index b596e0e..7628fbb 100644
--- a/src/kudu/util/maintenance_manager.cc
+++ b/src/kudu/util/maintenance_manager.cc
@@ -72,7 +72,11 @@ void MaintenanceOpStats::Clear() {
 }
 
 MaintenanceOp::MaintenanceOp(std::string name, IOUsage io_usage)
-    : name_(std::move(name)), running_(0), io_usage_(io_usage) {}
+    : name_(std::move(name)),
+      running_(0),
+      cancel_(false),
+      io_usage_(io_usage) {
+}
 
 MaintenanceOp::~MaintenanceOp() {
   CHECK(!manager_.get()) << "You must unregister the " << name_
@@ -165,6 +169,7 @@ void MaintenanceManager::UnregisterOp(MaintenanceOp* op) {
       VLOG_AND_TRACE("maintenance", 1) << "Waiting for op " << op->name() << " 
to finish so "
             << "we can unregister it.";
     }
+    op->CancelAndDisable();
     while (iter->first->running_ > 0) {
       op->cond_->Wait();
       iter = ops_.find(op);
@@ -270,7 +275,7 @@ MaintenanceOp* MaintenanceManager::FindBestOp() {
     // Update op stats.
     stats.Clear();
     op->UpdateStats(&stats);
-    if (!stats.valid() || !stats.runnable()) {
+    if (op->cancelled() || !stats.valid() || !stats.runnable()) {
       continue;
     }
     if (stats.logs_retained_bytes() > low_io_most_logs_retained_bytes &&

http://git-wip-us.apache.org/repos/asf/kudu/blob/531e7d99/src/kudu/util/maintenance_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/maintenance_manager.h 
b/src/kudu/util/maintenance_manager.h
index c55d62b..2ae4ccb 100644
--- a/src/kudu/util/maintenance_manager.h
+++ b/src/kudu/util/maintenance_manager.h
@@ -26,6 +26,7 @@
 #include <vector>
 
 #include "kudu/gutil/macros.h"
+#include "kudu/util/atomic.h"
 #include "kudu/util/condition_variable.h"
 #include "kudu/util/maintenance_manager.pb.h"
 #include "kudu/util/monotime.h"
@@ -177,6 +178,19 @@ class MaintenanceOp {
 
   IOUsage io_usage() const { return io_usage_; }
 
+  // Return true if the operation has been cancelled due to Unregister() 
pending.
+  bool cancelled() const {
+    return cancel_.Load();
+  }
+
+  // Cancel this operation, which prevents new instances of it from being 
scheduled
+  // regardless of whether the statistics indicate it is runnable. Instances 
may also
+  // optionally poll 'cancelled()' on a periodic basis to know if they should 
abort a
+  // lengthy operation in the middle of Perform().
+  void CancelAndDisable() {
+    cancel_.Store(true);
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(MaintenanceOp);
 
@@ -186,6 +200,11 @@ class MaintenanceOp {
   // The number of times that this op is currently running.
   uint32_t running_;
 
+  // Set when we are trying to unregister the maintenance operation.
+  // Ongoing operations could read this boolean and cancel themselves.
+  // New operations will not be scheduled when this boolean is set.
+  AtomicBool cancel_;
+
   // Condition variable which the UnregisterOp function can wait on.
   //
   // Note: 'cond_' is used with the MaintenanceManager's mutex. As such,

Reply via email to