Repository: kudu
Updated Branches:
refs/heads/master 68962df0e -> 86116e4c5
[ExternalDaemon] return non-OK if no process to pause/resume
Prior to this patch, ExternalDaemon::{Pause,Resume}() methods
returned Status::OK() if there were no process to pause/resume.
This patch addresses that: now both methods return Status::NotFound()
if there is no process to pause/resume. Also, added WARN_UNUSED_RESULT
attribute for both methods and updated the call sites to handle their
return status correspondingly. Besides, replaced gscoped_ptr with
std::unique_ptr in external_minicluster.{cc,h}.
Change-Id: Ibce641a29ffc43391f54c6a8aacf6ba91ae93ae9
Reviewed-on: http://gerrit.cloudera.org:8080/7092
Reviewed-by: Todd Lipcon <[email protected]>
Tested-by: Kudu Jenkins
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/86116e4c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/86116e4c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/86116e4c
Branch: refs/heads/master
Commit: 86116e4c515a9c89e728dd699decaf20d097edac
Parents: 68962df
Author: Alexey Serbin <[email protected]>
Authored: Mon Jun 5 17:46:41 2017 -0700
Committer: Alexey Serbin <[email protected]>
Committed: Tue Jun 6 06:55:18 2017 +0000
----------------------------------------------------------------------
.../integration-tests/client-stress-test.cc | 24 +++++++--------
.../integration-tests/external_mini_cluster.cc | 32 ++++++++++++++------
.../integration-tests/external_mini_cluster.h | 10 +++---
.../integration-tests/master_failover-itest.cc | 10 +++---
.../integration-tests/master_migration-itest.cc | 2 +-
.../integration-tests/raft_consensus-itest.cc | 10 +++---
6 files changed, 50 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/client-stress-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/client-stress-test.cc
b/src/kudu/integration-tests/client-stress-test.cc
index e909b1a..2718889 100644
--- a/src/kudu/integration-tests/client-stress-test.cc
+++ b/src/kudu/integration-tests/client-stress-test.cc
@@ -181,19 +181,19 @@ TEST_F(ClientStressTest_MultiMaster,
TestLeaderResolutionTimeout) {
work.Start();
- cluster_->tablet_server(0)->Pause();
- cluster_->tablet_server(1)->Pause();
- cluster_->tablet_server(2)->Pause();
- cluster_->master(0)->Pause();
- cluster_->master(1)->Pause();
- cluster_->master(2)->Pause();
+ ASSERT_OK(cluster_->tablet_server(0)->Pause());
+ ASSERT_OK(cluster_->tablet_server(1)->Pause());
+ ASSERT_OK(cluster_->tablet_server(2)->Pause());
+ ASSERT_OK(cluster_->master(0)->Pause());
+ ASSERT_OK(cluster_->master(1)->Pause());
+ ASSERT_OK(cluster_->master(2)->Pause());
SleepFor(MonoDelta::FromMilliseconds(300));
- cluster_->tablet_server(0)->Resume();
- cluster_->tablet_server(1)->Resume();
- cluster_->tablet_server(2)->Resume();
- cluster_->master(0)->Resume();
- cluster_->master(1)->Resume();
- cluster_->master(2)->Resume();
+ ASSERT_OK(cluster_->tablet_server(0)->Resume());
+ ASSERT_OK(cluster_->tablet_server(1)->Resume());
+ ASSERT_OK(cluster_->tablet_server(2)->Resume());
+ ASSERT_OK(cluster_->master(0)->Resume());
+ ASSERT_OK(cluster_->master(1)->Resume());
+ ASSERT_OK(cluster_->master(2)->Resume());
SleepFor(MonoDelta::FromMilliseconds(100));
// Set an explicit timeout. This test has caused deadlocks in the past.
http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.cc
b/src/kudu/integration-tests/external_mini_cluster.cc
index 85f28dc..bc9ab1b 100644
--- a/src/kudu/integration-tests/external_mini_cluster.cc
+++ b/src/kudu/integration-tests/external_mini_cluster.cc
@@ -19,12 +19,13 @@
#include <algorithm>
#include <functional>
-#include <gtest/gtest.h>
#include <memory>
-#include <rapidjson/document.h>
#include <string>
#include <unordered_set>
+#include <gtest/gtest.h>
+#include <rapidjson/document.h>
+
#include "kudu/client/client.h"
#include "kudu/client/master_rpc.h"
#include "kudu/common/wire_protocol.h"
@@ -33,10 +34,10 @@
#include "kudu/gutil/strings/substitute.h"
#include "kudu/gutil/strings/util.h"
#include "kudu/master/master.proxy.h"
+#include "kudu/rpc/messenger.h"
#include "kudu/server/server_base.pb.h"
#include "kudu/server/server_base.proxy.h"
#include "kudu/tserver/tserver_service.proxy.h"
-#include "kudu/rpc/messenger.h"
#include "kudu/util/async_util.h"
#include "kudu/util/curl_util.h"
#include "kudu/util/env.h"
@@ -48,6 +49,7 @@
#include "kudu/util/net/sockaddr.h"
#include "kudu/util/path_util.h"
#include "kudu/util/pb_util.h"
+#include "kudu/util/status.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/subprocess.h"
#include "kudu/util/test_util.h"
@@ -741,7 +743,7 @@ Status ExternalDaemon::StartProcess(const vector<string>&
user_flags) {
ignore_result(Env::Default()->DeleteFile(info_path));
// Start the daemon.
- gscoped_ptr<Subprocess> p(new Subprocess(argv));
+ unique_ptr<Subprocess> p(new Subprocess(argv));
p->ShareParentStdout(false);
p->SetEnvVars(extra_env_);
string env_str;
@@ -822,21 +824,31 @@ void ExternalDaemon::SetExePath(string exe) {
}
Status ExternalDaemon::Pause() {
- if (!process_) return Status::OK();
+ if (!process_) {
+ return Status::IllegalState(Substitute(
+ "Request to pause '$0' but the process is not there", exe_));
+ }
VLOG(1) << "Pausing " << exe_ << " with pid " << process_->pid();
+ const Status s = process_->Kill(SIGSTOP);
+ RETURN_NOT_OK(s);
paused_ = true;
- return process_->Kill(SIGSTOP);
+ return s;
}
Status ExternalDaemon::Resume() {
- if (!process_) return Status::OK();
+ if (!process_) {
+ return Status::IllegalState(Substitute(
+ "Request to resume '$0' but the process is not there", exe_));
+ }
VLOG(1) << "Resuming " << exe_ << " with pid " << process_->pid();
+ const Status s = process_->Kill(SIGCONT);
+ RETURN_NOT_OK(s);
paused_ = false;
- return process_->Kill(SIGCONT);
+ return s;
}
bool ExternalDaemon::IsShutdown() const {
- return process_.get() == nullptr;
+ return !process_;
}
bool ExternalDaemon::IsProcessAlive() const {
@@ -1094,7 +1106,7 @@
ScopedResumeExternalDaemon::ScopedResumeExternalDaemon(ExternalDaemon* daemon)
}
ScopedResumeExternalDaemon::~ScopedResumeExternalDaemon() {
- daemon_->Resume();
+ WARN_NOT_OK(daemon_->Resume(), "Could not resume external daemon");
}
//------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.h
b/src/kudu/integration-tests/external_mini_cluster.h
index e8d25c8..edcc41e 100644
--- a/src/kudu/integration-tests/external_mini_cluster.h
+++ b/src/kudu/integration-tests/external_mini_cluster.h
@@ -14,6 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+
#pragma once
#include <sys/types.h>
@@ -27,7 +28,6 @@
#include <boost/optional.hpp>
#include "kudu/client/client.h"
-#include "kudu/gutil/gscoped_ptr.h"
#include "kudu/gutil/macros.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/integration-tests/mini_cluster_base.h"
@@ -392,10 +392,10 @@ class ExternalDaemon : public
RefCountedThreadSafe<ExternalDaemon> {
Status EnableKerberos(MiniKdc* kdc, const std::string& bind_host);
// Sends a SIGSTOP signal to the daemon.
- Status Pause();
+ Status Pause() WARN_UNUSED_RESULT;
// Sends a SIGCONT signal to the daemon.
- Status Resume();
+ Status Resume() WARN_UNUSED_RESULT;
// Return true if we have explicitly shut down the process.
bool IsShutdown() const;
@@ -499,12 +499,12 @@ class ExternalDaemon : public
RefCountedThreadSafe<ExternalDaemon> {
std::vector<std::string> extra_flags_;
std::map<std::string, std::string> extra_env_;
- gscoped_ptr<Subprocess> process_;
+ std::unique_ptr<Subprocess> process_;
bool paused_ = false;
std::unique_ptr<Subprocess> perf_record_process_;
- gscoped_ptr<server::ServerStatusPB> status_;
+ std::unique_ptr<server::ServerStatusPB> status_;
std::string rpc_bind_address_;
// These capture the daemons parameters and running ports and
http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/master_failover-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_failover-itest.cc
b/src/kudu/integration-tests/master_failover-itest.cc
index ed96b7f..5026e75 100644
--- a/src/kudu/integration-tests/master_failover-itest.cc
+++ b/src/kudu/integration-tests/master_failover-itest.cc
@@ -154,7 +154,7 @@ TEST_F(MasterFailoverTest, TestCreateTableSync) {
LOG(INFO) << "Pausing leader master";
int leader_idx;
ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
- cluster_->master(leader_idx)->Pause();
+ ASSERT_OK(cluster_->master(leader_idx)->Pause());
ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
// As Pause() is asynchronous, the following sequence of events is possible:
@@ -189,7 +189,7 @@ TEST_F(MasterFailoverTest, TestPauseAfterCreateTableIssued)
{
LOG(INFO) << "Pausing leader master";
int leader_idx;
ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
- cluster_->master(leader_idx)->Pause();
+ ASSERT_OK(cluster_->master(leader_idx)->Pause());
ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
MonoTime deadline = MonoTime::Now() + MonoDelta::FromSeconds(90);
@@ -215,7 +215,7 @@ TEST_F(MasterFailoverTest, TestDeleteTableSync) {
LOG(INFO) << "Pausing leader master";
int leader_idx;
ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
- cluster_->master(leader_idx)->Pause();
+ ASSERT_OK(cluster_->master(leader_idx)->Pause());
ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
// It's possible for DeleteTable() to delete the table and still return
@@ -249,7 +249,7 @@ TEST_F(MasterFailoverTest, TestRenameTableSync) {
LOG(INFO) << "Pausing leader master";
int leader_idx;
ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
- cluster_->master(leader_idx)->Pause();
+ ASSERT_OK(cluster_->master(leader_idx)->Pause());
ScopedResumeExternalDaemon resume_daemon(cluster_->master(leader_idx));
// It's possible for AlterTable() to rename the table and still return
@@ -449,7 +449,7 @@ TEST_F(MasterFailoverTest, TestMasterPermanentFailure) {
// Only in slow mode.
if (AllowSlowTests()) {
for (int j = 0; j < cluster_->num_masters(); j++) {
- cluster_->master(j)->Pause();
+ ASSERT_OK(cluster_->master(j)->Pause());
ScopedResumeExternalDaemon resume_daemon(cluster_->master(j));
string table_name = Substitute("table-$0-$1", i, j);
http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/master_migration-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_migration-itest.cc
b/src/kudu/integration-tests/master_migration-itest.cc
index 29f2b2d..234f1ce 100644
--- a/src/kudu/integration-tests/master_migration-itest.cc
+++ b/src/kudu/integration-tests/master_migration-itest.cc
@@ -208,7 +208,7 @@ TEST_F(MasterMigrationTest, TestEndToEndMigration) {
// Only in slow mode.
if (AllowSlowTests()) {
for (int i = 0; i < migrated_cluster.num_masters(); i++) {
- migrated_cluster.master(i)->Pause();
+ ASSERT_OK(migrated_cluster.master(i)->Pause());
ScopedResumeExternalDaemon resume_daemon(migrated_cluster.master(i));
ASSERT_OK(client->OpenTable(kTableName, &table));
ASSERT_EQ(0, CountTableRows(table.get()));
http://git-wip-us.apache.org/repos/asf/kudu/blob/86116e4c/src/kudu/integration-tests/raft_consensus-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus-itest.cc
b/src/kudu/integration-tests/raft_consensus-itest.cc
index ded7710..1e48a65 100644
--- a/src/kudu/integration-tests/raft_consensus-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus-itest.cc
@@ -1639,7 +1639,7 @@ TEST_F(RaftConsensusITest, TestStepDownWithSlowFollower) {
// Stop both followers.
for (int i = 1; i < 3; i++) {
- cluster_->tablet_server_by_uuid(tservers[i]->uuid())->Pause();
+ ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[i]->uuid())->Pause());
}
// Sleep a little bit of time to make sure that the leader has outstanding
heartbeats
@@ -1838,7 +1838,7 @@ TEST_F(RaftConsensusITest,
TestReplaceChangeConfigOperation) {
ASSERT_TRUE(s.IsTimedOut());
// Pause the leader, and restart the other servers.
- cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Pause();
+ ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Pause());
ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[1]->uuid())->Restart());
ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[2]->uuid())->Restart());
@@ -1852,7 +1852,7 @@ TEST_F(RaftConsensusITest,
TestReplaceChangeConfigOperation) {
// Resume the original leader. Its change-config operation will now be
aborted
// since it was never replicated to the majority, and the new leader will
have
// replaced the operation.
- cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Resume();
+ ASSERT_OK(cluster_->tablet_server_by_uuid(tservers[0]->uuid())->Resume());
// Insert some data and verify that it propagates to all servers.
NO_FATALS(InsertTestRowsRemoteThread(0, 10, 1, vector<CountDownLatch*>()));
@@ -2632,7 +2632,7 @@ TEST_F(RaftConsensusITest,
TestCommitIndexFarBehindAfterLeaderElection) {
// Pause the initial leader and wait for the replica to elect itself. The
third TS participates
// here by voting.
- first_leader_ts->Pause();
+ ASSERT_OK(first_leader_ts->Pause());
ASSERT_OK(WaitUntilLeader(tablet_servers_[second_leader_ts->uuid()],
tablet_id_, kTimeout));
// The voter TS has done its duty. Shut it down to avoid log spam where it
tries to run
@@ -2659,7 +2659,7 @@ TEST_F(RaftConsensusITest,
TestCommitIndexFarBehindAfterLeaderElection) {
// Now, when we resume the original leader, we expect them to recover
properly.
// Previously this triggered KUDU-1469.
- first_leader_ts->Resume();
+ ASSERT_OK(first_leader_ts->Resume());
TabletServerMap active_tservers = tablet_servers_;
active_tservers.erase(only_vote_ts->uuid());