tablet_server-stress-test: limit runtime of this test This stress test is typically the long pole in the dist-test runtime. For example, in a recent ASAN run[1] the test took around 730 seconds.
This changes the behavior of the test to run for a prescribed amount of time (60 seconds in slow mode, 10 in fast). This should keep relatively good coverage while avoiding such long test runs. [1] http://dist-test.cloudera.org/trace?job_id=jenkins-slave.1473295331.9755 Change-Id: I7441f50bcd4788e3e54a90bd5f782201a7d4c6af Reviewed-on: http://gerrit.cloudera.org:8080/4329 Reviewed-by: Adar Dembo <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/46d9ed7a Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/46d9ed7a Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/46d9ed7a Branch: refs/heads/master Commit: 46d9ed7aa86e7bcd9649ec37af1fbd8369d5c0fe Parents: 2876683 Author: Todd Lipcon <[email protected]> Authored: Wed Sep 7 18:23:52 2016 -0700 Committer: Todd Lipcon <[email protected]> Committed: Thu Sep 8 02:06:54 2016 +0000 ---------------------------------------------------------------------- src/kudu/scripts/benchmarks.sh | 2 +- src/kudu/tserver/tablet_server-stress-test.cc | 39 +++++++++++++++++----- 2 files changed, 32 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/46d9ed7a/src/kudu/scripts/benchmarks.sh ---------------------------------------------------------------------- diff --git a/src/kudu/scripts/benchmarks.sh b/src/kudu/scripts/benchmarks.sh index 799d7be..be110f1 100755 --- a/src/kudu/scripts/benchmarks.sh +++ b/src/kudu/scripts/benchmarks.sh @@ -240,7 +240,7 @@ run_benchmarks() { # Run multi-threaded TS insert benchmark for i in $(seq 1 $NUM_SAMPLES) ; do KUDU_ALLOW_SLOW_TESTS=1 build/latest/bin/tablet_server-stress-test \ - --num_inserts_per_thread=30000 &> $LOGDIR/${TS_8THREAD_BENCH}$i.log + --num_inserts_per_thread=30000 -runtime_secs=0 &> $LOGDIR/${TS_8THREAD_BENCH}$i.log done # Run full stack scan/insert test using MRS only, ~26s each http://git-wip-us.apache.org/repos/asf/kudu/blob/46d9ed7a/src/kudu/tserver/tablet_server-stress-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/tserver/tablet_server-stress-test.cc b/src/kudu/tserver/tablet_server-stress-test.cc index cfe24b1..87d24fd 100644 --- a/src/kudu/tserver/tablet_server-stress-test.cc +++ b/src/kudu/tserver/tablet_server-stress-test.cc @@ -16,12 +16,22 @@ // under the License. #include "kudu/tserver/tablet_server-test-base.h" +#include <thread> + #include "kudu/gutil/strings/substitute.h" #include "kudu/util/countdown_latch.h" #include "kudu/util/stopwatch.h" +DEFINE_int32(runtime_secs, 10, + "Maximum number of seconds to run. If the threads have not completed " + "inserting by this time, they will stop regardless. Set to 0 to disable " + "the timeout."); DEFINE_int32(num_inserter_threads, 8, "Number of inserter threads to run"); -DEFINE_int32(num_inserts_per_thread, 0, "Number of inserts from each thread"); +DEFINE_int32(num_inserts_per_thread, 100000000, + "Number of inserts from each thread. If 'runtime_secs' is non-zero, threads will " + "exit after that time out even if they have not inserted the desired number. The " + "default is set high so that, typically, the 'runtime_secs' parameter determines " + "how long this test will run."); DECLARE_bool(enable_maintenance_manager); METRIC_DEFINE_histogram(test, insert_latency, @@ -37,11 +47,10 @@ namespace tserver { class TSStressTest : public TabletServerTestBase { public: TSStressTest() - : start_latch_(FLAGS_num_inserter_threads) { + : start_latch_(FLAGS_num_inserter_threads), + stop_latch_(1) { - if (FLAGS_num_inserts_per_thread == 0) { - FLAGS_num_inserts_per_thread = AllowSlowTests() ? 100000 : 1000; - } + OverrideFlagForSlowTests("runtime_secs", "60"); // Re-enable the maintenance manager which is disabled by default // in TS tests. We want to stress the whole system including @@ -76,6 +85,7 @@ class TSStressTest : public TabletServerTestBase { protected: scoped_refptr<Histogram> histogram_; CountDownLatch start_latch_; + CountDownLatch stop_latch_; std::vector<scoped_refptr<kudu::Thread> > threads_; }; @@ -87,7 +97,7 @@ void TSStressTest::InserterThread(int thread_idx) { uint64_t max_rows = FLAGS_num_inserts_per_thread; int start_row = thread_idx * max_rows; - for (int i = start_row; i < start_row + max_rows ; i++) { + for (int i = start_row; i < start_row + max_rows && stop_latch_.count() > 0; i++) { MonoTime before = MonoTime::Now(); InsertTestRowsRemote(thread_idx, i, 1); MonoTime after = MonoTime::Now(); @@ -98,23 +108,36 @@ void TSStressTest::InserterThread(int thread_idx) { } TEST_F(TSStressTest, TestMTInserts) { + std::thread timeout_thread; StartThreads(); Stopwatch s(Stopwatch::ALL_THREADS); s.start(); + + // Start a thread to fire 'stop_latch_' after the prescribed number of seconds. + if (FLAGS_runtime_secs > 0) { + timeout_thread = std::thread([&]() { + stop_latch_.WaitFor(MonoDelta::FromSeconds(FLAGS_runtime_secs)); + stop_latch_.CountDown(); + }); + } JoinThreads(); s.stop(); - int num_rows = (FLAGS_num_inserter_threads * FLAGS_num_inserts_per_thread); + + int num_rows = histogram_->TotalCount(); LOG(INFO) << "Inserted " << num_rows << " rows in " << s.elapsed().wall_millis() << " ms"; LOG(INFO) << "Throughput: " << (num_rows * 1000 / s.elapsed().wall_millis()) << " rows/sec"; LOG(INFO) << "CPU efficiency: " << (num_rows / s.elapsed().user_cpu_seconds()) << " rows/cpusec"; - // Generate the JSON. std::stringstream out; JsonWriter writer(&out, JsonWriter::PRETTY); ASSERT_OK(histogram_->WriteAsJson(&writer, MetricJsonOptions())); LOG(INFO) << out.str(); + + // Ensure the timeout thread is stopped before exiting. + stop_latch_.CountDown(); + if (timeout_thread.joinable()) timeout_thread.join(); } } // namespace tserver
