IMPALA-4086: Add benchmark for simple scheduler Change-Id: I89ec1c6c1828bb0b86d1e13ce4dfc5a8df865c2e Reviewed-on: http://gerrit.cloudera.org:8080/4554 Reviewed-by: Lars Volker <[email protected]> Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2ee16067 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2ee16067 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2ee16067 Branch: refs/heads/master Commit: 2ee160679ca693777ad6764706a251755ad6ece3 Parents: e90afcb Author: Lars Volker <[email protected]> Authored: Fri Sep 9 13:28:05 2016 +0200 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Jul 29 05:43:58 2017 +0000 ---------------------------------------------------------------------- be/src/benchmarks/CMakeLists.txt | 1 + be/src/benchmarks/scheduler-benchmark.cc | 171 ++++++++++++++++++++++++++ be/src/scheduling/scheduler.h | 8 +- be/src/util/debug-util.cc | 2 + be/src/util/debug-util.h | 1 + 5 files changed, 178 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/benchmarks/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt index e954583..1d67d45 100644 --- a/be/src/benchmarks/CMakeLists.txt +++ b/be/src/benchmarks/CMakeLists.txt @@ -50,6 +50,7 @@ ADD_BE_BENCHMARK(overflow-benchmark) ADD_BE_BENCHMARK(parse-timestamp-benchmark) ADD_BE_BENCHMARK(rle-benchmark) ADD_BE_BENCHMARK(row-batch-serialize-benchmark) +ADD_BE_BENCHMARK(scheduler-benchmark) ADD_BE_BENCHMARK(status-benchmark) ADD_BE_BENCHMARK(string-benchmark) ADD_BE_BENCHMARK(string-compare-benchmark) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/benchmarks/scheduler-benchmark.cc ---------------------------------------------------------------------- diff --git a/be/src/benchmarks/scheduler-benchmark.cc b/be/src/benchmarks/scheduler-benchmark.cc new file mode 100644 index 0000000..df9e981 --- /dev/null +++ b/be/src/benchmarks/scheduler-benchmark.cc @@ -0,0 +1,171 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <iostream> +#include <string> +#include <vector> + +#include "gutil/strings/substitute.h" +#include "scheduling/scheduler.h" +#include "scheduling/scheduler-test-util.h" +#include "util/benchmark.h" +#include "util/cpu-info.h" +#include "util/debug-util.h" +#include "util/thread.h" + +#include "common/names.h" + +using namespace impala; +using namespace impala::test; + + +// This benchmark exercises the core scheduling method 'ComputeScanRangeAssignment()' of +// the Scheduler class for various cluster and table sizes. It makes the following +// assumptions: +// - All nodes run local impalads. The benchmark includes suites for both DISK_LOCAL and +// REMOTE scheduling preferences. Having datanodes without a local impalad would result +// in a performance somewhere between these two. +// - The plan only scans one table. All logic in the scheduler is built around assigning +// blocks to impalad backends, which scan them. Having multiple tables will merely +// repeat this process. The interesting metric is varying the number of blocks per +// table. +// - Blocks and files are treated as the same thing from the scheduler's perspective. +// Scheduling happens on scan ranges, which are issued based on file blocks by the +// frontend. +// +// Machine Info: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz +// Cluster Size, DISK_LOCAL: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile +// (relative) (relative) (relative) +// --------------------------------------------------------------------------------------------------------- +// 3 Hosts 13.2 13.5 13.5 1X 1X 1X +// 10 Hosts 12.6 12.6 12.8 0.951X 0.937X 0.951X +// 50 Hosts 9.34 9.52 9.53 0.705X 0.706X 0.706X +// 100 Hosts 8.68 8.68 8.68 0.655X 0.643X 0.643X +// 500 Hosts 5.57 5.67 5.7 0.42X 0.421X 0.423X +// 1000 Hosts 4.02 4.1 4.1 0.304X 0.304X 0.304X +// 3000 Hosts 1.85 1.85 1.86 0.14X 0.137X 0.138X +// 10000 Hosts 0.577 0.588 0.588 0.0436X 0.0436X 0.0436X +// +// Cluster Size, REMOTE: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile +// (relative) (relative) (relative) +// --------------------------------------------------------------------------------------------------------- +// 3 Hosts 23.8 23.9 24.1 1X 1X 1X +// 10 Hosts 20.4 20.9 21.1 0.854X 0.877X 0.878X +// 50 Hosts 15 15 15.2 0.628X 0.628X 0.632X +// 100 Hosts 12.6 12.7 12.7 0.53X 0.532X 0.528X +// 500 Hosts 7.38 7.55 7.6 0.31X 0.316X 0.316X +// 1000 Hosts 4.9 4.93 4.93 0.206X 0.207X 0.205X +// 3000 Hosts 1.9 1.96 1.96 0.0797X 0.0823X 0.0817X +// 10000 Hosts 0.577 0.588 0.588 0.0242X 0.0246X 0.0245X +// +// Number of Blocks: Function iters/ms 10%ile 50%ile 90%ile 10%ile 50%ile 90%ile +// (relative) (relative) (relative) +// --------------------------------------------------------------------------------------------------------- +// 1 Blocks 74.5 75.2 76.1 1X 1X 1X +// 10 Blocks 44.4 44.6 45.1 0.596X 0.593X 0.593X +// 100 Blocks 8.46 8.46 8.49 0.114X 0.113X 0.112X +// 1000 Blocks 0.981 1 1 0.0132X 0.0133X 0.0131X +// 10000 Blocks 0.1 0.102 0.103 0.00134X 0.00136X 0.00136X + +static const vector<int> CLUSTER_SIZES = {3, 10, 50, 100, 500, 1000, 3000, 10000}; +static const int DEFAULT_CLUSTER_SIZE = 100; +static const vector<int> NUM_BLOCKS_PER_TABLE = {1, 10, 100, 1000, 10000}; +static const int DEFAULT_NUM_BLOCKS_PER_TABLE = 100; + +/// Members of this struct are needed to build the test fixtures and depend on each other. +/// Since their constructors take const references they must be constructed in order, +/// which is why we keep pointers to them here. +struct TestCtx { + std::unique_ptr<Cluster> cluster; + std::unique_ptr<Schema> schema; + std::unique_ptr<Plan> plan; + std::unique_ptr<Result> result; + std::unique_ptr<SchedulerWrapper> scheduler_wrapper; +}; + +/// Initialize a test context for a single benchmark run. +void InitializeTestCtx(int num_hosts, int num_blocks, + TReplicaPreference::type replica_preference, TestCtx* test_ctx) { + test_ctx->cluster.reset(new Cluster()); + test_ctx->cluster->AddHosts(num_hosts, true, true); + + test_ctx->schema.reset(new Schema(*test_ctx->cluster)); + test_ctx->schema->AddMultiBlockTable("T0", num_blocks, ReplicaPlacement::LOCAL_ONLY, 3); + + test_ctx->plan.reset(new Plan(*test_ctx->schema)); + test_ctx->plan->SetReplicaPreference(replica_preference); + test_ctx->plan->SetRandomReplica(true); + test_ctx->plan->AddTableScan("T0"); + + test_ctx->result.reset(new Result(*test_ctx->plan)); + + test_ctx->scheduler_wrapper.reset(new SchedulerWrapper(*test_ctx->plan)); +} + +/// This function is passed to the test framework and executes the scheduling method +/// repeatedly. +void BenchmarkFunction(int num_iterations, void* data) { + TestCtx* test_ctx = static_cast<TestCtx*>(data); + for (int i = 0; i < num_iterations; ++i) { + test_ctx->result->Reset(); + test_ctx->scheduler_wrapper->Compute(test_ctx->result.get()); + } +} + +/// Build and run a benchmark suite for various cluster sizes with the default number of +/// blocks. Scheduling will be done according to the parameter 'replica_preference'. +void RunClusterSizeBenchmark(TReplicaPreference::type replica_preference) { + string suite_name = strings::Substitute( + "Cluster Size, $0", PrintTReplicaPreference(replica_preference)); + Benchmark suite(suite_name, false /* micro_heuristics */); + vector<TestCtx> test_ctx(CLUSTER_SIZES.size()); + + for (int i = 0; i < CLUSTER_SIZES.size(); ++i) { + int cluster_size = CLUSTER_SIZES[i]; + InitializeTestCtx( + cluster_size, DEFAULT_NUM_BLOCKS_PER_TABLE, replica_preference, &test_ctx[i]); + string benchmark_name = strings::Substitute("$0 Hosts", cluster_size); + suite.AddBenchmark(benchmark_name, BenchmarkFunction, &test_ctx[i]); + } + cout << suite.Measure() << endl; +} + +/// Build and run a benchmark suite for various table sizes with the default cluster size. +/// Scheduling will be done according to the parameter 'replica_preference'. +void RunNumBlocksBenchmark(TReplicaPreference::type replica_preference) { + Benchmark suite("Number of Blocks", false /* micro_heuristics */); + vector<TestCtx> test_ctx(NUM_BLOCKS_PER_TABLE.size()); + + for (int i = 0; i < NUM_BLOCKS_PER_TABLE.size(); ++i) { + int num_blocks = NUM_BLOCKS_PER_TABLE[i]; + InitializeTestCtx(DEFAULT_CLUSTER_SIZE, num_blocks, replica_preference, &test_ctx[i]); + string benchmark_name = strings::Substitute("$0 Blocks", num_blocks); + suite.AddBenchmark(benchmark_name, BenchmarkFunction, &test_ctx[i]); + } + cout << suite.Measure() << endl; +} + +int main(int argc, char** argv) { + google::InitGoogleLogging(argv[0]); + CpuInfo::Init(); + impala::InitThreading(); + + cout << Benchmark::GetMachineInfo() << endl; + RunClusterSizeBenchmark(TReplicaPreference::DISK_LOCAL); + RunClusterSizeBenchmark(TReplicaPreference::REMOTE); + RunNumBlocksBenchmark(TReplicaPreference::DISK_LOCAL); +} http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/scheduling/scheduler.h ---------------------------------------------------------------------- diff --git a/be/src/scheduling/scheduler.h b/be/src/scheduling/scheduler.h index 8d57c32..043deac 100644 --- a/be/src/scheduling/scheduler.h +++ b/be/src/scheduling/scheduler.h @@ -65,11 +65,8 @@ class SchedulerWrapper; /// TODO: Inject global dependencies into the class (for example ExecEnv::GetInstance(), /// RNG used during scheduling, FLAGS_*) /// to make it testable. -/// TODO: Benchmark the performance of the scheduler. The tests need to include setups -/// with: -/// - Small and large number of executors. -/// - Small and large query plans. -/// - Scheduling query plans with concurrent updates to the internal executor +/// TODO: Extend the benchmarks of the scheduler. The tests need to include setups with: +/// - Scheduling query plans with concurrent updates to the internal backend /// configuration. class Scheduler { public: @@ -334,6 +331,7 @@ class Scheduler { /// the schedule's TQueryExecRequest.plan_exec_info. /// Unpartitioned fragments are assigned to the coordinator. Populate the schedule's /// fragment_exec_params_ with the resulting scan range assignment. + /// We have a benchmark for this method in be/src/benchmarks/scheduler-benchmark.cc. Status ComputeScanRangeAssignment(QuerySchedule* schedule); /// Process the list of scan ranges of a single plan node and compute scan range http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/util/debug-util.cc ---------------------------------------------------------------------- diff --git a/be/src/util/debug-util.cc b/be/src/util/debug-util.cc index 6a94496..48df592 100644 --- a/be/src/util/debug-util.cc +++ b/be/src/util/debug-util.cc @@ -78,6 +78,7 @@ THRIFT_ENUM_OUTPUT_FN(TDdlType); THRIFT_ENUM_OUTPUT_FN(TCatalogOpType); THRIFT_ENUM_OUTPUT_FN(THdfsFileFormat); THRIFT_ENUM_OUTPUT_FN(THdfsCompression); +THRIFT_ENUM_OUTPUT_FN(TReplicaPreference); THRIFT_ENUM_OUTPUT_FN(TSessionType); THRIFT_ENUM_OUTPUT_FN(TStmtType); THRIFT_ENUM_OUTPUT_FN(QueryState); @@ -91,6 +92,7 @@ THRIFT_ENUM_OUTPUT_FN(TImpalaQueryOptions); THRIFT_ENUM_PRINT_FN(TCatalogObjectType); THRIFT_ENUM_PRINT_FN(TDdlType); THRIFT_ENUM_PRINT_FN(TCatalogOpType); +THRIFT_ENUM_PRINT_FN(TReplicaPreference); THRIFT_ENUM_PRINT_FN(TSessionType); THRIFT_ENUM_PRINT_FN(TStmtType); THRIFT_ENUM_PRINT_FN(QueryState); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2ee16067/be/src/util/debug-util.h ---------------------------------------------------------------------- diff --git a/be/src/util/debug-util.h b/be/src/util/debug-util.h index fcfb9d6..29fd6bb 100644 --- a/be/src/util/debug-util.h +++ b/be/src/util/debug-util.h @@ -64,6 +64,7 @@ std::string PrintPlanNodeType(const TPlanNodeType::type& type); std::string PrintTCatalogObjectType(const TCatalogObjectType::type& type); std::string PrintTDdlType(const TDdlType::type& type); std::string PrintTCatalogOpType(const TCatalogOpType::type& type); +std::string PrintTReplicaPreference(const TReplicaPreference::type& type); std::string PrintTSessionType(const TSessionType::type& type); std::string PrintTStmtType(const TStmtType::type& type); std::string PrintQueryState(const beeswax::QueryState::type& type);
