Repository: incubator-impala Updated Branches: refs/heads/master bf1d9677f -> 48085274f
IMPALA-4329: Prevent crash in scheduler when no backends are registered The scheduler crashed with a segmentation fault when there were no backends registered: After not being able to find a local backend (none are configured at all) in ComputeScanRangeAssignment(), the previous code would eventually try to return the top of assignment_ctx.assignment_heap in SelectRemoteBackendHost(), but that heap would be empty. Subsequently, when using the IP address of that heap node, a segmentation fault would occur. This change adds a check and aborts scheduling with an error. It also contains a test. Change-Id: I6d93158f34841ea66dc3682290266262c87ea7ff Reviewed-on: http://gerrit.cloudera.org:8080/4776 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2fa1633e Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2fa1633e Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2fa1633e Branch: refs/heads/master Commit: 2fa1633e4090000e9018c012df81f291a0d7566e Parents: bf1d967 Author: Lars Volker <[email protected]> Authored: Thu Oct 20 14:19:56 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Fri Oct 21 03:16:30 2016 +0000 ---------------------------------------------------------------------- be/src/scheduling/simple-scheduler-test-util.cc | 6 +++--- be/src/scheduling/simple-scheduler-test-util.h | 5 +++-- be/src/scheduling/simple-scheduler-test.cc | 19 +++++++++++++++++++ be/src/scheduling/simple-scheduler.cc | 8 +++++++- common/thrift/generate_error_codes.py | 3 +++ 5 files changed, 35 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler-test-util.cc ---------------------------------------------------------------------- diff --git a/be/src/scheduling/simple-scheduler-test-util.cc b/be/src/scheduling/simple-scheduler-test-util.cc index 3e14ea7..d3f5584 100644 --- a/be/src/scheduling/simple-scheduler-test-util.cc +++ b/be/src/scheduling/simple-scheduler-test-util.cc @@ -449,13 +449,13 @@ SchedulerWrapper::SchedulerWrapper(const Plan& plan) InitializeScheduler(); } -void SchedulerWrapper::Compute(bool exec_at_coord, Result* result) { +Status SchedulerWrapper::Compute(bool exec_at_coord, Result* result) { DCHECK(scheduler_ != NULL); // Compute Assignment. FragmentScanRangeAssignment* assignment = result->AddAssignment(); - scheduler_->ComputeScanRangeAssignment(*scheduler_->GetBackendConfig(), 0, NULL, false, - plan_.scan_range_locations(), plan_.referenced_datanodes(), exec_at_coord, + return scheduler_->ComputeScanRangeAssignment(*scheduler_->GetBackendConfig(), 0, NULL, + false, plan_.scan_range_locations(), plan_.referenced_datanodes(), exec_at_coord, plan_.query_options(), NULL, assignment); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler-test-util.h ---------------------------------------------------------------------- diff --git a/be/src/scheduling/simple-scheduler-test-util.h b/be/src/scheduling/simple-scheduler-test-util.h index 85bb1a5..ab46e2a 100644 --- a/be/src/scheduling/simple-scheduler-test-util.h +++ b/be/src/scheduling/simple-scheduler-test-util.h @@ -21,6 +21,7 @@ #include <boost/scoped_ptr.hpp> +#include "common/status.h" #include "gen-cpp/ImpalaInternalService.h" // for TQueryOptions #include "scheduling/query-schedule.h" #include "util/metrics.h" @@ -421,10 +422,10 @@ class SchedulerWrapper { SchedulerWrapper(const Plan& plan); /// Call ComputeScanRangeAssignment() with exec_at_coord set to false. - void Compute(Result* result) { Compute(false, result); } + Status Compute(Result* result) { return Compute(false, result); } /// Call ComputeScanRangeAssignment(). - void Compute(bool exec_at_coord, Result* result); + Status Compute(bool exec_at_coord, Result* result); /// Reset the state of the scheduler by re-creating and initializing it. void Reset() { InitializeScheduler(); } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler-test.cc ---------------------------------------------------------------------- diff --git a/be/src/scheduling/simple-scheduler-test.cc b/be/src/scheduling/simple-scheduler-test.cc index d9964af..5743ff2 100644 --- a/be/src/scheduling/simple-scheduler-test.cc +++ b/be/src/scheduling/simple-scheduler-test.cc @@ -361,6 +361,25 @@ TEST_F(SchedulerTest, TestSendUpdates) { EXPECT_EQ(0, result.NumDiskAssignedBytes(1)); } +/// IMPALA-4329: Test scheduling with no backends. +TEST_F(SchedulerTest, TestEmptyBackendConfig) { + Cluster cluster; + cluster.AddHost(false, true); + + Schema schema(cluster); + schema.AddMultiBlockTable("T", 1, ReplicaPlacement::REMOTE_ONLY, 1); + + Plan plan(schema); + plan.AddTableScan("T"); + + Result result(plan); + SchedulerWrapper scheduler(plan); + Status status = scheduler.Compute(&result); + EXPECT_TRUE(!status.ok()); + EXPECT_EQ( + status.GetDetail(), "Cannot schedule query: no registered backends available.\n"); +} + } // end namespace impala IMPALA_TEST_MAIN(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler.cc ---------------------------------------------------------------------- diff --git a/be/src/scheduling/simple-scheduler.cc b/be/src/scheduling/simple-scheduler.cc index 9b52d5a..bc500c8 100644 --- a/be/src/scheduling/simple-scheduler.cc +++ b/be/src/scheduling/simple-scheduler.cc @@ -537,6 +537,10 @@ Status SimpleScheduler::ComputeScanRangeAssignment( const vector<TNetworkAddress>& host_list, bool exec_at_coord, const TQueryOptions& query_options, RuntimeProfile::Counter* timer, FragmentScanRangeAssignment* assignment) { + if (backend_config.NumBackends() == 0) { + return Status(TErrorCode::NO_REGISTERED_BACKENDS); + } + SCOPED_TIMER(timer); // We adjust all replicas with memory distance less than base_distance to base_distance // and collect all replicas with equal or better distance as candidates. For a full list @@ -917,6 +921,7 @@ SimpleScheduler::AssignmentCtx::AssignmentCtx( : backend_config_(backend_config), first_unused_backend_idx_(0), total_assignments_(total_assignments), total_local_assignments_(total_local_assignments) { + DCHECK_GT(backend_config.NumBackends(), 0); backend_config.GetAllBackendIps(&random_backend_order_); std::mt19937 g(rand()); std::shuffle(random_backend_order_.begin(), random_backend_order_.end(), g); @@ -965,7 +970,8 @@ const IpAddr* SimpleScheduler::AssignmentCtx::SelectRemoteBackendHost() { } else { // Pick next backend from assignment_heap. All backends must have been inserted into // the heap at this point. - DCHECK(backend_config_.NumBackends() == assignment_heap_.size()); + DCHECK_GT(backend_config_.NumBackends(), 0); + DCHECK_EQ(backend_config_.NumBackends(), assignment_heap_.size()); candidate_ip = &(assignment_heap_.top().ip); } DCHECK(candidate_ip != NULL); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/common/thrift/generate_error_codes.py ---------------------------------------------------------------------- diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py index 131947a..ae338d5 100755 --- a/common/thrift/generate_error_codes.py +++ b/common/thrift/generate_error_codes.py @@ -289,6 +289,9 @@ error_codes = ( ("PARQUET_ZERO_ROWS_IN_NON_EMPTY_FILE", 93, "File '$0' is corrupt: metadata indicates " "a zero row count but there is at least one non-empty row group."), + + ("NO_REGISTERED_BACKENDS", 94, "Cannot schedule query: no registered backends " + "available."), ) import sys
