IMPALA-4329: Prevent crash in scheduler when no backends are registered

The scheduler crashed with a segmentation fault when there were no
backends registered: After not being able to find a local backend (none
are configured at all) in ComputeScanRangeAssignment(), the previous
code would eventually try to return the top of
assignment_ctx.assignment_heap in SelectRemoteBackendHost(), but that
heap would be empty. Subsequently, when using the IP address of that
heap node, a segmentation fault would occur.

This change adds a check and aborts scheduling with an error. It also
contains a test.

Change-Id: I6d93158f34841ea66dc3682290266262c87ea7ff
Reviewed-on: http://gerrit.cloudera.org:8080/4776
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2fa1633e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2fa1633e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2fa1633e

Branch: refs/heads/hadoop-next
Commit: 2fa1633e4090000e9018c012df81f291a0d7566e
Parents: bf1d967
Author: Lars Volker <[email protected]>
Authored: Thu Oct 20 14:19:56 2016 -0700
Committer: Internal Jenkins <[email protected]>
Committed: Fri Oct 21 03:16:30 2016 +0000

----------------------------------------------------------------------
 be/src/scheduling/simple-scheduler-test-util.cc |  6 +++---
 be/src/scheduling/simple-scheduler-test-util.h  |  5 +++--
 be/src/scheduling/simple-scheduler-test.cc      | 19 +++++++++++++++++++
 be/src/scheduling/simple-scheduler.cc           |  8 +++++++-
 common/thrift/generate_error_codes.py           |  3 +++
 5 files changed, 35 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler-test-util.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler-test-util.cc 
b/be/src/scheduling/simple-scheduler-test-util.cc
index 3e14ea7..d3f5584 100644
--- a/be/src/scheduling/simple-scheduler-test-util.cc
+++ b/be/src/scheduling/simple-scheduler-test-util.cc
@@ -449,13 +449,13 @@ SchedulerWrapper::SchedulerWrapper(const Plan& plan)
   InitializeScheduler();
 }
 
-void SchedulerWrapper::Compute(bool exec_at_coord, Result* result) {
+Status SchedulerWrapper::Compute(bool exec_at_coord, Result* result) {
   DCHECK(scheduler_ != NULL);
 
   // Compute Assignment.
   FragmentScanRangeAssignment* assignment = result->AddAssignment();
-  scheduler_->ComputeScanRangeAssignment(*scheduler_->GetBackendConfig(), 0, 
NULL, false,
-      plan_.scan_range_locations(), plan_.referenced_datanodes(), 
exec_at_coord,
+  return 
scheduler_->ComputeScanRangeAssignment(*scheduler_->GetBackendConfig(), 0, NULL,
+      false, plan_.scan_range_locations(), plan_.referenced_datanodes(), 
exec_at_coord,
       plan_.query_options(), NULL, assignment);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler-test-util.h
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler-test-util.h 
b/be/src/scheduling/simple-scheduler-test-util.h
index 85bb1a5..ab46e2a 100644
--- a/be/src/scheduling/simple-scheduler-test-util.h
+++ b/be/src/scheduling/simple-scheduler-test-util.h
@@ -21,6 +21,7 @@
 
 #include <boost/scoped_ptr.hpp>
 
+#include "common/status.h"
 #include "gen-cpp/ImpalaInternalService.h" // for TQueryOptions
 #include "scheduling/query-schedule.h"
 #include "util/metrics.h"
@@ -421,10 +422,10 @@ class SchedulerWrapper {
   SchedulerWrapper(const Plan& plan);
 
   /// Call ComputeScanRangeAssignment() with exec_at_coord set to false.
-  void Compute(Result* result) { Compute(false, result); }
+  Status Compute(Result* result) { return Compute(false, result); }
 
   /// Call ComputeScanRangeAssignment().
-  void Compute(bool exec_at_coord, Result* result);
+  Status Compute(bool exec_at_coord, Result* result);
 
   /// Reset the state of the scheduler by re-creating and initializing it.
   void Reset() { InitializeScheduler(); }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler-test.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler-test.cc 
b/be/src/scheduling/simple-scheduler-test.cc
index d9964af..5743ff2 100644
--- a/be/src/scheduling/simple-scheduler-test.cc
+++ b/be/src/scheduling/simple-scheduler-test.cc
@@ -361,6 +361,25 @@ TEST_F(SchedulerTest, TestSendUpdates) {
   EXPECT_EQ(0, result.NumDiskAssignedBytes(1));
 }
 
+/// IMPALA-4329: Test scheduling with no backends.
+TEST_F(SchedulerTest, TestEmptyBackendConfig) {
+  Cluster cluster;
+  cluster.AddHost(false, true);
+
+  Schema schema(cluster);
+  schema.AddMultiBlockTable("T", 1, ReplicaPlacement::REMOTE_ONLY, 1);
+
+  Plan plan(schema);
+  plan.AddTableScan("T");
+
+  Result result(plan);
+  SchedulerWrapper scheduler(plan);
+  Status status = scheduler.Compute(&result);
+  EXPECT_TRUE(!status.ok());
+  EXPECT_EQ(
+      status.GetDetail(), "Cannot schedule query: no registered backends 
available.\n");
+}
+
 }  // end namespace impala
 
 IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/be/src/scheduling/simple-scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler.cc 
b/be/src/scheduling/simple-scheduler.cc
index 9b52d5a..bc500c8 100644
--- a/be/src/scheduling/simple-scheduler.cc
+++ b/be/src/scheduling/simple-scheduler.cc
@@ -537,6 +537,10 @@ Status SimpleScheduler::ComputeScanRangeAssignment(
     const vector<TNetworkAddress>& host_list, bool exec_at_coord,
     const TQueryOptions& query_options, RuntimeProfile::Counter* timer,
     FragmentScanRangeAssignment* assignment) {
+  if (backend_config.NumBackends() == 0) {
+    return Status(TErrorCode::NO_REGISTERED_BACKENDS);
+  }
+
   SCOPED_TIMER(timer);
   // We adjust all replicas with memory distance less than base_distance to 
base_distance
   // and collect all replicas with equal or better distance as candidates. For 
a full list
@@ -917,6 +921,7 @@ SimpleScheduler::AssignmentCtx::AssignmentCtx(
   : backend_config_(backend_config), first_unused_backend_idx_(0),
     total_assignments_(total_assignments),
     total_local_assignments_(total_local_assignments) {
+  DCHECK_GT(backend_config.NumBackends(), 0);
   backend_config.GetAllBackendIps(&random_backend_order_);
   std::mt19937 g(rand());
   std::shuffle(random_backend_order_.begin(), random_backend_order_.end(), g);
@@ -965,7 +970,8 @@ const IpAddr* 
SimpleScheduler::AssignmentCtx::SelectRemoteBackendHost() {
   } else {
     // Pick next backend from assignment_heap. All backends must have been 
inserted into
     // the heap at this point.
-    DCHECK(backend_config_.NumBackends() == assignment_heap_.size());
+    DCHECK_GT(backend_config_.NumBackends(), 0);
+    DCHECK_EQ(backend_config_.NumBackends(), assignment_heap_.size());
     candidate_ip = &(assignment_heap_.top().ip);
   }
   DCHECK(candidate_ip != NULL);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2fa1633e/common/thrift/generate_error_codes.py
----------------------------------------------------------------------
diff --git a/common/thrift/generate_error_codes.py 
b/common/thrift/generate_error_codes.py
index 131947a..ae338d5 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -289,6 +289,9 @@ error_codes = (
 
   ("PARQUET_ZERO_ROWS_IN_NON_EMPTY_FILE", 93, "File '$0' is corrupt: metadata 
indicates "
    "a zero row count but there is at least one non-empty row group."),
+
+  ("NO_REGISTERED_BACKENDS", 94, "Cannot schedule query: no registered 
backends "
+   "available."),
 )
 
 import sys

Reply via email to