IMPALA-3019: Fix unnecessary resets of iterator

In order to perform round-robin backend selection, the simple
scheduler uses an iterator to the next backend entry to be selected.
This iterator needs to be reset whenever it is invalidated by changes
to the underlying map. The current behavior resets the pointer on
every message of the statestored, even if the message was empty and
thus did not result in any changes to the map.

After every reset of the iterator round-robin selection starts from
the first backend in the scheduler's backend map. As the statestored
sends empty keepalive messages every couple of seconds, this
effectively limits scheduling of remote reads to only a few backends.

This change introduces a check to prevent those unnecessary iterator
resets, which will spread remote reads more evenly over all backends.

Change-Id: I831d485b46c7d9460fb014a302a26864b6bd573e
Reviewed-on: http://gerrit.cloudera.org:8080/2330
Reviewed-by: Lars Volker <[email protected]>
Tested-by: Internal Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/3031


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3649ff89
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3649ff89
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3649ff89

Branch: refs/heads/master
Commit: 3649ff89e1643bcf3d7889fd57ed3fbd2b17d64f
Parents: 0306dd5
Author: Lars Volker <[email protected]>
Authored: Wed Feb 17 18:09:15 2016 -0800
Committer: Tim Armstrong <[email protected]>
Committed: Fri May 13 15:52:53 2016 -0700

----------------------------------------------------------------------
 be/src/scheduling/simple-scheduler.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3649ff89/be/src/scheduling/simple-scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler.cc 
b/be/src/scheduling/simple-scheduler.cc
index 5074071..7abc916 100644
--- a/be/src/scheduling/simple-scheduler.cc
+++ b/be/src/scheduling/simple-scheduler.cc
@@ -268,7 +268,9 @@ void SimpleScheduler::UpdateMembership(
     // backend_ip_map_) in place.
     {
       lock_guard<mutex> lock(backend_map_lock_);
+      bool backend_map_changed = false;
       if (!delta.is_delta) {
+        backend_map_changed = true;
         current_membership_.clear();
         backend_map_.clear();
         backend_ip_map_.clear();
@@ -295,6 +297,7 @@ void SimpleScheduler::UpdateMembership(
                                    << be_desc.address;
         }
 
+        backend_map_changed = true;
         list<TBackendDescriptor>* be_descs = &backend_map_[be_desc.ip_address];
         if (find(be_descs->begin(), be_descs->end(), be_desc) == 
be_descs->end()) {
           backend_map_[be_desc.ip_address].push_back(be_desc);
@@ -305,6 +308,7 @@ void SimpleScheduler::UpdateMembership(
       // Process deletions from the topic
       for (const string& backend_id: delta.topic_deletions) {
         if (current_membership_.find(backend_id) != current_membership_.end()) 
{
+          backend_map_changed = true;
           const TBackendDescriptor& be_desc = current_membership_[backend_id];
           backend_ip_map_.erase(be_desc.address.hostname);
           list<TBackendDescriptor>* be_descs = 
&backend_map_[be_desc.ip_address];
@@ -314,7 +318,8 @@ void SimpleScheduler::UpdateMembership(
           current_membership_.erase(backend_id);
         }
       }
-      next_nonlocal_backend_entry_ = backend_map_.begin();
+      // Update invalidated iterator.
+      if (backend_map_changed) next_nonlocal_backend_entry_ = 
backend_map_.begin();
     }
 
     // If this impalad is not in our view of the membership list, we should 
add it and

Reply via email to