IMPALA-3019: Fix unnecessary resets of iterator In order to perform round-robin backend selection, the simple scheduler uses an iterator to the next backend entry to be selected. This iterator needs to be reset whenever it is invalidated by changes to the underlying map. The current behavior resets the pointer on every message of the statestored, even if the message was empty and thus did not result in any changes to the map.
After every reset of the iterator round-robin selection starts from the first backend in the scheduler's backend map. As the statestored sends empty keepalive messages every couple of seconds, this effectively limits scheduling of remote reads to only a few backends. This change introduces a check to prevent those unnecessary iterator resets, which will spread remote reads more evenly over all backends. Change-Id: I831d485b46c7d9460fb014a302a26864b6bd573e Reviewed-on: http://gerrit.cloudera.org:8080/2330 Reviewed-by: Lars Volker <[email protected]> Tested-by: Internal Jenkins Reviewed-on: http://gerrit.cloudera.org:8080/3031 Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/3649ff89 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/3649ff89 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/3649ff89 Branch: refs/heads/master Commit: 3649ff89e1643bcf3d7889fd57ed3fbd2b17d64f Parents: 0306dd5 Author: Lars Volker <[email protected]> Authored: Wed Feb 17 18:09:15 2016 -0800 Committer: Tim Armstrong <[email protected]> Committed: Fri May 13 15:52:53 2016 -0700 ---------------------------------------------------------------------- be/src/scheduling/simple-scheduler.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3649ff89/be/src/scheduling/simple-scheduler.cc ---------------------------------------------------------------------- diff --git a/be/src/scheduling/simple-scheduler.cc b/be/src/scheduling/simple-scheduler.cc index 5074071..7abc916 100644 --- a/be/src/scheduling/simple-scheduler.cc +++ b/be/src/scheduling/simple-scheduler.cc @@ -268,7 +268,9 @@ void SimpleScheduler::UpdateMembership( // backend_ip_map_) in place. { lock_guard<mutex> lock(backend_map_lock_); + bool backend_map_changed = false; if (!delta.is_delta) { + backend_map_changed = true; current_membership_.clear(); backend_map_.clear(); backend_ip_map_.clear(); @@ -295,6 +297,7 @@ void SimpleScheduler::UpdateMembership( << be_desc.address; } + backend_map_changed = true; list<TBackendDescriptor>* be_descs = &backend_map_[be_desc.ip_address]; if (find(be_descs->begin(), be_descs->end(), be_desc) == be_descs->end()) { backend_map_[be_desc.ip_address].push_back(be_desc); @@ -305,6 +308,7 @@ void SimpleScheduler::UpdateMembership( // Process deletions from the topic for (const string& backend_id: delta.topic_deletions) { if (current_membership_.find(backend_id) != current_membership_.end()) { + backend_map_changed = true; const TBackendDescriptor& be_desc = current_membership_[backend_id]; backend_ip_map_.erase(be_desc.address.hostname); list<TBackendDescriptor>* be_descs = &backend_map_[be_desc.ip_address]; @@ -314,7 +318,8 @@ void SimpleScheduler::UpdateMembership( current_membership_.erase(backend_id); } } - next_nonlocal_backend_entry_ = backend_map_.begin(); + // Update invalidated iterator. + if (backend_map_changed) next_nonlocal_backend_entry_ = backend_map_.begin(); } // If this impalad is not in our view of the membership list, we should add it and
