Repository: hive Updated Branches: refs/heads/branch-2.1 cc48c476e -> 06fd3a95a refs/heads/master 146a9183e -> 8cec20d97
HIVE-14608 : LLAP: slow scheduling due to LlapTaskScheduler not removing nodes on kill (Sergey Shelukhin, reviewed by Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cd6c3cdf Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cd6c3cdf Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cd6c3cdf Branch: refs/heads/master Commit: cd6c3cdf91c466a01cd08b108601f8a654a192a8 Parents: 146a918 Author: Sergey Shelukhin <ser...@apache.org> Authored: Wed Sep 7 19:00:43 2016 -0700 Committer: Sergey Shelukhin <ser...@apache.org> Committed: Wed Sep 7 19:00:43 2016 -0700 ---------------------------------------------------------------------- .../tezplugins/LlapTaskSchedulerService.java | 35 ++++++++++---------- 1 file changed, 18 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cd6c3cdf/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java ---------------------------------------------------------------------- diff --git a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java index 10d9ad1..9fc43b3 100644 --- a/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java +++ b/llap-tez/src/java/org/apache/hadoop/hive/llap/tezplugins/LlapTaskSchedulerService.java @@ -15,7 +15,9 @@ package org.apache.hadoop.hive.llap.tezplugins; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.EnumSet; import java.util.HashMap; @@ -781,27 +783,26 @@ public class LlapTaskSchedulerService extends TaskScheduler { } } /* fall through - miss in locality (random scheduling) or no locality-requested */ - Entry<String, NodeInfo>[] all = instanceToNodeMap.entrySet().toArray(new Entry[0]); - // Check again + Collection<ServiceInstance> instances = activeInstances.getAll(); + ArrayList<NodeInfo> all = new ArrayList<>(instances.size()); + for (ServiceInstance inst : instances) { + NodeInfo nodeInfo = instanceToNodeMap.get(inst.getWorkerIdentity()); + if (nodeInfo != null && nodeInfo.canAcceptTask()) { + all.add(nodeInfo); + } + } if (LOG.isDebugEnabled()) { LOG.debug("Attempting random allocation for task={}", request.task); } - if (all.length > 0) { - int n = random.nextInt(all.length); - // start at random offset and iterate whole list - for (int i = 0; i < all.length; i++) { - Entry<String, NodeInfo> inst = all[(i + n) % all.length]; - if (inst.getValue().canAcceptTask()) { - LOG.info( - "Assigning " + nodeToString(inst.getValue().getServiceInstance(), inst.getValue()) + - " when looking for any host, from #hosts=" + all.length + ", requestedHosts=" + - ((requestedHosts == null || requestedHosts.length == 0) ? "null" : - Arrays.toString(requestedHosts))); - return new SelectHostResult(inst.getValue().getServiceInstance(), inst.getValue()); - } - } + if (all.isEmpty()) { + return SELECT_HOST_RESULT_DELAYED_RESOURCES; } - return SELECT_HOST_RESULT_DELAYED_RESOURCES; + NodeInfo randomNode = all.get(random.nextInt(all.size())); + LOG.info("Assigning " + nodeToString(randomNode.getServiceInstance(), randomNode) + + " when looking for any host, from #hosts=" + all.size() + ", requestedHosts=" + + ((requestedHosts == null || requestedHosts.length == 0) + ? "null" : Arrays.toString(requestedHosts))); + return new SelectHostResult(randomNode.getServiceInstance(), randomNode); } finally { readLock.unlock(); }