[ https://issues.apache.org/jira/browse/GOBBLIN-1072?focusedWorklogId=399454&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-399454 ]
ASF GitHub Bot logged work on GOBBLIN-1072: ------------------------------------------- Author: ASF GitHub Bot Created on: 07/Mar/20 00:22 Start Date: 07/Mar/20 00:22 Worklog Time Spent: 10m Work Description: sv2000 commented on pull request #2912: [GOBBLIN-1072] Being more conservative on leasing containers URL: https://github.com/apache/incubator-gobblin/pull/2912#discussion_r389199173 ########## File path: gobblin-yarn/src/main/java/org/apache/gobblin/yarn/YarnAutoScalingManager.java ########## @@ -189,16 +224,123 @@ void runInternal() { } } + // Find all participants appearing in this cluster. Note that Helix instances can contain cluster-manager + // and potentially replanner-instance. + Set<String> allParticipants = getParticipants(GobblinYarnTaskRunner.class.getSimpleName()); + + // Find all joined participants not in-use for this round of inspection. + // If idle time is beyond tolerance, mark the instance as unused by assigning timestamp as -1. + for (String participant : allParticipants) { + if (!inUseInstances.contains(participant)) { + instanceIdleSinceWhen.putIfAbsent(participant, System.currentTimeMillis()); + if (absenceUnderTolerance(participant)) { + inUseInstances.add(participant); + } + } else { + // An instance that has been previously detected as idle but now back to in-use. + // Remove this instance if existed in the tracking map. + instanceIdleSinceWhen.remove(participant); + } + } + + + // compute the target containers as a ceiling of number of partitions divided by the number of containers // per partition. int numTargetContainers = (int) Math.ceil((double)numPartitions / this.partitionsPerContainer); // adjust the number of target containers based on the configured min and max container values. numTargetContainers = Math.max(this.minContainers, Math.min(this.maxContainers, numTargetContainers)); + slidingFixedWindow.add(numTargetContainers); + log.info("There are {} containers being requested", numTargetContainers); - this.yarnService.requestTargetNumberOfContainers(numTargetContainers, inUseInstances); + this.yarnService.requestTargetNumberOfContainers(slidingFixedWindow.getMax(), inUseInstances); + } + + @VisibleForTesting + /** + * Pass a participant if condition hold, where the condition, by default is that if an instance went back to + * active (having partition running on it) within {@link #maxIdleTimeInMinBeforeScalingDown} mins, we will + * not tag that instance as "unused" and have that as the candidate for scaling down. + */ + boolean absenceUnderTolerance(String participant){ + return System.currentTimeMillis() - instanceIdleSinceWhen.get(participant) < + TimeUnit.MINUTES.toMillis(maxIdleTimeInMinBeforeScalingDown); + } + } + + /** + * A FIFO queue with fixed size and returns maxValue among all elements within the queue in constant time. + * This data structure prevent temporary fluctuation in the number of active helix partitions as the size of queue + * grows and will be less sensitive when scaling down is actually required. + * + * The interface for this lass is implemented in a minimal-necessity manner to serve only as a sliding-sized-window Review comment: Typo: drop "lass" ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 399454) Time Spent: 1h 50m (was: 1h 40m) > Being more conservative on releasing YARN containers > ---------------------------------------------------- > > Key: GOBBLIN-1072 > URL: https://issues.apache.org/jira/browse/GOBBLIN-1072 > Project: Apache Gobblin > Issue Type: Improvement > Reporter: Lei Sun > Priority: Major > Time Spent: 1h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)