Michael Blow has submitted this change and it was merged. Change subject: [NO ISSUE][CLUS] Ensure Active Jobs Capacity is Released Only Once ......................................................................
[NO ISSUE][CLUS] Ensure Active Jobs Capacity is Released Only Once - user model changes: no - storage format changes: no - interface changes: no Details: - Ensure active jobs capacity is released only once. - Warn if the cluster maximum capacity is exceeded. Change-Id: Ia53c6918a68f7050bd8af482dbe8e161d1315844 Reviewed-on: https://asterix-gerrit.ics.uci.edu/2938 Sonar-Qube: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> Reviewed-by: Michael Blow <[email protected]> --- M asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/job/resource/JobCapacityController.java M hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java 2 files changed, 21 insertions(+), 6 deletions(-) Approvals: Anon. E. Moose #1000171: Jenkins: Verified; No violations found; Verified Michael Blow: Looks good to me, approved Murtadha Hubail: Looks good to me, but someone else must approve diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/job/resource/JobCapacityController.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/job/resource/JobCapacityController.java index 8ea1fa7..b123a5e 100644 --- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/job/resource/JobCapacityController.java +++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/job/resource/JobCapacityController.java @@ -26,12 +26,15 @@ import org.apache.hyracks.api.job.resource.IJobCapacityController; import org.apache.hyracks.api.job.resource.IReadOnlyClusterCapacity; import org.apache.hyracks.control.cc.scheduler.IResourceManager; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; // To avoid the computation cost for checking the capacity constraint for each node, // currently the admit/allocation decisions are based on the aggregated resource information. // TODO(buyingyi): investigate partition-aware resource control. public class JobCapacityController implements IJobCapacityController { + private static final Logger LOGGER = LogManager.getLogger(); private final IResourceManager resourceManager; public JobCapacityController(IResourceManager resourceManager) { @@ -71,6 +74,16 @@ int aggregatedNumCores = currentCapacity.getAggregatedCores(); currentCapacity.setAggregatedMemoryByteSize(aggregatedMemoryByteSize + reqAggregatedMemoryByteSize); currentCapacity.setAggregatedCores(aggregatedNumCores + reqAggregatedNumCores); + ensureMaxCapacity(); } + private void ensureMaxCapacity() { + final IClusterCapacity currentCapacity = resourceManager.getCurrentCapacity(); + final IReadOnlyClusterCapacity maximumCapacity = resourceManager.getMaximumCapacity(); + if (currentCapacity.getAggregatedCores() > maximumCapacity.getAggregatedCores() + || currentCapacity.getAggregatedMemoryByteSize() > maximumCapacity.getAggregatedMemoryByteSize()) { + LOGGER.warn("Current cluster available capacity {} is more than its maximum capacity {}", currentCapacity, + maximumCapacity); + } + } } diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java index 7dc636c..7e1ca61 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java @@ -229,8 +229,9 @@ } run.setStatus(run.getPendingStatus(), run.getPendingExceptions()); run.setEndTime(System.currentTimeMillis()); - if (activeRunMap.remove(jobId) == null) { - LOGGER.warn("Job {} was not found running but is getting archived and capacity released", jobId); + if (activeRunMap.remove(jobId) != null) { + // non-active jobs have zero capacity + releaseJobCapacity(run); } runMapArchive.put(jobId, run); runMapHistory.put(jobId, run.getExceptions()); @@ -246,10 +247,6 @@ caughtException = ExceptionUtils.suppress(caughtException, e); } } - - // Releases cluster capacitys occupied by the job. - JobSpecification job = run.getJobSpecification(); - jobCapacityController.release(job); // Picks the next job to execute. pickJobsToRun(); @@ -347,4 +344,9 @@ throw HyracksException.create(ErrorCode.INVALID_INPUT_PARAMETER); } } + + private void releaseJobCapacity(JobRun jobRun) { + final JobSpecification job = jobRun.getJobSpecification(); + jobCapacityController.release(job); + } } -- To view, visit https://asterix-gerrit.ics.uci.edu/2938 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia53c6918a68f7050bd8af482dbe8e161d1315844 Gerrit-PatchSet: 4 Gerrit-Project: asterixdb Gerrit-Branch: stabilization-f69489 Gerrit-Owner: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Michael Blow <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: abdullah alamoudi <[email protected]>
