Repository: asterixdb Updated Branches: refs/heads/master 0d0a113f4 -> 54a507007
[ASTERIXDB-2003][FAIL] Abort jobs failing during job start - user model changes: no - storage format changes: no - interface changes: no Details: - Prevent NPE or unmodifiable list in JobCleanupWork and JobletCleanupNotificationWork. - Abort job if a failure happens during job start Change-Id: If6fe4ed9084270f9f22ee4b4c71936d679c8b883 Reviewed-on: https://asterix-gerrit.ics.uci.edu/1904 Sonar-Qube: Jenkins <[email protected]> Reviewed-by: Yingyi Bu <[email protected]> Tested-by: Jenkins <[email protected]> BAD: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/54a50700 Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/54a50700 Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/54a50700 Branch: refs/heads/master Commit: 54a507007e08cd84652774263bd7e1fe9ede8a0f Parents: 0d0a113 Author: Murtadha Hubail <[email protected]> Authored: Thu Jul 27 19:28:42 2017 +0300 Committer: Murtadha Hubail <[email protected]> Committed: Thu Jul 27 13:08:28 2017 -0700 ---------------------------------------------------------------------- .../apache/hyracks/control/cc/executor/JobExecutor.java | 2 +- .../org/apache/hyracks/control/cc/job/JobManager.java | 9 ++++++--- .../apache/hyracks/control/cc/work/JobCleanupWork.java | 9 +++++++-- .../control/cc/work/JobletCleanupNotificationWork.java | 10 ++++++++-- 4 files changed, 22 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/asterixdb/blob/54a50700/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/executor/JobExecutor.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/executor/JobExecutor.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/executor/JobExecutor.java index 2150bdd..f18a917 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/executor/JobExecutor.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/executor/JobExecutor.java @@ -521,7 +521,7 @@ public class JobExecutor { } } - private void abortJob(List<Exception> exceptions) { + public void abortJob(List<Exception> exceptions) { Set<TaskCluster> inProgressTaskClustersCopy = new HashSet<>(inProgressTaskClusters); for (TaskCluster tc : inProgressTaskClustersCopy) { abortTaskCluster(findLastTaskClusterAttempt(tc), TaskClusterAttempt.TaskClusterStatus.ABORTED); http://git-wip-us.apache.org/repos/asf/asterixdb/blob/54a50700/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java index c1a7899..abf1d57 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/job/JobManager.java @@ -45,7 +45,6 @@ import org.apache.hyracks.control.cc.application.CCServiceContext; import org.apache.hyracks.control.cc.cluster.INodeManager; import org.apache.hyracks.control.cc.scheduler.FIFOJobQueue; import org.apache.hyracks.control.cc.scheduler.IJobQueue; -import org.apache.hyracks.control.cc.work.JobCleanupWork; import org.apache.hyracks.control.common.controllers.CCConfig; import com.fasterxml.jackson.databind.ObjectMapper; @@ -318,8 +317,12 @@ public class JobManager implements IJobManager { try { run.getExecutor().startJob(); } catch (Exception e) { - ccs.getWorkQueue().schedule(new JobCleanupWork(ccs.getJobManager(), run.getJobId(), JobStatus.FAILURE, - Collections.singletonList(e))); + LOGGER.log(Level.SEVERE, "Aborting " + run.getJobId() + " due to failure during job start", e); + final List<Exception> exceptions = Collections.singletonList(e); + // fail the job then abort it + run.setStatus(JobStatus.FAILURE, exceptions); + // abort job will trigger JobCleanupWork + run.getExecutor().abortJob(exceptions); } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/54a50700/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobCleanupWork.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobCleanupWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobCleanupWork.java index 5f29981..502ac50 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobCleanupWork.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobCleanupWork.java @@ -18,6 +18,7 @@ */ package org.apache.hyracks.control.cc.work; +import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; @@ -55,8 +56,12 @@ public class JobCleanupWork extends AbstractWork { } catch (HyracksException e) { // Fail the job with the caught exception during final completion. JobRun run = jobManager.get(jobId); - run.getExceptions().add(e); - run.setStatus(JobStatus.FAILURE, run.getExceptions()); + List<Exception> completionException = new ArrayList<>(); + if (run.getExceptions() != null && !run.getExceptions().isEmpty()) { + completionException.addAll(run.getExceptions()); + } + completionException.add(0, e); + run.setStatus(JobStatus.FAILURE, completionException); } } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/54a50700/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobletCleanupNotificationWork.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobletCleanupNotificationWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobletCleanupNotificationWork.java index 6a8e631..5bf721b 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobletCleanupNotificationWork.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/JobletCleanupNotificationWork.java @@ -18,6 +18,8 @@ */ package org.apache.hyracks.control.cc.work; +import java.util.ArrayList; +import java.util.List; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; @@ -67,8 +69,12 @@ public class JobletCleanupNotificationWork extends AbstractHeartbeatWork { jobManager.finalComplete(run); } catch (HyracksException e) { // Fail the job with the caught exception during final completion. - run.getExceptions().add(e); - run.setStatus(JobStatus.FAILURE, run.getExceptions()); + List<Exception> completionException = new ArrayList<>(); + if (run.getExceptions() != null && !run.getExceptions().isEmpty()) { + completionException.addAll(run.getExceptions()); + } + completionException.add(0, e); + run.setStatus(JobStatus.FAILURE, completionException); } } }
