Repository: oozie Updated Branches: refs/heads/master a5c9829d1 -> 48c1128d3
OOZIE-2142 Changing the JT whitelist causes running Workflows to stay RUNNING forever (rkanter) Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/48c1128d Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/48c1128d Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/48c1128d Branch: refs/heads/master Commit: 48c1128d32eda6ecb89834ba5bebd580864a8987 Parents: a5c9829 Author: Robert Kanter <[email protected]> Authored: Mon Mar 9 11:44:25 2015 -0700 Committer: Robert Kanter <[email protected]> Committed: Mon Mar 9 11:44:25 2015 -0700 ---------------------------------------------------------------------- .../oozie/command/wf/ActionCheckXCommand.java | 8 +-- .../command/wf/TestActionCheckXCommand.java | 56 ++++++++++++++++++++ release-log.txt | 1 + 3 files changed, 62 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/48c1128d/core/src/main/java/org/apache/oozie/command/wf/ActionCheckXCommand.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/command/wf/ActionCheckXCommand.java b/core/src/main/java/org/apache/oozie/command/wf/ActionCheckXCommand.java index e9488d4..cb944eb 100644 --- a/core/src/main/java/org/apache/oozie/command/wf/ActionCheckXCommand.java +++ b/core/src/main/java/org/apache/oozie/command/wf/ActionCheckXCommand.java @@ -208,13 +208,15 @@ public class ActionCheckXCommand extends ActionXCommand<Void> { wfAction.setErrorInfo(ex.getErrorCode(), ex.getMessage()); switch (ex.getErrorType()) { + case ERROR: + // If allowed to retry, this will handle it; otherwise, we should fall through to FAILED + if (handleUserRetry(wfAction)) { + break; + } case FAILED: failJob(context, wfAction); generateEvent = true; break; - case ERROR: - handleUserRetry(wfAction); - break; case TRANSIENT: // retry N times, then suspend workflow if (!handleTransient(context, executor, WorkflowAction.Status.RUNNING)) { handleNonTransient(context, executor, WorkflowAction.Status.START_MANUAL); http://git-wip-us.apache.org/repos/asf/oozie/blob/48c1128d/core/src/test/java/org/apache/oozie/command/wf/TestActionCheckXCommand.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/command/wf/TestActionCheckXCommand.java b/core/src/test/java/org/apache/oozie/command/wf/TestActionCheckXCommand.java index efde282..f503b1f 100644 --- a/core/src/test/java/org/apache/oozie/command/wf/TestActionCheckXCommand.java +++ b/core/src/test/java/org/apache/oozie/command/wf/TestActionCheckXCommand.java @@ -29,8 +29,11 @@ import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobID; import org.apache.hadoop.mapred.RunningJob; +import org.apache.oozie.ForTestingActionExecutor; import org.apache.oozie.WorkflowActionBean; import org.apache.oozie.WorkflowJobBean; +import org.apache.oozie.action.ActionExecutor; +import org.apache.oozie.action.ActionExecutorException; import org.apache.oozie.action.hadoop.LauncherMapperHelper; import org.apache.oozie.action.hadoop.MapReduceActionExecutor; import org.apache.oozie.action.hadoop.MapperReducerForTest; @@ -43,9 +46,12 @@ import org.apache.oozie.executor.jpa.WorkflowActionGetJPAExecutor; import org.apache.oozie.executor.jpa.WorkflowActionInsertJPAExecutor; import org.apache.oozie.executor.jpa.WorkflowJobGetJPAExecutor; import org.apache.oozie.service.ActionCheckerService; +import org.apache.oozie.service.ActionService; +import org.apache.oozie.service.ConfigurationService; import org.apache.oozie.service.HadoopAccessorService; import org.apache.oozie.service.InstrumentationService; import org.apache.oozie.service.JPAService; +import org.apache.oozie.service.LiteWorkflowStoreService; import org.apache.oozie.service.Services; import org.apache.oozie.service.UUIDService; import org.apache.oozie.test.XDataTestCase; @@ -294,6 +300,55 @@ public class TestActionCheckXCommand extends XDataTestCase { } + private static class ErrorCheckActionExecutor extends ActionExecutor { + public static final String ERROR_CODE = "some_error"; + protected ErrorCheckActionExecutor() { + super("map-reduce"); + } + @Override + public void start(Context context, WorkflowAction action) throws ActionExecutorException {} + + @Override + public void end(Context context, WorkflowAction action) throws ActionExecutorException {} + + @Override + public void check(Context context, WorkflowAction action) throws ActionExecutorException { + throw new ActionExecutorException(ActionExecutorException.ErrorType.ERROR, ERROR_CODE, "check"); + } + + @Override + public void kill(Context context, WorkflowAction action) throws ActionExecutorException {} + + @Override + public boolean isCompleted(String externalStatus) { + return false; + } + } + + public void testActionCheckErrorNoUserRetry() throws Exception { + WorkflowActionBean action = _testActionCheckError(); + assertEquals(WorkflowAction.Status.FAILED, action.getStatus()); + } + + public void testActionCheckErrorUserRetry() throws Exception { + ConfigurationService.set(LiteWorkflowStoreService.CONF_USER_RETRY_ERROR_CODE_EXT, ErrorCheckActionExecutor.ERROR_CODE); + WorkflowActionBean action = _testActionCheckError(); + assertEquals(WorkflowAction.Status.USER_RETRY, action.getStatus()); + } + + private WorkflowActionBean _testActionCheckError() throws Exception { + services.get(ActionService.class).registerAndInitExecutor(ErrorCheckActionExecutor.class); + + JPAService jpaService = Services.get().get(JPAService.class); + WorkflowJobBean job = this.addRecordToWfJobTable(WorkflowJob.Status.RUNNING, WorkflowInstance.Status.RUNNING); + WorkflowActionBean action = this.addRecordToWfActionTable(job.getId(), "1", WorkflowAction.Status.RUNNING); + WorkflowActionGetJPAExecutor wfActionGetCmd = new WorkflowActionGetJPAExecutor(action.getId()); + + new ActionCheckXCommand(action.getId()).call(); + action = jpaService.execute(wfActionGetCmd); + return action; + } + public void testActionCheckTransientDuringLauncher() throws Exception { // When using YARN, skip this test because it relies on shutting down the job tracker, which isn't used in YARN if (createJobConf().get("yarn.resourcemanager.address") != null) { @@ -589,6 +644,7 @@ public class TestActionCheckXCommand extends XDataTestCase { action.setLastCheckTime(new Date()); action.setPending(); action.setExecutionPath("/"); + action.setUserRetryMax(2); Path inputDir = new Path(getFsTestCaseDir(), "input"); Path outputDir = new Path(getFsTestCaseDir(), "output"); http://git-wip-us.apache.org/repos/asf/oozie/blob/48c1128d/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index 9fb9841..1e6e101 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.2.0 release (trunk - unreleased) +OOZIE-2142 Changing the JT whitelist causes running Workflows to stay RUNNING forever (rkanter) OOZIE-2164 make master parameterizable in Spark action example (wypoon via rkanter) OOZIE-2155 Incorrect DST Shifts are occurring based on the Database timezone (rkanter) OOZIE-2156 override hive.querylog.location in hive-site.xml (ryota)
