Repository: oozie Updated Branches: refs/heads/master 7a80c6528 -> bf8329c2f
OOZIE-2314 Unable to kill old instance child job by workflow or coord rerun by Launcher (jaydeepvishwakarma via rkanter) Project: http://git-wip-us.apache.org/repos/asf/oozie/repo Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/bf8329c2 Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/bf8329c2 Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/bf8329c2 Branch: refs/heads/master Commit: bf8329c2f47c249c1907581dd25513c755fb2d0e Parents: 7a80c65 Author: Robert Kanter <[email protected]> Authored: Fri Oct 16 17:46:10 2015 -0700 Committer: Robert Kanter <[email protected]> Committed: Fri Oct 16 17:46:10 2015 -0700 ---------------------------------------------------------------------- .../oozie/action/hadoop/JavaActionExecutor.java | 15 ++++++++++++++- .../oozie/action/hadoop/LauncherMapperHelper.java | 5 +++-- .../command/coord/CoordActionStartXCommand.java | 3 +++ .../org/apache/oozie/action/hadoop/TestLauncher.java | 2 +- release-log.txt | 1 + 5 files changed, 22 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/oozie/blob/bf8329c2/core/src/main/java/org/apache/oozie/action/hadoop/JavaActionExecutor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/JavaActionExecutor.java b/core/src/main/java/org/apache/oozie/action/hadoop/JavaActionExecutor.java index c519cb0..e83b3b5 100644 --- a/core/src/main/java/org/apache/oozie/action/hadoop/JavaActionExecutor.java +++ b/core/src/main/java/org/apache/oozie/action/hadoop/JavaActionExecutor.java @@ -58,6 +58,7 @@ import org.apache.oozie.action.ActionExecutor; import org.apache.oozie.action.ActionExecutorException; import org.apache.oozie.client.OozieClient; import org.apache.oozie.client.WorkflowAction; +import org.apache.oozie.command.coord.CoordActionStartXCommand; import org.apache.oozie.command.wf.ActionStartXCommand; import org.apache.oozie.service.ConfigurationService; import org.apache.oozie.service.HadoopAccessorException; @@ -880,7 +881,19 @@ public class JavaActionExecutor extends ActionExecutor { // Properties for when a launcher job's AM gets restarted if (ConfigurationService.getBoolean(HADOOP_YARN_KILL_CHILD_JOBS_ON_AMRESTART)) { - LauncherMapperHelper.setupYarnRestartHandling(launcherJobConf, actionConf, launcherTag); + // launcher time filter is required to prune the search of launcher tag. + // Setting coordinator action nominal time as launcher time as it child job cannot launch before nominal + // time. Workflow created time is good enough when workflow is running independently or workflow is + // rerunning from failed node. + long launcherTime = System.currentTimeMillis(); + String coordActionNominalTime = context.getProtoActionConf() + .get(CoordActionStartXCommand.OOZIE_COORD_ACTION_NOMINAL_TIME); + if (coordActionNominalTime != null) { + launcherTime = Long.parseLong(coordActionNominalTime); + } else if (context.getWorkflow().getCreatedTime() != null) { + launcherTime = context.getWorkflow().getCreatedTime().getTime(); + } + LauncherMapperHelper.setupYarnRestartHandling(launcherJobConf, actionConf, launcherTag, launcherTime); } else { LOG.info(MessageFormat.format("{0} is set to false, not setting YARN restart properties", http://git-wip-us.apache.org/repos/asf/oozie/blob/bf8329c2/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMapperHelper.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMapperHelper.java b/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMapperHelper.java index e22329d..ef6b99d 100644 --- a/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMapperHelper.java +++ b/core/src/main/java/org/apache/oozie/action/hadoop/LauncherMapperHelper.java @@ -162,9 +162,10 @@ public class LauncherMapperHelper { launcherConf.set("mapred.output.dir", new Path(actionDir, "output").toString()); } - public static void setupYarnRestartHandling(JobConf launcherJobConf, Configuration actionConf, String launcherTag) + public static void setupYarnRestartHandling(JobConf launcherJobConf, Configuration actionConf, String launcherTag, + long launcherTime) throws NoSuchAlgorithmException { - launcherJobConf.setLong(LauncherMainHadoopUtils.OOZIE_JOB_LAUNCH_TIME, System.currentTimeMillis()); + launcherJobConf.setLong(LauncherMainHadoopUtils.OOZIE_JOB_LAUNCH_TIME, launcherTime); // Tags are limited to 100 chars so we need to hash them to make sure (the actionId otherwise doesn't have a max length) String tag = getTag(launcherTag); // keeping the oozie.child.mapreduce.job.tags instead of mapreduce.job.tags to avoid killing launcher itself. http://git-wip-us.apache.org/repos/asf/oozie/blob/bf8329c2/core/src/main/java/org/apache/oozie/command/coord/CoordActionStartXCommand.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/oozie/command/coord/CoordActionStartXCommand.java b/core/src/main/java/org/apache/oozie/command/coord/CoordActionStartXCommand.java index 98f356e..ada8a30 100644 --- a/core/src/main/java/org/apache/oozie/command/coord/CoordActionStartXCommand.java +++ b/core/src/main/java/org/apache/oozie/command/coord/CoordActionStartXCommand.java @@ -67,6 +67,7 @@ public class CoordActionStartXCommand extends CoordinatorXCommand<Void> { public static final String COULD_NOT_START = "COULD_NOT_START"; public static final String START_DATA_MISSING = "START_DATA_MISSING"; public static final String EXEC_DATA_MISSING = "EXEC_DATA_MISSING"; + public static final String OOZIE_COORD_ACTION_NOMINAL_TIME = "oozie.coord.action.nominal_time"; private final XLog log = getLog(); private String actionId = null; @@ -207,6 +208,8 @@ public class CoordActionStartXCommand extends CoordinatorXCommand<Void> { conf.setBoolean(OozieClient.RERUN_FAIL_NODES, true); dagEngine.reRun(coordAction.getExternalId(), conf); } else { + // Pushing the nominal time in conf to use for launcher tag search + conf.set(OOZIE_COORD_ACTION_NOMINAL_TIME,String.valueOf(coordAction.getNominalTime().getTime())); String wfId = dagEngine.submitJobFromCoordinator(conf, actionId); coordAction.setExternalId(wfId); } http://git-wip-us.apache.org/repos/asf/oozie/blob/bf8329c2/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java b/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java index 7a044ed..aa938d0 100644 --- a/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java +++ b/core/src/test/java/org/apache/oozie/action/hadoop/TestLauncher.java @@ -82,7 +82,7 @@ public class TestLauncher extends XFsTestCase { Configuration actionConf = new XConfiguration(); LauncherMapperHelper.setupLauncherInfo(jobConf, "1", "1@a", actionDir, "1@a-0", actionConf, ""); - LauncherMapperHelper.setupYarnRestartHandling(jobConf, jobConf, "1@a"); + LauncherMapperHelper.setupYarnRestartHandling(jobConf, jobConf, "1@a", System.currentTimeMillis()); assertEquals("1", actionConf.get("oozie.job.id")); assertEquals("1@a", actionConf.get("oozie.action.id")); http://git-wip-us.apache.org/repos/asf/oozie/blob/bf8329c2/release-log.txt ---------------------------------------------------------------------- diff --git a/release-log.txt b/release-log.txt index c8ac6ab..6d61c2d 100644 --- a/release-log.txt +++ b/release-log.txt @@ -1,5 +1,6 @@ -- Oozie 4.3.0 release (trunk - unreleased) +OOZIE-2314 Unable to kill old instance child job by workflow or coord rerun by Launcher (jaydeepvishwakarma via rkanter) OOZIE-2386 org.apache.oozie.util.TestXConfiguration.testSubstituteVar is flakey (rkanter) OOZIE-2385 org.apache.oozie.TestCoordinatorEngineStreamLog.testCoordLogStreaming is flakey (rkanter) OOZIE-2382 org.apache.oozie.action.hadoop.TestPigMain.testPig_withNullExternalID is flakey (rkanter)
