SLIDER-1021 Exceptions raised during AM launch don't trigger exit code AM failures
Project: http://git-wip-us.apache.org/repos/asf/incubator-slider/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-slider/commit/ee6fd4be Tree: http://git-wip-us.apache.org/repos/asf/incubator-slider/tree/ee6fd4be Diff: http://git-wip-us.apache.org/repos/asf/incubator-slider/diff/ee6fd4be Branch: refs/heads/develop Commit: ee6fd4bec831e6dcb9b1f02d66f937161e7289d4 Parents: b2b04f9 Author: Steve Loughran <[email protected]> Authored: Mon Dec 14 15:36:57 2015 +0000 Committer: Steve Loughran <[email protected]> Committed: Mon Dec 14 17:28:07 2015 +0000 ---------------------------------------------------------------------- .../server/appmaster/SliderAppMaster.java | 30 +++++++++++--------- .../appmaster/actions/ActionStopSlider.java | 16 +++++++++-- .../apache/slider/test/SliderTestBase.groovy | 4 +-- 3 files changed, 31 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ee6fd4be/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java index 415a597..3868920 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/SliderAppMaster.java @@ -983,6 +983,8 @@ public class SliderAppMaster extends AbstractSliderLaunchedService waitForAMCompletionSignal(); } catch(Exception e) { log.error("Exception : {}", e, e); + // call the AM stop command as if it had been queued (but without + // going via the queue, which may not have started onAMStop(new ActionStopSlider(e)); } //shutdown time @@ -1450,8 +1452,10 @@ public class SliderAppMaster extends AbstractSliderLaunchedService /** * trigger the YARN cluster termination process * @return the exit code + * @throws Exception if the stop action contained an Exception which implements + * ExitCodeProvider */ - private synchronized int finish() { + private synchronized int finish() throws Exception { Preconditions.checkNotNull(stopAction, "null stop action"); FinalApplicationStatus appStatus; log.info("Triggering shutdown of the AM: {}", stopAction); @@ -1459,21 +1463,25 @@ public class SliderAppMaster extends AbstractSliderLaunchedService String appMessage = stopAction.getMessage(); //stop the daemon & grab its exit code int exitCode = stopAction.getExitCode(); + Exception exception = stopAction.getEx(); appStatus = stopAction.getFinalApplicationStatus(); if (!spawnedProcessExitedBeforeShutdownTriggered) { //stopped the forked process but don't worry about its exit code - exitCode = stopForkedProcess(); - log.debug("Stopped forked process: exit code={}", exitCode); + int forkedExitCode = stopForkedProcess(); + log.debug("Stopped forked process: exit code={}", forkedExitCode); } // make sure the AM is actually registered. If not, there's no point // trying to unregister it if (amRegistrationData == null) { log.info("Application attempt not yet registered; skipping unregistration"); + if (exception != null) { + throw exception; + } return exitCode; } - + //stop any launches in progress launchService.stop(); @@ -1487,18 +1495,14 @@ public class SliderAppMaster extends AbstractSliderLaunchedService try { log.info("Unregistering AM status={} message={}", appStatus, appMessage); asyncRMClient.unregisterApplicationMaster(appStatus, appMessage, null); -/* JDK7 + } catch (InvalidApplicationMasterRequestException e) { + log.info("Application not found in YARN application list;" + + " it may have been terminated/YARN shutdown in progress: {}", e, e); } catch (YarnException | IOException e) { log.info("Failed to unregister application: " + e, e); } -*/ - } catch (IOException e) { - log.info("Failed to unregister application: {}", e, e); - } catch (InvalidApplicationMasterRequestException e) { - log.info("Application not found in YARN application list;" + - " it may have been terminated/YARN shutdown in progress: {}", e, e); - } catch (YarnException e) { - log.info("Failed to unregister application: {}", e, e); + if (exception != null) { + throw exception; } return exitCode; } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ee6fd4be/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java ---------------------------------------------------------------------- diff --git a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java index d2f23a2..055cea5 100644 --- a/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java +++ b/slider-core/src/main/java/org/apache/slider/server/appmaster/actions/ActionStopSlider.java @@ -19,6 +19,7 @@ package org.apache.slider.server.appmaster.actions; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.slider.core.exceptions.ExceptionConverter; import org.apache.slider.core.exceptions.TriggerClusterTeardownException; import org.apache.slider.core.main.ExitCodeProvider; import org.apache.slider.core.main.LauncherExitCodes; @@ -35,6 +36,7 @@ public class ActionStopSlider extends AsyncAction { private int exitCode; private FinalApplicationStatus finalApplicationStatus; private String message; + private final Exception ex; /** * Simple constructor @@ -42,8 +44,8 @@ public class ActionStopSlider extends AsyncAction { */ public ActionStopSlider(String name) { super(name); + this.ex = null; } - /** * Stop slider @@ -64,6 +66,7 @@ public class ActionStopSlider extends AsyncAction { this.exitCode = exitCode; this.finalApplicationStatus = finalApplicationStatus; this.message = message; + this.ex = null; } /** @@ -75,16 +78,18 @@ public class ActionStopSlider extends AsyncAction { */ public ActionStopSlider(String name, int exitCode, - FinalApplicationStatus finalApplicationStatus, String message) { + FinalApplicationStatus finalApplicationStatus, + String message) { super(name); this.exitCode = exitCode; this.finalApplicationStatus = finalApplicationStatus; this.message = message; + this.ex = null; } /** * Simple constructor - * @param name action name + * @param ex teardown exception */ public ActionStopSlider(TriggerClusterTeardownException ex) { this("stop", @@ -109,6 +114,7 @@ public class ActionStopSlider extends AsyncAction { } setFinalApplicationStatus(FinalApplicationStatus.FAILED); setMessage(ex.getMessage()); + this.ex = ex; } @Override @@ -149,4 +155,8 @@ public class ActionStopSlider extends AsyncAction { public void setMessage(String message) { this.message = message; } + + public Exception getEx() { + return ex; + } } http://git-wip-us.apache.org/repos/asf/incubator-slider/blob/ee6fd4be/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy ---------------------------------------------------------------------- diff --git a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy index 2b75c26..f697ab1 100644 --- a/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy +++ b/slider-core/src/test/groovy/org/apache/slider/test/SliderTestBase.groovy @@ -18,7 +18,6 @@ package org.apache.slider.test -import com.codahale.metrics.MetricRegistry import groovy.transform.CompileStatic import org.apache.hadoop.fs.FileUtil import org.apache.slider.common.SliderXMLConfKeysForTesting @@ -32,8 +31,7 @@ import org.junit.rules.TestName /** * Base class for unit tests as well as ones starting mini clusters * -the foundational code and methods - * - * + * */ @CompileStatic
