Repository: reef Updated Branches: refs/heads/master 8ef1b8361 -> 0fb3f8d9a
[REEF-1708] use proper defaults and better logging in YarnContainerManager * Use better (default) parameters when registering and unregistering the AM with YARN; * Implement better logging for the AM registeration/unregistration process This is required for debugging the Unmanaged AM mode and otherwise has no impact on the REEF functionality JIRA: [REEF-1708](https://issues.apache.org/jira/browse/REEF-1708) Closes #1222 Project: http://git-wip-us.apache.org/repos/asf/reef/repo Commit: http://git-wip-us.apache.org/repos/asf/reef/commit/0fb3f8d9 Tree: http://git-wip-us.apache.org/repos/asf/reef/tree/0fb3f8d9 Diff: http://git-wip-us.apache.org/repos/asf/reef/diff/0fb3f8d9 Branch: refs/heads/master Commit: 0fb3f8d9a885c0431b2a6d01cb69c7ce486149ab Parents: 8ef1b83 Author: Sergiy Matusevych <[email protected]> Authored: Wed Jan 11 18:20:05 2017 -0800 Committer: Yunseong Lee <[email protected]> Committed: Fri Jan 13 23:43:30 2017 +0900 ---------------------------------------------------------------------- .../yarn/driver/YarnContainerManager.java | 36 +++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/reef/blob/0fb3f8d9/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java ---------------------------------------------------------------------- diff --git a/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java b/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java index 41be1e8..e9e189b 100644 --- a/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java +++ b/lang/java/reef-runtime-yarn/src/main/java/org/apache/reef/runtime/yarn/driver/YarnContainerManager.java @@ -62,6 +62,12 @@ final class YarnContainerManager private static final String RUNTIME_NAME = "YARN"; + /** Default hostname to provide in the Application Master registration. */ + private static final String AM_REGISTRATION_HOST = ""; + + /** Default port number to provide in the Application Master registration. */ + private static final int AM_REGISTRATION_PORT = -1; + private final Queue<AMRMClient.ContainerRequest> requestsBeforeSentToRM = new ConcurrentLinkedQueue<>(); private final Queue<AMRMClient.ContainerRequest> requestsAfterSentToRM = new ConcurrentLinkedQueue<>(); private final Map<String, String> nodeIdToRackName = new ConcurrentHashMap<>(); @@ -112,7 +118,7 @@ final class YarnContainerManager this.reefFileNames = reefFileNames; this.progressProvider = progressProvider; - LOG.log(Level.FINEST, "Instantiated YarnContainerManager"); + LOG.log(Level.FINEST, "Instantiated YarnContainerManager: {0}", this.registration); } /** @@ -183,9 +189,8 @@ final class YarnContainerManager return Math.max(Math.min(1, progressProvider.get().getProgress()), 0); } catch (final Exception e) { // An Exception must be caught and logged here because YARN swallows the Exception and fails the job. - LOG.log(Level.WARNING, "An exception occurred in ProgressProvider.getProgress(), with message : " + - e.getMessage() + ". Returning 0 as progress."); - return 0f; + LOG.log(Level.WARNING, "Cannot get the application progress. Will return 0.", e); + return 0; } } @@ -299,17 +304,23 @@ final class YarnContainerManager * This method is called from DriverRuntimeStartHandler via YARNRuntimeStartHandler. */ void onStart() { + + LOG.log(Level.FINEST, "YARN registration: begin"); + this.resourceManager.init(this.yarnConf); this.resourceManager.start(); this.nodeManager.init(this.yarnConf); this.nodeManager.start(); + LOG.log(Level.FINEST, "YARN registration: registered with RM and NM"); + try { - this.registration.setRegistration( - this.resourceManager.registerApplicationMaster("", 0, this.trackingURLProvider.getTrackingUrl())); - LOG.log(Level.FINE, "YARN registration: {0}", this.registration); + this.registration.setRegistration(this.resourceManager.registerApplicationMaster( + AM_REGISTRATION_HOST, AM_REGISTRATION_PORT, this.trackingURLProvider.getTrackingUrl())); + + LOG.log(Level.FINE, "YARN registration: AM registered: {0}", this.registration); final FileSystem fs = FileSystem.get(this.yarnConf); final Path outputFileName = new Path(this.jobSubmissionDirectory, this.reefFileNames.getDriverHttpEndpoint()); @@ -321,6 +332,8 @@ final class YarnContainerManager LOG.log(Level.WARNING, "Unable to register application master.", e); onRuntimeError(e); } + + LOG.log(Level.FINEST, "YARN registration: done: {0}", this.registration); } /** @@ -340,7 +353,8 @@ final class YarnContainerManager this.reefEventHandlers.close(); if (exception == null) { - this.resourceManager.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED, null, null); + this.resourceManager.unregisterApplicationMaster( + FinalApplicationStatus.SUCCEEDED, "Success!", this.trackingURLProvider.getTrackingUrl()); } else { // Note: We don't allow RM to restart our applications if it's an application level failure. @@ -351,10 +365,13 @@ final class YarnContainerManager final String failureMsg = String.format("Application failed due to:%n%s%n" + "With stack trace:%n%s", exception.getMessage(), ExceptionUtils.getStackTrace(exception)); - this.resourceManager.unregisterApplicationMaster(FinalApplicationStatus.FAILED, failureMsg, null); + this.resourceManager.unregisterApplicationMaster( + FinalApplicationStatus.FAILED, failureMsg, this.trackingURLProvider.getTrackingUrl()); } this.resourceManager.close(); + LOG.log(Level.FINEST, "Container ResourceManager stopped successfully"); + } catch (final Exception e) { LOG.log(Level.WARNING, "Error shutting down YARN application", e); } @@ -363,6 +380,7 @@ final class YarnContainerManager if (this.nodeManager.getServiceState() == Service.STATE.STARTED) { try { this.nodeManager.close(); + LOG.log(Level.FINEST, "Container NodeManager stopped successfully"); } catch (final IOException e) { LOG.log(Level.WARNING, "Error closing YARN Node Manager", e); }
