Repository: spark Updated Branches: refs/heads/master 8bb9414aa -> f8346d2fc
[SPARK-25174][YARN] Limit the size of diagnostic message for am to unregister itself from rm ## What changes were proposed in this pull request? When using older versions of spark releases, a use case generated a huge code-gen file which hit the limitation `Constant pool has grown past JVM limit of 0xFFFF`. In this situation, it should fail immediately. But the diagnosis message sent to RM is too large, the ApplicationMaster suspended and RM's ZKStateStore was crashed. For 2.3 or later spark releases the limitation of code-gen has been removed, but maybe there are still some uncaught exceptions that contain oversized error message will cause such a problem. This PR is aim to cut down the diagnosis message size. ## How was this patch tested? Please review http://spark.apache.org/contributing.html before opening a pull request. Closes #22180 from yaooqinn/SPARK-25174. Authored-by: Kent Yao <[email protected]> Signed-off-by: Marcelo Vanzin <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8346d2f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8346d2f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8346d2f Branch: refs/heads/master Commit: f8346d2fc01f1e881e4e3f9c4499bf5f9e3ceb3f Parents: 8bb9414 Author: Kent Yao <[email protected]> Authored: Fri Aug 24 13:44:19 2018 -0700 Committer: Marcelo Vanzin <[email protected]> Committed: Fri Aug 24 13:44:19 2018 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 5 +++-- .../src/main/scala/org/apache/spark/deploy/yarn/config.scala | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/f8346d2f/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala ---------------------------------------------------------------------- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 55ed114..8f94e3f 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn import java.io.{File, IOException} import java.lang.reflect.{InvocationTargetException, Modifier} -import java.net.{Socket, URI, URL} +import java.net.{URI, URL} import java.security.PrivilegedExceptionAction import java.util.concurrent.{TimeoutException, TimeUnit} @@ -28,6 +28,7 @@ import scala.concurrent.Promise import scala.concurrent.duration.Duration import scala.util.control.NonFatal +import org.apache.commons.lang3.{StringUtils => ComStrUtils} import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.util.StringUtils import org.apache.hadoop.yarn.api._ @@ -368,7 +369,7 @@ private[spark] class ApplicationMaster(args: ApplicationMasterArguments) extends } logInfo(s"Final app status: $finalStatus, exitCode: $exitCode" + Option(msg).map(msg => s", (reason: $msg)").getOrElse("")) - finalMsg = msg + finalMsg = ComStrUtils.abbreviate(msg, sparkConf.get(AM_FINAL_MSG_LIMIT).toInt) finished = true if (!inShutdown && Thread.currentThread() != reporterThread && reporterThread != null) { logDebug("shutting down reporter thread") http://git-wip-us.apache.org/repos/asf/spark/blob/f8346d2f/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala ---------------------------------------------------------------------- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala index 1013fd2..ab8273b 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala @@ -192,6 +192,12 @@ package object config { .toSequence .createWithDefault(Nil) + private[spark] val AM_FINAL_MSG_LIMIT = ConfigBuilder("spark.yarn.am.finalMessageLimit") + .doc("The limit size of final diagnostic message for our ApplicationMaster to unregister from" + + " the ResourceManager.") + .bytesConf(ByteUnit.BYTE) + .createWithDefaultString("1m") + /* Client-mode AM configuration. */ private[spark] val AM_CORES = ConfigBuilder("spark.yarn.am.cores") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
