Repository: spark
Updated Branches:
  refs/heads/master 8bb9414aa -> f8346d2fc


[SPARK-25174][YARN] Limit the size of diagnostic message for am to unregister 
itself from rm

## What changes were proposed in this pull request?

When using older versions of spark releases,  a use case generated a huge 
code-gen file which hit the limitation `Constant pool has grown past JVM limit 
of 0xFFFF`.  In this situation, it should fail immediately. But the diagnosis 
message sent to RM is too large,  the ApplicationMaster suspended and RM's 
ZKStateStore was crashed. For 2.3 or later spark releases the limitation of 
code-gen has been removed, but maybe there are still some uncaught exceptions 
that contain oversized error message will cause such a problem.

This PR is aim to cut down the diagnosis message size.

## How was this patch tested?

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Closes #22180 from yaooqinn/SPARK-25174.

Authored-by: Kent Yao <[email protected]>
Signed-off-by: Marcelo Vanzin <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8346d2f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8346d2f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8346d2f

Branch: refs/heads/master
Commit: f8346d2fc01f1e881e4e3f9c4499bf5f9e3ceb3f
Parents: 8bb9414
Author: Kent Yao <[email protected]>
Authored: Fri Aug 24 13:44:19 2018 -0700
Committer: Marcelo Vanzin <[email protected]>
Committed: Fri Aug 24 13:44:19 2018 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala | 5 +++--
 .../src/main/scala/org/apache/spark/deploy/yarn/config.scala   | 6 ++++++
 2 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f8346d2f/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
----------------------------------------------------------------------
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 55ed114..8f94e3f 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn
 
 import java.io.{File, IOException}
 import java.lang.reflect.{InvocationTargetException, Modifier}
-import java.net.{Socket, URI, URL}
+import java.net.{URI, URL}
 import java.security.PrivilegedExceptionAction
 import java.util.concurrent.{TimeoutException, TimeUnit}
 
@@ -28,6 +28,7 @@ import scala.concurrent.Promise
 import scala.concurrent.duration.Duration
 import scala.util.control.NonFatal
 
+import org.apache.commons.lang3.{StringUtils => ComStrUtils}
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.util.StringUtils
 import org.apache.hadoop.yarn.api._
@@ -368,7 +369,7 @@ private[spark] class ApplicationMaster(args: 
ApplicationMasterArguments) extends
         }
         logInfo(s"Final app status: $finalStatus, exitCode: $exitCode" +
           Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
-        finalMsg = msg
+        finalMsg = ComStrUtils.abbreviate(msg, 
sparkConf.get(AM_FINAL_MSG_LIMIT).toInt)
         finished = true
         if (!inShutdown && Thread.currentThread() != reporterThread && 
reporterThread != null) {
           logDebug("shutting down reporter thread")

http://git-wip-us.apache.org/repos/asf/spark/blob/f8346d2f/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
----------------------------------------------------------------------
diff --git 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 1013fd2..ab8273b 100644
--- 
a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ 
b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -192,6 +192,12 @@ package object config {
     .toSequence
     .createWithDefault(Nil)
 
+  private[spark] val AM_FINAL_MSG_LIMIT = 
ConfigBuilder("spark.yarn.am.finalMessageLimit")
+    .doc("The limit size of final diagnostic message for our ApplicationMaster 
to unregister from" +
+      " the ResourceManager.")
+    .bytesConf(ByteUnit.BYTE)
+    .createWithDefaultString("1m")
+
   /* Client-mode AM configuration. */
 
   private[spark] val AM_CORES = ConfigBuilder("spark.yarn.am.cores")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to