Repository: spark Updated Branches: refs/heads/master 358ae1534 -> 5fccb567b
[SPARK-2318] When exiting on a signal, print the signal name first. Author: Reynold Xin <[email protected]> Closes #1260 from rxin/signalhandler1 and squashes the following commits: 8e73552 [Reynold Xin] Uh add Logging back in ApplicationMaster. 0402ba8 [Reynold Xin] Synchronize SignalLogger.register. dc70705 [Reynold Xin] Added SignalLogger to YARN ApplicationMaster. 79a21b4 [Reynold Xin] Added license header. 0da052c [Reynold Xin] Added the SignalLogger itself. e587d2e [Reynold Xin] [SPARK-2318] When exiting on a signal, print the signal name first. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fccb567 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fccb567 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fccb567 Branch: refs/heads/master Commit: 5fccb567b37a94445512c7ec20b830b5e062089f Parents: 358ae15 Author: Reynold Xin <[email protected]> Authored: Mon Jun 30 15:12:38 2014 -0700 Committer: Reynold Xin <[email protected]> Committed: Mon Jun 30 15:12:38 2014 -0700 ---------------------------------------------------------------------- .../org/apache/spark/deploy/master/Master.scala | 5 +- .../org/apache/spark/deploy/worker/Worker.scala | 5 +- .../executor/CoarseGrainedExecutorBackend.scala | 6 +- .../spark/executor/MesosExecutorBackend.scala | 5 +- .../org/apache/spark/util/SignalLogger.scala | 60 ++++++++++++++++++++ .../spark/deploy/yarn/ApplicationMaster.scala | 5 +- .../spark/deploy/yarn/ApplicationMaster.scala | 5 +- 7 files changed, 79 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/core/src/main/scala/org/apache/spark/deploy/master/Master.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 33ffcbd..11545b8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -41,7 +41,7 @@ import org.apache.spark.deploy.master.ui.MasterWebUI import org.apache.spark.metrics.MetricsSystem import org.apache.spark.scheduler.{EventLoggingListener, ReplayListenerBus} import org.apache.spark.ui.SparkUI -import org.apache.spark.util.{AkkaUtils, Utils} +import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils} private[spark] class Master( host: String, @@ -755,12 +755,13 @@ private[spark] class Master( } } -private[spark] object Master { +private[spark] object Master extends Logging { val systemName = "sparkMaster" private val actorName = "Master" val sparkUrlRegex = "spark://([^:]+):([0-9]+)".r def main(argStrings: Array[String]) { + SignalLogger.register(log) val conf = new SparkConf val args = new MasterArguments(argStrings, conf) val (actorSystem, _, _) = startSystemAndActor(args.host, args.port, args.webUiPort, conf) http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index a0ecaf7..ce42544 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -34,7 +34,7 @@ import org.apache.spark.deploy.DeployMessages._ import org.apache.spark.deploy.master.{DriverState, Master} import org.apache.spark.deploy.worker.ui.WorkerWebUI import org.apache.spark.metrics.MetricsSystem -import org.apache.spark.util.{AkkaUtils, Utils} +import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils} /** * @param masterUrls Each url should look like spark://host:port. @@ -365,8 +365,9 @@ private[spark] class Worker( } } -private[spark] object Worker { +private[spark] object Worker extends Logging { def main(argStrings: Array[String]) { + SignalLogger.register(log) val args = new WorkerArguments(argStrings) val (actorSystem, _) = startSystemAndActor(args.host, args.port, args.webUiPort, args.cores, args.memory, args.masters, args.workDir) http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index b5fd334..8d31bd0 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -31,7 +31,7 @@ import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.worker.WorkerWatcher import org.apache.spark.scheduler.TaskDescription import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ -import org.apache.spark.util.{AkkaUtils, Utils} +import org.apache.spark.util.{AkkaUtils, SignalLogger, Utils} private[spark] class CoarseGrainedExecutorBackend( driverUrl: String, @@ -97,10 +97,12 @@ private[spark] class CoarseGrainedExecutorBackend( } } -private[spark] object CoarseGrainedExecutorBackend { +private[spark] object CoarseGrainedExecutorBackend extends Logging { def run(driverUrl: String, executorId: String, hostname: String, cores: Int, workerUrl: Option[String]) { + SignalLogger.register(log) + SparkHadoopUtil.get.runAsSparkUser { () => // Debug code Utils.checkHost(hostname) http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala index 7410049..2232e62 100644 --- a/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala @@ -25,8 +25,8 @@ import org.apache.mesos.Protos.{TaskStatus => MesosTaskStatus, _} import org.apache.spark.{Logging, TaskState} import org.apache.spark.TaskState.TaskState -import org.apache.spark.util.Utils import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.util.{SignalLogger, Utils} private[spark] class MesosExecutorBackend extends MesosExecutor @@ -93,8 +93,9 @@ private[spark] class MesosExecutorBackend /** * Entry point for Mesos executor. */ -private[spark] object MesosExecutorBackend { +private[spark] object MesosExecutorBackend extends Logging { def main(args: Array[String]) { + SignalLogger.register(log) SparkHadoopUtil.get.runAsSparkUser { () => MesosNativeLibrary.load() // Create a new Executor and start it running http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/core/src/main/scala/org/apache/spark/util/SignalLogger.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/util/SignalLogger.scala b/core/src/main/scala/org/apache/spark/util/SignalLogger.scala new file mode 100644 index 0000000..d769b54 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/SignalLogger.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import org.apache.commons.lang.SystemUtils +import org.slf4j.Logger +import sun.misc.{Signal, SignalHandler} + +/** + * Used to log signals received. This can be very useful in debugging crashes or kills. + * + * Inspired by Colin Patrick McCabe's similar class from Hadoop. + */ +private[spark] object SignalLogger { + + private var registered = false + + /** Register a signal handler to log signals on UNIX-like systems. */ + def register(log: Logger): Unit = synchronized { + if (SystemUtils.IS_OS_UNIX) { + require(!registered, "Can't re-install the signal handlers") + registered = true + + val signals = Seq("TERM", "HUP", "INT") + for (signal <- signals) { + try { + new SignalLoggerHandler(signal, log) + } catch { + case e: Exception => log.warn("Failed to register signal handler " + signal, e) + } + } + log.info("Registered signal handlers for [" + signals.mkString(", ") + "]") + } + } +} + +private sealed class SignalLoggerHandler(name: String, log: Logger) extends SignalHandler { + + val prevHandler = Signal.handle(new Signal(name), this) + + override def handle(signal: Signal): Unit = { + log.error("RECEIVED SIGNAL " + signal.getNumber() + ": SIG" + signal.getName()) + prevHandler.handle(signal) + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala ---------------------------------------------------------------------- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 1cc9c33..438737f 100644 --- a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext} import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.util.Utils +import org.apache.spark.util.{SignalLogger, Utils} /** * An application master that runs the users driver program and allocates executors. @@ -409,7 +409,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, } -object ApplicationMaster { +object ApplicationMaster extends Logging { // Number of times to wait for the allocator loop to complete. // Each loop iteration waits for 100ms, so maximum of 3 seconds. // This is to ensure that we have reasonable number of containers before we start @@ -487,6 +487,7 @@ object ApplicationMaster { } def main(argStrings: Array[String]) { + SignalLogger.register(log) val args = new ApplicationMasterArguments(argStrings) SparkHadoopUtil.get.runAsSparkUser { () => new ApplicationMaster(args).run() http://git-wip-us.apache.org/repos/asf/spark/blob/5fccb567/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala ---------------------------------------------------------------------- diff --git a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index 6244332..ee1e9c9 100644 --- a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.webapp.util.WebAppUtils import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext} import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.util.Utils +import org.apache.spark.util.{SignalLogger, Utils} /** @@ -363,7 +363,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration, } -object ApplicationMaster { +object ApplicationMaster extends Logging { // Number of times to wait for the allocator loop to complete. // Each loop iteration waits for 100ms, so maximum of 3 seconds. // This is to ensure that we have reasonable number of containers before we start @@ -455,6 +455,7 @@ object ApplicationMaster { } def main(argStrings: Array[String]) { + SignalLogger.register(log) val args = new ApplicationMasterArguments(argStrings) SparkHadoopUtil.get.runAsSparkUser { () => new ApplicationMaster(args).run()
