Repository: spark Updated Branches: refs/heads/branch-1.0 822353dc5 -> aef8a4a51
Include stack trace for exceptions thrown by user code. It is very confusing when your code throws an exception, but the only stack trace show is in the DAGScheduler. This is a simple patch to include the stack trace for the actual failure in the error message. Suggestions on formatting welcome. Before: ``` scala> sc.parallelize(1 :: Nil).map(_ => sys.error("Ahh!")).collect() org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times (most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh!) at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037) ... ``` After: ``` org.apache.spark.SparkException: Job aborted due to stage failure: Task 0.0:3 failed 1 times, most recent failure: Exception failure in TID 3 on host localhost: java.lang.RuntimeException: Ahh! scala.sys.package$.error(package.scala:27) $iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13) $iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(<console>:13) scala.collection.Iterator$$anon$11.next(Iterator.scala:328) scala.collection.Iterator$class.foreach(Iterator.scala:727) scala.collection.AbstractIterator.foreach(Iterator.scala:1157) scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) scala.collection.AbstractIterator.to(Iterator.scala:1157) scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) scala.collection.AbstractIterator.toArray(Iterator.scala:1157) org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676) org.apache.spark.rdd.RDD$$anonfun$6.apply(RDD.scala:676) org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048) org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:1048) org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:110) org.apache.spark.scheduler.Task.run(Task.scala:50) org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:211) org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:46) org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:176) java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) java.lang.Thread.run(Thread.java:744) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1055) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1039) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1037) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$abortStage(DAGScheduler.scala:1037) at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614) at org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:614) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:614) at org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$anonfun$receive$1.applyOrElse(DAGScheduler.scala:143) at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498) at akka.actor.ActorCell.invoke(ActorCell.scala:456) at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237) at akka.dispatch.Mailbox.run(Mailbox.scala:219) at akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) ``` Author: Michael Armbrust <mich...@databricks.com> Closes #409 from marmbrus/stacktraces and squashes the following commits: 3e4eb65 [Michael Armbrust] indent. include header for driver stack trace. 018b06b [Michael Armbrust] Include stack trace for exceptions in user code. (cherry picked from commit d4916a8eeb747e748b9fba380e9c9503ed11faed) Signed-off-by: Reynold Xin <r...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aef8a4a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aef8a4a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aef8a4a5 Branch: refs/heads/branch-1.0 Commit: aef8a4a51c6f7f982e31d7636f66eab595d34f2e Parents: 822353d Author: Michael Armbrust <mich...@databricks.com> Authored: Wed Apr 16 18:12:56 2014 -0700 Committer: Reynold Xin <r...@apache.org> Committed: Wed Apr 16 18:13:06 2014 -0700 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/aef8a4a5/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 86d2050..a81b834 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -538,8 +538,8 @@ private[spark] class TaskSetManager( return } val key = ef.description - failureReason = "Exception failure in TID %s on host %s: %s".format( - tid, info.host, ef.description) + failureReason = "Exception failure in TID %s on host %s: %s\n%s".format( + tid, info.host, ef.description, ef.stackTrace.map(" " + _).mkString("\n")) val now = clock.getTime() val (printFull, dupCount) = { if (recentExceptions.contains(key)) { @@ -582,7 +582,7 @@ private[spark] class TaskSetManager( if (numFailures(index) >= maxTaskFailures) { logError("Task %s:%d failed %d times; aborting job".format( taskSet.id, index, maxTaskFailures)) - abort("Task %s:%d failed %d times (most recent failure: %s)".format( + abort("Task %s:%d failed %d times, most recent failure: %s\nDriver stacktrace:".format( taskSet.id, index, maxTaskFailures, failureReason)) return }