[
https://issues.apache.org/jira/browse/SPARK-6086?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Patrick Wendell updated SPARK-6086:
-----------------------------------
Description:
Class Cast Exceptions in DAGScheduler.updateAccumulators, when DAGScheduler is
collecting status from tasks. These exceptions happen occasionally, especially
when there are many stages in a job.
Application code:
https://github.com/kai-zeng/spark/blob/accum-bug/examples/src/main/scala/org/apache/spark/examples/sql/hive/SQLSuite.scala
Script used: ./bin/spark-submit --class
org.apache.spark.examples.sql.hive.SQLSuite
examples/target/scala-2.10/spark-examples-1.3.0-SNAPSHOT-hadoop1.0.4.jar
benchmark-cache 6
There are two types of error messages:
{code}
java.lang.ClassCastException: scala.None$ cannot be cast to
scala.collection.TraversableOnce
at
org.apache.spark.GrowableAccumulableParam.addInPlace(Accumulators.scala:188)
at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:82)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:340)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:335)
at
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
at
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
at org.apache.spark.Accumulators$.add(Accumulators.scala:335)
at
org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:892)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1000)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1398)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
{code}
{code}
java.lang.ClassCastException: scala.None$ cannot be cast to java.lang.Integer
at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
at
org.apache.spark.AccumulatorParam$IntAccumulatorParam$.addInPlace(Accumulators.scala:263)
at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:82)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:340)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:335)
at
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
at
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
at org.apache.spark.Accumulators$.add(Accumulators.scala:335)
at
org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:892)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1000)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1398)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
{code}
was:
Class Cast Exceptions in DAGScheduler.updateAccumulators, when DAGScheduler is
collecting status from tasks. These exceptions happen occasionally, especially
when there are many stages in a job.
Application code:
https://github.com/kai-zeng/spark/blob/accum-bug/examples/src/main/scala/org/apache/spark/examples/sql/hive/SQLSuite.scala
Script used: ./bin/spark-submit --class
org.apache.spark.examples.sql.hive.SQLSuite
examples/target/scala-2.10/spark-examples-1.3.0-SNAPSHOT-hadoop1.0.4.jar
benchmark-cache 6
There are two types of error messages:
java.lang.ClassCastException: scala.None$ cannot be cast to
scala.collection.TraversableOnce
at
org.apache.spark.GrowableAccumulableParam.addInPlace(Accumulators.scala:188)
at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:82)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:340)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:335)
at
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
at
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
at org.apache.spark.Accumulators$.add(Accumulators.scala:335)
at
org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:892)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1000)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1398)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
java.lang.ClassCastException: scala.None$ cannot be cast to java.lang.Integer
at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
at
org.apache.spark.AccumulatorParam$IntAccumulatorParam$.addInPlace(Accumulators.scala:263)
at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:82)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:340)
at org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:335)
at
scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
at scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
at
scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
at org.apache.spark.Accumulators$.add(Accumulators.scala:335)
at
org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:892)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1000)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1398)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> Exceptions in DAGScheduler.updateAccumulators
> ---------------------------------------------
>
> Key: SPARK-6086
> URL: https://issues.apache.org/jira/browse/SPARK-6086
> Project: Spark
> Issue Type: Bug
> Components: Scheduler, Spark Core
> Affects Versions: 1.3.0
> Reporter: Kai Zeng
> Priority: Critical
>
> Class Cast Exceptions in DAGScheduler.updateAccumulators, when DAGScheduler
> is collecting status from tasks. These exceptions happen occasionally,
> especially when there are many stages in a job.
> Application code:
> https://github.com/kai-zeng/spark/blob/accum-bug/examples/src/main/scala/org/apache/spark/examples/sql/hive/SQLSuite.scala
> Script used: ./bin/spark-submit --class
> org.apache.spark.examples.sql.hive.SQLSuite
> examples/target/scala-2.10/spark-examples-1.3.0-SNAPSHOT-hadoop1.0.4.jar
> benchmark-cache 6
> There are two types of error messages:
> {code}
> java.lang.ClassCastException: scala.None$ cannot be cast to
> scala.collection.TraversableOnce
> at
> org.apache.spark.GrowableAccumulableParam.addInPlace(Accumulators.scala:188)
> at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:82)
> at
> org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:340)
> at
> org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:335)
> at
> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
> at
> scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
> at
> scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
> at
> scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
> at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
> at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
> at
> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
> at org.apache.spark.Accumulators$.add(Accumulators.scala:335)
> at
> org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:892)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1000)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1398)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> {code}
> {code}
> java.lang.ClassCastException: scala.None$ cannot be cast to java.lang.Integer
> at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:106)
> at
> org.apache.spark.AccumulatorParam$IntAccumulatorParam$.addInPlace(Accumulators.scala:263)
> at org.apache.spark.Accumulable.$plus$plus$eq(Accumulators.scala:82)
> at
> org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:340)
> at
> org.apache.spark.Accumulators$$anonfun$add$2.apply(Accumulators.scala:335)
> at
> scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772)
> at
> scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
> at
> scala.collection.mutable.HashMap$$anonfun$foreach$1.apply(HashMap.scala:98)
> at
> scala.collection.mutable.HashTable$class.foreachEntry(HashTable.scala:226)
> at scala.collection.mutable.HashMap.foreachEntry(HashMap.scala:39)
> at scala.collection.mutable.HashMap.foreach(HashMap.scala:98)
> at
> scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771)
> at org.apache.spark.Accumulators$.add(Accumulators.scala:335)
> at
> org.apache.spark.scheduler.DAGScheduler.updateAccumulators(DAGScheduler.scala:892)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1000)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1398)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1362)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]