[ https://issues.apache.org/jira/browse/SPARK-16173?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Davies Liu updated SPARK-16173: ------------------------------- Assignee: Dongjoon Hyun > Can't join describe() of DataFrame in Scala 2.10 > ------------------------------------------------ > > Key: SPARK-16173 > URL: https://issues.apache.org/jira/browse/SPARK-16173 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.5.2, 1.6.1, 2.0.0 > Reporter: Davies Liu > Assignee: Dongjoon Hyun > Fix For: 1.6.2, 2.0.1 > > > descripbe() of DataFrame use Seq() (it's a Iterator actually) to create > another DataFrame, which can not be serialized in Scala 2.10. > {code} > org.apache.spark.SparkException: Task not serializable > at > org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:304) > at > org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:294) > at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:122) > at org.apache.spark.SparkContext.clean(SparkContext.scala:2060) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1.apply(RDD.scala:707) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1.apply(RDD.scala:706) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111) > at org.apache.spark.rdd.RDD.withScope(RDD.scala:316) > at org.apache.spark.rdd.RDD.mapPartitions(RDD.scala:706) > at > org.apache.spark.sql.execution.ConvertToUnsafe.doExecute(rowFormatConverters.scala:38) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:132) > at > org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$5.apply(SparkPlan.scala:130) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:130) > at > org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:82) > at > org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1$$anonfun$apply$1.apply(BroadcastHashJoin.scala:79) > at > org.apache.spark.sql.execution.SQLExecution$.withExecutionId(SQLExecution.scala:100) > at > org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79) > at > org.apache.spark.sql.execution.joins.BroadcastHashJoin$$anonfun$broadcastFuture$1.apply(BroadcastHashJoin.scala:79) > at > scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) > at > scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.io.NotSerializableException: > scala.collection.Iterator$$anon$11 > Serialization stack: > - object not serializable (class: scala.collection.Iterator$$anon$11, > value: empty iterator) > - field (class: scala.collection.Iterator$$anonfun$toStream$1, name: > $outer, type: interface scala.collection.Iterator) > - object (class scala.collection.Iterator$$anonfun$toStream$1, > <function0>) > - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: > interface scala.Function0) > - object (class scala.collection.immutable.Stream$Cons, > Stream(WrappedArray(1), WrappedArray(2.0), WrappedArray(NaN), > WrappedArray(2), WrappedArray(2))) > - field (class: scala.collection.immutable.Stream$$anonfun$zip$1, name: > $outer, type: class scala.collection.immutable.Stream) > - object (class scala.collection.immutable.Stream$$anonfun$zip$1, > <function0>) > - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: > interface scala.Function0) > - object (class scala.collection.immutable.Stream$Cons, > Stream((WrappedArray(1),(count,<function1>)), > (WrappedArray(2.0),(mean,<function1>)), > (WrappedArray(NaN),(stddev,<function1>)), > (WrappedArray(2),(min,<function1>)), (WrappedArray(2),(max,<function1>)))) > - field (class: scala.collection.immutable.Stream$$anonfun$map$1, name: > $outer, type: class scala.collection.immutable.Stream) > - object (class scala.collection.immutable.Stream$$anonfun$map$1, > <function0>) > - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: > interface scala.Function0) > - object (class scala.collection.immutable.Stream$Cons, > Stream([count,1], [mean,2.0], [stddev,NaN], [min,2], [max,2])) > - field (class: scala.collection.immutable.Stream$$anonfun$map$1, name: > $outer, type: class scala.collection.immutable.Stream) > - object (class scala.collection.immutable.Stream$$anonfun$map$1, > <function0>) > - field (class: scala.collection.immutable.Stream$Cons, name: tl, type: > interface scala.Function0) > - object (class scala.collection.immutable.Stream$Cons, > Stream([count,1], [mean,2.0], [stddev,NaN], [min,2], [max,2])) > - field (class: org.apache.spark.sql.execution.LocalTableScan, name: > rows, type: interface scala.collection.Seq) > - object (class org.apache.spark.sql.execution.LocalTableScan, > LocalTableScan [summary#633,grade#634], > [[count,1],[mean,2.0],[stddev,NaN],[min,2],[max,2]] > ) > - field (class: org.apache.spark.sql.execution.ConvertToUnsafe, name: > child, type: class org.apache.spark.sql.execution.SparkPlan) > - object (class org.apache.spark.sql.execution.ConvertToUnsafe, > ConvertToUnsafe > +- LocalTableScan [summary#633,grade#634], > [[count,1],[mean,2.0],[stddev,NaN],[min,2],[max,2]] > ) > - field (class: > org.apache.spark.sql.execution.ConvertToUnsafe$$anonfun$1, name: $outer, > type: class org.apache.spark.sql.execution.ConvertToUnsafe) > - object (class > org.apache.spark.sql.execution.ConvertToUnsafe$$anonfun$1, <function1>) > at > org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40) > at > org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:47) > at > org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101) > at > org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:301) > ... 24 more > {code} > https://databricks-prod-cloudfront.cloud.databricks.com/public/4027ec902e239c93eaaa8714f173bcfc/1953143414968711/3514600112485120/2850462372213371/latest.html -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org