Hi everyone! I got a exception when i run my script with spark-shell:
I added SPARK_JAVA_OPTS="-Dsun.io.serialization.extendedDebugInfo=true" in spark-env.sh to show the following stack: org.apache.spark.SparkException: Task not serializable at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166) at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158) at org.apache.spark.SparkContext.clean(SparkContext.scala:1242) at org.apache.spark.rdd.RDD.filter(RDD.scala:282) at org.apache.spark.sql.SchemaRDD.filter(SchemaRDD.scala:460) at $iwC$$iwC$$iwC$$iwC.<init>(<console>:18) at $iwC$$iwC$$iwC.<init>(<console>:23) at $iwC$$iwC.<init>(<console>:25) at $iwC.<init>(<console>:27) at <init>(<console>:29) at .<init>(<console>:33) at .<clinit>(<console>) at .<init>(<console>:7) at .<clinit>(<console>) at $print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:601) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062) …… Caused by: java.io.NotSerializableException: org.apache.spark.sql.hive.HiveContext$$anon$3 - field (class "org.apache.spark.sql.hive.HiveContext", name: "functionRegistry", type: "class org.apache.spark.sql.hive.HiveFunctionRegistry") - object (class "org.apache.spark.sql.hive.HiveContext", org.apache.spark.sql.hive.HiveContext@4648e685) - field (class "$iwC$$iwC$$iwC$$iwC", name: "hc", type: "class org.apache.spark.sql.hive.HiveContext") - object (class "$iwC$$iwC$$iwC$$iwC", $iwC$$iwC$$iwC$$iwC@23d652ef) - field (class "$iwC$$iwC$$iwC", name: "$iw", type: "class $iwC$$iwC$$iwC$$iwC") - object (class "$iwC$$iwC$$iwC", $iwC$$iwC$$iwC@71cc14f1) - field (class "$iwC$$iwC", name: "$iw", type: "class $iwC$$iwC$$iwC") - object (class "$iwC$$iwC", $iwC$$iwC@74eca89e) - field (class "$iwC", name: "$iw", type: "class $iwC$$iwC") - object (class "$iwC", $iwC@685c4cc4) - field (class "$line9.$read", name: "$iw", type: "class $iwC") - object (class "$line9.$read", $line9.$read@519f9aae) - field (class "$iwC$$iwC$$iwC", name: "$VAL7", type: "class $line9.$read") - object (class "$iwC$$iwC$$iwC", $iwC$$iwC$$iwC@4b996858) - field (class "$iwC$$iwC$$iwC$$iwC", name: "$outer", type: "class $iwC$$iwC$$iwC") - object (class "$iwC$$iwC$$iwC$$iwC", $iwC$$iwC$$iwC$$iwC@31d646d4) - field (class "$iwC$$iwC$$iwC$$iwC$$anonfun$1", name: "$outer", type: "class $iwC$$iwC$$iwC$$iwC") - root object (class "$iwC$$iwC$$iwC$$iwC$$anonfun$1", <function1>) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1528) I write some simple script to reproduce this problem. case 1 : val barr1 = sc.broadcast("test") val sret = sc.parallelize(1 to 10, 2) val ret = sret.filter(row => !barr1.equals("test")) ret.collect.foreach(println) It’s working fine with local mode and yarn-client mode. case 2 : val barr1 = sc.broadcast("test") val hc = new org.apache.spark.sql.hive.HiveContext(sc) val sret = hc.sql("show tables") val ret = sret.filter(row => !barr1.equals("test")) ret.collect.foreach(println) It will throw java.io.NotSerializableException: org.apache.spark.sql.hive.HiveContext with local mode and yarn-client mode But it working fine if I write the same code in a scala file and run in Intellij IDEA. import org.apache.spark.{SparkConf, SparkContext} object TestBroadcast2 { def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("Broadcast Test").setMaster("local[3]") val sc = new SparkContext(sparkConf) val barr1 = sc.broadcast("test") val hc = new org.apache.spark.sql.hive.HiveContext(sc) val sret = hc.sql("show tables") val ret = sret.filter(row => !barr1.equals("test")) ret.collect.foreach(println) } }