Hi everyone!

I got a exception when i run my script with spark-shell:

I added 

SPARK_JAVA_OPTS="-Dsun.io.serialization.extendedDebugInfo=true"

in spark-env.sh to show the following stack:


org.apache.spark.SparkException: Task not serializable
        at 
org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:166)
        at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:158)
        at org.apache.spark.SparkContext.clean(SparkContext.scala:1242)
        at org.apache.spark.rdd.RDD.filter(RDD.scala:282)
        at org.apache.spark.sql.SchemaRDD.filter(SchemaRDD.scala:460)
        at $iwC$$iwC$$iwC$$iwC.<init>(<console>:18)
        at $iwC$$iwC$$iwC.<init>(<console>:23)
        at $iwC$$iwC.<init>(<console>:25)
        at $iwC.<init>(<console>:27)
        at <init>(<console>:29)
        at .<init>(<console>:33)
        at .<clinit>(<console>)
        at .<init>(<console>:7)
        at .<clinit>(<console>)
        at $print(<console>)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:601)
        at 
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:789)
        at 
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1062)
……
Caused by: java.io.NotSerializableException: 
org.apache.spark.sql.hive.HiveContext$$anon$3
        - field (class "org.apache.spark.sql.hive.HiveContext", name: 
"functionRegistry", type: "class 
org.apache.spark.sql.hive.HiveFunctionRegistry")
        - object (class "org.apache.spark.sql.hive.HiveContext", 
org.apache.spark.sql.hive.HiveContext@4648e685)
        - field (class "$iwC$$iwC$$iwC$$iwC", name: "hc", type: "class 
org.apache.spark.sql.hive.HiveContext")
        - object (class "$iwC$$iwC$$iwC$$iwC", $iwC$$iwC$$iwC$$iwC@23d652ef)
        - field (class "$iwC$$iwC$$iwC", name: "$iw", type: "class 
$iwC$$iwC$$iwC$$iwC")
        - object (class "$iwC$$iwC$$iwC", $iwC$$iwC$$iwC@71cc14f1)
        - field (class "$iwC$$iwC", name: "$iw", type: "class $iwC$$iwC$$iwC")
        - object (class "$iwC$$iwC", $iwC$$iwC@74eca89e)
        - field (class "$iwC", name: "$iw", type: "class $iwC$$iwC")
        - object (class "$iwC", $iwC@685c4cc4)
        - field (class "$line9.$read", name: "$iw", type: "class $iwC")
        - object (class "$line9.$read", $line9.$read@519f9aae)
        - field (class "$iwC$$iwC$$iwC", name: "$VAL7", type: "class 
$line9.$read")
        - object (class "$iwC$$iwC$$iwC", $iwC$$iwC$$iwC@4b996858)
        - field (class "$iwC$$iwC$$iwC$$iwC", name: "$outer", type: "class 
$iwC$$iwC$$iwC")
        - object (class "$iwC$$iwC$$iwC$$iwC", $iwC$$iwC$$iwC$$iwC@31d646d4)
        - field (class "$iwC$$iwC$$iwC$$iwC$$anonfun$1", name: "$outer", type: 
"class $iwC$$iwC$$iwC$$iwC")
        - root object (class "$iwC$$iwC$$iwC$$iwC$$anonfun$1", <function1>)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177)
        at 
java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1528)

I write some simple script to reproduce this problem.

case 1 :
    val barr1 = sc.broadcast("test")
    val sret = sc.parallelize(1 to 10, 2)
    val ret = sret.filter(row => !barr1.equals("test"))
    ret.collect.foreach(println)

It’s working fine with local mode and yarn-client mode.

case 2 :
    val barr1 = sc.broadcast("test")
    val hc = new org.apache.spark.sql.hive.HiveContext(sc)
    val sret = hc.sql("show tables")
    val ret = sret.filter(row => !barr1.equals("test"))
    ret.collect.foreach(println)

It will throw java.io.NotSerializableException: 
org.apache.spark.sql.hive.HiveContext
 with local mode and yarn-client mode

But it working fine if I write the same code in a scala file and run in 
Intellij IDEA.

import org.apache.spark.{SparkConf, SparkContext}

object TestBroadcast2 {
  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("Broadcast 
Test").setMaster("local[3]")
    val sc = new SparkContext(sparkConf)
    val barr1 = sc.broadcast("test")
    val hc = new org.apache.spark.sql.hive.HiveContext(sc)
    val sret = hc.sql("show tables")
    val ret = sret.filter(row => !barr1.equals("test"))
    ret.collect.foreach(println)
  }
}





Reply via email to