It seems CDH specific issue, you might be better to ask cloudera forum.

Rob Anderson <rockclimbings...@gmail.com>于2017年3月7日周二 上午9:02写道:

> Hey Everyone,
>
> We're running Zeppelin 0.7.0.  We've just cut over to spark2, using
> scala11 via the CDH parcel (SPARK2-2.0.0.cloudera1-1.cdh5.7.0.p0.113931).
>
> Running a simple job, throws a "Caused by: java.lang.ClassNotFoundException:
> $anonfun$1".  It appears that  during execution time on the yarn hosts,
> the native CDH spark1.5 jars are loaded before the new spark2 jars.  I've
> tried using spark.yarn.archive to specify the spark2 jars in hdfs as well
> as using other spark options, none of which seems to make a difference.
>
>
> Any suggestions you can offer is appreciated.
>
> Thanks,
>
> Rob
>
> ------------------------
>
>
> %spark
> val taxonomy = sc.textFile("/user/user1/data/")
>                  .map(l => l.split("\t"))
>
> %spark
> taxonomy.first
>
>
> org.apache.spark.SparkException: Job aborted due to stage failure: Task 0
> in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage
> 1.0 (TID 7, data08.hadoop.prod.ostk.com, executor 2):
> java.lang.ClassNotFoundException: $anonfun$1
> at
> org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:82)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:348)
> at
> org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
> at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
> at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
> at
> org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
> at
> org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
> at org.apache.spark.scheduler.Task.run(Task.scala:86)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:745)
> Caused by: java.lang.ClassNotFoundException: $anonfun$1
> at java.lang.ClassLoader.findClass(ClassLoader.java:530)
> at
> org.apache.spark.util.ParentClassLoader.findClass(ParentClassLoader.scala:26)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.scala:34)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at
> org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.scala:30)
> at
> org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:77)
> ... 30 more
> Driver stacktrace:
> at org.apache.spark.scheduler.DAGScheduler.org
> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
> at scala.Option.foreach(Option.scala:257)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1669)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1624)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1613)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
> at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1893)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1906)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:1919)
> at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1318)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
> at org.apache.spark.rdd.RDD.take(RDD.scala:1292)
> at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1332)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)
> at org.apache.spark.rdd.RDD.first(RDD.scala:1331)
> ... 37 elided
> Caused by: java.lang.ClassNotFoundException: $anonfun$1
> at
> org.apache.spark.repl.ExecutorClassLoader.findClass(ExecutorClassLoader.scala:82)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at java.lang.Class.forName0(Native Method)
> at java.lang.Class.forName(Class.java:348)
> at
> org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
> at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
> at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2000)
> at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1924)
> at
> java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
> at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
> at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
> at
> org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
> at
> org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
> at org.apache.spark.scheduler.Task.run(Task.scala:86)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> ... 1 more
> Caused by: java.lang.ClassNotFoundException: $anonfun$1
> at java.lang.ClassLoader.findClass(ClassLoader.java:530)
> at
> org.apache.spark.util.ParentClassLoader.findClass(ParentClassLoader.scala:26)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
> at
> org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.scala:34)
> at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
> at
> org.apache.spark.util.ParentClassLoader.loadClass(ParentClassLoader.scala:30)
> at org.apache.spark.repl.ExecutorClassL
> oader.findClass(ExecutorClassLoader.scala:77)
> ... 30 more
>

Reply via email to