[ 
https://issues.apache.org/jira/browse/SPARK-1977?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14018390#comment-14018390
 ] 

Neville Li commented on SPARK-1977:
-----------------------------------

Our YARN cluster runs 2.2.0. We built spark-assembly and spark-examples jars 
with 1.0.0 release source and the bundled make_distribution.sh. And here's my 
command:

{code}
spark-submit --master yarn-cluster --class 
org.apache.spark.examples.mllib.MovieLensALS --num-executors 2 
--executor-memory 2g --driver-memory 2g 
dist/lib/spark-examples-1.0.0-hadoop2.2.0.jar --kryo --implicitPrefs 
sample_movielens_data.txt
{code}

Here's a complete list of classpath from the environment tab.
{code}
/etc/hadoop/conf
/usr/lib/hadoop-hdfs/hadoop-hdfs-2.2.0.2.0.6.0-76-tests.jar
/usr/lib/hadoop-hdfs/hadoop-hdfs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-hdfs/hadoop-hdfs-nfs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-hdfs/lib/asm-3.2.jar
/usr/lib/hadoop-hdfs/lib/commons-cli-1.2.jar
/usr/lib/hadoop-hdfs/lib/commons-codec-1.4.jar
/usr/lib/hadoop-hdfs/lib/commons-daemon-1.0.13.jar
/usr/lib/hadoop-hdfs/lib/commons-el-1.0.jar
/usr/lib/hadoop-hdfs/lib/commons-io-2.1.jar
/usr/lib/hadoop-hdfs/lib/commons-lang-2.5.jar
/usr/lib/hadoop-hdfs/lib/commons-logging-1.1.1.jar
/usr/lib/hadoop-hdfs/lib/guava-11.0.2.jar
/usr/lib/hadoop-hdfs/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop-hdfs/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop-hdfs/lib/jasper-runtime-5.5.23.jar
/usr/lib/hadoop-hdfs/lib/jersey-core-1.9.jar
/usr/lib/hadoop-hdfs/lib/jersey-server-1.9.jar
/usr/lib/hadoop-hdfs/lib/jetty-6.1.26.jar
/usr/lib/hadoop-hdfs/lib/jetty-util-6.1.26.jar
/usr/lib/hadoop-hdfs/lib/jsp-api-2.1.jar
/usr/lib/hadoop-hdfs/lib/jsr305-1.3.9.jar
/usr/lib/hadoop-hdfs/lib/log4j-1.2.17.jar
/usr/lib/hadoop-hdfs/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop-hdfs/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop-hdfs/lib/servlet-api-2.5.jar
/usr/lib/hadoop-hdfs/lib/xmlenc-0.52.jar
/usr/lib/hadoop-mapreduce/hadoop-archives-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-datajoin-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-distcp-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-extras-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-gridmix-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-app-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-core-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-hs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-hs-plugins-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.2.0.6.0-76-tests.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-shuffle-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-rumen-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-streaming-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/lib/aopalliance-1.0.jar
/usr/lib/hadoop-mapreduce/lib/asm-3.2.jar
/usr/lib/hadoop-mapreduce/lib/avro-1.7.4.jar
/usr/lib/hadoop-mapreduce/lib/commons-compress-1.4.1.jar
/usr/lib/hadoop-mapreduce/lib/commons-io-2.1.jar
/usr/lib/hadoop-mapreduce/lib/guice-3.0.jar
/usr/lib/hadoop-mapreduce/lib/guice-servlet-3.0.jar
/usr/lib/hadoop-mapreduce/lib/hamcrest-core-1.1.jar
/usr/lib/hadoop-mapreduce/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop-mapreduce/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop-mapreduce/lib/javax.inject-1.jar
/usr/lib/hadoop-mapreduce/lib/jersey-core-1.9.jar
/usr/lib/hadoop-mapreduce/lib/jersey-guice-1.9.jar
/usr/lib/hadoop-mapreduce/lib/jersey-server-1.9.jar
/usr/lib/hadoop-mapreduce/lib/junit-4.10.jar
/usr/lib/hadoop-mapreduce/lib/log4j-1.2.17.jar
/usr/lib/hadoop-mapreduce/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop-mapreduce/lib/paranamer-2.3.jar
/usr/lib/hadoop-mapreduce/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop-mapreduce/lib/snappy-java-1.0.4.1.jar
/usr/lib/hadoop-mapreduce/lib/xz-1.0.jar
/usr/lib/hadoop-yarn/hadoop-yarn-api-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-client-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-nodemanager-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-resourcemanager-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-tests-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-web-proxy-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-site-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/lib/aopalliance-1.0.jar
/usr/lib/hadoop-yarn/lib/asm-3.2.jar
/usr/lib/hadoop-yarn/lib/avro-1.7.4.jar
/usr/lib/hadoop-yarn/lib/commons-compress-1.4.1.jar
/usr/lib/hadoop-yarn/lib/commons-io-2.1.jar
/usr/lib/hadoop-yarn/lib/guice-3.0.jar
/usr/lib/hadoop-yarn/lib/guice-servlet-3.0.jar
/usr/lib/hadoop-yarn/lib/hamcrest-core-1.1.jar
/usr/lib/hadoop-yarn/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop-yarn/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop-yarn/lib/javax.inject-1.jar
/usr/lib/hadoop-yarn/lib/jersey-core-1.9.jar
/usr/lib/hadoop-yarn/lib/jersey-guice-1.9.jar
/usr/lib/hadoop-yarn/lib/jersey-server-1.9.jar
/usr/lib/hadoop-yarn/lib/junit-4.10.jar
/usr/lib/hadoop-yarn/lib/log4j-1.2.17.jar
/usr/lib/hadoop-yarn/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop-yarn/lib/paranamer-2.3.jar
/usr/lib/hadoop-yarn/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop-yarn/lib/snappy-java-1.0.4.1.jar
/usr/lib/hadoop-yarn/lib/xz-1.0.jar
/usr/lib/hadoop/hadoop-annotations-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/hadoop-auth-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/hadoop-common-2.2.0.2.0.6.0-76-tests.jar
/usr/lib/hadoop/hadoop-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/hadoop-nfs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/lib/activation-1.1.jar
/usr/lib/hadoop/lib/asm-3.2.jar
/usr/lib/hadoop/lib/avro-1.7.4.jar
/usr/lib/hadoop/lib/commons-beanutils-1.7.0.jar
/usr/lib/hadoop/lib/commons-beanutils-core-1.8.0.jar
/usr/lib/hadoop/lib/commons-cli-1.2.jar
/usr/lib/hadoop/lib/commons-codec-1.4.jar
/usr/lib/hadoop/lib/commons-collections-3.2.1.jar
/usr/lib/hadoop/lib/commons-compress-1.4.1.jar
/usr/lib/hadoop/lib/commons-configuration-1.6.jar
/usr/lib/hadoop/lib/commons-digester-1.8.jar
/usr/lib/hadoop/lib/commons-el-1.0.jar
/usr/lib/hadoop/lib/commons-httpclient-3.1.jar
/usr/lib/hadoop/lib/commons-io-2.1.jar
/usr/lib/hadoop/lib/commons-lang-2.5.jar
/usr/lib/hadoop/lib/commons-logging-1.1.1.jar
/usr/lib/hadoop/lib/commons-math-2.1.jar
/usr/lib/hadoop/lib/commons-net-3.1.jar
/usr/lib/hadoop/lib/guava-11.0.2.jar
/usr/lib/hadoop/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop/lib/jackson-jaxrs-1.8.8.jar
/usr/lib/hadoop/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop/lib/jackson-xc-1.8.8.jar
/usr/lib/hadoop/lib/jasper-compiler-5.5.23.jar
/usr/lib/hadoop/lib/jasper-runtime-5.5.23.jar
/usr/lib/hadoop/lib/jaxb-api-2.2.2.jar
/usr/lib/hadoop/lib/jaxb-impl-2.2.3-1.jar
/usr/lib/hadoop/lib/jersey-core-1.9.jar
/usr/lib/hadoop/lib/jersey-json-1.9.jar
/usr/lib/hadoop/lib/jersey-server-1.9.jar
/usr/lib/hadoop/lib/jets3t-0.6.1.jar
/usr/lib/hadoop/lib/jettison-1.1.jar
/usr/lib/hadoop/lib/jetty-6.1.26.jar
/usr/lib/hadoop/lib/jetty-util-6.1.26.jar
/usr/lib/hadoop/lib/jsch-0.1.42.jar
/usr/lib/hadoop/lib/jsp-api-2.1.jar
/usr/lib/hadoop/lib/jsr305-1.3.9.jar
/usr/lib/hadoop/lib/junit-4.8.2.jar
/usr/lib/hadoop/lib/log4j-1.2.17.jar
/usr/lib/hadoop/lib/mockito-all-1.8.5.jar
/usr/lib/hadoop/lib/native/*
/usr/lib/hadoop/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop/lib/paranamer-2.3.jar
/usr/lib/hadoop/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop/lib/servlet-api-2.5.jar
/usr/lib/hadoop/lib/slf4j-api-1.7.5.jar
/usr/lib/hadoop/lib/slf4j-log4j12-1.7.5.jar
/usr/lib/hadoop/lib/snappy-java-1.0.4.1.jar
/usr/lib/hadoop/lib/stax-api-1.0.1.jar
/usr/lib/hadoop/lib/xmlenc-0.52.jar
/usr/lib/hadoop/lib/xz-1.0.jar
/usr/lib/hadoop/lib/zookeeper-3.4.5.jar
{code}

> mutable.BitSet in ALS not serializable with KryoSerializer
> ----------------------------------------------------------
>
>                 Key: SPARK-1977
>                 URL: https://issues.apache.org/jira/browse/SPARK-1977
>             Project: Spark
>          Issue Type: Bug
>          Components: MLlib
>    Affects Versions: 1.0.0
>            Reporter: Neville Li
>            Priority: Minor
>
> OutLinkBlock in ALS.scala has an Array[mutable.BitSet] member.
> KryoSerializer uses AllScalaRegistrar from Twitter chill but it doesn't 
> register mutable.BitSet.
> Right now we have to register mutable.BitSet manually. A proper fix would be 
> using immutable.BitSet in ALS or register mutable.BitSet in upstream chill.
> {code}
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: 
> Task 1724.0:9 failed 4 times, most recent failure: Exception failure in TID 
> 68548 on host lon4-hadoopslave-b232.lon4.spotify.net: 
> com.esotericsoftware.kryo.KryoException: java.lang.ArrayStoreException: 
> scala.collection.mutable.HashSet
> Serialization trace:
> shouldSend (org.apache.spark.mllib.recommendation.OutLinkBlock)
>         
> com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:626)
>         
> com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
>         com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
>         com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:43)
>         com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
>         com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
>         
> org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:115)
>         
> org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:125)
>         org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
>         
> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
>         
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$4.apply(CoGroupedRDD.scala:155)
>         
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$4.apply(CoGroupedRDD.scala:154)
>         
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>         scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>         org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:154)
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>         org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>         
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31)
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>         org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33)
>         org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
>         org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:77)
>         org.apache.spark.rdd.RDD.iterator(RDD.scala:227)
>         org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
>         org.apache.spark.scheduler.Task.run(Task.scala:51)
>         org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
>         
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>         
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>         java.lang.Thread.run(Thread.java:662)
> Driver stacktrace:
>       at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1033)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1017)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1015)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>       at 
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1015)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:633)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:633)
>       at scala.Option.foreach(Option.scala:236)
>       at 
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:633)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1207)
>       at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
>       at akka.actor.ActorCell.invoke(ActorCell.scala:456)
>       at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
>       at akka.dispatch.Mailbox.run(Mailbox.scala:219)
>       at 
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
>       at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>       at 
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>       at 
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>       at 
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)

Reply via email to