[
https://issues.apache.org/jira/browse/SPARK-1977?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14018390#comment-14018390
]
Neville Li commented on SPARK-1977:
-----------------------------------
Our YARN cluster runs 2.2.0. We built spark-assembly and spark-examples jars
with 1.0.0 release source and the bundled make_distribution.sh. And here's my
command:
{code}
spark-submit --master yarn-cluster --class
org.apache.spark.examples.mllib.MovieLensALS --num-executors 2
--executor-memory 2g --driver-memory 2g
dist/lib/spark-examples-1.0.0-hadoop2.2.0.jar --kryo --implicitPrefs
sample_movielens_data.txt
{code}
Here's a complete list of classpath from the environment tab.
{code}
/etc/hadoop/conf
/usr/lib/hadoop-hdfs/hadoop-hdfs-2.2.0.2.0.6.0-76-tests.jar
/usr/lib/hadoop-hdfs/hadoop-hdfs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-hdfs/hadoop-hdfs-nfs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-hdfs/lib/asm-3.2.jar
/usr/lib/hadoop-hdfs/lib/commons-cli-1.2.jar
/usr/lib/hadoop-hdfs/lib/commons-codec-1.4.jar
/usr/lib/hadoop-hdfs/lib/commons-daemon-1.0.13.jar
/usr/lib/hadoop-hdfs/lib/commons-el-1.0.jar
/usr/lib/hadoop-hdfs/lib/commons-io-2.1.jar
/usr/lib/hadoop-hdfs/lib/commons-lang-2.5.jar
/usr/lib/hadoop-hdfs/lib/commons-logging-1.1.1.jar
/usr/lib/hadoop-hdfs/lib/guava-11.0.2.jar
/usr/lib/hadoop-hdfs/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop-hdfs/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop-hdfs/lib/jasper-runtime-5.5.23.jar
/usr/lib/hadoop-hdfs/lib/jersey-core-1.9.jar
/usr/lib/hadoop-hdfs/lib/jersey-server-1.9.jar
/usr/lib/hadoop-hdfs/lib/jetty-6.1.26.jar
/usr/lib/hadoop-hdfs/lib/jetty-util-6.1.26.jar
/usr/lib/hadoop-hdfs/lib/jsp-api-2.1.jar
/usr/lib/hadoop-hdfs/lib/jsr305-1.3.9.jar
/usr/lib/hadoop-hdfs/lib/log4j-1.2.17.jar
/usr/lib/hadoop-hdfs/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop-hdfs/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop-hdfs/lib/servlet-api-2.5.jar
/usr/lib/hadoop-hdfs/lib/xmlenc-0.52.jar
/usr/lib/hadoop-mapreduce/hadoop-archives-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-datajoin-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-distcp-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-extras-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-gridmix-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-app-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-core-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-hs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-hs-plugins-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.2.0.6.0-76-tests.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-jobclient-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-client-shuffle-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-rumen-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/hadoop-streaming-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-mapreduce/lib/aopalliance-1.0.jar
/usr/lib/hadoop-mapreduce/lib/asm-3.2.jar
/usr/lib/hadoop-mapreduce/lib/avro-1.7.4.jar
/usr/lib/hadoop-mapreduce/lib/commons-compress-1.4.1.jar
/usr/lib/hadoop-mapreduce/lib/commons-io-2.1.jar
/usr/lib/hadoop-mapreduce/lib/guice-3.0.jar
/usr/lib/hadoop-mapreduce/lib/guice-servlet-3.0.jar
/usr/lib/hadoop-mapreduce/lib/hamcrest-core-1.1.jar
/usr/lib/hadoop-mapreduce/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop-mapreduce/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop-mapreduce/lib/javax.inject-1.jar
/usr/lib/hadoop-mapreduce/lib/jersey-core-1.9.jar
/usr/lib/hadoop-mapreduce/lib/jersey-guice-1.9.jar
/usr/lib/hadoop-mapreduce/lib/jersey-server-1.9.jar
/usr/lib/hadoop-mapreduce/lib/junit-4.10.jar
/usr/lib/hadoop-mapreduce/lib/log4j-1.2.17.jar
/usr/lib/hadoop-mapreduce/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop-mapreduce/lib/paranamer-2.3.jar
/usr/lib/hadoop-mapreduce/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop-mapreduce/lib/snappy-java-1.0.4.1.jar
/usr/lib/hadoop-mapreduce/lib/xz-1.0.jar
/usr/lib/hadoop-yarn/hadoop-yarn-api-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-client-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-nodemanager-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-resourcemanager-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-tests-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-server-web-proxy-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/hadoop-yarn-site-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop-yarn/lib/aopalliance-1.0.jar
/usr/lib/hadoop-yarn/lib/asm-3.2.jar
/usr/lib/hadoop-yarn/lib/avro-1.7.4.jar
/usr/lib/hadoop-yarn/lib/commons-compress-1.4.1.jar
/usr/lib/hadoop-yarn/lib/commons-io-2.1.jar
/usr/lib/hadoop-yarn/lib/guice-3.0.jar
/usr/lib/hadoop-yarn/lib/guice-servlet-3.0.jar
/usr/lib/hadoop-yarn/lib/hamcrest-core-1.1.jar
/usr/lib/hadoop-yarn/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop-yarn/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop-yarn/lib/javax.inject-1.jar
/usr/lib/hadoop-yarn/lib/jersey-core-1.9.jar
/usr/lib/hadoop-yarn/lib/jersey-guice-1.9.jar
/usr/lib/hadoop-yarn/lib/jersey-server-1.9.jar
/usr/lib/hadoop-yarn/lib/junit-4.10.jar
/usr/lib/hadoop-yarn/lib/log4j-1.2.17.jar
/usr/lib/hadoop-yarn/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop-yarn/lib/paranamer-2.3.jar
/usr/lib/hadoop-yarn/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop-yarn/lib/snappy-java-1.0.4.1.jar
/usr/lib/hadoop-yarn/lib/xz-1.0.jar
/usr/lib/hadoop/hadoop-annotations-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/hadoop-auth-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/hadoop-common-2.2.0.2.0.6.0-76-tests.jar
/usr/lib/hadoop/hadoop-common-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/hadoop-nfs-2.2.0.2.0.6.0-76.jar
/usr/lib/hadoop/lib/activation-1.1.jar
/usr/lib/hadoop/lib/asm-3.2.jar
/usr/lib/hadoop/lib/avro-1.7.4.jar
/usr/lib/hadoop/lib/commons-beanutils-1.7.0.jar
/usr/lib/hadoop/lib/commons-beanutils-core-1.8.0.jar
/usr/lib/hadoop/lib/commons-cli-1.2.jar
/usr/lib/hadoop/lib/commons-codec-1.4.jar
/usr/lib/hadoop/lib/commons-collections-3.2.1.jar
/usr/lib/hadoop/lib/commons-compress-1.4.1.jar
/usr/lib/hadoop/lib/commons-configuration-1.6.jar
/usr/lib/hadoop/lib/commons-digester-1.8.jar
/usr/lib/hadoop/lib/commons-el-1.0.jar
/usr/lib/hadoop/lib/commons-httpclient-3.1.jar
/usr/lib/hadoop/lib/commons-io-2.1.jar
/usr/lib/hadoop/lib/commons-lang-2.5.jar
/usr/lib/hadoop/lib/commons-logging-1.1.1.jar
/usr/lib/hadoop/lib/commons-math-2.1.jar
/usr/lib/hadoop/lib/commons-net-3.1.jar
/usr/lib/hadoop/lib/guava-11.0.2.jar
/usr/lib/hadoop/lib/jackson-core-asl-1.8.8.jar
/usr/lib/hadoop/lib/jackson-jaxrs-1.8.8.jar
/usr/lib/hadoop/lib/jackson-mapper-asl-1.8.8.jar
/usr/lib/hadoop/lib/jackson-xc-1.8.8.jar
/usr/lib/hadoop/lib/jasper-compiler-5.5.23.jar
/usr/lib/hadoop/lib/jasper-runtime-5.5.23.jar
/usr/lib/hadoop/lib/jaxb-api-2.2.2.jar
/usr/lib/hadoop/lib/jaxb-impl-2.2.3-1.jar
/usr/lib/hadoop/lib/jersey-core-1.9.jar
/usr/lib/hadoop/lib/jersey-json-1.9.jar
/usr/lib/hadoop/lib/jersey-server-1.9.jar
/usr/lib/hadoop/lib/jets3t-0.6.1.jar
/usr/lib/hadoop/lib/jettison-1.1.jar
/usr/lib/hadoop/lib/jetty-6.1.26.jar
/usr/lib/hadoop/lib/jetty-util-6.1.26.jar
/usr/lib/hadoop/lib/jsch-0.1.42.jar
/usr/lib/hadoop/lib/jsp-api-2.1.jar
/usr/lib/hadoop/lib/jsr305-1.3.9.jar
/usr/lib/hadoop/lib/junit-4.8.2.jar
/usr/lib/hadoop/lib/log4j-1.2.17.jar
/usr/lib/hadoop/lib/mockito-all-1.8.5.jar
/usr/lib/hadoop/lib/native/*
/usr/lib/hadoop/lib/netty-3.6.2.Final.jar
/usr/lib/hadoop/lib/paranamer-2.3.jar
/usr/lib/hadoop/lib/protobuf-java-2.5.0.jar
/usr/lib/hadoop/lib/servlet-api-2.5.jar
/usr/lib/hadoop/lib/slf4j-api-1.7.5.jar
/usr/lib/hadoop/lib/slf4j-log4j12-1.7.5.jar
/usr/lib/hadoop/lib/snappy-java-1.0.4.1.jar
/usr/lib/hadoop/lib/stax-api-1.0.1.jar
/usr/lib/hadoop/lib/xmlenc-0.52.jar
/usr/lib/hadoop/lib/xz-1.0.jar
/usr/lib/hadoop/lib/zookeeper-3.4.5.jar
{code}
> mutable.BitSet in ALS not serializable with KryoSerializer
> ----------------------------------------------------------
>
> Key: SPARK-1977
> URL: https://issues.apache.org/jira/browse/SPARK-1977
> Project: Spark
> Issue Type: Bug
> Components: MLlib
> Affects Versions: 1.0.0
> Reporter: Neville Li
> Priority: Minor
>
> OutLinkBlock in ALS.scala has an Array[mutable.BitSet] member.
> KryoSerializer uses AllScalaRegistrar from Twitter chill but it doesn't
> register mutable.BitSet.
> Right now we have to register mutable.BitSet manually. A proper fix would be
> using immutable.BitSet in ALS or register mutable.BitSet in upstream chill.
> {code}
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 1724.0:9 failed 4 times, most recent failure: Exception failure in TID
> 68548 on host lon4-hadoopslave-b232.lon4.spotify.net:
> com.esotericsoftware.kryo.KryoException: java.lang.ArrayStoreException:
> scala.collection.mutable.HashSet
> Serialization trace:
> shouldSend (org.apache.spark.mllib.recommendation.OutLinkBlock)
>
> com.esotericsoftware.kryo.serializers.FieldSerializer$ObjectField.read(FieldSerializer.java:626)
>
> com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:221)
> com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
> com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:43)
> com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:34)
> com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:732)
>
> org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:115)
>
> org.apache.spark.serializer.DeserializationStream$$anon$1.getNext(Serializer.scala:125)
> org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:71)
>
> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$4.apply(CoGroupedRDD.scala:155)
>
> org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$4.apply(CoGroupedRDD.scala:154)
>
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:154)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> org.apache.spark.rdd.MappedValuesRDD.compute(MappedValuesRDD.scala:31)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
>
> org.apache.spark.rdd.FlatMappedValuesRDD.compute(FlatMappedValuesRDD.scala:31)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
> org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33)
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
> org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:77)
> org.apache.spark.rdd.RDD.iterator(RDD.scala:227)
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:111)
> org.apache.spark.scheduler.Task.run(Task.scala:51)
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:187)
>
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
> java.lang.Thread.run(Thread.java:662)
> Driver stacktrace:
> at
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1033)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1017)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1015)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1015)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:633)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:633)
> at scala.Option.foreach(Option.scala:236)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:633)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2.applyOrElse(DAGScheduler.scala:1207)
> at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
> at akka.actor.ActorCell.invoke(ActorCell.scala:456)
> at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
> at akka.dispatch.Mailbox.run(Mailbox.scala:219)
> at
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
> at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
> at
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
> at
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
> at
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> {code}
--
This message was sent by Atlassian JIRA
(v6.2#6252)