I'm using spark-1.4.1 and compile it against CDH5.3.2. When I use ALS.trainImplicit to build a model, I got this error when rank=40 and iterations=30.
It worked for (rank=10, iteration=10) and (rank=20, iteration=20). What was wrong with (rank=40, iterations=30)? 15/08/13 01:16:40 INFO scheduler.DAGScheduler: Got job 66 (saveAsTextFile at MatrixFactorizationModel.scala:283) with 1 output partitions (allowLocal=false) 15/08/13 01:16:40 INFO scheduler.DAGScheduler: Final stage: ResultStage 2394(saveAsTextFile at MatrixFactorizationModel.scala:283) ... 15/08/13 01:16:41 INFO scheduler.DAGScheduler: Job 66 finished: saveAsTextFile at MatrixFactorizationModel.scala:283, took 0.538016 s ... 15/08/13 01:16:42 INFO parquet.ParquetRelation2: Using default output committer for Parquet: parquet.hadoop.ParquetOutputCommitter 15/08/13 01:16:42 INFO sources.DefaultWriterContainer: Using user defined output committer class parquet.hadoop.ParquetOutputCommitter 15/08/13 01:16:42 INFO spark.SparkContext: Starting job: parquet at MatrixFactorizationModel.scala:284 15/08/13 01:16:42 INFO scheduler.DAGScheduler: Got job 67 (parquet at MatrixFactorizationModel.scala:284) with 432 output partitions (allowLocal=false) 15/08/13 01:16:42 INFO scheduler.DAGScheduler: Final stage: ResultStage 2460(parquet at MatrixFactorizationModel.scala:284) 15/08/13 01:16:42 INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage 2459, ShuffleMapStage 2398) 15/08/13 01:16:42 INFO scheduler.DAGScheduler: Missing parents: List() 15/08/13 01:16:42 INFO scheduler.DAGScheduler: Submitting ResultStage 2460 (MapPartitionsRDD[634] at parquet at MatrixFactorizationModel.scala:284), which has no missing parents 15/08/13 01:16:42 INFO cluster.YarnClusterScheduler: Cancelling stage 2460 15/08/13 01:16:42 INFO scheduler.DAGScheduler: ResultStage 2460 (parquet at MatrixFactorizationModel.scala:284) failed in Unknown s 15/08/13 01:16:42 INFO scheduler.DAGScheduler: Job 67 failed: parquet at MatrixFactorizationModel.scala:284, took 0.249275 s 15/08/13 01:16:42 ERROR sources.InsertIntoHadoopFsRelation: Aborting job. org.apache.spark.SparkException: Job aborted due to stage failure: Task serialization failed: java.lang.StackOverflowError java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1533) java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347) scala.collection.immutable.$colon$colon.writeObject(List.scala:379) ... 15/08/13 01:16:42 ERROR yarn.ApplicationMaster: User class threw exception: org.apache.spark.SparkException: Job aborted. org.apache.spark.SparkException: Job aborted. at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.insert(commands.scala:166) at org.apache.spark.sql.sources.InsertIntoHadoopFsRelation.run(commands.scala:139) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:57) at org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:57) at org.apache.spark.sql.execution.ExecutedCommand.doExecute(commands.scala:68) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:88) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:147) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:87) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:950) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:950) at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:336) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:144) at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:135) at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:281) at org.apache.spark.mllib.recommendation.MatrixFactorizationModel$SaveLoadV1_0$.save(MatrixFactorizationModel.scala:284) at org.apache.spark.mllib.recommendation.MatrixFactorizationModel.save(MatrixFactorizationModel.scala:141) at com.mycompany.recommendation.ModelTrainer.train(ModelTrainer.scala:49) at com.mycompany.recommendation.ModelTrainer$.run(ModelTrainer.scala:96) at com.mycompany.recommendation.ModelTrainer$$anonfun$main$1.apply(ModelTrainer.scala:101) at com.mycompany.recommendation.ModelTrainer$$anonfun$main$1.apply(ModelTrainer.scala:100) at scala.Option.map(Option.scala:145) at com.mycompany.recommendation.ModelTrainer$.main(ModelTrainer.scala:100) at com.mycompany.recommendation.ModelTrainer.main(ModelTrainer.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:483) Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task serialization failed: java.lang.StackOverflowError java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1533) java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1547) java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1508) java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1431) java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1177) java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:347)