Any idea about this?

 

From: Kürşat Kurt [mailto:kur...@kursatkurt.com] 
Sent: Sunday, October 30, 2016 7:59 AM
To: 'Jörn Franke' <jornfra...@gmail.com>
Cc: 'user@spark.apache.org' <user@spark.apache.org>
Subject: RE: Out Of Memory issue

 

Hi Jörn;

 

I am reading 300.000 line csv file. It is “ß” seperated(attached sample file). 
First column is class name and second column is product name.

Java version is 1.8.108, single node. Furthermore (as you can see in code) i 
tried random forests and this get OMM too.

 

 

 

Code :

package main.scala
 
import java.util.Locale
import org.apache.spark.SparkConf
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.CountVectorizer
import org.apache.spark.ml.feature.IndexToString
import org.apache.spark.ml.feature.StandardScaler
import org.apache.spark.ml.feature.StopWordsRemover
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.ml.feature.Tokenizer
import org.apache.spark.sql.SparkSession
import com.hrzafer.reshaturkishstemmer.Resha
import org.apache.spark.ml.feature.VectorIndexer
import org.apache.spark.ml.feature.IDF
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.NGram
import org.apache.spark.ml.classification.NaiveBayes
import org.apache.spark.ml.classification.DecisionTreeClassificationModel
import org.apache.spark.ml.classification.DecisionTreeClassifier
import org.apache.spark.ml.classification.LogisticRegression
import scala.collection.mutable.ListBuffer
import org.apache.spark.ml.classification.OneVsRest
import org.apache.spark.storage.StorageLevel
 
 
object Test1 {
 
  var num = 50;
  var savePath = "hdfs://localhost:54310/SparkWork/SparkModel/";
  var stemmer = Resha.Instance
 
  var STOP_WORDS: Set[String] = Set();
 
  def cropSentence(s: String) = {
    s.replaceAll("\\([^\\)]*\\)", "")
      .replaceAll("(\\d+)(gb|GB) <file://d+)(gb|GB)> ", "$1 $2")
      .replaceAll(" - ", " ")
      .replaceAll("-", " ")
      .replaceAll(" tr. ", " ")
      .replaceAll("  +", " ")
      .replaceAll(",", " ").trim();
  }
 
  def main(args: Array[String]): Unit = {
 
    val start1 = System.currentTimeMillis();
 
    val sc = new SparkConf().setAppName("Test")    
    .set("spark.hadoop.validateOutputSpecs", "false")
    .set("spark.serializer","org.apache.spark.serializer.KryoSerializer")
 
 
    val spark = SparkSession.builder.appName("Java 
Spark").config(sc).getOrCreate();
    import spark.implicits._
 
    val mainDataset = 
spark.sparkContext.textFile("hdfs://localhost:54310/SparkWork/classifications.csv")
      .map( _.split("ß"))
      .map(tokens => {      
         var list=new ListBuffer[String]();
      var 
token0=cropSentence(tokens(0).toLowerCase(Locale.forLanguageTag("TR-tr")));     
 
      token0.split("\\s+ <file://s+> ").map {list+=stemmer.stem(_)}   
      (tokens(1), list.toList.mkString(" "))
      }).persist(StorageLevel.MEMORY_AND_DISK).toDF("className","productName");
 
 
    val classIndexer = new StringIndexer()
      .setInputCol("className")
      .setOutputCol("label");
 
    val classIndexerModel = classIndexer.fit(mainDataset);
    var mainDS=classIndexerModel.transform(mainDataset);
    classIndexerModel.write.overwrite.save(savePath + "ClassIndexer");
     //Tokenizer
              val tokenizer = new Tokenizer()                                
                           .setInputCol("productName")                     
                           .setOutputCol("words_nonfiltered")
                           ;
 
    //StopWords
              val remover = new StopWordsRemover()
                             .setInputCol("words_nonfiltered")
                             .setOutputCol("words")
                             .setStopWords( 
Array[String]("garanti","garantili","resmi","distribütör","cep","tel","-","//"));
 
    //CountVectorize
 
              val countVectorizer = new CountVectorizer()
                             .setInputCol("words")
                             .setOutputCol("features");
 
 
              val  rfc = new RandomForestClassifier ()                          
                      .setLabelCol("label")
                      .setNumTrees(3)
                      .setMaxDepth(3)                      
                      .setFeatureSubsetStrategy("auto")
                      .setFeaturesCol("features")
                      .setImpurity("gini")                      
                      .setMaxBins(3);
 
 
              val nb = new NaiveBayes()
               .setSmoothing(0.1)
               .setModelType("multinomial")
 
 
           val pipeline = new 
Pipeline().setStages(Array(tokenizer,remover,countVectorizer,nb));
 
 
           val splits = mainDS.randomSplit(Array(0.80,0.20));
           val train =splits(0);
           //train.show(num,false);
           val test = splits(1);
           //test.show(num,false);
 
           //mainDataset.show(100,false);           
           val model = pipeline.fit(train);
           model.write.overwrite.save(savePath+"RandomForestClassifier");
           //var model=rfc.fit(train);
 
 
           var result = model.transform(test);
 
 
           val predictionAndLabels = result.select("prediction", "label");
           val evaluator = new 
MulticlassClassificationEvaluator().setMetricName("accuracy");
          System.out.println("Accuracy = " + 
evaluator.evaluate(predictionAndLabels));
 
 
  }
}

 

From: Jörn Franke [mailto:jornfra...@gmail.com] 
Sent: Sunday, October 30, 2016 12:44 AM
To: Kürşat Kurt <kur...@kursatkurt.com <mailto:kur...@kursatkurt.com> >
Cc: user@spark.apache.org <mailto:user@spark.apache.org> 
Subject: Re: Out Of Memory issue

 

What is the size and format of the input data?

Can you provide more details on your Spark job? Rdd? Dataframe? Etc. Java 
Version? Is this a single node? It seems your executors and os do not get a lot 
of memory


On 29 Oct 2016, at 22:51, Kürşat Kurt <kur...@kursatkurt.com 
<mailto:kur...@kursatkurt.com> > wrote:

Hi;

 

While training NaiveBayes classification, i am getting OOM.

What is wrong with these parameters?

Here is the spark-submit command: ./spark-submit --class main.scala.Test1 
--master local[*] --driver-memory 60g  /home/user1/project_2.11-1.0.jar

 

Ps: Os is Ubuntu 14.04 and system has 64GB RAM, 256GB SSD with spark 2.0.1. 

 

16/10/29 23:32:21 INFO BlockManagerInfo: Removed broadcast_10_piece0 on 
89.*************:35416 in memory (size: 4.0 MB, free: 31.7 GB)

16/10/29 23:32:21 INFO BlockManagerInfo: Removed broadcast_10_piece1 on 
89.*************:35416 in memory (size: 2.4 MB, free: 31.7 GB)

16/10/29 23:33:00 INFO ExternalAppendOnlyMap: Thread 123 spilling in-memory map 
of 31.8 GB to disk (1 time so far)

16/10/29 23:34:42 INFO ExternalAppendOnlyMap: Thread 123 spilling in-memory map 
of 31.8 GB to disk (2 times so far)

16/10/29 23:36:58 INFO ExternalAppendOnlyMap: Thread 123 spilling in-memory map 
of 31.8 GB to disk (3 times so far)

16/10/29 23:41:27 WARN TaskMemoryManager: leak 21.2 GB memory from 
org.apache.spark.util.collection.ExternalAppendOnlyMap@43ab2e76 
<mailto:org.apache.spark.util.collection.ExternalAppendOnlyMap@43ab2e76> 

16/10/29 23:41:28 ERROR Executor: Exception in task 0.0 in stage 10.0 (TID 31)

java.lang.OutOfMemoryError: Java heap space

        at com.esotericsoftware.kryo.io.Input.readDoubles(Input.java:885)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:222)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:205)

        at com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:759)

        at 
com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:132)

        at 
com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:551)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:33)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at 
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:229)

        at 
org.apache.spark.serializer.DeserializationStream.readValue(Serializer.scala:159)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.readNextItem(ExternalAppendOnlyMap.scala:515)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.hasNext(ExternalAppendOnlyMap.scala:535)

        at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1004)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.org 
<http://ExternalIterator.org> 
$apache$spark$util$collection$ExternalAppendOnlyMap$ExternalIterator$$readNextHashCode(ExternalAppendOnlyMap.scala:336)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:409)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:407)

        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:407)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:302)

        at scala.collection.Iterator$class.foreach(Iterator.scala:893)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.foreach(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)

        at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.to(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.toBuffer(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)

16/10/29 23:41:28 ERROR SparkUncaughtExceptionHandler: Uncaught exception in 
thread Thread[Executor task launch worker-7,5,main]

java.lang.OutOfMemoryError: Java heap space

        at com.esotericsoftware.kryo.io.Input.readDoubles(Input.java:885)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:222)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:205)

        at com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:759)

        at 
com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:132)

        at 
com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:551)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:33)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at 
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:229)

        at 
org.apache.spark.serializer.DeserializationStream.readValue(Serializer.scala:159)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.readNextItem(ExternalAppendOnlyMap.scala:515)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.hasNext(ExternalAppendOnlyMap.scala:535)

        at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1004)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.org 
<http://ExternalIterator.org> 
$apache$spark$util$collection$ExternalAppendOnlyMap$ExternalIterator$$readNextHashCode(ExternalAppendOnlyMap.scala:336)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:409)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:407)

        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:407)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:302)

        at scala.collection.Iterator$class.foreach(Iterator.scala:893)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.foreach(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)

        at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.to(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.toBuffer(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.codec.CodecConfig: 
Compression: SNAPPY

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.ParquetOutputFormat: 
Parquet block size to 134217728

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.ParquetOutputFormat: 
Parquet page size to 1048576

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.ParquetOutputFormat: 
Parquet dictionary page size to 1048576

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.ParquetOutputFormat: 
Dictionary is on

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.ParquetOutputFormat: 
Validation is off

Oct 29, 2016 11:25:48 PM INFO: org.apache.parquet.hadoop.ParquetOutputFormat: 
Writer version is: PARQUET_1_0

Oct 29, 2016 11:25:49 PM INFO: 
org.apache.parquet.hadoop.InternalParquetRecordWriter: Flushing mem columnStore 
to file. allocated memory: 4,396,549

Oct 29, 2016 11:25:49 PM INFO: 
org.apache.parquet.hadoop.ColumnChunkPageWriteStore: written 4,157,541B for 
[labels, list, element] BINARY: 142,207 values, 5,600,131B raw, 4,156,878B 
comp, 6 pages, encodings: [PLAIN, RLE]

16/10/29 23:41:28 WARN TaskSetManager: Lost task 0.0 in stage 10.0 (TID 31, 
localhost): java.lang.OutOfMemoryError: Java heap space

        at com.esotericsoftware.kryo.io.Input.readDoubles(Input.java:885)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:222)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:205)

        at com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:759)

        at 
com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:132)

        at 
com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:551)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:33)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at 
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:229)

        at 
org.apache.spark.serializer.DeserializationStream.readValue(Serializer.scala:159)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.readNextItem(ExternalAppendOnlyMap.scala:515)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.hasNext(ExternalAppendOnlyMap.scala:535)

        at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1004)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.org 
<http://ExternalIterator.org> 
$apache$spark$util$collection$ExternalAppendOnlyMap$ExternalIterator$$readNextHashCode(ExternalAppendOnlyMap.scala:336)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:409)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:407)

        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:407)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:302)

        at scala.collection.Iterator$class.foreach(Iterator.scala:893)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.foreach(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)

        at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.to(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.toBuffer(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)

 

16/10/29 23:41:28 INFO SparkContext: Invoking stop() from shutdown hook

16/10/29 23:41:28 ERROR TaskSetManager: Task 0 in stage 10.0 failed 1 times; 
aborting job

16/10/29 23:41:28 INFO TaskSchedulerImpl: Removed TaskSet 10.0, whose tasks 
have all completed, from pool

16/10/29 23:41:28 INFO TaskSchedulerImpl: Cancelling stage 10

16/10/29 23:41:28 INFO DAGScheduler: ResultStage 10 (collect at 
NaiveBayes.scala:400) failed in 570.233 s

16/10/29 23:41:28 INFO DAGScheduler: Job 5 failed: collect at 
NaiveBayes.scala:400, took 934.966523 s

Exception in thread "main" org.apache.spark.SparkException: Job aborted due to 
stage failure: Task 0 in stage 10.0 failed 1 times, most recent failure: Lost 
task 0.0 in stage 10.0 (TID 31, localhost): java.lang.OutOfMemoryError: Java 
heap space

       at com.esotericsoftware.kryo.io.Input.readDoubles(Input.java:885)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:222)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:205)

        at com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:759)

        at 
com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:132)

        at 
com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:551)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:33)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at 
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:229)

        at 
org.apache.spark.serializer.DeserializationStream.readValue(Serializer.scala:159)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.readNextItem(ExternalAppendOnlyMap.scala:515)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.hasNext(ExternalAppendOnlyMap.scala:535)

        at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1004)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.org 
<http://ExternalIterator.org> 
$apache$spark$util$collection$ExternalAppendOnlyMap$ExternalIterator$$readNextHashCode(ExternalAppendOnlyMap.scala:336)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:409)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:407)

        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:407)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:302)

        at scala.collection.Iterator$class.foreach(Iterator.scala:893)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.foreach(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)

        at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.to(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.toBuffer(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)

 

Driver stacktrace:

        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)

        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)

        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)

        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

        at 
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)

        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)

        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)

        at scala.Option.foreach(Option.scala:257)

        at 
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)

        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)

        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)

        at 
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)

        at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)

        at 
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1890)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1903)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1916)

        at org.apache.spark.SparkContext.runJob(SparkContext.scala:1930)

        at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:912)

        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)

        at 
org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)

        at org.apache.spark.rdd.RDD.withScope(RDD.scala:358)

        at org.apache.spark.rdd.RDD.collect(RDD.scala:911)

        at 
org.apache.spark.mllib.classification.NaiveBayes.run(NaiveBayes.scala:400)

        at 
org.apache.spark.mllib.classification.NaiveBayes$.train(NaiveBayes.scala:507)

        at 
org.apache.spark.ml.classification.NaiveBayes.train(NaiveBayes.scala:114)

        at 
org.apache.spark.ml.classification.NaiveBayes.train(NaiveBayes.scala:76)

        at org.apache.spark.ml.Predictor.fit(Predictor.scala:90)

        at org.apache.spark.ml.Predictor.fit(Predictor.scala:71)

        at org.apache.spark.ml.Pipeline$$anonfun$fit$2.apply(Pipeline.scala:149)

        at org.apache.spark.ml.Pipeline$$anonfun$fit$2.apply(Pipeline.scala:145)

        at scala.collection.Iterator$class.foreach(Iterator.scala:893)

        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)

        at 
scala.collection.IterableViewLike$Transformed$class.foreach(IterableViewLike.scala:44)

        at 
scala.collection.SeqViewLike$AbstractTransformed.foreach(SeqViewLike.scala:37)

        at org.apache.spark.ml.Pipeline.fit(Pipeline.scala:145)

        at main.scala.Test1$.main(Test1.scala:172)

        at main.scala.Test1.main(Test1.scala)

        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)

        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)

        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)

        at java.lang.reflect.Method.invoke(Method.java:498)

        at 
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)

        at 
org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)

        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)

        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)

        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Caused by: java.lang.OutOfMemoryError: Java heap space

        at com.esotericsoftware.kryo.io.Input.readDoubles(Input.java:885)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:222)

        at 
com.esotericsoftware.kryo.serializers.DefaultArraySerializers$DoubleArraySerializer.read(DefaultArraySerializers.java:205)

        at com.esotericsoftware.kryo.Kryo.readObjectOrNull(Kryo.java:759)

        at 
com.esotericsoftware.kryo.serializers.ObjectField.read(ObjectField.java:132)

        at 
com.esotericsoftware.kryo.serializers.FieldSerializer.read(FieldSerializer.java:551)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:42)

        at com.twitter.chill.Tuple2Serializer.read(TupleSerializers.scala:33)

        at com.esotericsoftware.kryo.Kryo.readClassAndObject(Kryo.java:790)

        at 
org.apache.spark.serializer.KryoDeserializationStream.readObject(KryoSerializer.scala:229)

        at 
org.apache.spark.serializer.DeserializationStream.readValue(Serializer.scala:159)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.readNextItem(ExternalAppendOnlyMap.scala:515)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$DiskMapIterator.hasNext(ExternalAppendOnlyMap.scala:535)

        at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1004)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.org 
<http://ExternalIterator.org> 
$apache$spark$util$collection$ExternalAppendOnlyMap$ExternalIterator$$readNextHashCode(ExternalAppendOnlyMap.scala:336)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:409)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator$$anonfun$next$1.apply(ExternalAppendOnlyMap.scala:407)

        at 
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)

        at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:407)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.next(ExternalAppendOnlyMap.scala:302)

        at scala.collection.Iterator$class.foreach(Iterator.scala:893)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.foreach(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)

        at 
scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)

        at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.to(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)

        at 
org.apache.spark.util.collection.ExternalAppendOnlyMap$ExternalIterator.toBuffer(ExternalAppendOnlyMap.scala:302)

        at 
scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)

16/10/29 23:41:28 INFO SparkUI: Stopped Spark web UI at 
http://89.*************:4040

16/10/29 23:41:28 INFO MapOutputTrackerMasterEndpoint: 
MapOutputTrackerMasterEndpoint stopped!

16/10/29 23:41:28 INFO MemoryStore: MemoryStore cleared

16/10/29 23:41:28 INFO BlockManager: BlockManager stopped

16/10/29 23:41:28 INFO BlockManagerMaster: BlockManagerMaster stopped

16/10/29 23:41:28 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: 
OutputCommitCoordinator stopped!

16/10/29 23:41:28 INFO SparkContext: Successfully stopped SparkContext

16/10/29 23:41:28 INFO ShutdownHookManager: Shutdown hook called

16/10/29 23:41:28 INFO ShutdownHookManager: Deleting directory 
/tmp/spark-15cf14e4-f103-4cbf-aa0f-85828eadbcce

Reply via email to