Hi Guys i am running standalone spark 1.4.1 on local machine
i have 10 partitions with data skew on partition 1 and 4 partition: [(0, 0), (*1, 15593259)*, (2, 0), (3, 0), (*4, 20695601)*, (5, 0), (6, 0), (7, 0), (8, 0), (9, 0)] and elements: >> Now i try to rdd.repartition(10) and getting errors like ERROR Executor: Exception in task 1.0 in stage 10.0 (TID 61) java.lang.OutOfMemoryError: Java heap space and* ERROR DiskBlockObjectWriter in stack trace* *I tried to enhance "spark.executor.memory", "4g" as well getting same errors again* 15/09/02 21:12:43 ERROR Executor: Exception in task 1.0 in stage 10.0 (TID 61) java.lang.OutOfMemoryError: Java heap space at org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:113) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:101) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:97) at org.apache.spark.InterruptibleIterator.next(InterruptibleIterator.scala:43) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.util.collection.WritablePartitionedIterator$$anon$3.writeNext(WritablePartitionedPairCollection.scala:105) at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:375) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:208) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 15/09/02 21:12:43 WARN TaskSetManager: Lost task 1.0 in stage 10.0 (TID 61, localhost): java.lang.OutOfMemoryError: Java heap space at org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:113) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:101) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:97) at org.apache.spark.InterruptibleIterator.next(InterruptibleIterator.scala:43) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.util.collection.WritablePartitionedIterator$$anon$3.writeNext(WritablePartitionedPairCollection.scala:105) at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:375) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:208) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 15/09/02 21:12:43 ERROR TaskSetManager: Task 1 in stage 10.0 failed 1 times; aborting job 15/09/02 21:12:43 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[Executor task launch worker-0,5,main] java.lang.OutOfMemoryError: Java heap space at org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:113) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:101) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:97) at org.apache.spark.InterruptibleIterator.next(InterruptibleIterator.scala:43) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.util.collection.WritablePartitionedIterator$$anon$3.writeNext(WritablePartitionedPairCollection.scala:105) at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:375) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:208) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 15/09/02 21:12:43 INFO SparkContext: Invoking stop() from shutdown hook 15/09/02 21:12:43 INFO TaskSchedulerImpl: Cancelling stage 10 15/09/02 21:12:43 INFO Executor: Executor is trying to kill task 4.0 in stage 10.0 (TID 64) 15/09/02 21:12:43 INFO TaskSchedulerImpl: Stage 10 was cancelled 15/09/02 21:12:43 INFO DAGScheduler: ShuffleMapStage 10 (repartition at NativeMethodAccessorImpl.java:-2) failed in 102.132 s 15/09/02 21:12:43 INFO DAGScheduler: Job 4 failed: collect at /Users/shahid/projects/spark_rl/record_linker_spark.py:74, took 102.154710 s Traceback (most recent call last): File "/Users/shahid/projects/spark_rl/record_linker_spark.py", line 121, in <module> 15/09/02 21:12:43 INFO SparkUI: Stopped Spark web UI at http://192.168.1.2:4040 15/09/02 21:12:43 INFO DAGScheduler: Stopping DAGScheduler 15/09/02 21:12:43 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped! 15/09/02 21:12:43 INFO Utils: path = /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e/blockmgr-e8e82c2e-ca87-4667-a330-b1edb83aa81f, already present as root for deletion. 15/09/02 21:12:43 INFO MemoryStore: MemoryStore cleared 15/09/02 21:12:43 INFO BlockManager: BlockManager stopped 15/09/02 21:12:43 INFO BlockManagerMaster: BlockManagerMaster stopped 15/09/02 21:12:43 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped! 15/09/02 21:12:43 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon. 15/09/02 21:12:43 INFO RemoteActorRefProvider$RemotingTerminator: Remote daemon shut down; proceeding with flushing remote transports. 15/09/02 21:12:43 WARN PythonRDD: Incomplete task interrupted: Attempting to kill Python Worker 15/09/02 21:12:43 INFO RemoteActorRefProvider$RemotingTerminator: Remoting shut down. R.print_no_elements(mathes_grp) File "/Users/shahid/projects/spark_rl/record_linker_spark.py", line 74, in print_no_elements print "\n* * * * * * * * * * * * * * *\n<< partition: %s and elements: >> \n* * * * * * * * * * * * * * *\n" % rdd.mapPartitionsWithIndex(self.index_no_elements).collect() File "/Users/shahid/projects/spark-1.4.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/rdd.py", line 757, in collect File "/Users/shahid/projects/spark-1.4.1-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 538, in __call__ File "/Users/shahid/projects/spark-1.4.1-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 10.0 failed 1 times, most recent failure: Lost task 1.0 in stage 10.0 (TID 61, localhost): java.lang.OutOfMemoryError: Java heap space at org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:113) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:101) at org.apache.spark.api.python.PythonRDD$$anon$1.next(PythonRDD.scala:97) at org.apache.spark.InterruptibleIterator.next(InterruptibleIterator.scala:43) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at org.apache.spark.util.collection.WritablePartitionedIterator$$anon$3.writeNext(WritablePartitionedPairCollection.scala:105) at org.apache.spark.util.collection.ExternalSorter.spillToPartitionFiles(ExternalSorter.scala:375) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:208) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:62) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org <http://org.apache.spark.scheduler.dagscheduler.org/> $apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) at scala.Option.foreach(Option.scala:236) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) 15/09/02 21:12:43 INFO SparkContext: Successfully stopped SparkContext 15/09/02 21:12:43 INFO Utils: Shutdown hook called 15/09/02 21:12:43 INFO Utils: Deleting directory /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e/pyspark-090350a4-e0ab-4692-8360-b6be8d64d1fe 15/09/02 21:12:43 INFO Utils: Deleting directory /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e 15/09/02 21:12:45 ERROR DiskBlockObjectWriter: Uncaught exception while reverting partial writes to file /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e/blockmgr-e8e82c2e-ca87-4667-a330-b1edb83aa81f/05/temp_shuffle_f80f61c5-4fb6-4d46-a81b-239095ebf20d java.io.FileNotFoundException: /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e/blockmgr-e8e82c2e-ca87-4667-a330-b1edb83aa81f/05/temp_shuffle_f80f61c5-4fb6-4d46-a81b-239095ebf20d (No such file or directory) at java.io.FileOutputStream.open0(Native Method) at java.io.FileOutputStream.open(FileOutputStream.java:270) at java.io.FileOutputStream.<init>(FileOutputStream.java:213) at org.apache.spark.storage.DiskBlockObjectWriter.revertPartialWritesAndClose(BlockObjectWriter.scala:189) at org.apache.spark.util.collection.ExternalSorter$$anonfun$stop$2.apply(ExternalSorter.scala:807) at org.apache.spark.util.collection.ExternalSorter$$anonfun$stop$2.apply(ExternalSorter.scala:806) at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108) at org.apache.spark.util.collection.ExternalSorter.stop(ExternalSorter.scala:806) at org.apache.spark.shuffle.sort.SortShuffleWriter.stop(SortShuffleWriter.scala:94) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:76) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 15/09/02 21:12:46 ERROR Utils: Exception while deleting Spark temp dir: /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e java.io.IOException: Failed to delete: /private/var/folders/5f/3v7mdvrn02lcfczhr9my0ljc0000gn/T/spark-79bb5b4f-9be4-47ed-a722-bfc79880622e at org.apache.spark.util.Utils$.deleteRecursively(Utils.scala:963) at org.apache.spark.util.Utils$$anonfun$1$$anonfun$apply$mcV$sp$5.apply(Utils.scala:204) at org.apache.spark.util.Utils$$anonfun$1$$anonfun$apply$mcV$sp$5.apply(Utils.scala:201) at scala.collection.mutable.HashSet.foreach(HashSet.scala:79) at org.apache.spark.util.Utils$$anonfun$1.apply$mcV$sp(Utils.scala:201) at org.apache.spark.util.SparkShutdownHook.run(Utils.scala:2308) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(Utils.scala:2278) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(Utils.scala:2278) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(Utils.scala:2278) at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1772) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(Utils.scala:2278) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(Utils.scala:2278) at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(Utils.scala:2278) at scala.util.Try$.apply(Try.scala:161) at org.apache.spark.util.SparkShutdownHookManager.runAll(Utils.scala:2278) at org.apache.spark.util.SparkShutdownHookManager$$anon$6.run(Utils.scala:2260) at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54) -- with Regards Shahid Ashraf