[ 
https://issues.apache.org/jira/browse/SPARK-17430?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16369367#comment-16369367
 ] 

Furcy Pin commented on SPARK-17430:
-----------------------------------

Same problem here in spark 2.2.1

The job hangs forever, the GUI is still responsive but no task advances and the 
executors are still reserved. 
This can be really annoying as it create zombie jobs eating cluster resources 
for nothing until they are killed manually.

The StackTrace in the driver logs gave this error:
{code:java}
WARN util.Utils: Suppressing exception in finally: Java heap space
java.lang.OutOfMemoryError: Java heap space
        at java.util.Arrays.copyOf(Arrays.java:3236)
        at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:118)
        at 
java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
        at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:153)
        at 
java.util.zip.DeflaterOutputStream.deflate(DeflaterOutputStream.java:253)
        at 
java.util.zip.DeflaterOutputStream.write(DeflaterOutputStream.java:211)
        at java.util.zip.GZIPOutputStream.write(GZIPOutputStream.java:145)
        at 
java.io.ObjectOutputStream$BlockDataOutputStream.writeBlockHeader(ObjectOutputStream.java:1894)
        at 
java.io.ObjectOutputStream$BlockDataOutputStream.drain(ObjectOutputStream.java:1875)
        at 
java.io.ObjectOutputStream$BlockDataOutputStream.flush(ObjectOutputStream.java:1822)
        at java.io.ObjectOutputStream.flush(ObjectOutputStream.java:719)
        at java.io.ObjectOutputStream.close(ObjectOutputStream.java:740)
        at 
org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$2.apply$mcV$sp(MapOutputTracker.scala:620)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1346)
        at 
org.apache.spark.MapOutputTracker$.serializeMapStatuses(MapOutputTracker.scala:619)
        at 
org.apache.spark.MapOutputTrackerMaster.getSerializedMapOutputStatuses(MapOutputTracker.scala:562)
        at 
org.apache.spark.scheduler.DAGScheduler.createShuffleMapStage(DAGScheduler.scala:341)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getOrCreateShuffleMapStage$1.apply(DAGScheduler.scala:312)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getOrCreateShuffleMapStage$1.apply(DAGScheduler.scala:305)
        at scala.collection.Iterator$class.foreach(Iterator.scala:727)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
        at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
        at scala.collection.mutable.Stack.foreach(Stack.scala:169)
        at 
org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$getOrCreateShuffleMapStage(DAGScheduler.scala:305)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$getOrCreateParentStages$1.apply(DAGScheduler.scala:381)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$getOrCreateParentStages$1.apply(DAGScheduler.scala:380)
        at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        at 
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
        at scala.collection.mutable.HashSet.foreach(HashSet.scala:79)
        at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
        at 
scala.collection.mutable.AbstractSet.scala$collection$SetLike$$super$map(Set.scala:45)
        at scala.collection.SetLike$class.map(SetLike.scala:93)
Exception in thread "dag-scheduler-event-loop" java.lang.OutOfMemoryError: Java 
heap space
        at java.util.Arrays.copyOf(Arrays.java:3236)
        at java.io.ByteArrayOutputStream.grow(ByteArrayOutputStream.java:118)
        at 
java.io.ByteArrayOutputStream.ensureCapacity(ByteArrayOutputStream.java:93)
        at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:153)
        at 
java.util.zip.DeflaterOutputStream.deflate(DeflaterOutputStream.java:253)
        at 
java.util.zip.DeflaterOutputStream.write(DeflaterOutputStream.java:211)
        at java.util.zip.GZIPOutputStream.write(GZIPOutputStream.java:145)
        at 
java.io.ObjectOutputStream$BlockDataOutputStream.writeBlockHeader(ObjectOutputStream.java:1894)
        at 
java.io.ObjectOutputStream$BlockDataOutputStream.drain(ObjectOutputStream.java:1875)
        at 
java.io.ObjectOutputStream$BlockDataOutputStream.setBlockDataMode(ObjectOutputStream.java:1786)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1189)
        at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1378)
        at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174)
        at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
        at 
org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply$mcV$sp(MapOutputTracker.scala:617)
        at 
org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at 
org.apache.spark.MapOutputTracker$$anonfun$serializeMapStatuses$1.apply(MapOutputTracker.scala:616)
        at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1337)
        at 
org.apache.spark.MapOutputTracker$.serializeMapStatuses(MapOutputTracker.scala:619)
        at 
org.apache.spark.MapOutputTrackerMaster.getSerializedMapOutputStatuses(MapOutputTracker.scala:562)
        at 
org.apache.spark.scheduler.DAGScheduler.createShuffleMapStage(DAGScheduler.scala:341)
        at 
org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getOrCreateShuffleMapStage$1.apply(DAGScheduler.scala:312)
        at org.apac{code}

> Spark task Hangs after OOM while DAG scheduler tries to serialize a task
> ------------------------------------------------------------------------
>
>                 Key: SPARK-17430
>                 URL: https://issues.apache.org/jira/browse/SPARK-17430
>             Project: Spark
>          Issue Type: Bug
>          Components: Scheduler
>    Affects Versions: 1.6.2
>            Reporter: Mikhail Galanin
>            Priority: Major
>         Attachments: abort-task-on-oom-in-dag-scheduler.patch
>
>
> Hi here,
> We're running Spark under Hadoop 2.7.1 Yarn and faced a problem.
> The problem is that sometimes an exception raises inside JavaSerializer (see 
> the stacktrace below). The exception isn't a problem itself but after it 
> happens, the task hangs. It's shown as "running" in the Hadoop task list but 
> no one worker is executing task, no more records appear in Spark job log 
> until somebody kills it.
> We have fixed the issue by patching Spark code (catch OOM in 
> submitMissingTasks()) but it looks like OOM error is deliberately ignored so 
> probably there should be a better solution.
> {noformat}
> Exception in thread "dag-scheduler-event-loop" java.lang.OutOfMemoryError: 
> Java heap space
>       at java.util.Arrays.copyOf(Arrays.java:3332)
>       at 
> java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:137)
>       at 
> java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:121)
>       at 
> java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:421)
>       at java.lang.StringBuilder.append(StringBuilder.java:136)
>       at 
> java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1421)
>       at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
>       at 
> java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
>       at 
> java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
>       at 
> java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
>       at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
>       at 
> java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
>       at 
> java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
>       at 
> java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
>       at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
>       at 
> java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
>       at 
> java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
>       at 
> java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
>       at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
>       at 
> java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
>       at 
> java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
>       at 
> java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
>       at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
>       at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
>       at 
> org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:44)
>       at 
> org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101)
>       at 
> org.apache.spark.scheduler.DAGScheduler.submitMissingTasks(DAGScheduler.scala:1003)
>       at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$submitStage(DAGScheduler.scala:921)
>       at 
> org.apache.spark.scheduler.DAGScheduler.handleJobSubmitted(DAGScheduler.scala:861)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1607)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
>       at 
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to