[
https://issues.apache.org/jira/browse/SPARK-32314?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Zhongwei Zhu updated SPARK-32314:
---------------------------------
Description:
Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack
Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields contains
same info, and the former one is kept for backward compatibility of spark
history before version 1.2.0. We can remove 1st field. This will help reduce
eventlog size significantly when lots of task are failed due to
ExceptionFailure.
The sample json of current format as below:
{noformat}
{
"Event": "SparkListenerTaskEnd",
"Stage ID": 1237,
"Stage Attempt ID": 0,
"Task Type": "ShuffleMapTask",
"Task End Reason": {
"Reason": "ExceptionFailure",
"Class Name": "java.io.IOException",
"Description": "org.apache.spark.SparkException: Failed to get
broadcast_1405_piece10 of broadcast_1405",
"Stack Trace": [
{
"Declaring Class": "org.apache.spark.util.Utils$",
"Method Name": "tryOrIOException",
"File Name": "Utils.scala",
"Line Number": 1350
},
{
"Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
"Method Name": "readBroadcastBlock",
"File Name": "TorrentBroadcast.scala",
"Line Number": 218
},
{
"Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
"Method Name": "getValue",
"File Name": "TorrentBroadcast.scala",
"Line Number": 103
},
{
"Declaring Class": "org.apache.spark.broadcast.Broadcast",
"Method Name": "value",
"File Name": "Broadcast.scala",
"Line Number": 70
},
{
"Declaring Class":
"org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
"Method Name": "wholestagecodegen_init_0_0$",
"File Name": "generated.java",
"Line Number": 466
},
{
"Declaring Class":
"org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
"Method Name": "init",
"File Name": "generated.java",
"Line Number": 33
},
{
"Declaring Class":
"org.apache.spark.sql.execution.WholeStageCodegenExec",
"Method Name": "$anonfun$doExecute$4",
"File Name": "WholeStageCodegenExec.scala",
"Line Number": 750
},
{
"Declaring Class":
"org.apache.spark.sql.execution.WholeStageCodegenExec",
"Method Name": "$anonfun$doExecute$4$adapted",
"File Name": "WholeStageCodegenExec.scala",
"Line Number": 747
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "$anonfun$mapPartitionsWithIndex$2",
"File Name": "RDD.scala",
"Line Number": 915
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted",
"File Name": "RDD.scala",
"Line Number": 915
},
{
"Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
"Method Name": "compute",
"File Name": "MapPartitionsRDD.scala",
"Line Number": 52
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "computeOrReadCheckpoint",
"File Name": "RDD.scala",
"Line Number": 373
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "iterator",
"File Name": "RDD.scala",
"Line Number": 337
},
{
"Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
"Method Name": "compute",
"File Name": "MapPartitionsRDD.scala",
"Line Number": 52
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "computeOrReadCheckpoint",
"File Name": "RDD.scala",
"Line Number": 373
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "iterator",
"File Name": "RDD.scala",
"Line Number": 337
},
{
"Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor",
"Method Name": "write",
"File Name": "ShuffleWriteProcessor.scala",
"Line Number": 59
},
{
"Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
"Method Name": "runTask",
"File Name": "ShuffleMapTask.scala",
"Line Number": 99
},
{
"Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
"Method Name": "runTask",
"File Name": "ShuffleMapTask.scala",
"Line Number": 52
},
{
"Declaring Class": "org.apache.spark.scheduler.Task",
"Method Name": "run",
"File Name": "Task.scala",
"Line Number": 127
},
{
"Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
"Method Name": "$anonfun$run$3",
"File Name": "Executor.scala",
"Line Number": 464
},
{
"Declaring Class": "org.apache.spark.util.Utils$",
"Method Name": "tryWithSafeFinally",
"File Name": "Utils.scala",
"Line Number": 1377
},
{
"Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
"Method Name": "run",
"File Name": "Executor.scala",
"Line Number": 467
},
{
"Declaring Class": "java.util.concurrent.ThreadPoolExecutor",
"Method Name": "runWorker",
"File Name": "ThreadPoolExecutor.java",
"Line Number": 1142
},
{
"Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker",
"Method Name": "run",
"File Name": "ThreadPoolExecutor.java",
"Line Number": 617
},
{
"Declaring Class": "java.lang.Thread",
"Method Name": "run",
"File Name": "Thread.java",
"Line Number": 745
}
],
"Full Stack Trace": "java.io.IOException: org.apache.spark.SparkException:
Failed to get broadcast_1405_piece10 of broadcast_1405\r\n\tat
org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat
org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat
org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat
org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat
org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat
java.lang.Thread.run(Thread.java:745)\r\nCaused by:
org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of
broadcast_1405\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat
scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat
scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat
scala.Option.getOrElse(Option.scala:189)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat
org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat
org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25
more\r\n",
"Accumulator Updates": [
{
"ID": 108750,
"Update": "185972",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108752,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108757,
"Update": "32768",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108759,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108760,
"Update": "1",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108761,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108762,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108763,
"Update": "21283988",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108764,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108765,
"Update": "0",
"Internal": false,
"Count Failed Values": true
}
]
},
"Task Info": {
"Task ID": 922051,
"Index": 2030,
"Attempt": 0,
"Launch Time": 1593809400002,
"Executor ID": "513",
"Host": "BN01AP9EB5816D8",
"Locality": "NODE_LOCAL",
"Speculative": false,
"Getting Result Time": 0,
"Finish Time": 1593809585998,
"Failed": true,
"Killed": false,
"Accumulables": []
},
"Task Executor Metrics": {
"JVMHeapMemory": 3186191224,
"JVMOffHeapMemory": 169078760,
"OnHeapExecutionMemory": 131072,
"OffHeapExecutionMemory": 0,
"OnHeapStorageMemory": 843822088,
"OffHeapStorageMemory": 0,
"OnHeapUnifiedMemory": 843953160,
"OffHeapUnifiedMemory": 0,
"DirectPoolMemory": 47740224,
"MappedPoolMemory": 0,
"ProcessTreeJVMVMemory": 0,
"ProcessTreeJVMRSSMemory": 0,
"ProcessTreePythonVMemory": 0,
"ProcessTreePythonRSSMemory": 0,
"ProcessTreeOtherVMemory": 0,
"ProcessTreeOtherRSSMemory": 0,
"MinorGCCount": 550,
"MinorGCTime": 28316,
"MajorGCCount": 28,
"MajorGCTime": 18669
},
"Task Metrics": {
"Executor Deserialize Time": 0,
"Executor Deserialize CPU Time": 0,
"Executor Run Time": 185972,
"Executor CPU Time": 0,
"Peak Execution Memory": 32768,
"Result Size": 0,
"JVM GC Time": 0,
"Result Serialization Time": 0,
"Memory Bytes Spilled": 0,
"Disk Bytes Spilled": 0,
"Shuffle Read Metrics": {
"Remote Blocks Fetched": 0,
"Local Blocks Fetched": 1,
"Fetch Wait Time": 0,
"Remote Bytes Read": 0,
"Remote Bytes Read To Disk": 0,
"Local Bytes Read": 21283988,
"Total Records Read": 0
},
"Shuffle Write Metrics": {
"Shuffle Bytes Written": 0,
"Shuffle Write Time": 0,
"Shuffle Records Written": 0
},
"Input Metrics": {
"Bytes Read": 0,
"Records Read": 0
},
"Output Metrics": {
"Bytes Written": 0,
"Records Written": 0
},
"Updated Blocks": []
}
}
{noformat}
was:
Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack
Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields contains
same info, and the former one is kept for backward compatibility of spark
history before version 1.2.0. We can remove 1st field in default setting and
add one config to control whether log 1st field. This will help reduce eventlog
size significantly when lots of task are failed due to ExceptionFailure.
The sample json of current format as below:
{noformat}
{
"Event": "SparkListenerTaskEnd",
"Stage ID": 1237,
"Stage Attempt ID": 0,
"Task Type": "ShuffleMapTask",
"Task End Reason": {
"Reason": "ExceptionFailure",
"Class Name": "java.io.IOException",
"Description": "org.apache.spark.SparkException: Failed to get
broadcast_1405_piece10 of broadcast_1405",
"Stack Trace": [
{
"Declaring Class": "org.apache.spark.util.Utils$",
"Method Name": "tryOrIOException",
"File Name": "Utils.scala",
"Line Number": 1350
},
{
"Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
"Method Name": "readBroadcastBlock",
"File Name": "TorrentBroadcast.scala",
"Line Number": 218
},
{
"Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
"Method Name": "getValue",
"File Name": "TorrentBroadcast.scala",
"Line Number": 103
},
{
"Declaring Class": "org.apache.spark.broadcast.Broadcast",
"Method Name": "value",
"File Name": "Broadcast.scala",
"Line Number": 70
},
{
"Declaring Class":
"org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
"Method Name": "wholestagecodegen_init_0_0$",
"File Name": "generated.java",
"Line Number": 466
},
{
"Declaring Class":
"org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
"Method Name": "init",
"File Name": "generated.java",
"Line Number": 33
},
{
"Declaring Class":
"org.apache.spark.sql.execution.WholeStageCodegenExec",
"Method Name": "$anonfun$doExecute$4",
"File Name": "WholeStageCodegenExec.scala",
"Line Number": 750
},
{
"Declaring Class":
"org.apache.spark.sql.execution.WholeStageCodegenExec",
"Method Name": "$anonfun$doExecute$4$adapted",
"File Name": "WholeStageCodegenExec.scala",
"Line Number": 747
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "$anonfun$mapPartitionsWithIndex$2",
"File Name": "RDD.scala",
"Line Number": 915
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted",
"File Name": "RDD.scala",
"Line Number": 915
},
{
"Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
"Method Name": "compute",
"File Name": "MapPartitionsRDD.scala",
"Line Number": 52
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "computeOrReadCheckpoint",
"File Name": "RDD.scala",
"Line Number": 373
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "iterator",
"File Name": "RDD.scala",
"Line Number": 337
},
{
"Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
"Method Name": "compute",
"File Name": "MapPartitionsRDD.scala",
"Line Number": 52
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "computeOrReadCheckpoint",
"File Name": "RDD.scala",
"Line Number": 373
},
{
"Declaring Class": "org.apache.spark.rdd.RDD",
"Method Name": "iterator",
"File Name": "RDD.scala",
"Line Number": 337
},
{
"Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor",
"Method Name": "write",
"File Name": "ShuffleWriteProcessor.scala",
"Line Number": 59
},
{
"Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
"Method Name": "runTask",
"File Name": "ShuffleMapTask.scala",
"Line Number": 99
},
{
"Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
"Method Name": "runTask",
"File Name": "ShuffleMapTask.scala",
"Line Number": 52
},
{
"Declaring Class": "org.apache.spark.scheduler.Task",
"Method Name": "run",
"File Name": "Task.scala",
"Line Number": 127
},
{
"Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
"Method Name": "$anonfun$run$3",
"File Name": "Executor.scala",
"Line Number": 464
},
{
"Declaring Class": "org.apache.spark.util.Utils$",
"Method Name": "tryWithSafeFinally",
"File Name": "Utils.scala",
"Line Number": 1377
},
{
"Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
"Method Name": "run",
"File Name": "Executor.scala",
"Line Number": 467
},
{
"Declaring Class": "java.util.concurrent.ThreadPoolExecutor",
"Method Name": "runWorker",
"File Name": "ThreadPoolExecutor.java",
"Line Number": 1142
},
{
"Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker",
"Method Name": "run",
"File Name": "ThreadPoolExecutor.java",
"Line Number": 617
},
{
"Declaring Class": "java.lang.Thread",
"Method Name": "run",
"File Name": "Thread.java",
"Line Number": 745
}
],
"Full Stack Trace": "java.io.IOException: org.apache.spark.SparkException:
Failed to get broadcast_1405_piece10 of broadcast_1405\r\n\tat
org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat
org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat
org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat
org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat
org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat
java.lang.Thread.run(Thread.java:745)\r\nCaused by:
org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of
broadcast_1405\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat
scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat
scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat
scala.Option.getOrElse(Option.scala:189)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat
org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat
org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat
org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25
more\r\n",
"Accumulator Updates": [
{
"ID": 108750,
"Update": "185972",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108752,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108757,
"Update": "32768",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108759,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108760,
"Update": "1",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108761,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108762,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108763,
"Update": "21283988",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108764,
"Update": "0",
"Internal": false,
"Count Failed Values": true
},
{
"ID": 108765,
"Update": "0",
"Internal": false,
"Count Failed Values": true
}
]
},
"Task Info": {
"Task ID": 922051,
"Index": 2030,
"Attempt": 0,
"Launch Time": 1593809400002,
"Executor ID": "513",
"Host": "BN01AP9EB5816D8",
"Locality": "NODE_LOCAL",
"Speculative": false,
"Getting Result Time": 0,
"Finish Time": 1593809585998,
"Failed": true,
"Killed": false,
"Accumulables": []
},
"Task Executor Metrics": {
"JVMHeapMemory": 3186191224,
"JVMOffHeapMemory": 169078760,
"OnHeapExecutionMemory": 131072,
"OffHeapExecutionMemory": 0,
"OnHeapStorageMemory": 843822088,
"OffHeapStorageMemory": 0,
"OnHeapUnifiedMemory": 843953160,
"OffHeapUnifiedMemory": 0,
"DirectPoolMemory": 47740224,
"MappedPoolMemory": 0,
"ProcessTreeJVMVMemory": 0,
"ProcessTreeJVMRSSMemory": 0,
"ProcessTreePythonVMemory": 0,
"ProcessTreePythonRSSMemory": 0,
"ProcessTreeOtherVMemory": 0,
"ProcessTreeOtherRSSMemory": 0,
"MinorGCCount": 550,
"MinorGCTime": 28316,
"MajorGCCount": 28,
"MajorGCTime": 18669
},
"Task Metrics": {
"Executor Deserialize Time": 0,
"Executor Deserialize CPU Time": 0,
"Executor Run Time": 185972,
"Executor CPU Time": 0,
"Peak Execution Memory": 32768,
"Result Size": 0,
"JVM GC Time": 0,
"Result Serialization Time": 0,
"Memory Bytes Spilled": 0,
"Disk Bytes Spilled": 0,
"Shuffle Read Metrics": {
"Remote Blocks Fetched": 0,
"Local Blocks Fetched": 1,
"Fetch Wait Time": 0,
"Remote Bytes Read": 0,
"Remote Bytes Read To Disk": 0,
"Local Bytes Read": 21283988,
"Total Records Read": 0
},
"Shuffle Write Metrics": {
"Shuffle Bytes Written": 0,
"Shuffle Write Time": 0,
"Shuffle Records Written": 0
},
"Input Metrics": {
"Bytes Read": 0,
"Records Read": 0
},
"Output Metrics": {
"Bytes Written": 0,
"Records Written": 0
},
"Updated Blocks": []
}
}
{noformat}
> [SHS] Remove old format of stacktrace in event log
> --------------------------------------------------
>
> Key: SPARK-32314
> URL: https://issues.apache.org/jira/browse/SPARK-32314
> Project: Spark
> Issue Type: Improvement
> Components: Spark Core
> Affects Versions: 3.0.0
> Reporter: Zhongwei Zhu
> Priority: Minor
>
> Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack
> Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields
> contains same info, and the former one is kept for backward compatibility of
> spark history before version 1.2.0. We can remove 1st field. This will help
> reduce eventlog size significantly when lots of task are failed due to
> ExceptionFailure.
> The sample json of current format as below:
> {noformat}
> {
> "Event": "SparkListenerTaskEnd",
> "Stage ID": 1237,
> "Stage Attempt ID": 0,
> "Task Type": "ShuffleMapTask",
> "Task End Reason": {
> "Reason": "ExceptionFailure",
> "Class Name": "java.io.IOException",
> "Description": "org.apache.spark.SparkException: Failed to get
> broadcast_1405_piece10 of broadcast_1405",
> "Stack Trace": [
> {
> "Declaring Class": "org.apache.spark.util.Utils$",
> "Method Name": "tryOrIOException",
> "File Name": "Utils.scala",
> "Line Number": 1350
> },
> {
> "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
> "Method Name": "readBroadcastBlock",
> "File Name": "TorrentBroadcast.scala",
> "Line Number": 218
> },
> {
> "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
> "Method Name": "getValue",
> "File Name": "TorrentBroadcast.scala",
> "Line Number": 103
> },
> {
> "Declaring Class": "org.apache.spark.broadcast.Broadcast",
> "Method Name": "value",
> "File Name": "Broadcast.scala",
> "Line Number": 70
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
> "Method Name": "wholestagecodegen_init_0_0$",
> "File Name": "generated.java",
> "Line Number": 466
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
> "Method Name": "init",
> "File Name": "generated.java",
> "Line Number": 33
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.execution.WholeStageCodegenExec",
> "Method Name": "$anonfun$doExecute$4",
> "File Name": "WholeStageCodegenExec.scala",
> "Line Number": 750
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.execution.WholeStageCodegenExec",
> "Method Name": "$anonfun$doExecute$4$adapted",
> "File Name": "WholeStageCodegenExec.scala",
> "Line Number": 747
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "$anonfun$mapPartitionsWithIndex$2",
> "File Name": "RDD.scala",
> "Line Number": 915
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted",
> "File Name": "RDD.scala",
> "Line Number": 915
> },
> {
> "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
> "Method Name": "compute",
> "File Name": "MapPartitionsRDD.scala",
> "Line Number": 52
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "computeOrReadCheckpoint",
> "File Name": "RDD.scala",
> "Line Number": 373
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "iterator",
> "File Name": "RDD.scala",
> "Line Number": 337
> },
> {
> "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
> "Method Name": "compute",
> "File Name": "MapPartitionsRDD.scala",
> "Line Number": 52
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "computeOrReadCheckpoint",
> "File Name": "RDD.scala",
> "Line Number": 373
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "iterator",
> "File Name": "RDD.scala",
> "Line Number": 337
> },
> {
> "Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor",
> "Method Name": "write",
> "File Name": "ShuffleWriteProcessor.scala",
> "Line Number": 59
> },
> {
> "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
> "Method Name": "runTask",
> "File Name": "ShuffleMapTask.scala",
> "Line Number": 99
> },
> {
> "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
> "Method Name": "runTask",
> "File Name": "ShuffleMapTask.scala",
> "Line Number": 52
> },
> {
> "Declaring Class": "org.apache.spark.scheduler.Task",
> "Method Name": "run",
> "File Name": "Task.scala",
> "Line Number": 127
> },
> {
> "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
> "Method Name": "$anonfun$run$3",
> "File Name": "Executor.scala",
> "Line Number": 464
> },
> {
> "Declaring Class": "org.apache.spark.util.Utils$",
> "Method Name": "tryWithSafeFinally",
> "File Name": "Utils.scala",
> "Line Number": 1377
> },
> {
> "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
> "Method Name": "run",
> "File Name": "Executor.scala",
> "Line Number": 467
> },
> {
> "Declaring Class": "java.util.concurrent.ThreadPoolExecutor",
> "Method Name": "runWorker",
> "File Name": "ThreadPoolExecutor.java",
> "Line Number": 1142
> },
> {
> "Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker",
> "Method Name": "run",
> "File Name": "ThreadPoolExecutor.java",
> "Line Number": 617
> },
> {
> "Declaring Class": "java.lang.Thread",
> "Method Name": "run",
> "File Name": "Thread.java",
> "Line Number": 745
> }
> ],
> "Full Stack Trace": "java.io.IOException:
> org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of
> broadcast_1405\r\n\tat
> org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat
>
> org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat
> org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat
>
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat
>
> org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat
>
> org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat
>
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat
>
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat
>
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat
>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat
>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat
> org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat
> java.lang.Thread.run(Thread.java:745)\r\nCaused by:
> org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of
> broadcast_1405\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat
>
> scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat
> scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat
>
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat
> scala.Option.getOrElse(Option.scala:189)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat
> org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat
> org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25
> more\r\n",
> "Accumulator Updates": [
> {
> "ID": 108750,
> "Update": "185972",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108752,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108757,
> "Update": "32768",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108759,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108760,
> "Update": "1",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108761,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108762,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108763,
> "Update": "21283988",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108764,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108765,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> }
> ]
> },
> "Task Info": {
> "Task ID": 922051,
> "Index": 2030,
> "Attempt": 0,
> "Launch Time": 1593809400002,
> "Executor ID": "513",
> "Host": "BN01AP9EB5816D8",
> "Locality": "NODE_LOCAL",
> "Speculative": false,
> "Getting Result Time": 0,
> "Finish Time": 1593809585998,
> "Failed": true,
> "Killed": false,
> "Accumulables": []
> },
> "Task Executor Metrics": {
> "JVMHeapMemory": 3186191224,
> "JVMOffHeapMemory": 169078760,
> "OnHeapExecutionMemory": 131072,
> "OffHeapExecutionMemory": 0,
> "OnHeapStorageMemory": 843822088,
> "OffHeapStorageMemory": 0,
> "OnHeapUnifiedMemory": 843953160,
> "OffHeapUnifiedMemory": 0,
> "DirectPoolMemory": 47740224,
> "MappedPoolMemory": 0,
> "ProcessTreeJVMVMemory": 0,
> "ProcessTreeJVMRSSMemory": 0,
> "ProcessTreePythonVMemory": 0,
> "ProcessTreePythonRSSMemory": 0,
> "ProcessTreeOtherVMemory": 0,
> "ProcessTreeOtherRSSMemory": 0,
> "MinorGCCount": 550,
> "MinorGCTime": 28316,
> "MajorGCCount": 28,
> "MajorGCTime": 18669
> },
> "Task Metrics": {
> "Executor Deserialize Time": 0,
> "Executor Deserialize CPU Time": 0,
> "Executor Run Time": 185972,
> "Executor CPU Time": 0,
> "Peak Execution Memory": 32768,
> "Result Size": 0,
> "JVM GC Time": 0,
> "Result Serialization Time": 0,
> "Memory Bytes Spilled": 0,
> "Disk Bytes Spilled": 0,
> "Shuffle Read Metrics": {
> "Remote Blocks Fetched": 0,
> "Local Blocks Fetched": 1,
> "Fetch Wait Time": 0,
> "Remote Bytes Read": 0,
> "Remote Bytes Read To Disk": 0,
> "Local Bytes Read": 21283988,
> "Total Records Read": 0
> },
> "Shuffle Write Metrics": {
> "Shuffle Bytes Written": 0,
> "Shuffle Write Time": 0,
> "Shuffle Records Written": 0
> },
> "Input Metrics": {
> "Bytes Read": 0,
> "Records Read": 0
> },
> "Output Metrics": {
> "Bytes Written": 0,
> "Records Written": 0
> },
> "Updated Blocks": []
> }
> }
> {noformat}
>
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]