[
https://issues.apache.org/jira/browse/SPARK-32314?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Zhongwei Zhu updated SPARK-32314:
---------------------------------
Summary: [SHS] Remove old format of stacktrace in event log (was: [SHS]
Add config to control whether log old format of stacktrace)
> [SHS] Remove old format of stacktrace in event log
> --------------------------------------------------
>
> Key: SPARK-32314
> URL: https://issues.apache.org/jira/browse/SPARK-32314
> Project: Spark
> Issue Type: Improvement
> Components: Spark Core
> Affects Versions: 3.0.0
> Reporter: Zhongwei Zhu
> Priority: Minor
>
> Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack
> Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields
> contains same info, and the former one is kept for backward compatibility of
> spark history before version 1.2.0. We can remove 1st field in default
> setting and add one config to control whether log 1st field. This will help
> reduce eventlog size significantly when lots of task are failed due to
> ExceptionFailure.
>
> The sample json of current format as below:
>
> {noformat}
> {
> "Event": "SparkListenerTaskEnd",
> "Stage ID": 1237,
> "Stage Attempt ID": 0,
> "Task Type": "ShuffleMapTask",
> "Task End Reason": {
> "Reason": "ExceptionFailure",
> "Class Name": "java.io.IOException",
> "Description": "org.apache.spark.SparkException: Failed to get
> broadcast_1405_piece10 of broadcast_1405",
> "Stack Trace": [
> {
> "Declaring Class": "org.apache.spark.util.Utils$",
> "Method Name": "tryOrIOException",
> "File Name": "Utils.scala",
> "Line Number": 1350
> },
> {
> "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
> "Method Name": "readBroadcastBlock",
> "File Name": "TorrentBroadcast.scala",
> "Line Number": 218
> },
> {
> "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast",
> "Method Name": "getValue",
> "File Name": "TorrentBroadcast.scala",
> "Line Number": 103
> },
> {
> "Declaring Class": "org.apache.spark.broadcast.Broadcast",
> "Method Name": "value",
> "File Name": "Broadcast.scala",
> "Line Number": 70
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
> "Method Name": "wholestagecodegen_init_0_0$",
> "File Name": "generated.java",
> "Line Number": 466
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9",
> "Method Name": "init",
> "File Name": "generated.java",
> "Line Number": 33
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.execution.WholeStageCodegenExec",
> "Method Name": "$anonfun$doExecute$4",
> "File Name": "WholeStageCodegenExec.scala",
> "Line Number": 750
> },
> {
> "Declaring Class":
> "org.apache.spark.sql.execution.WholeStageCodegenExec",
> "Method Name": "$anonfun$doExecute$4$adapted",
> "File Name": "WholeStageCodegenExec.scala",
> "Line Number": 747
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "$anonfun$mapPartitionsWithIndex$2",
> "File Name": "RDD.scala",
> "Line Number": 915
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted",
> "File Name": "RDD.scala",
> "Line Number": 915
> },
> {
> "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
> "Method Name": "compute",
> "File Name": "MapPartitionsRDD.scala",
> "Line Number": 52
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "computeOrReadCheckpoint",
> "File Name": "RDD.scala",
> "Line Number": 373
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "iterator",
> "File Name": "RDD.scala",
> "Line Number": 337
> },
> {
> "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD",
> "Method Name": "compute",
> "File Name": "MapPartitionsRDD.scala",
> "Line Number": 52
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "computeOrReadCheckpoint",
> "File Name": "RDD.scala",
> "Line Number": 373
> },
> {
> "Declaring Class": "org.apache.spark.rdd.RDD",
> "Method Name": "iterator",
> "File Name": "RDD.scala",
> "Line Number": 337
> },
> {
> "Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor",
> "Method Name": "write",
> "File Name": "ShuffleWriteProcessor.scala",
> "Line Number": 59
> },
> {
> "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
> "Method Name": "runTask",
> "File Name": "ShuffleMapTask.scala",
> "Line Number": 99
> },
> {
> "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask",
> "Method Name": "runTask",
> "File Name": "ShuffleMapTask.scala",
> "Line Number": 52
> },
> {
> "Declaring Class": "org.apache.spark.scheduler.Task",
> "Method Name": "run",
> "File Name": "Task.scala",
> "Line Number": 127
> },
> {
> "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
> "Method Name": "$anonfun$run$3",
> "File Name": "Executor.scala",
> "Line Number": 464
> },
> {
> "Declaring Class": "org.apache.spark.util.Utils$",
> "Method Name": "tryWithSafeFinally",
> "File Name": "Utils.scala",
> "Line Number": 1377
> },
> {
> "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner",
> "Method Name": "run",
> "File Name": "Executor.scala",
> "Line Number": 467
> },
> {
> "Declaring Class": "java.util.concurrent.ThreadPoolExecutor",
> "Method Name": "runWorker",
> "File Name": "ThreadPoolExecutor.java",
> "Line Number": 1142
> },
> {
> "Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker",
> "Method Name": "run",
> "File Name": "ThreadPoolExecutor.java",
> "Line Number": 617
> },
> {
> "Declaring Class": "java.lang.Thread",
> "Method Name": "run",
> "File Name": "Thread.java",
> "Line Number": 745
> }
> ],
> "Full Stack Trace": "java.io.IOException:
> org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of
> broadcast_1405\r\n\tat
> org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat
>
> org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat
> org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat
>
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat
>
> org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat
>
> org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat
>
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat
>
> org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat
>
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat
> org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat
> org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat
>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat
>
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat
> org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat
> org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat
>
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat
> java.lang.Thread.run(Thread.java:745)\r\nCaused by:
> org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of
> broadcast_1405\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat
>
> scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat
> scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat
>
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat
> scala.Option.getOrElse(Option.scala:189)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat
> org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat
> org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat
> org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25
> more\r\n",
> "Accumulator Updates": [
> {
> "ID": 108750,
> "Update": "185972",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108752,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108757,
> "Update": "32768",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108759,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108760,
> "Update": "1",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108761,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108762,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108763,
> "Update": "21283988",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108764,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> },
> {
> "ID": 108765,
> "Update": "0",
> "Internal": false,
> "Count Failed Values": true
> }
> ]
> },
> "Task Info": {
> "Task ID": 922051,
> "Index": 2030,
> "Attempt": 0,
> "Launch Time": 1593809400002,
> "Executor ID": "513",
> "Host": "BN01AP9EB5816D8",
> "Locality": "NODE_LOCAL",
> "Speculative": false,
> "Getting Result Time": 0,
> "Finish Time": 1593809585998,
> "Failed": true,
> "Killed": false,
> "Accumulables": []
> },
> "Task Executor Metrics": {
> "JVMHeapMemory": 3186191224,
> "JVMOffHeapMemory": 169078760,
> "OnHeapExecutionMemory": 131072,
> "OffHeapExecutionMemory": 0,
> "OnHeapStorageMemory": 843822088,
> "OffHeapStorageMemory": 0,
> "OnHeapUnifiedMemory": 843953160,
> "OffHeapUnifiedMemory": 0,
> "DirectPoolMemory": 47740224,
> "MappedPoolMemory": 0,
> "ProcessTreeJVMVMemory": 0,
> "ProcessTreeJVMRSSMemory": 0,
> "ProcessTreePythonVMemory": 0,
> "ProcessTreePythonRSSMemory": 0,
> "ProcessTreeOtherVMemory": 0,
> "ProcessTreeOtherRSSMemory": 0,
> "MinorGCCount": 550,
> "MinorGCTime": 28316,
> "MajorGCCount": 28,
> "MajorGCTime": 18669
> },
> "Task Metrics": {
> "Executor Deserialize Time": 0,
> "Executor Deserialize CPU Time": 0,
> "Executor Run Time": 185972,
> "Executor CPU Time": 0,
> "Peak Execution Memory": 32768,
> "Result Size": 0,
> "JVM GC Time": 0,
> "Result Serialization Time": 0,
> "Memory Bytes Spilled": 0,
> "Disk Bytes Spilled": 0,
> "Shuffle Read Metrics": {
> "Remote Blocks Fetched": 0,
> "Local Blocks Fetched": 1,
> "Fetch Wait Time": 0,
> "Remote Bytes Read": 0,
> "Remote Bytes Read To Disk": 0,
> "Local Bytes Read": 21283988,
> "Total Records Read": 0
> },
> "Shuffle Write Metrics": {
> "Shuffle Bytes Written": 0,
> "Shuffle Write Time": 0,
> "Shuffle Records Written": 0
> },
> "Input Metrics": {
> "Bytes Read": 0,
> "Records Read": 0
> },
> "Output Metrics": {
> "Bytes Written": 0,
> "Records Written": 0
> },
> "Updated Blocks": []
> }
> }
> {noformat}
>
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]