[ https://issues.apache.org/jira/browse/SPARK-32314?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-32314: ------------------------------------ Assignee: Apache Spark > [SHS] Add config to control whether log old format of stacktrace > ---------------------------------------------------------------- > > Key: SPARK-32314 > URL: https://issues.apache.org/jira/browse/SPARK-32314 > Project: Spark > Issue Type: Improvement > Components: Spark Core > Affects Versions: 3.0.0 > Reporter: Zhongwei Zhu > Assignee: Apache Spark > Priority: Minor > > Currently, EventLoggingListeneer write both "Stack Trace" and "Full Stack > Trace" in TaskEndResaon of ExceptionFailure to event log. Both fields > contains same info, and the former one is kept for backward compatibility of > spark history before version 1.2.0. We can remove 1st field in default > setting and add one config to control whether log 1st field. This will help > reduce eventlog size significantly when lots of task are failed due to > ExceptionFailure. > > The sample json of current format as below: > > {noformat} > { > "Event": "SparkListenerTaskEnd", > "Stage ID": 1237, > "Stage Attempt ID": 0, > "Task Type": "ShuffleMapTask", > "Task End Reason": { > "Reason": "ExceptionFailure", > "Class Name": "java.io.IOException", > "Description": "org.apache.spark.SparkException: Failed to get > broadcast_1405_piece10 of broadcast_1405", > "Stack Trace": [ > { > "Declaring Class": "org.apache.spark.util.Utils$", > "Method Name": "tryOrIOException", > "File Name": "Utils.scala", > "Line Number": 1350 > }, > { > "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast", > "Method Name": "readBroadcastBlock", > "File Name": "TorrentBroadcast.scala", > "Line Number": 218 > }, > { > "Declaring Class": "org.apache.spark.broadcast.TorrentBroadcast", > "Method Name": "getValue", > "File Name": "TorrentBroadcast.scala", > "Line Number": 103 > }, > { > "Declaring Class": "org.apache.spark.broadcast.Broadcast", > "Method Name": "value", > "File Name": "Broadcast.scala", > "Line Number": 70 > }, > { > "Declaring Class": > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9", > "Method Name": "wholestagecodegen_init_0_0$", > "File Name": "generated.java", > "Line Number": 466 > }, > { > "Declaring Class": > "org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9", > "Method Name": "init", > "File Name": "generated.java", > "Line Number": 33 > }, > { > "Declaring Class": > "org.apache.spark.sql.execution.WholeStageCodegenExec", > "Method Name": "$anonfun$doExecute$4", > "File Name": "WholeStageCodegenExec.scala", > "Line Number": 750 > }, > { > "Declaring Class": > "org.apache.spark.sql.execution.WholeStageCodegenExec", > "Method Name": "$anonfun$doExecute$4$adapted", > "File Name": "WholeStageCodegenExec.scala", > "Line Number": 747 > }, > { > "Declaring Class": "org.apache.spark.rdd.RDD", > "Method Name": "$anonfun$mapPartitionsWithIndex$2", > "File Name": "RDD.scala", > "Line Number": 915 > }, > { > "Declaring Class": "org.apache.spark.rdd.RDD", > "Method Name": "$anonfun$mapPartitionsWithIndex$2$adapted", > "File Name": "RDD.scala", > "Line Number": 915 > }, > { > "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD", > "Method Name": "compute", > "File Name": "MapPartitionsRDD.scala", > "Line Number": 52 > }, > { > "Declaring Class": "org.apache.spark.rdd.RDD", > "Method Name": "computeOrReadCheckpoint", > "File Name": "RDD.scala", > "Line Number": 373 > }, > { > "Declaring Class": "org.apache.spark.rdd.RDD", > "Method Name": "iterator", > "File Name": "RDD.scala", > "Line Number": 337 > }, > { > "Declaring Class": "org.apache.spark.rdd.MapPartitionsRDD", > "Method Name": "compute", > "File Name": "MapPartitionsRDD.scala", > "Line Number": 52 > }, > { > "Declaring Class": "org.apache.spark.rdd.RDD", > "Method Name": "computeOrReadCheckpoint", > "File Name": "RDD.scala", > "Line Number": 373 > }, > { > "Declaring Class": "org.apache.spark.rdd.RDD", > "Method Name": "iterator", > "File Name": "RDD.scala", > "Line Number": 337 > }, > { > "Declaring Class": "org.apache.spark.shuffle.ShuffleWriteProcessor", > "Method Name": "write", > "File Name": "ShuffleWriteProcessor.scala", > "Line Number": 59 > }, > { > "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask", > "Method Name": "runTask", > "File Name": "ShuffleMapTask.scala", > "Line Number": 99 > }, > { > "Declaring Class": "org.apache.spark.scheduler.ShuffleMapTask", > "Method Name": "runTask", > "File Name": "ShuffleMapTask.scala", > "Line Number": 52 > }, > { > "Declaring Class": "org.apache.spark.scheduler.Task", > "Method Name": "run", > "File Name": "Task.scala", > "Line Number": 127 > }, > { > "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner", > "Method Name": "$anonfun$run$3", > "File Name": "Executor.scala", > "Line Number": 464 > }, > { > "Declaring Class": "org.apache.spark.util.Utils$", > "Method Name": "tryWithSafeFinally", > "File Name": "Utils.scala", > "Line Number": 1377 > }, > { > "Declaring Class": "org.apache.spark.executor.Executor$TaskRunner", > "Method Name": "run", > "File Name": "Executor.scala", > "Line Number": 467 > }, > { > "Declaring Class": "java.util.concurrent.ThreadPoolExecutor", > "Method Name": "runWorker", > "File Name": "ThreadPoolExecutor.java", > "Line Number": 1142 > }, > { > "Declaring Class": "java.util.concurrent.ThreadPoolExecutor$Worker", > "Method Name": "run", > "File Name": "ThreadPoolExecutor.java", > "Line Number": 617 > }, > { > "Declaring Class": "java.lang.Thread", > "Method Name": "run", > "File Name": "Thread.java", > "Line Number": 745 > } > ], > "Full Stack Trace": "java.io.IOException: > org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of > broadcast_1405\r\n\tat > org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1350)\r\n\tat > org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:218)\r\n\tat > > org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:103)\r\n\tat > org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70)\r\n\tat > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.wholestagecodegen_init_0_0$(generated.java:466)\r\n\tat > > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage9.init(generated.java:33)\r\n\tat > > org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4(WholeStageCodegenExec.scala:750)\r\n\tat > > org.apache.spark.sql.execution.WholeStageCodegenExec.$anonfun$doExecute$4$adapted(WholeStageCodegenExec.scala:747)\r\n\tat > > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:915)\r\n\tat > > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:915)\r\n\tat > > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat > org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)\r\n\tat > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)\r\n\tat > org.apache.spark.rdd.RDD.iterator(RDD.scala:337)\r\n\tat > org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)\r\n\tat > > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)\r\n\tat > > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)\r\n\tat > org.apache.spark.scheduler.Task.run(Task.scala:127)\r\n\tat > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:464)\r\n\tat > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)\r\n\tat > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)\r\n\tat > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\r\n\tat > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\r\n\tat > java.lang.Thread.run(Thread.java:745)\r\nCaused by: > org.apache.spark.SparkException: Failed to get broadcast_1405_piece10 of > broadcast_1405\r\n\tat > org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBlocks$1(TorrentBroadcast.scala:189)\r\n\tat > > scala.runtime.java8.JFunction1$mcVI$sp.apply(JFunction1$mcVI$sp.java:23)\r\n\tat > scala.collection.immutable.List.foreach(List.scala:392)\r\n\tat > org.apache.spark.broadcast.TorrentBroadcast.readBlocks(TorrentBroadcast.scala:161)\r\n\tat > > org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$4(TorrentBroadcast.scala:245)\r\n\tat > scala.Option.getOrElse(Option.scala:189)\r\n\tat > org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$2(TorrentBroadcast.scala:223)\r\n\tat > org.apache.spark.util.KeyLock.withLock(KeyLock.scala:64)\r\n\tat > org.apache.spark.broadcast.TorrentBroadcast.$anonfun$readBroadcastBlock$1(TorrentBroadcast.scala:218)\r\n\tat > org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1343)\r\n\t... 25 > more\r\n", > "Accumulator Updates": [ > { > "ID": 108750, > "Update": "185972", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108752, > "Update": "0", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108757, > "Update": "32768", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108759, > "Update": "0", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108760, > "Update": "1", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108761, > "Update": "0", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108762, > "Update": "0", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108763, > "Update": "21283988", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108764, > "Update": "0", > "Internal": false, > "Count Failed Values": true > }, > { > "ID": 108765, > "Update": "0", > "Internal": false, > "Count Failed Values": true > } > ] > }, > "Task Info": { > "Task ID": 922051, > "Index": 2030, > "Attempt": 0, > "Launch Time": 1593809400002, > "Executor ID": "513", > "Host": "BN01AP9EB5816D8", > "Locality": "NODE_LOCAL", > "Speculative": false, > "Getting Result Time": 0, > "Finish Time": 1593809585998, > "Failed": true, > "Killed": false, > "Accumulables": [] > }, > "Task Executor Metrics": { > "JVMHeapMemory": 3186191224, > "JVMOffHeapMemory": 169078760, > "OnHeapExecutionMemory": 131072, > "OffHeapExecutionMemory": 0, > "OnHeapStorageMemory": 843822088, > "OffHeapStorageMemory": 0, > "OnHeapUnifiedMemory": 843953160, > "OffHeapUnifiedMemory": 0, > "DirectPoolMemory": 47740224, > "MappedPoolMemory": 0, > "ProcessTreeJVMVMemory": 0, > "ProcessTreeJVMRSSMemory": 0, > "ProcessTreePythonVMemory": 0, > "ProcessTreePythonRSSMemory": 0, > "ProcessTreeOtherVMemory": 0, > "ProcessTreeOtherRSSMemory": 0, > "MinorGCCount": 550, > "MinorGCTime": 28316, > "MajorGCCount": 28, > "MajorGCTime": 18669 > }, > "Task Metrics": { > "Executor Deserialize Time": 0, > "Executor Deserialize CPU Time": 0, > "Executor Run Time": 185972, > "Executor CPU Time": 0, > "Peak Execution Memory": 32768, > "Result Size": 0, > "JVM GC Time": 0, > "Result Serialization Time": 0, > "Memory Bytes Spilled": 0, > "Disk Bytes Spilled": 0, > "Shuffle Read Metrics": { > "Remote Blocks Fetched": 0, > "Local Blocks Fetched": 1, > "Fetch Wait Time": 0, > "Remote Bytes Read": 0, > "Remote Bytes Read To Disk": 0, > "Local Bytes Read": 21283988, > "Total Records Read": 0 > }, > "Shuffle Write Metrics": { > "Shuffle Bytes Written": 0, > "Shuffle Write Time": 0, > "Shuffle Records Written": 0 > }, > "Input Metrics": { > "Bytes Read": 0, > "Records Read": 0 > }, > "Output Metrics": { > "Bytes Written": 0, > "Records Written": 0 > }, > "Updated Blocks": [] > } > } > {noformat} > > -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org