AngersZhuuuu commented on a change in pull request #31522:
URL: https://github.com/apache/spark/pull/31522#discussion_r671580787
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CustomWriteTaskStatsTrackerSuite.scala
##########
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.InternalRow
class CustomWriteTaskStatsTrackerSuite extends SparkFunSuite {
def checkFinalStats(tracker: CustomWriteTaskStatsTracker, result:
Map[String, Int]): Unit = {
-
assert(tracker.getFinalStats().asInstanceOf[CustomWriteTaskStats].numRowsPerFile
== result)
+
assert(tracker.getFinalStats(0).asInstanceOf[CustomWriteTaskStats].numRowsPerFile
== result)
Review comment:
Done
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala
##########
@@ -73,7 +73,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
}
private def finalStatus(tracker: BasicWriteTaskStatsTracker):
BasicWriteTaskStats = {
- tracker.getFinalStats().asInstanceOf[BasicWriteTaskStats]
+ tracker.getFinalStats(0).asInstanceOf[BasicWriteTaskStats]
Review comment:
Done
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteStatsTracker.scala
##########
@@ -103,5 +103,5 @@ trait WriteJobStatsTracker extends Serializable {
* to the expected derived type when implementing this method in a derived
class.
* The framework will make sure to call this with the right arguments.
*/
- def processStats(stats: Seq[WriteTaskStats]): Unit
+ def processStats(stats: Seq[WriteTaskStats], jobCommitDuration: Long): Unit
Review comment:
Done
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteStatsTracker.scala
##########
@@ -69,7 +69,7 @@ trait WriteTaskStatsTracker {
* @note This may only be called once. Further use of the object may lead to
undefined behavior.
* @return An object of subtype of [[WriteTaskStats]], to be sent to the
driver.
*/
- def getFinalStats(): WriteTaskStats
+ def getFinalStats(taskCommitTime: Long): WriteTaskStats
Review comment:
Done
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
##########
@@ -230,7 +239,11 @@ object BasicWriteJobStatsTracker {
NUM_FILES_KEY -> SQLMetrics.createMetric(sparkContext, "number of
written files"),
NUM_OUTPUT_BYTES_KEY -> SQLMetrics.createSizeMetric(sparkContext,
"written output"),
NUM_OUTPUT_ROWS_KEY -> SQLMetrics.createMetric(sparkContext, "number of
output rows"),
- NUM_PARTS_KEY -> SQLMetrics.createMetric(sparkContext, "number of
dynamic part")
+ NUM_PARTS_KEY -> SQLMetrics.createMetric(sparkContext, "number of
dynamic part"),
+ DURATION_OF_TASK_COMMIT ->
+ SQLMetrics.createTimingMetric(sparkContext, "duration of task commit"),
+ DURATION_JOB_COMMIT->
+ SQLMetrics.createTimingMetric(sparkContext, "duration of committing
the job")
Review comment:
Done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]