marin-ma commented on code in PR #6475:
URL: https://github.com/apache/incubator-gluten/pull/6475#discussion_r1687659792
##########
backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxMetricsApi.scala:
##########
@@ -258,27 +258,33 @@ class VeloxMetricsApi extends MetricsApi with Logging {
sparkContext: SparkContext,
isSort: Boolean): Map[String, SQLMetric] = {
val baseMetrics = Map(
- "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
"numPartitions" -> SQLMetrics.createMetric(sparkContext, "number of
partitions"),
+ "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
"bytesSpilled" -> SQLMetrics.createSizeMetric(sparkContext, "shuffle
bytes spilled"),
- "splitBufferSize" -> SQLMetrics.createSizeMetric(sparkContext, "split
buffer size"),
- "splitTime" -> SQLMetrics.createNanoTimingMetric(sparkContext, "time to
split"),
- "spillTime" -> SQLMetrics.createNanoTimingMetric(sparkContext, "time to
spill"),
- "deserializeTime" -> SQLMetrics.createNanoTimingMetric(sparkContext,
"time to deserialize"),
"avgReadBatchNumRows" -> SQLMetrics
.createAverageMetric(sparkContext, "avg read batch num rows"),
"numInputRows" -> SQLMetrics.createMetric(sparkContext, "number of input
rows"),
"numOutputRows" -> SQLMetrics
.createMetric(sparkContext, "number of output rows"),
"inputBatches" -> SQLMetrics
- .createMetric(sparkContext, "number of input batches")
+ .createMetric(sparkContext, "number of input batches"),
+ "spillTime" -> SQLMetrics.createNanoTimingMetric(sparkContext, "time to
spill"),
+ "compressTime" -> SQLMetrics.createNanoTimingMetric(sparkContext, "time
to compress"),
+ "decompressTime" -> SQLMetrics.createNanoTimingMetric(sparkContext,
"time to decompress"),
+ "deserializeTime" -> SQLMetrics.createNanoTimingMetric(sparkContext,
"time to deserialize"),
+ // For hash shuffle writer, the peak bytes represents the maximum split
buffer size.
+ // For sort shuffle writer, the peak bytes represents the maximum
+ // row buffer + sort buffer size.
+ "peakBytes" -> SQLMetrics.createSizeMetric(sparkContext, "peak bytes
allocated")
)
if (isSort) {
- baseMetrics
+ baseMetrics ++ Map(
+ "sortTime" -> SQLMetrics.createNanoTimingMetric(sparkContext, "time to
shuffle sort"),
+ "c2rTime" -> SQLMetrics.createNanoTimingMetric(sparkContext, "time to
shuffle c2r")
Review Comment:
> however this may bring some trouble for analysis script
Checked with the analysis script. Except for the `get_shuffle_stat`, other
analysis (sar, emon, time breakdown, app comparison) goes well.
Perhaps we can introduce a new metrics `shuffle write wall time` to
calculate the real time for both hash and sort shuffle, and modify the analysis
script to use this one.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]