gaborgsomogyi commented on a change in pull request #30336:
URL: https://github.com/apache/spark/pull/30336#discussion_r522313692
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
##########
@@ -199,49 +202,98 @@ private[ui] class StreamingQueryStatisticsPage(parent:
StreamingQueryTab)
"records")
graphUIDataForNumRowsDroppedByWatermark.generateDataJs(jsCollector)
- // scalastyle:off
- <tr>
- <td style="vertical-align: middle;">
- <div style="width: 160px;">
- <div><strong>Aggregated Number Of Total State Rows
{SparkUIUtils.tooltip("Aggregated number of total state rows.",
"right")}</strong></div>
- </div>
- </td>
- <td
class={"aggregated-num-total-state-rows-timeline"}>{graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)}</td>
- <td
class={"aggregated-num-total-state-rows-histogram"}>{graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)}</td>
- </tr>
- <tr>
- <td style="vertical-align: middle;">
- <div style="width: 160px;">
- <div><strong>Aggregated Number Of Updated State Rows
{SparkUIUtils.tooltip("Aggregated number of updated state rows.",
"right")}</strong></div>
- </div>
- </td>
- <td
class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
- <td
class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
- </tr>
- <tr>
- <td style="vertical-align: middle;">
- <div style="width: 160px;">
- <div><strong>Aggregated State Memory Used In Bytes
{SparkUIUtils.tooltip("Aggregated state memory used in bytes.",
"right")}</strong></div>
- </div>
- </td>
- <td
class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
- <td
class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
- </tr>
+ val result =
+ // scalastyle:off
<tr>
<td style="vertical-align: middle;">
<div style="width: 160px;">
- <div><strong>Aggregated Number Of State Rows Dropped By
Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by
watermark.", "right")}</strong></div>
+ <div><strong>Aggregated Number Of Total State Rows
{SparkUIUtils.tooltip("Aggregated number of total state rows.",
"right")}</strong></div>
</div>
</td>
- <td
class={"aggregated-num-state-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
- <td
class={"aggregated-num-state-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
+ <td
class={"aggregated-num-total-state-rows-timeline"}>{graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)}</td>
+ <td
class={"aggregated-num-total-state-rows-histogram"}>{graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)}</td>
</tr>
- // scalastyle:on
+ <tr>
+ <td style="vertical-align: middle;">
+ <div style="width: 160px;">
+ <div><strong>Aggregated Number Of Updated State Rows
{SparkUIUtils.tooltip("Aggregated number of updated state rows.",
"right")}</strong></div>
+ </div>
+ </td>
+ <td
class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
+ <td
class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
+ </tr>
+ <tr>
+ <td style="vertical-align: middle;">
+ <div style="width: 160px;">
+ <div><strong>Aggregated State Memory Used In Bytes
{SparkUIUtils.tooltip("Aggregated state memory used in bytes.",
"right")}</strong></div>
+ </div>
+ </td>
+ <td
class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
+ <td
class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
+ </tr>
+ <tr>
+ <td style="vertical-align: middle;">
+ <div style="width: 160px;">
+ <div><strong>Aggregated Number Of State Rows Dropped By
Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by
watermark.", "right")}</strong></div>
+ </div>
+ </td>
+ <td
class={"aggregated-num-state-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
+ <td
class={"aggregated-num-state-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
+ </tr>
+ // scalastyle:on
+
+ result ++= generateAggregatedCustomMetrics(query, minBatchTime,
maxBatchTime, jsCollector)
+ result
} else {
new NodeBuffer()
}
}
+ def generateAggregatedCustomMetrics(
+ query: StreamingQueryUIData,
+ minBatchTime: Long,
+ maxBatchTime: Long,
+ jsCollector: JsCollector): NodeBuffer = {
+ val result: NodeBuffer = new NodeBuffer
+
+ // This is made sure on caller side but put it here to be defensive
+ require(query.lastProgress.stateOperators.nonEmpty)
+ val disabledCustomMetrics =
Utils.stringToSeq(SQLConf.get.disabledStreamingCustomMetrics)
Review comment:
> I think it'd be better to do opposite, disable everything and let end
users enable it, as in many cases default metrics would be sufficient to get
the picture of current status. Custom metrics are more likely auxiliary.
It makes sense, switching.
> So the ideal approach would be providing metric name with aggregation
method (sum, avg, min, max), but if it sounds to be complicated just for this
functionality, we could simply restrict to sum and explain it to config doc.
I think it's a good idea to have such functions but the question is what
exactly.
Sum and max makes sense but the other 2 I can't really imagine a use-case.
Maybe we can wait for real use-cases and add aggregation methods when it's
clear.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]