Github user tgravescs commented on a diff in the pull request:
https://github.com/apache/spark/pull/21688#discussion_r219230988
--- Diff: core/src/main/resources/org/apache/spark/ui/static/stagepage.js
---
@@ -0,0 +1,926 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+$(document).ajaxStop($.unblockUI);
+$(document).ajaxStart(function () {
+ $.blockUI({message: '<h3>Loading Stage Page...</h3>'});
+});
+
+$.extend( $.fn.dataTable.ext.type.order, {
+ "file-size-pre": ConvertDurationString,
+
+ "file-size-asc": function ( a, b ) {
+ a = ConvertDurationString( a );
+ b = ConvertDurationString( b );
+ return ((a < b) ? -1 : ((a > b) ? 1 : 0));
+ },
+
+ "file-size-desc": function ( a, b ) {
+ a = ConvertDurationString( a );
+ b = ConvertDurationString( b );
+ return ((a < b) ? 1 : ((a > b) ? -1 : 0));
+ }
+} );
+
+// This function will only parse the URL under certain format
+// e.g.
https://domain:50509/history/application_1502220952225_59143/stages/stage/?id=0&attempt=0
+function stageEndPoint(appId) {
+ var words = document.baseURI.split('/');
+ var words2 = document.baseURI.split('?');
+ var ind = words.indexOf("proxy");
+ if (ind > 0) {
+ var appId = words[ind + 1];
+ var stageIdLen = words2[1].indexOf('&');
+ var stageId = words2[1].substr(3, stageIdLen - 3);
+ var newBaseURI = words.slice(0, ind + 2).join('/');
+ return newBaseURI + "/api/v1/applications/" + appId + "/stages/" +
stageId;
+ }
+ ind = words.indexOf("history");
+ if (ind > 0) {
+ var appId = words[ind + 1];
+ var attemptId = words[ind + 2];
+ var stageIdLen = words2[1].indexOf('&');
+ var stageId = words2[1].substr(3, stageIdLen - 3);
+ var newBaseURI = words.slice(0, ind).join('/');
+ if (isNaN(attemptId) || attemptId == "0") {
+ return newBaseURI + "/api/v1/applications/" + appId +
"/stages/" + stageId;
+ } else {
+ return newBaseURI + "/api/v1/applications/" + appId + "/" +
attemptId + "/stages/" + stageId;
+ }
+ }
+ var stageIdLen = words2[1].indexOf('&');
+ var stageId = words2[1].substr(3, stageIdLen - 3);
+ return location.origin + "/api/v1/applications/" + appId + "/stages/"
+ stageId;
+}
+
+function getColumnNameForTaskMetricSummary(columnKey) {
+ switch(columnKey) {
+ case "executorRunTime":
+ return "Duration";
+ break;
+
+ case "jvmGcTime":
+ return "GC Time";
+ break;
+
+ case "gettingResultTime":
+ return "Getting Result Time";
+ break;
+
+ case "inputMetrics":
+ return "Input Size / Records";
+ break;
+
+ case "outputMetrics":
+ return "Output Size / Records";
+ break;
+
+ case "peakExecutionMemory":
+ return "Peak Execution Memory";
+ break;
+
+ case "resultSerializationTime":
+ return "Result Serialization Time";
+ break;
+
+ case "schedulerDelay":
+ return "Scheduler Delay";
+ break;
+
+ case "diskBytesSpilled":
+ return "Shuffle spill (disk)";
+ break;
+
+ case "memoryBytesSpilled":
+ return "Shuffle spill (memory)";
+ break;
+
+ case "shuffleReadMetrics":
+ return "Shuffle Read Size / Records";
+ break;
+
+ case "shuffleWriteMetrics":
+ return "Shuffle Write Size / Records";
+ break;
+
+ case "executorDeserializeTime":
+ return "Task Deserialization Time";
+ break;
+
+ default:
+ return "NA";
+ }
+}
+
+$(document).ready(function () {
+ setDataTableDefaults();
+
+ $("#showAdditionalMetrics").append(
+ "<div><a id='taskMetric'>" +
+ "<span class='expand-input-rate-arrow arrow-closed'
id='arrowtoggle1'></span>" +
+ " Show Additional Metrics" +
+ "</a></div>" +
+ "<div class='container-fluid container-fluid-div'
id='toggle-metrics' hidden>" +
+ "<div><input type='checkbox' class='toggle-vis' id='box-0'
data-column='0'> Select All</div>" +
+ "<div id='scheduler_delay'><input type='checkbox'
class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
+ "<div id='task_deserialization_time'><input type='checkbox'
class='toggle-vis' id='box-12' data-column='12'> Task Deserialization
Time</div>" +
+ "<div id='shuffle_read_blocked_time'><input type='checkbox'
class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked
Time</div>" +
+ "<div id='shuffle_remote_reads'><input type='checkbox'
class='toggle-vis' id='box-14' data-column='14'> Shuffle Remote Reads</div>" +
+ "<div id='result_serialization_time'><input type='checkbox'
class='toggle-vis' id='box-15' data-column='15'> Result Serialization
Time</div>" +
+ "<div id='getting_result_time'><input type='checkbox'
class='toggle-vis' id='box-16' data-column='16'> Getting Result Time</div>" +
+ "<div id='peak_execution_memory'><input type='checkbox'
class='toggle-vis' id='box-17' data-column='17'> Peak Execution Memory</div>" +
+ "</div>");
+
+ $('#scheduler_delay').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Scheduler delay includes time to ship the task
from the scheduler to the executor, and time to send " +
+ "the task result from the executor to the scheduler. If
scheduler delay is large, consider decreasing the size of tasks or decreasing
the size of task results.");
+ $('#task_deserialization_time').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Time spent deserializing the task closure on the
executor, including the time to read the broadcasted task.");
+ $('#shuffle_read_blocked_time').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Time that the task spent blocked waiting for
shuffle data to be read from remote machines.");
+ $('#shuffle_remote_reads').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Total shuffle bytes read from remote executors.
This is a subset of the shuffle read bytes; the remaining shuffle data is read
locally. ");
+ $('#result_serialization_time').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Time spent serializing the task result on the
executor before sending it back to the driver.");
+ $('#getting_result_time').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Time that the driver spends fetching task
results from workers. If this is large, consider decreasing the amount of data
returned from each task.");
+ $('#peak_execution_memory').attr("data-toggle", "tooltip")
+ .attr("data-placement", "bottom")
+ .attr("title", "Execution memory refers to the memory used by
internal data structures created during " +
+ "shuffles, aggregations and joins when Tungsten is
enabled. The value of this accumulator " +
+ "should be approximately the sum of the peak sizes across
all such data structures created " +
+ "in this task. For SQL jobs, this only tracks all unsafe
operators, broadcast joins, and " +
+ "external sort.");
+ $('#scheduler_delay').tooltip(true);
+ $('#task_deserialization_time').tooltip(true);
+ $('#shuffle_read_blocked_time').tooltip(true);
+ $('#shuffle_remote_reads').tooltip(true);
+ $('#result_serialization_time').tooltip(true);
+ $('#getting_result_time').tooltip(true);
+ $('#peak_execution_memory').tooltip(true);
+ tasksSummary = $("#active-tasks");
+ getStandAloneAppId(function (appId) {
+
+ var endPoint = stageEndPoint(appId);
+ $.getJSON(endPoint, function(response, status, jqXHR) {
+
+ var responseBody = response[0];
+ // prepare data for task aggregated metrics table
+ indices = Object.keys(responseBody.executorSummary);
+ var task_summary_table = [];
+ indices.forEach(function (ix) {
+ responseBody.executorSummary[ix].id = ix;
+ task_summary_table.push(responseBody.executorSummary[ix]);
+ });
+
+ // prepare data for accumulatorUpdates
+ var indices = Object.keys(responseBody.accumulatorUpdates);
+ var accumulator_table_all = [];
+ var accumulator_table = [];
+ indices.forEach(function (ix) {
+
accumulator_table_all.push(responseBody.accumulatorUpdates[ix]);
+ });
+
+ accumulator_table_all.forEach(function (x){
+ var name = (x.name).toString();
+ if(name.includes("internal.") == false){
+ accumulator_table.push(x);
+ }
+ });
+
+ // rendering the UI page
+ var data = {"executors": response};
+ $.get(createTemplateURI(appId, "stagespage"),
function(template) {
+
tasksSummary.append(Mustache.render($(template).filter("#stages-summary-template").html(),
data));
+
+ $("#taskMetric").click(function(){
+ $("#arrowtoggle1").toggleClass("arrow-open
arrow-closed");
+ $("#toggle-metrics").toggle();
+ });
+
+ $("#aggregatedMetrics").click(function(){
+ $("#arrowtoggle2").toggleClass("arrow-open
arrow-closed");
+ $("#toggle-aggregatedMetrics").toggle();
+ });
+
+ var task_metrics_table = [];
+ var stageAttemptId = getStageAttemptId();
+ var quantiles = "0,0.25,0.5,0.75,1.0";
+ $.getJSON(stageEndPoint(appId) +
"/"+stageAttemptId+"/taskSummary?quantiles="+quantiles,
function(taskMetricsResponse, status, jqXHR) {
+ var taskMetricIndices =
Object.keys(taskMetricsResponse);
+ taskMetricIndices.forEach(function (ix) {
+ var columnName =
getColumnNameForTaskMetricSummary(ix);
+ if (columnName == "Shuffle Read Size / Records") {
+ var row1 = {
+ "metric": columnName,
+ "data": taskMetricsResponse[ix]
+ };
+ var row2 = {
+ "metric": "Shuffle Read Blocked Time",
+ "data": taskMetricsResponse[ix]
+ };
+ var row3 = {
+ "metric": "Shuffle Remote Reads",
+ "data": taskMetricsResponse[ix]
+ };
+ task_metrics_table.push(row1);
+ task_metrics_table.push(row2);
+ task_metrics_table.push(row3);
+ }
+ else if (columnName != "NA") {
+ var row = {
+ "metric": columnName,
+ "data": taskMetricsResponse[ix]
+ };
+ task_metrics_table.push(row);
+ }
+ });
+
+ var taskMetricsTable = "#summary-metrics-table";
+ var task_conf = {
+ "data": task_metrics_table,
+ "columns": [
+ {data : 'metric'},
+ {
+ data: function (row, type) {
+ switch(row.metric) {
+ case 'Input Size / Records':
+ var str1arr =
extractDataFromArrayString(JSON.stringify(row.data.bytesRead));
+ var str2arr =
extractDataFromArrayString(JSON.stringify(row.data.recordsRead));
+ var str =
formatBytes(str1arr[0], type) + " / " + str2arr[0];
+ return str;
+ break;
+
+ case 'Output Size / Records':
+ var str1arr =
extractDataFromArrayString(JSON.stringify(row.data.bytesWritten));
+ var str2arr =
extractDataFromArrayString(JSON.stringify(row.data.recordsWritten));
+ var str =
formatBytes(str1arr[0], type) + " / " + str2arr[0];
+ return str;
+ break;
+
+ case 'Shuffle Read Size / Records':
+ var str1arr =
extractDataFromArrayString(JSON.stringify(row.data.readBytes));
+ var str2arr =
extractDataFromArrayString(JSON.stringify(row.data.readRecords));
+ var str =
formatBytes(str1arr[0], type) + " / " + str2arr[0];
+ return str;
+ break;
+
+ case 'Shuffle Read Blocked Time':
+ var str1arr =
extractDataFromArrayString(JSON.stringify(row.data.fetchWaitTime));
+ var str =
formatDuration(str1arr[0]);
+ return str;
+ break;
+
+ case 'Shuffle Remote Reads':
+ var str1arr =
extractDataFromArrayString(JSON.stringify(row.data.remoteBytesRead));
+ var str =
formatBytes(str1arr[0], type);
+ return str;
+ break;
+
+ case 'Shuffle Write Size /
Records':
+ var str1arr =
extractDataFromArrayString(JSON.stringify(row.data.writeBytes));
+ var str2arr =
extractDataFromArrayString(JSON.stringify(row.data.writeRecords));
+ var str =
formatBytes(str1arr[0], type) + " / " + str2arr[0];
+ return str;
+ break;
+
+ default:
+ return (row.metric == 'Peak
Execution Memory' || row.metric == 'Shuffle spill (memory)'
+ || row.metric ==
'Shuffle spill (disk)') ? formatBytes(row.data[0], type) :
(formatDuration(row.data[0]));
+
+ }
+ }
+ },
+ {
+ data: function (row, type) {
--- End diff --
make this iterative if we can
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]