[GitHub] spark pull request #21688: [SPARK-21809] : Change Stage Page to use datatabl...

tgravescs Thu, 20 Sep 2018 09:25:07 -0700

Github user tgravescs commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21688#discussion_r219230988
  
    --- Diff: core/src/main/resources/org/apache/spark/ui/static/stagepage.js 
---
    @@ -0,0 +1,926 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +$(document).ajaxStop($.unblockUI);
    +$(document).ajaxStart(function () {
    +    $.blockUI({message: '<h3>Loading Stage Page...</h3>'});
    +});
    +
    +$.extend( $.fn.dataTable.ext.type.order, {
    +    "file-size-pre": ConvertDurationString,
    +
    +    "file-size-asc": function ( a, b ) {
    +        a = ConvertDurationString( a );
    +        b = ConvertDurationString( b );
    +        return ((a < b) ? -1 : ((a > b) ? 1 : 0));
    +    },
    +
    +    "file-size-desc": function ( a, b ) {
    +        a = ConvertDurationString( a );
    +        b = ConvertDurationString( b );
    +        return ((a < b) ? 1 : ((a > b) ? -1 : 0));
    +    }
    +} );
    +
    +// This function will only parse the URL under certain format
    +// e.g. 
https://domain:50509/history/application_1502220952225_59143/stages/stage/?id=0&attempt=0
    +function stageEndPoint(appId) {
    +    var words = document.baseURI.split('/');
    +    var words2 = document.baseURI.split('?');
    +    var ind = words.indexOf("proxy");
    +    if (ind > 0) {
    +        var appId = words[ind + 1];
    +        var stageIdLen = words2[1].indexOf('&');
    +        var stageId = words2[1].substr(3, stageIdLen - 3);
    +        var newBaseURI = words.slice(0, ind + 2).join('/');
    +        return newBaseURI + "/api/v1/applications/" + appId + "/stages/" + 
stageId;
    +    }
    +    ind = words.indexOf("history");
    +    if (ind > 0) {
    +        var appId = words[ind + 1];
    +        var attemptId = words[ind + 2];
    +        var stageIdLen = words2[1].indexOf('&');
    +        var stageId = words2[1].substr(3, stageIdLen - 3);
    +        var newBaseURI = words.slice(0, ind).join('/');
    +        if (isNaN(attemptId) || attemptId == "0") {
    +            return newBaseURI + "/api/v1/applications/" + appId + 
"/stages/" + stageId;
    +        } else {
    +            return newBaseURI + "/api/v1/applications/" + appId + "/" + 
attemptId + "/stages/" + stageId;
    +        }
    +    }
    +    var stageIdLen = words2[1].indexOf('&');
    +    var stageId = words2[1].substr(3, stageIdLen - 3);
    +    return location.origin + "/api/v1/applications/" + appId + "/stages/" 
+ stageId;
    +}
    +
    +function getColumnNameForTaskMetricSummary(columnKey) {
    +    switch(columnKey) {
    +        case "executorRunTime":
    +            return "Duration";
    +            break;
    +
    +        case "jvmGcTime":
    +            return "GC Time";
    +            break;
    +
    +        case "gettingResultTime":
    +            return "Getting Result Time";
    +            break;
    +
    +        case "inputMetrics":
    +            return "Input Size / Records";
    +            break;
    +
    +        case "outputMetrics":
    +            return "Output Size / Records";
    +            break;
    +
    +        case "peakExecutionMemory":
    +            return "Peak Execution Memory";
    +            break;
    +
    +        case "resultSerializationTime":
    +            return "Result Serialization Time";
    +            break;
    +
    +        case "schedulerDelay":
    +            return "Scheduler Delay";
    +            break;
    +
    +        case "diskBytesSpilled":
    +            return "Shuffle spill (disk)";
    +            break;
    +
    +        case "memoryBytesSpilled":
    +            return "Shuffle spill (memory)";
    +            break;
    +
    +        case "shuffleReadMetrics":
    +            return "Shuffle Read Size / Records";
    +            break;
    +
    +        case "shuffleWriteMetrics":
    +            return "Shuffle Write Size / Records";
    +            break;
    +
    +        case "executorDeserializeTime":
    +            return "Task Deserialization Time";
    +            break;
    +
    +        default:
    +            return "NA";
    +    }
    +}
    +
    +$(document).ready(function () {
    +    setDataTableDefaults();
    +
    +    $("#showAdditionalMetrics").append(
    +        "<div><a id='taskMetric'>" +
    +        "<span class='expand-input-rate-arrow arrow-closed' 
id='arrowtoggle1'></span>" +
    +        " Show Additional Metrics" +
    +        "</a></div>" +
    +        "<div class='container-fluid container-fluid-div' 
id='toggle-metrics' hidden>" +
    +        "<div><input type='checkbox' class='toggle-vis' id='box-0' 
data-column='0'> Select All</div>" +
    +        "<div id='scheduler_delay'><input type='checkbox' 
class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
    +        "<div id='task_deserialization_time'><input type='checkbox' 
class='toggle-vis' id='box-12' data-column='12'> Task Deserialization 
Time</div>" +
    +        "<div id='shuffle_read_blocked_time'><input type='checkbox' 
class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked 
Time</div>" +
    +        "<div id='shuffle_remote_reads'><input type='checkbox' 
class='toggle-vis' id='box-14' data-column='14'> Shuffle Remote Reads</div>" +
    +        "<div id='result_serialization_time'><input type='checkbox' 
class='toggle-vis' id='box-15' data-column='15'> Result Serialization 
Time</div>" +
    +        "<div id='getting_result_time'><input type='checkbox' 
class='toggle-vis' id='box-16' data-column='16'> Getting Result Time</div>" +
    +        "<div id='peak_execution_memory'><input type='checkbox' 
class='toggle-vis' id='box-17' data-column='17'> Peak Execution Memory</div>" +
    +        "</div>");
    +
    +    $('#scheduler_delay').attr("data-toggle", "tooltip")
    +        .attr("data-placement", "bottom")
    +        .attr("title", "Scheduler delay includes time to ship the task 
from the scheduler to the executor, and time to send " +
    +            "the task result from the executor to the scheduler. If 
scheduler delay is large, consider decreasing the size of tasks or decreasing 
the size of task results.");
    +    $('#task_deserialization_time').attr("data-toggle", "tooltip")
    +        .attr("data-placement", "bottom")
    +        .attr("title", "Time spent deserializing the task closure on the 
executor, including the time to read the broadcasted task.");
    +    $('#shuffle_read_blocked_time').attr("data-toggle", "tooltip")
    +        .attr("data-placement", "bottom")
    +        .attr("title", "Time that the task spent blocked waiting for 
shuffle data to be read from remote machines.");
    +    $('#shuffle_remote_reads').attr("data-toggle", "tooltip")
    +        .attr("data-placement", "bottom")
    +        .attr("title", "Total shuffle bytes read from remote executors. 
This is a subset of the shuffle read bytes; the remaining shuffle data is read 
locally. ");
    +    $('#result_serialization_time').attr("data-toggle", "tooltip")
    +            .attr("data-placement", "bottom")
    +            .attr("title", "Time spent serializing the task result on the 
executor before sending it back to the driver.");
    +    $('#getting_result_time').attr("data-toggle", "tooltip")
    +            .attr("data-placement", "bottom")
    +            .attr("title", "Time that the driver spends fetching task 
results from workers. If this is large, consider decreasing the amount of data 
returned from each task.");
    +    $('#peak_execution_memory').attr("data-toggle", "tooltip")
    +            .attr("data-placement", "bottom")
    +            .attr("title", "Execution memory refers to the memory used by 
internal data structures created during " +
    +                "shuffles, aggregations and joins when Tungsten is 
enabled. The value of this accumulator " +
    +                "should be approximately the sum of the peak sizes across 
all such data structures created " +
    +                "in this task. For SQL jobs, this only tracks all unsafe 
operators, broadcast joins, and " +
    +                "external sort.");
    +    $('#scheduler_delay').tooltip(true);
    +    $('#task_deserialization_time').tooltip(true);
    +    $('#shuffle_read_blocked_time').tooltip(true);
    +    $('#shuffle_remote_reads').tooltip(true);
    +    $('#result_serialization_time').tooltip(true);
    +    $('#getting_result_time').tooltip(true);
    +    $('#peak_execution_memory').tooltip(true);
    +    tasksSummary = $("#active-tasks");
    +    getStandAloneAppId(function (appId) {
    +
    +        var endPoint = stageEndPoint(appId);
    +        $.getJSON(endPoint, function(response, status, jqXHR) {
    +
    +            var responseBody = response[0];
    +            // prepare data for task aggregated metrics table
    +            indices = Object.keys(responseBody.executorSummary);
    +            var task_summary_table = [];
    +            indices.forEach(function (ix) {
    +               responseBody.executorSummary[ix].id = ix;
    +               task_summary_table.push(responseBody.executorSummary[ix]);
    +            });
    +
    +            // prepare data for accumulatorUpdates
    +            var indices = Object.keys(responseBody.accumulatorUpdates);
    +            var accumulator_table_all = [];
    +            var accumulator_table = [];
    +            indices.forEach(function (ix) {
    +               
accumulator_table_all.push(responseBody.accumulatorUpdates[ix]);
    +            });
    +
    +            accumulator_table_all.forEach(function (x){
    +                var name = (x.name).toString();
    +                if(name.includes("internal.") == false){
    +                    accumulator_table.push(x);
    +                }
    +            });
    +
    +            // rendering the UI page
    +            var data = {"executors": response};
    +            $.get(createTemplateURI(appId, "stagespage"), 
function(template) {
    +                
tasksSummary.append(Mustache.render($(template).filter("#stages-summary-template").html(),
 data));
    +
    +                $("#taskMetric").click(function(){
    +                    $("#arrowtoggle1").toggleClass("arrow-open 
arrow-closed");
    +                    $("#toggle-metrics").toggle();
    +                });
    +
    +                $("#aggregatedMetrics").click(function(){
    +                    $("#arrowtoggle2").toggleClass("arrow-open 
arrow-closed");
    +                    $("#toggle-aggregatedMetrics").toggle();
    +                });
    +
    +                var task_metrics_table = [];
    +                var stageAttemptId = getStageAttemptId();
    +                var quantiles = "0,0.25,0.5,0.75,1.0";
    +                $.getJSON(stageEndPoint(appId) + 
"/"+stageAttemptId+"/taskSummary?quantiles="+quantiles, 
function(taskMetricsResponse, status, jqXHR) {
    +                    var taskMetricIndices = 
Object.keys(taskMetricsResponse);
    +                    taskMetricIndices.forEach(function (ix) {
    +                        var columnName = 
getColumnNameForTaskMetricSummary(ix);
    +                        if (columnName == "Shuffle Read Size / Records") {
    +                            var row1 = {
    +                                "metric": columnName,
    +                                "data": taskMetricsResponse[ix]
    +                            };
    +                            var row2 = {
    +                                "metric": "Shuffle Read Blocked Time",
    +                                "data": taskMetricsResponse[ix]
    +                            };
    +                            var row3 = {
    +                                "metric": "Shuffle Remote Reads",
    +                                "data": taskMetricsResponse[ix]
    +                            };
    +                            task_metrics_table.push(row1);
    +                            task_metrics_table.push(row2);
    +                            task_metrics_table.push(row3);
    +                        }
    +                        else if (columnName != "NA") {
    +                            var row = {
    +                                "metric": columnName,
    +                                "data": taskMetricsResponse[ix]
    +                            };
    +                            task_metrics_table.push(row);
    +                        }
    +                    });
    +
    +                    var taskMetricsTable = "#summary-metrics-table";
    +                    var task_conf = {
    +                        "data": task_metrics_table,
    +                        "columns": [
    +                            {data : 'metric'},
    +                            {
    +                                data: function (row, type) {
    +                                    switch(row.metric) {
    +                                        case 'Input Size / Records':
    +                                            var str1arr = 
extractDataFromArrayString(JSON.stringify(row.data.bytesRead));
    +                                            var str2arr = 
extractDataFromArrayString(JSON.stringify(row.data.recordsRead));
    +                                            var str = 
formatBytes(str1arr[0], type) + " / " + str2arr[0];
    +                                            return str;
    +                                            break;
    +
    +                                        case 'Output Size / Records':
    +                                            var str1arr = 
extractDataFromArrayString(JSON.stringify(row.data.bytesWritten));
    +                                            var str2arr = 
extractDataFromArrayString(JSON.stringify(row.data.recordsWritten));
    +                                            var str = 
formatBytes(str1arr[0], type) + " / " + str2arr[0];
    +                                            return str;
    +                                            break;
    +
    +                                        case 'Shuffle Read Size / Records':
    +                                            var str1arr = 
extractDataFromArrayString(JSON.stringify(row.data.readBytes));
    +                                            var str2arr = 
extractDataFromArrayString(JSON.stringify(row.data.readRecords));
    +                                            var str = 
formatBytes(str1arr[0], type) + " / " + str2arr[0];
    +                                            return str;
    +                                            break;
    +
    +                                        case 'Shuffle Read Blocked Time':
    +                                            var str1arr = 
extractDataFromArrayString(JSON.stringify(row.data.fetchWaitTime));
    +                                            var str = 
formatDuration(str1arr[0]);
    +                                            return str;
    +                                            break;
    +
    +                                        case 'Shuffle Remote Reads':
    +                                            var str1arr = 
extractDataFromArrayString(JSON.stringify(row.data.remoteBytesRead));
    +                                            var str = 
formatBytes(str1arr[0], type);
    +                                            return str;
    +                                            break;
    +
    +                                        case 'Shuffle Write Size / 
Records':
    +                                            var str1arr = 
extractDataFromArrayString(JSON.stringify(row.data.writeBytes));
    +                                            var str2arr = 
extractDataFromArrayString(JSON.stringify(row.data.writeRecords));
    +                                            var str = 
formatBytes(str1arr[0], type) + " / " + str2arr[0];
    +                                            return str;
    +                                            break;
    +
    +                                        default:
    +                                            return (row.metric == 'Peak 
Execution Memory' || row.metric == 'Shuffle spill (memory)'
    +                                                    || row.metric == 
'Shuffle spill (disk)') ? formatBytes(row.data[0], type) : 
(formatDuration(row.data[0]));
    +
    +                                    }
    +                                }
    +                            },
    +                            {
    +                                data: function (row, type) {
    --- End diff --
    
    make this iterative if we can



---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request #21688: [SPARK-21809] : Change Stage Page to use datatabl...

Reply via email to