[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185339738 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) +.done(function(stateDataJson) { +//Update Status & Buttons for alternate stateData +if (typeof status_map == 'undefined') { + status_map = (stateDataJson); //Update + updateStatusAndShutdown(stateDataJson); +} + }); + //Don't loop any more + if (typeof status_map != 'undefined') { +break; + } +} + } else { +updateStatusAndShutdown(status_map); + } + } + + function updateStatusAndShutdown(status_map) { +let bitMap = {}; +if (typeof status_map != 'undefined') { +for (var k in status_map) { + bitMap[k] = status_map[k]; +} +} +for (i = 1; i <= size; i++) { +let key = ""; +if ($("#row-"+i).find("#stateKey").length > 0) { //Check if newBit that has no stateKey --- End diff -- Made a switch to `currentRow` The stateKey is irrelevant and not needed any more. We actually weren't injecting it anymore, so the block was never executed. ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185135884 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -103,14 +105,14 @@ ${drillbit.getState()} Not Available - -<#if ( model.shouldShowAdminInfo() && ( drillbit.isCurrent() || ( !model.isAuthEnabled() && location.protocol != "https" ))) > - -<#else> + + <#if ( model.shouldShowAdminInfo() || !model.isAuthEnabled() || drillbit.isCurrent() ) > --- End diff -- Yes, and (unfortunately), I can't check for 'https' protocol within freemarker. The check for this is taken care of at [line 317](https://github.com/kkhatua/drill/blob/ab3e8619c6259803eb362be290a3a3605839a194/exec/java-exec/src/main/resources/rest/index.ftl#L317) of `updateStatusAndShutdown` function, where I check for *HTTPS* and as a current Drillbit. ```javascript if (status_map[key] == "ONLINE") { $("#row-"+i).find("#status").text(status_map[key]); <#if ( model.shouldShowAdminInfo() || !model.isAuthEnabled() ) > if ( location.protocol != "https" || ($("#row-"+i).find("#current").html() == "Current") ) { $("#row-"+i).find("#shutdown").prop('disabled',false).css('opacity',1.0).css('cursor','pointer'); } ``` ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185134711 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -328,9 +432,11 @@ //Iterates through all the nodes for update function reloadMetrics() { for (i = 1; i <= size; i++) { - var address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); - var httpPort = $("#row-"+i).find("#httpPort").contents().get(0).nodeValue.trim(); - updateMetricsHtml(address, httpPort, i); + if ( $("#row-"+i).find("#stateKey").length == 0 ) { --- End diff -- Very odd. The `stateKey` seems to disappear (or never appeared). Let me look at what happened. ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185131588 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -328,9 +432,11 @@ //Iterates through all the nodes for update function reloadMetrics() { for (i = 1; i <= size; i++) { - var address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); - var httpPort = $("#row-"+i).find("#httpPort").contents().get(0).nodeValue.trim(); - updateMetricsHtml(address, httpPort, i); + if ( $("#row-"+i).find("#stateKey").length == 0 ) { +var address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); +var httpPort = $("#row-"+i).find("#httpPort").contents().get(0).nodeValue.trim(); +updateMetricsHtml(address, httpPort, i); --- End diff -- Ok. Logically, this seems to resolve to the same outcome, though. ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185131321 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) +.done(function(stateDataJson) { +//Update Status & Buttons for alternate stateData +if (typeof status_map == 'undefined') { + status_map = (stateDataJson); //Update + updateStatusAndShutdown(stateDataJson); +} + }); + //Don't loop any more + if (typeof status_map != 'undefined') { +break; + } +} + } else { +updateStatusAndShutdown(status_map); + } + } + + function updateStatusAndShutdown(status_map) { +let bitMap = {}; +if (typeof status_map != 'undefined') { +for (var k in status_map) { + bitMap[k] = status_map[k]; +} +} +for (i = 1; i <= size; i++) { +let key = ""; +if ($("#row-"+i).find("#stateKey").length > 0) { //Check if newBit that has no stateKey + key = $("#row-"+i).find("#stateKey").textContent; +} else { + let address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); + let port = $("#row-"+i).find("#httpPort").html(); + key = address+"-"+port; +} -if (status_map[key] == null) { +if (typeof status_map == 'undefined') { +$("#row-"+i).find("#status").text(nAText); + $("#row-"+i).find("#shutdown").prop('disabled',true).css('opacity',0.5); +$("#row-"+i).find("#queriesCount").text(""); +} else if (status_map[key] == null) { $("#row-"+i).find("#status").text("OFFLINE"); $("#row-"+i).find("#shutdown").prop('disabled',true).css('opacity',0.5); $("#row-"+i).find("#queriesCount").text(""); -} -else { +} else { if (status_map[key] == "ONLINE") { $("#row-"+i).find("#status").text(status_map[key]); - $("#row-"+i).find("#shutdown").prop('disabled',false).css('opacity',1.0); -} -else { +<#if ( model.shouldShowAdminInfo() || !model.isAuthEnabled() ) > +if ( location.protocol != "https" || ($("#row-"+i).find("#current").html() == "Current") ) { + $("#row-"+i).find("#shutdown").prop('disabled',false).css('opacity',1.0).css('cursor','pointer'); +} + +} else { if ($("#row-&quo
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185129153 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) +.done(function(stateDataJson) { +//Update Status & Buttons for alternate stateData +if (typeof status_map == 'undefined') { + status_map = (stateDataJson); //Update + updateStatusAndShutdown(stateDataJson); +} + }); + //Don't loop any more + if (typeof status_map != 'undefined') { +break; + } +} + } else { +updateStatusAndShutdown(status_map); + } + } + + function updateStatusAndShutdown(status_map) { +let bitMap = {}; +if (typeof status_map != 'undefined') { +for (var k in status_map) { + bitMap[k] = status_map[k]; +} +} +for (i = 1; i <= size; i++) { +let key = ""; +if ($("#row-"+i).find("#stateKey").length > 0) { //Check if newBit that has no stateKey + key = $("#row-"+i).find("#stateKey").textContent; +} else { + let address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); + let port = $("#row-"+i).find("#httpPort").html(); + key = address+"-"+port; +} -if (status_map[key] == null) { +if (typeof status_map == 'undefined') { +$("#row-"+i).find("#status").text(nAText); + $("#row-"+i).find("#shutdown").prop('disabled',true).css('opacity',0.5); +$("#row-"+i).find("#queriesCount").text(""); +} else if (status_map[key] == null) { $("#row-"+i).find("#status").text("OFFLINE"); $("#row-"+i).find("#shutdown").prop('disabled',true).css('opacity',0.5); $("#row-"+i).find("#queriesCount").text(""); -} -else { +} else { if (status_map[key] == "ONLINE") { $("#row-"+i).find("#status").text(status_map[key]); - $("#row-"+i).find("#shutdown").prop('disabled',false).css('opacity',1.0); -} -else { +<#if ( model.shouldShowAdminInfo() || !model.isAuthEnabled() ) > +if ( location.protocol != "https" || ($("#row-"+i).find("#current").html() == "Current") ) { + $("#row-"+i).find("#shutdown").prop('disabled',false).css('opacity',1.0).css('cursor','pointer'); +} + +} else { if ($("#row-&quo
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185128740 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) +.done(function(stateDataJson) { +//Update Status & Buttons for alternate stateData +if (typeof status_map == 'undefined') { + status_map = (stateDataJson); //Update + updateStatusAndShutdown(stateDataJson); +} + }); + //Don't loop any more + if (typeof status_map != 'undefined') { +break; + } +} + } else { +updateStatusAndShutdown(status_map); + } + } + + function updateStatusAndShutdown(status_map) { +let bitMap = {}; --- End diff -- `bit_map` is a local map which is originally a clone of `status_map` before we remove already-seen bits. The remainder of the map is passed on. `status_map` is a global variable, as it is shared by multiple AJAX calls, so we avoid using that in `listNewDrillbits` ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185127293 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) +.done(function(stateDataJson) { +//Update Status & Buttons for alternate stateData +if (typeof status_map == 'undefined') { + status_map = (stateDataJson); //Update + updateStatusAndShutdown(stateDataJson); +} + }); + //Don't loop any more + if (typeof status_map != 'undefined') { +break; + } +} + } else { +updateStatusAndShutdown(status_map); + } + } + + function updateStatusAndShutdown(status_map) { +let bitMap = {}; +if (typeof status_map != 'undefined') { +for (var k in status_map) { + bitMap[k] = status_map[k]; +} +} +for (i = 1; i <= size; i++) { +let key = ""; +if ($("#row-"+i).find("#stateKey").length > 0) { //Check if newBit that has no stateKey --- End diff -- I'll check this and get back to you. ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185127034 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) +.done(function(stateDataJson) { +//Update Status & Buttons for alternate stateData +if (typeof status_map == 'undefined') { + status_map = (stateDataJson); //Update + updateStatusAndShutdown(stateDataJson); +} + }); + //Don't loop any more + if (typeof status_map != 'undefined') { +break; + } +} + } else { --- End diff -- I tried that, but since the `.getJson` is an async call, the `.done` callback is the right way to handle. Otherwise, the response doesn't come back in time for the `updateStatusAndShutdown` to be called with a valid `status_map`. So, this is the best way to go. ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185126155 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { --- End diff -- Ok. A lot of functions need comments. I'll update even those that haven't been touched. ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1241#discussion_r185125707 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -252,33 +255,129 @@ timeout = setTimeout(reloadStatus, refreshTime); } - function fillStatus(data,size) { - var status_map = (data.responseJSON); - for (i = 1; i <= size; i++) { -var address = $("#row-"+i).find("#address").contents().get(0).nodeValue; -address = address.trim(); -var port = $("#row-"+i).find("#port").html(); -var key = address+"-"+port; + function fillStatus(dataResponse,size) { + var status_map = (dataResponse.responseJSON); + //In case localhost has gone down (i.e. we don't know status from ZK) + if (typeof status_map == 'undefined') { +//Query other nodes for state details +for (j = 1; j <= size; j++) { + if ($("#row-"+j).find("#current").html() == "Current") { +continue; //Skip LocalHost + } + var address = $("#row-"+j).find("#address").contents().get(0).nodeValue.trim(); + var restPort = $("#row-"+j).find("#httpPort").contents().get(0).nodeValue.trim(); + var altStateUrl = location.protocol + "//" + address+":"+restPort + "/state"; + var goatResponse = $.getJSON(altStateUrl) --- End diff -- Unused variable name required for jQuery. I'll fix this with a better name :) ---
[GitHub] drill issue #1239: DRILL-143: CGroup Support for Drill-on-YARN
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1239 Yes, I agree. Support folks gave me similar feedback, so I'll commit this change in the mapr distro _IFF_ there is a request for that. YARN is already a complex beast with numerous settings. Introducing caveats like this will only add to the confusion. ---
[GitHub] drill issue #1239: DRILL-143: CGroup Support for Drill-on-YARN
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1239 @paul-rogers I went through with support on this and found that the issue is not specific to MapR. However, you make a strong argument in favor of letting YARN handle the CGroup management rather than Drill over-reaching. I've reverted the change to `yarn-drillbit.sh` in the latest commit. ---
[GitHub] drill issue #1042: DRILL-5261: Expose REST endpoint in zookeeper
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1042 @xhochy I recently had a PR #1203 for DRILL-6289 and realized your PR's solution was a subset of that. I've already incorporated changes to the C++ client's protobuf as well. If it addresses the requirements you had for this JIRA, can you close the PR? If there is something missing, feel free to submit a new PR with the missing feature. ---
[GitHub] drill issue #1233: Updated with links to previous releases
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1233 @arina-ielchiieva I'll change the PR as suggested by Parth. Since Bridget does the merges for _gh-pages_ repo, I'll ask her to close the PR. ---
[GitHub] drill issue #1241: DRILL-6364: Handle Cluster Info in WebUI when existing/ne...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1241 @sohami / @arina-ielchiieva can you review this? The change is not extensive and fairly straightforward. ---
[GitHub] drill issue #1241: DRILL-6364: Handle Cluster Info in WebUI when existing/ne...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1241 Screenshot of when UI node `kk127` goes down. The UI's javascript logic queries other Drillbits in the list (in this case, `kk128`) and discovers two new previously unseen Drillbits - `kk130` and `kk129`, discovered in the sequence in which they were discovered in the cluster. State changes are marked correctly, with shutdown buttons disabled. A prompt in the form of an orange refresh button near the Drillbit count indicates the need to refresh. Alternatively, one of the other nodes can be used for pop-out of a new WebUI. ![image](https://user-images.githubusercontent.com/4335237/39389539-681fed40-4a3e-11e8-92f7-6d5e717e0881.png) ---
[GitHub] drill pull request #1241: DRILL-6364: Handle Cluster Info in WebUI when exis...
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1241 DRILL-6364: Handle Cluster Info in WebUI when existing/new bits restart As a follow up to DRILL-6289, the following improvements have been done: 1. When loading the page for the first time, the WebUI enables the shutdown button without actually checking the state of the Drillbits. The ideal behaviour should be to disable the button till the state is verified. **[Done]** _If a Drillbit is confirmed down (i.e. not in `/state` response), it is marked as OFFLINE and button is disabled._ 2. When shutting down the current Drillbit, the WebUI no more has access to the cluster state. The ideal behaviour here should be to fetch the state from any of the other Drillbits to update the status. **[Done]** _With the current Drillbit down, the other bits are requested for cluster state info and update accordingly._ 3. When a new, previously unseen Drillbit comes up, the WebUI will never render it because the table is statically generated during the first page load. The idea behaviour should be to append to the table on discovery of a new node. **[Done]** _The new Drillbit info is injected and a prompt appears to refresh the page to re-populate any missing info. This also works with feature (2) mentioned above._ The only Java code change was to have the state response carry the address and http-port as a tuple, instead of the user-port (which seems to be never used). You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-6364 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1241.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1241 commit ab3e8619c6259803eb362be290a3a3605839a194 Author: Kunal Khatua <kunal@...> Date: 2018-04-27T23:27:45Z DRILL-6364: Handle Cluster Info in WebUI when existing/new bits restart As a follow up to DRILL-6289, the following improvements have been done: 1. When loading the page for the first time, the WebUI enables the shutdown button without actually checking the state of the Drillbits. The ideal behaviour should be to disable the button till the state is verified. [Done] If a Drillbit is confirmed down (i.e. not in `/state` response), it is marked as OFFLINE and button is disabled. 2. When shutting down the current Drillbit, the WebUI no more has access to the cluster state. The ideal behaviour here should be to fetch the state from any of the other Drillbits to update the status. [Done] With the current Drillbit down, the other bits are requested for cluster state info and update accordingly. 3. When a new, previously unseen Drillbit comes up, the WebUI will never render it because the table is statically generated during the first page load. The idea behaviour should be to append to the table on discovery of a new node. [Done] The new Drillbit info is injected and a prompt appears to refresh the page to re-populate any missing info. This also works with feature (2) mentioned above. The only Java code change was to have the state response carry the address and http-port as a tuple, instead of the user-port (which is never used). ---
[GitHub] drill issue #1232: DRILL-6094: Decimal data type enhancements
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1232 I would recommend we wait till we have all testing complete, since this is a big feature. ---
[GitHub] drill pull request #1239: DRILL-143: CGroup Support for Drill-on-YARN
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1239#discussion_r184153040 --- Diff: distribution/src/resources/yarn-drillbit.sh --- @@ -175,4 +209,11 @@ fi echo "`date` Starting drillbit on `hostname` under YARN, logging to $DRILLBIT_LOG_PATH" echo "`ulimit -a`" >> "$DRILLBIT_LOG_PATH" 2>&1 -"$DRILL_HOME/bin/runbit" exec +# Run in background +"$DRILL_HOME/bin/runbit" exec & --- End diff -- The process is momentarily in the background to capture the PID. We eventually wait for it. Are you saying that a process will not continue to run because it is in the background?? ---
[GitHub] drill issue #1239: DRILL-143: CGroup Support for Drill-on-YARN
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1239 Thanks for that pointer, @paul-rogers ! I'll make the relevant changes and add to this commit. ---
[GitHub] drill issue #1233: Updated with links to previous releases
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1233 Agreed. We do need a single place for showing the releases. I'll work with Bridget to change at both places. ---
[GitHub] drill pull request #1239: DRILL-143: CGroup Support for Drill-on-YARN
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1239#discussion_r183805056 --- Diff: distribution/src/resources/yarn-drillbit.sh --- @@ -110,6 +114,36 @@ # Enables Java GC logging. Passed from the drill.yarn.drillbit.log-gc # garbage collection option. +### Function to enforce CGroup (Refer local drillbit.sh) +check_and_enforce_cgroup(){ +dbitPid=$1; +kill -0 $dbitPid +if [ $? -gt 0 ]; then + echo "ERROR: Failed to add Drillbit to CGroup ( $DRILLBIT_CGROUP ) for 'cpu'. Ensure that the Drillbit ( pid=$dbitPid ) started up." >&2 + exit 1 +fi +SYS_CGROUP_DIR=${SYS_CGROUP_DIR:-"/sys/fs/cgroup"} +if [ -f $SYS_CGROUP_DIR/cpu/$DRILLBIT_CGROUP/cgroup.procs ]; then + echo $dbitPid > $SYS_CGROUP_DIR/cpu/$DRILLBIT_CGROUP/cgroup.procs + # Verify Enforcement + cgroupStatus=`grep -w $pid $SYS_CGROUP_DIR/cpu/${DRILLBIT_CGROUP}/cgroup.procs` + if [ -z "$cgroupStatus" ]; then --- End diff -- Fixed the changes. ---
[GitHub] drill pull request #1239: DRILL-143: CGroup Support for Drill-on-YARN
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1239#discussion_r183732780 --- Diff: distribution/src/resources/yarn-drillbit.sh --- @@ -110,6 +114,36 @@ # Enables Java GC logging. Passed from the drill.yarn.drillbit.log-gc # garbage collection option. +### Function to enforce CGroup (Refer local drillbit.sh) +check_and_enforce_cgroup(){ +dbitPid=$1; +kill -0 $dbitPid +if [ $? -gt 0 ]; then + echo "ERROR: Failed to add Drillbit to CGroup ( $DRILLBIT_CGROUP ) for 'cpu'. Ensure that the Drillbit ( pid=$dbitPid ) started up." >&2 + exit 1 +fi +SYS_CGROUP_DIR=${SYS_CGROUP_DIR:-"/sys/fs/cgroup"} +if [ -f $SYS_CGROUP_DIR/cpu/$DRILLBIT_CGROUP/cgroup.procs ]; then + echo $dbitPid > $SYS_CGROUP_DIR/cpu/$DRILLBIT_CGROUP/cgroup.procs + # Verify Enforcement + cgroupStatus=`grep -w $pid $SYS_CGROUP_DIR/cpu/${DRILLBIT_CGROUP}/cgroup.procs` + if [ -z "$cgroupStatus" ]; then --- End diff -- You're right. I seem to have missed negating the check. Since this check only affects publication of a message and not the actual application of the CGroup, we didn't catch it during testing. I'll fix this port and the original patch as well. ---
[GitHub] drill issue #1239: CGroup Support for Drill-on-YARN
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1239 @Ben-Zvi please review. QA verified that Drill-on-YARN works with this patch. ---
[GitHub] drill pull request #1239: CGroup Support for Drill-on-YARN
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1239 CGroup Support for Drill-on-YARN Original commit works for stand-alone Drill. During testing with Drill-on-YARN, it was discovered that while the environment is sourced, the DrillApplicationManager uses a different resource for spinning up the Drillbits. This commit applies the CGroups application logic for YARN specifically. You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-143_yarn Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1239.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1239 commit d23b5d0674eb59743e6ff08310834f6baa8f21cc Author: Kunal Khatua <kunal@...> Date: 2018-04-23T17:31:21Z CGroup Support for Drill-on-YARN Initial patch ---
[GitHub] drill pull request #1233: Updated with links to previous releases
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1233 Updated with links to previous releases Provides a link for visitors to see current and previous versions of Drill You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill gh-pages Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1233.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1233 commit 92a93a4d3f20ff64ab8dca322903c6d64d3a82f5 Author: Kunal Khatua <kkhatua@...> Date: 2018-04-20T22:25:57Z Updated with links to previous releases ---
[GitHub] drill issue #1229: DRILL-6333: Fixed Quotation marks
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1229 Fixed it. I missed these because I detected the original issues when attempting to create javadocs for `java-exec` , `jdbc` and `jdbc-all` packages only. A search-and-replace in the entire Drill repo caught the rest. @Ben-Zvi / @vdiravka can you guys do a quick scan to review this? ---
[GitHub] drill issue #1229: DRILL-6333: Fixed Quotation marks
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1229 I think we should. Let me walk through the entire project to be doubly sure. I'll correct them accordingly and resubmit a squashed commit.. Thanks for catching this, @vdiravka ! ---
[GitHub] drill pull request #1229: DRILL-6333: Fixed Quotation marks
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1229 DRILL-6333: Fixed Quotation marks Initial step to making the source-code ready for Javadoc generation You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-6333 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1229.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1229 commit 82099e1bbeb455417b88d1dccaf7463fc0aecb88 Author: Kunal Khatua <kkhatua@...> Date: 2018-04-05T07:17:55Z DRILL-6333: Fixed Quotation marks Initial step to making the source-code ready for Javadoc generation ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 @sohami Rebased the commits on top of the latest master ( https://github.com/apache/drill/commit/931b43ec54bf47dcbb4aa9ae4499f37a8f21b408 ) ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182553045 --- Diff: common/src/main/java/org/apache/drill/exec/metrics/DrillMetrics.java --- @@ -18,6 +18,7 @@ package org.apache.drill.exec.metrics; import java.lang.management.ManagementFactory; +import java.lang.management.OperatingSystemMXBean; --- End diff -- Right. Fixed. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182552999 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java --- @@ -180,20 +180,22 @@ public void run() throws Exception { if (profileStoreProvider != storeProvider) { profileStoreProvider.start(); } -final DrillbitEndpoint md = engine.start(); +DrillbitEndpoint md = engine.start(); manager.start(md, engine.getController(), engine.getDataConnectionCreator(), coord, storeProvider, profileStoreProvider); -@SuppressWarnings("resource") -final DrillbitContext drillbitContext = manager.getContext(); +DrillbitContext drillbitContext = manager.getContext(); --- End diff -- Done. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182553108 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -79,11 +94,15 @@ ${drillbit.getState()} -<#if (model.shouldShowAdminInfo() || !model.isAuthEnabled()) && drillbit.isCurrent() > +Not Available - SHUTDOWN - +<#if (model.shouldShowAdminInfo() || !model.isAuthEnabled()) && (drillbit.isCurrent() || !model.isUserEncryptionEnabled()) > --- End diff -- Fixed ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 @sohami , I've answered your questions in the review. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182502500 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -283,20 +308,114 @@ alert(errorThrown); }, success: function(data) { - alert(data["response"]); + alert(data["response"]); button.prop('disabled',true).css('opacity',0.5); } }); } } - + + function remoteShutdown(button,host) { --- End diff -- Done. Combined into one. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182502418 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -79,9 +91,14 @@ ${drillbit.getState()} + <#if (model.shouldShowAdminInfo() || !model.isAuthEnabled()) && drillbit.isCurrent() > - SHUTDOWN + SHUTDOWN + +<#elseif (model.shouldShowAdminInfo() || !model.isAuthEnabled()) && !drillbit.isCurrent() > + --- End diff -- Done the change for checking for HTTPS. However, this commit just marginally fixes the UI, which (otherwise) should have a full revamp. I did attempt some of the details proposed in DRILL-6244, but the effort is not trivial. For an unsecured setup, there is little reason to restrict the ability to shutdown multiple nodes, because you can always go directly and shut down those nodes without authentication. This simply makes it convenient to have a single UI window to achieve the same outcome. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182501277 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java --- @@ -189,11 +189,16 @@ public void run() throws Exception { drillbitContext.getOptionManager().init(); javaPropertiesToSystemOptions(); manager.getContext().getRemoteFunctionRegistry().init(context.getConfig(), storeProvider, coord); -registrationHandle = coord.register(md); webServer.start(); - +//Discovering HTTP port (in case of port hunting) +if (webServer.isRunning()) { + int httpPort = getWebServerPort(); + registrationHandle = coord.register(md.toBuilder().setHttpPort(httpPort).build()); +} else { + //WebServer is not running + registrationHandle = coord.register(md); +} --- End diff -- Done. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182501230 --- Diff: common/src/main/java/org/apache/drill/exec/metrics/DrillMetrics.java --- @@ -60,6 +61,7 @@ private static void registerSystemMetrics() { REGISTRY.registerAll(new BufferPoolMetricSet(ManagementFactory.getPlatformMBeanServer())); REGISTRY.registerAll(new MemoryUsageGaugeSet()); REGISTRY.registerAll(new ThreadStatesGaugeSet()); + REGISTRY.registerAll(new CpuGaugeSet(ManagementFactory.getOperatingSystemMXBean())); --- End diff -- Retained the GaugeSet to carry ProcessCPU and Uptime information as well. So, I've not replaced `registerAll`. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182500652 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -67,9 +73,15 @@ ${i} ${drillbit.getAddress()}<#if drillbit.isCurrent()> Current +<#else> + --- End diff -- The `shutdown()` method should be in the freemarker's IF block that checks for authentication. Any other method is benign and does not risk changing the state of the Drillbit. Also, this method simply will pop out a new window from which that Drillbit's UI can be accessed. This should help if someone wishes to navigate to that node to, say, review logs or even submit queries with that node as the foreman, without having to type the address (which might have a different HTTP port). ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182499705 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/WebServer.java --- @@ -158,7 +158,13 @@ public void start() throws Exception { final int selectors = config.getInt(ExecConstants.HTTP_JETTY_SERVER_SELECTORS); final QueuedThreadPool threadPool = new QueuedThreadPool(2, 2, 6); embeddedJetty = new Server(threadPool); -embeddedJetty.setHandler(createServletContextHandler(authEnabled)); +ServletContextHandler webServerContext = createServletContextHandler(authEnabled); +//Allow for Other Drillbits to make REST calls +FilterHolder filterHolder = new FilterHolder(CrossOriginFilter.class); +filterHolder.setInitParameter("allowedOrigins", "*"); --- End diff -- Yes. CORS is basically one of the means to prevent DoS attacks. I've added additional filter that allows access only for `/status/metrics` path. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182499262 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -260,7 +285,7 @@ function fillQueryCount(row_id) { var requestPath = "/queriesCount"; var url = getRequestUrl(requestPath); - var result = $.ajax({ + var result = $.ajax({ --- End diff -- Fixed ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r182499088 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/Drillbit.java --- @@ -180,7 +180,7 @@ public void run() throws Exception { if (profileStoreProvider != storeProvider) { profileStoreProvider.start(); } -final DrillbitEndpoint md = engine.start(); +DrillbitEndpoint md = engine.start(); --- End diff -- Removed since the scope is anyway within this method only. ---
[GitHub] drill issue #1204: DRILL-6318
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1204 You could try re-pushing the last commit with a different commit ID (change a trailing text in the PR to have Git generate a different ID). I don't see an option within Travis to resubmit. ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 Forgot to mention, this also resolves DRILL-5261 because of the protobuf carrying the HTTP port info. @arina-ielchiieva / @sohami / @dvjyothsna , please have a review of the changes. ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 Updated with an additional set of changes. * Added CPU metrics (obtained from [OperatingSystemMXBean.getProcessCpuLoad()](https://docs.oracle.com/javase/7/docs/jre/api/management/extension/com/sun/management/OperatingSystemMXBean.html#getProcessCpuLoad()) ) * Added uptime information so that you know if a Drillbit ever has a different start time. * Grey-out a button (disabled, i.e.) if the node is offline or remote & requires HTTPS * Additional changes based on comments Screenshot ![image](https://user-images.githubusercontent.com/4335237/38833357-1f14b4d6-417a-11e8-98e4-1462628e64bc.png) ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181834064 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -283,20 +308,114 @@ alert(errorThrown); }, success: function(data) { - alert(data["response"]); + alert(data["response"]); button.prop('disabled',true).css('opacity',0.5); } }); } } - + + function remoteShutdown(button,host) { + var url = location.protocol + "//" + host + "/gracefulShutdown"; + var result = $.ajax({ +type: 'POST', +url: url, +contentType : 'text/plain', +complete: function(data) { +alert(data.responseJSON["response"]); +button.prop('disabled',true).css('opacity',0.5); +} + }); + } + + + function popOutRemoteDbitUI(dbitHost, dbitPort) { --- End diff -- We can't put this and the remaining functions within the IF block since the localhost operations are still valid. i.e. an authenticated client on a specific Drillbit can ping that Drillbit for refresh of the metrics. Only shutdown function needs to be managed with the IF block ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181510944 --- Diff: common/src/main/java/org/apache/drill/exec/metrics/CpuGaugeSet.java --- @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.metrics; + +import java.lang.management.OperatingSystemMXBean; +import java.util.HashMap; +import java.util.Map; + +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Metric; +import com.codahale.metrics.MetricSet; + +/** + * Creates a Cpu GaugeSet + */ +class CpuGaugeSet implements MetricSet { --- End diff -- I thought we might want to expand the GaugeSet to carry additional metrics like `ProcessCpuLoad` and `ProcessCpuTime` . Since we can shrink the {{SHUTDOWN}} button to a symbol, we do have some real-estate to provide the ProcessCPULoad information as well. Would it help to have that? https://docs.oracle.com/javase/7/docs/jre/api/management/extension/com/sun/management/OperatingSystemMXBean.html#getProcessCpuLoad() ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 Latest screenshot with the power button, ![image](https://user-images.githubusercontent.com/4335237/38701447-b9588b1a-3e52-11e8-957a-272e4e72e350.png) ---
[GitHub] drill issue #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1200 @Ben-Zvi .. batch committers would be doing that. I've got the feed back that it helps to preserve the commit-&-comment history and the squashing can be done in pre-commit merge branch. @paul-rogers ? ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 Will add some new screenshots. In the meanwhile, I was thinking of reduce the 'SHUTDOWN' button to just the power Symbol, because we already have the 'STATUS' field. The single button does look odd in this case, but would be fine when the option appears for all Drillbits in an non-secured Drill cluster setup. Would that work, @arina-ielchiieva ? ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181161567 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -52,12 +52,18 @@ # - Address + Address + Heap Memory Usage + Direct Memory Usage --- End diff -- Reduced it to `Estimated Direct Memory ACTIVELY in use (as percent of Peak Usage)` ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181157818 --- Diff: common/src/main/java/org/apache/drill/exec/metrics/CpuGaugeSet.java --- @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.metrics; + +import java.lang.management.OperatingSystemMXBean; +import java.util.HashMap; +import java.util.Map; + +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Metric; +import com.codahale.metrics.MetricSet; + +/** + * Creates a Cpu GaugeSet + */ +class CpuGaugeSet implements MetricSet { + + private OperatingSystemMXBean osMXBean; + + CpuGaugeSet() {}; --- End diff -- It belongs to the same package where it is referenced, so i thought it didn't need public visibility. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181156759 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -79,9 +91,14 @@ ${drillbit.getState()} + --- End diff -- Missed catching that without an IDEA. Thanks ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181156606 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -52,12 +52,18 @@ # - Address + Address + Heap Memory Usage + Direct Memory Usage --- End diff -- 1. The problem is that the Peak Usage can go up over time. Adding 'so far' indicates that this isn't a final value. 2. We could drop it, but this was meant to indicate less confusion for users wondering why the MaxDirectMemory value is not reflected here. It does. however, make it rather verbose. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181155880 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/WebServer.java --- @@ -296,12 +302,19 @@ private ConstraintSecurityHandler createSecurityHandler() { } public int getPort() { -if (embeddedJetty == null || embeddedJetty.getConnectors().length != 1) { +if (!isRunning()) { throw new UnsupportedOperationException("Http is not enabled"); } return ((ServerConnector)embeddedJetty.getConnectors()[0]).getPort(); } + public boolean isRunning() { --- End diff -- Agreed ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181155596 --- Diff: common/src/main/java/org/apache/drill/exec/metrics/CpuGaugeSet.java --- @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.metrics; + +import java.lang.management.OperatingSystemMXBean; +import java.util.HashMap; +import java.util.Map; + +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Metric; +import com.codahale.metrics.MetricSet; + +/** + * Creates a Cpu GaugeSet + */ +class CpuGaugeSet implements MetricSet { + + private OperatingSystemMXBean osMXBean; + + CpuGaugeSet() {}; --- End diff -- I was originally thinking of making this private, so that the instance can only be created with the OS Bean. I might have left it as is, thinking the default behaviour could be without the need to explicitly pass the bean. I'll make it private. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r181154984 --- Diff: common/src/main/java/org/apache/drill/exec/metrics/CpuGaugeSet.java --- @@ -0,0 +1,62 @@ +/** --- End diff -- Never noticed that the license header is actually a javadoc comment. Will fix this. ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 @arina-ielchiieva / @dvjyothsna Updated the PR with changes based on the comments. Could you please review and let me know what else would need to be fixed. ---
[GitHub] drill issue #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1200 @Ben-Zvi / @paul-rogers I've updated the PR with changes based on your comments. Waiting for a review. ---
[GitHub] drill issue #1168: DRILL-6246: Reduced the size of the jdbc-all jar file
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1168 @sachouche have you had a chance to test the generated JDBC driver with Spotfire/SQuirreL ? ---
[GitHub] drill pull request #1205: 1.13.0
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1205#discussion_r180564452 --- Diff: exec/java-exec/src/main/resources/rest/query/query.ftl --- @@ -21,11 +21,12 @@ - + --- End diff -- There seems to be an unprintable character you might have accidentally introduced, and should be removed ---
[GitHub] drill pull request #1205: 1.13.0
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1205#discussion_r180564629 --- Diff: exec/java-exec/src/main/resources/rest/query/query_bak.ftl --- @@ -0,0 +1,124 @@ +<#-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + You under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> + +<#include "*/generic.ftl"> +<#macro page_head> +<#if model?? && model> + + +
[GitHub] drill pull request #1205: 1.13.0
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1205#discussion_r180565011 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/client/DrillClient.java --- @@ -577,6 +581,8 @@ public Version getServerVersion() { checkArgument(type == QueryType.LOGICAL || type == QueryType.PHYSICAL || type == QueryType.SQL, String.format("Only query types %s, %s and %s are supported in this API", QueryType.LOGICAL, QueryType.PHYSICAL, QueryType.SQL)); +//bingxing.wang.log --- End diff -- Clean up needed. ---
[GitHub] drill pull request #1205: 1.13.0
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1205#discussion_r180564939 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/client/DrillClient.java --- @@ -170,6 +170,8 @@ public DrillClient(DrillConfig config, ClusterCoordinator coordinator, BufferAll this.reconnectTimes = config.getInt(ExecConstants.BIT_RETRY_TIMES); this.reconnectDelay = config.getInt(ExecConstants.BIT_RETRY_DELAY); this.supportComplexTypes = config.getBoolean(ExecConstants.CLIENT_SUPPORT_COMPLEX_TYPES); + +logger.debug("picasso: DrillClient(): ownsZkConnection:" + this.ownsZkConnection); --- End diff -- The log message is cryptic and not clear. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r180553686 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/service/ServiceEngine.java --- @@ -102,6 +105,7 @@ public DrillbitEndpoint start() throws DrillbitStartupException, UnknownHostExce DrillbitEndpoint partialEndpoint = DrillbitEndpoint.newBuilder() .setAddress(hostName) .setUserPort(userPort) +.setHttpPort(httpPort) --- End diff -- Thanks to @dvjyothsna , referred to an outdated PR #1042 for the endpoint registration after the web-server starts up. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r180296333 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/service/ServiceEngine.java --- @@ -102,6 +105,7 @@ public DrillbitEndpoint start() throws DrillbitStartupException, UnknownHostExce DrillbitEndpoint partialEndpoint = DrillbitEndpoint.newBuilder() .setAddress(hostName) .setUserPort(userPort) +.setHttpPort(httpPort) --- End diff -- That's tricky. The webserver doesn't come up until after the Endpoint is created and ready. One possible solution would be to perform the port hunt before the endpoint, and assign it to a newly instantiated webserver after the Endpoint is created. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r180248133 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -289,14 +314,94 @@ }); } } - + + function remoteShutdown(button,host) { + var url = location.protocol + "//" + host + "/gracefulShutdown"; + var result = $.ajax({ +type: 'POST', +url: url, +contentType : 'text/plain', +complete: function(data) { +alert(data.responseJSON["response"]); +button.prop('disabled',true).css('opacity',0.5); +} + }); + } + function getRequestUrl(requestPath) { var protocol = location.protocol; var host = location.host; var url = protocol + "//" + host + requestPath; return url; } - + + //Iterates through all the nodes for update + function reloadMetrics() { + for (i = 1; i <= size; i++) { + var address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); + var httpPort = $("#row-"+i).find("#httpPort").contents().get(0).nodeValue.trim(); + updateMetricsHtml(address, httpPort, i); + } + } + + //Update memory + //TODO: HTTPS? + function updateMetricsHtml(drillbit,webport,idx) { +var result = $.ajax({ + type: 'GET', --- End diff -- Makes sense. I'll set the default values to Unknown rather than zero. ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1203#discussion_r180226226 --- Diff: exec/java-exec/src/main/resources/rest/index.ftl --- @@ -289,14 +314,94 @@ }); } } - + + function remoteShutdown(button,host) { + var url = location.protocol + "//" + host + "/gracefulShutdown"; + var result = $.ajax({ +type: 'POST', +url: url, +contentType : 'text/plain', +complete: function(data) { +alert(data.responseJSON["response"]); +button.prop('disabled',true).css('opacity',0.5); +} + }); + } + function getRequestUrl(requestPath) { var protocol = location.protocol; var host = location.host; var url = protocol + "//" + host + requestPath; return url; } - + + //Iterates through all the nodes for update + function reloadMetrics() { + for (i = 1; i <= size; i++) { + var address = $("#row-"+i).find("#address").contents().get(0).nodeValue.trim(); + var httpPort = $("#row-"+i).find("#httpPort").contents().get(0).nodeValue.trim(); + updateMetricsHtml(address, httpPort, i); + } + } + + //Update memory + //TODO: HTTPS? + function updateMetricsHtml(drillbit,webport,idx) { +var result = $.ajax({ + type: 'GET', --- End diff -- Good catch. I believe we'll get invalid certificate errors since the certificate exceptions most likely are not added. So the question I have then is whether we should disable making **any** remote HTTPS calls at all, or take a chance with the hope that the invalid certificate +might+ have been added? ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 @arina-ielchiieva You're correct about the inability to shutdown other Drillbits from the WebUI. But, from what @dvjyothsna explained, it seems that this was the result of a technical hurdle with HTTP**S** protocol. My understanding was that issuing shutdown to secure remote nodes won't work because of lack of certificates for those nodes, making it difficult for the remote Drillbit to authenticate the shutdown command. Keeping that in mind, the SHUTDOWN buttons would appear only for unsecured Drillbits, i.e. when Authentication is **_not_** enabled. https://github.com/kkhatua/drill/blob/dd507ea95a7d0ef4904092ef4fc43ee3b44ef058/exec/java-exec/src/main/resources/rest/index.ftl#L100 For setups requiring Authentication, the shutdown button would not appear. ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 @arina-ielchiieva could you please review this? ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 **PopOut to open in new window:** ![popout](https://user-images.githubusercontent.com/4335237/38448772-f198f47e-39bb-11e8-848f-19c15cb9c24a.png) Tooltip describing the new columns ![tooltipinfo](https://user-images.githubusercontent.com/4335237/38448769-ef047594-39bb-11e8-8697-8644d8130fe3.png) ---
[GitHub] drill issue #1203: DRILL-6289: Cluster view should show more relevant inform...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1203 **Snapshot:** ![screenshot](https://user-images.githubusercontent.com/4335237/38448762-e52ac3e8-39bb-11e8-9b42-5276bf3e3449.png) ---
[GitHub] drill pull request #1203: DRILL-6289: Cluster view should show more relevant...
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1203 DRILL-6289: Cluster view should show more relevant information The commits make use of AJAX calls by the browser to all the Drillbits for status metrics, which are then displayed here. This works with unsecured Drill sessions, while secure setups will not be able to show the metrics because of certificate authentication. To achieve this, the HTTP port needed to be discoverable, for which changes to the protobuf were done to provide the HTTP port number as well. The Drillbit homepage has been extended to show the following: * Heap Memory in use * Direct Memory (actively) in use - Since we're not able to get the total memory held by Netty at the moment, but only what is currently allocated to running queries. The base memory shown is the total peak allocation by Drill for Direct, because the actual Direct memory held is not available. In an active stable system in production, this should be around 90%. * Average (System) Load Factor reported by the OS for that node. * Pill indicating the current Drillbit whose WebUI is being viewed (existing UX feature), with an option to load (in a new window) the UI from other Drillbits. Information such as the User, Data and Control port numbers don't help much during general cluster health, so it might be worth removing this information if more real-estate is needed. For now, we are retaining this. You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-6289 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1203.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1203 commit 94e8bbaefea7c6fe9b5729bcec3cba1dbbb3bc8f Author: Kunal Khatua <kkhatua@...> Date: 2018-03-26T05:03:12Z Protobuf change to carry HTTP port info commit 9b1513da949da870fc8d9a96472c4ca307e7d5e8 Author: Kunal Khatua <kkhatua@...> Date: 2018-04-05T22:21:47Z Allow CORS for access to remote Drillbit metrics Cross-origin resource sharing (CORS) is required to ensure that the WebServer is able serve REST calls for status pages. commit eb9ae7c36a26194b5a49685230e2a94282efee84 Author: Kunal Khatua <kkhatua@...> Date: 2018-04-05T21:21:18Z Addition of Bootstrap's Glyphicons As part of the Bootstrap's components, this meets Apache License criteria Remove popout.png commit 45e4831403997661c1df578a95e3c985f64ff4f8 Author: Kunal Khatua <kkhatua@...> Date: 2018-04-06T23:08:43Z Materialize relevant metrics 1. Heap memory (incl usage) 2. Heap memory (incl usage) 3. Average System Load (last 1 min) 4. Option to view from other nodes (pop out) 5. Added Glyphicons commit dd507ea95a7d0ef4904092ef4fc43ee3b44ef058 Author: Kunal Khatua <kkhatua@...> Date: 2018-03-30T06:24:54Z Update System Table and related tests 1. Updated System Table to show HTTP port 2. Updated unit tests ---
[GitHub] drill issue #1199: DRILL-6303: Provide a button to copy the Drillbit's JStac...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1199 @arina-ielchiieva I've made updates based on the review. ![image](https://user-images.githubusercontent.com/4335237/38391975-c2e2dc08-38da-11e8-814e-fc974143afaf.png) I've also skipped adding re-adding the Glyphicons package because we already have that as part of DRILL-6279 (PR #1197 ). ---
[GitHub] drill issue #1197: DRILL-6279: UI indicates operators that spilled in-memory...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1197 @arina-ielchiieva done the requested changes. ---
[GitHub] drill pull request #1197: DRILL-6279: UI indicates operators that spilled in...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1197#discussion_r179557189 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/profile/OperatorWrapper.java --- @@ -190,8 +223,59 @@ public void addSummary(TableBuilder tb, HashMap<String, Long> majorFragmentBusyT tb.appendFormattedInteger(recordSum); final ImmutablePair<OperatorProfile, Integer> peakMem = Collections.max(opList, Comparators.operatorPeakMemory); -tb.appendBytes(Math.round(memSum / size)); -tb.appendBytes(peakMem.getLeft().getPeakLocalMemoryAllocated()); + +//Inject spill-to-disk attributes +Map<String, String> avgSpillMap = null; +Map<String, String> maxSpillMap = null; +if (hasSpilledToDisk) { + avgSpillMap = new HashMap<>(); + //Average SpillCycle + float avgSpillCycle = (float) spillCycleSum/size; --- End diff -- My bad. I was thinking the sum was a long. I'll fix this. ---
[GitHub] drill pull request #1197: DRILL-6279: UI indicates operators that spilled in...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1197#discussion_r179552510 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/profile/OperatorWrapper.java --- @@ -38,7 +39,16 @@ * Wrapper class for profiles of ALL operator instances of the same operator type within a major fragment. */ public class OperatorWrapper { + //private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(OperatorWrapper.class); --- End diff -- Ok. I see a lot of other places where it is commented out. I'll uncomment it with an unused tag for the compler to ignore. Comes very handy in debugging when making changes ---
[GitHub] drill pull request #1197: DRILL-6279: UI indicates operators that spilled in...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1197#discussion_r179552379 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/profile/OperatorWrapper.java --- @@ -38,7 +39,16 @@ * Wrapper class for profiles of ALL operator instances of the same operator type within a major fragment. */ public class OperatorWrapper { + //private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(OperatorWrapper.class); + + private static final String HTML_ATTRIB_SPILLS = "spills"; + private static final String HTML_ATTRIB_CLASS = "class"; + private static final String HTML_ATTRIB_STYLE = "style"; + private static final String HTML_ATTRIB_TITLE = "title"; + private static final DecimalFormat DECIMAL_FORMATTER = new DecimalFormat("#.##"); private static final String UNKNOWN_OPERATOR = "UNKNOWN_OPERATOR"; + //-ve value to indicate absence of metric --- End diff -- Using a negative ("-ve") value constant. Will fix the comment ---
[GitHub] drill pull request #1197: DRILL-6279: UI indicates operators that spilled in...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1197#discussion_r179552086 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/profile/OperatorWrapper.java --- @@ -190,8 +223,59 @@ public void addSummary(TableBuilder tb, HashMap<String, Long> majorFragmentBusyT tb.appendFormattedInteger(recordSum); final ImmutablePair<OperatorProfile, Integer> peakMem = Collections.max(opList, Comparators.operatorPeakMemory); -tb.appendBytes(Math.round(memSum / size)); -tb.appendBytes(peakMem.getLeft().getPeakLocalMemoryAllocated()); + +//Inject spill-to-disk attributes +Map<String, String> avgSpillMap = null; +Map<String, String> maxSpillMap = null; +if (hasSpilledToDisk) { + avgSpillMap = new HashMap<>(); + //Average SpillCycle + float avgSpillCycle = (float) spillCycleSum/size; --- End diff -- Needed the value to be in decimal (formatter will limit it to 2 decimal spaces). ---
[GitHub] drill pull request #1197: DRILL-6279: UI indicates operators that spilled in...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1197#discussion_r179551936 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/profile/OperatorWrapper.java --- @@ -190,8 +223,59 @@ public void addSummary(TableBuilder tb, HashMap<String, Long> majorFragmentBusyT tb.appendFormattedInteger(recordSum); final ImmutablePair<OperatorProfile, Integer> peakMem = Collections.max(opList, Comparators.operatorPeakMemory); -tb.appendBytes(Math.round(memSum / size)); -tb.appendBytes(peakMem.getLeft().getPeakLocalMemoryAllocated()); + +//Inject spill-to-disk attributes +Map<String, String> avgSpillMap = null; +Map<String, String> maxSpillMap = null; +if (hasSpilledToDisk) { + avgSpillMap = new HashMap<>(); + //Average SpillCycle + float avgSpillCycle = (float) spillCycleSum/size; + avgSpillMap.put(HTML_ATTRIB_TITLE, DECIMAL_FORMATTER.format(avgSpillCycle) + " Spills on average"); --- End diff -- Agreed. Will fix it. ---
[GitHub] drill pull request #1197: DRILL-6279: UI indicates operators that spilled in...
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1197#discussion_r179551687 --- Diff: exec/java-exec/src/main/resources/rest/profile/profile.ftl --- @@ -372,6 +372,16 @@ table.sortable thead .sorting_desc { background-image: url("/static/img/black-de
[GitHub] drill issue #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1200 @Ben-Zvi / @paul-rogers I discovered that the path `/cgroup` is unique only to my installation of the package on CentOS. The standard path is `/sys/fs/cgroup`. So, I've made changes that allow for users to specify the CGroups location. Without this, the latest commit complains as expected: ``` [root@kk127 ~]# /opt/mapr/drill/apache-drill-1.14.0-SNAPSHOT/bin/drillbit.sh restart Stopping drillbit .. Starting drillbit, logging to /var/log/drill/drillbit.out ERROR: CGroup drillcpu not found. Ensure that daemon is running, SYS_CGROUP_DIR is correctly set (currently, /sys/fs/cgroup ), and that the CGroup exists ``` The `drill-env.sh` also specifies that the enforcement is only for CPU. I'll put this in the documentation of the feature as well. ---
[GitHub] drill issue #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1200 @paul-rogers very good point about the issue of the heartbeat thread competing with other Drillbit process threads for CPU. When writing up the documentation for this, we'll have to ensure that users also configure the parallelization to a reasonable level to ensure that CPU thrashing is minimized with CGroups enforced. ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179292376 --- Diff: distribution/src/resources/drillbit.sh --- @@ -127,6 +127,44 @@ check_before_start() fi } +check_after_start(){ +#check if the process is running +if [ -f $pid ]; then + dbitProc=$(ps -ef | grep `cat $pid` | grep Drillbit) + if [ -n "$dbitProc" ]; then +# Check and enforce for CGroup +if [ -n "$DRILLBIT_CGROUP" ]; then + check_and_enforce_cgroup `cat $pid` +fi + fi +fi +} + +check_and_enforce_cgroup(){ +dbitPid=$1; +#if [ $(`ps -o cgroup` | grep -c $DRILLBIT_CGROUP ) -eq 1 ]; then +if [ -f /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs ]; then + echo $dbitPid > /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs + # Verify Enforcement + cgroupStatus=`grep -w $pid /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs` + if [ -z "$cgroupStatus" ]; then +#Ref: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt +let cpu_quota=`cat /cgroup/cpu/${DRILLBIT_CGROUP}/cpu.cfs_quota_us` +let cpu_period=`cat /cgroup/cpu/${DRILLBIT_CGROUP}/cpu.cfs_period_us` +if [ $cpu_period -gt 0 ] && [ $cpu_quota -gt 0 ]; then + coresAllowed="(up to "`echo $(( 100 * $cpu_quota / $cpu_period )) | sed 's/..$/.&/'`" cores allowed)" +fi +echo "WARN: Drillbit's CPU resource usage will be managed under the CGroup : $DRILLBIT_CGROUP "$coresAllowed + else +echo "ERROR: Failed to add Drillbit to CGroup ( $DRILLBIT_CGROUP ) for resource usage management. Ensure that the cgroup manages CPU" +exit 1 + fi +else + echo "ERROR: cgroup $DRILLBIT_CGROUP does not found. Ensure that daemon is running and cgroup exists" + exit 1 --- End diff -- Good point. Ideally, we should prevent the Drillbit from starting up (or in this case, it should shut down), if CGroups couldn't be applied. My concern is that if CGroups is not being enforced, we're running a process that can (potentially) consume excess CPU resources. Should I shut down the Drillbit in such a scenario, or move on with just a WARN message? ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179287166 --- Diff: distribution/src/resources/drillbit.sh --- @@ -127,6 +127,44 @@ check_before_start() fi } +check_after_start(){ +#check if the process is running +if [ -f $pid ]; then + dbitProc=$(ps -ef | grep `cat $pid` | grep Drillbit) + if [ -n "$dbitProc" ]; then +# Check and enforce for CGroup +if [ -n "$DRILLBIT_CGROUP" ]; then + check_and_enforce_cgroup `cat $pid` +fi + fi +fi +} + +check_and_enforce_cgroup(){ +dbitPid=$1; +#if [ $(`ps -o cgroup` | grep -c $DRILLBIT_CGROUP ) -eq 1 ]; then +if [ -f /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs ]; then + echo $dbitPid > /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs + # Verify Enforcement + cgroupStatus=`grep -w $pid /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs` + if [ -z "$cgroupStatus" ]; then +#Ref: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt +let cpu_quota=`cat /cgroup/cpu/${DRILLBIT_CGROUP}/cpu.cfs_quota_us` +let cpu_period=`cat /cgroup/cpu/${DRILLBIT_CGROUP}/cpu.cfs_period_us` +if [ $cpu_period -gt 0 ] && [ $cpu_quota -gt 0 ]; then + coresAllowed="(up to "`echo $(( 100 * $cpu_quota / $cpu_period )) | sed 's/..$/.&/'`" cores allowed)" +fi +echo "WARN: Drillbit's CPU resource usage will be managed under the CGroup : $DRILLBIT_CGROUP "$coresAllowed --- End diff -- Thought that since the Drill process is being brought up in a restricted mode, it should appear as a WARN. I can switch it to an INFO because there is nothing compelling to use WARN. Will make the change on the variable within quotes. ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179286598 --- Diff: distribution/src/resources/drillbit.sh --- @@ -127,6 +127,44 @@ check_before_start() fi } +check_after_start(){ +#check if the process is running +if [ -f $pid ]; then + dbitProc=$(ps -ef | grep `cat $pid` | grep Drillbit) + if [ -n "$dbitProc" ]; then +# Check and enforce for CGroup +if [ -n "$DRILLBIT_CGROUP" ]; then + check_and_enforce_cgroup `cat $pid` +fi + fi +fi +} + +check_and_enforce_cgroup(){ +dbitPid=$1; +#if [ $(`ps -o cgroup` | grep -c $DRILLBIT_CGROUP ) -eq 1 ]; then +if [ -f /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs ]; then + echo $dbitPid > /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs + # Verify Enforcement + cgroupStatus=`grep -w $pid /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs` + if [ -z "$cgroupStatus" ]; then +#Ref: https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt +let cpu_quota=`cat /cgroup/cpu/${DRILLBIT_CGROUP}/cpu.cfs_quota_us` --- End diff -- Ok. Will fix this ---
[GitHub] drill issue #1199: DRILL-6303: Provide a button to copy the Drillbit's JStac...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1199 @arina-ielchiieva I've added 2 snapshots and tested in Chrome. **Mouse Over**, showing tooltip ![mouseover](https://user-images.githubusercontent.com/4335237/38327176-74ac0aac-37fc-11e8-9a0e-d5b5ad638460.png) **On clicking**, the copy of the text to the clipboard, followed by the alert. ![mouseonclick](https://user-images.githubusercontent.com/4335237/38327185-79c0377a-37fc-11e8-955f-5dd9ad7bfaea.png) ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179159323 --- Diff: distribution/src/resources/drillbit.sh --- @@ -154,6 +192,7 @@ start_bit ( ) nohup nice -n $DRILL_NICENESS "$DRILL_HOME/bin/runbit" exec ${args[@]} >> "$logout" 2>&1 & echo $! > $pid sleep 1 + check_after_start --- End diff -- ð Will fix this, thanks! ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179158686 --- Diff: distribution/src/resources/drillbit.sh --- @@ -127,6 +127,44 @@ check_before_start() fi } +check_after_start(){ +#check if the process is running +if [ -f $pid ]; then + dbitProc=$(ps -ef | grep `cat $pid` | grep Drillbit) + if [ -n "$dbitProc" ]; then +# Check and enforce for CGroup +if [ -n "$DRILLBIT_CGROUP" ]; then + check_and_enforce_cgroup `cat $pid` +fi + fi +fi +} + +check_and_enforce_cgroup(){ +dbitPid=$1; +#if [ $(`ps -o cgroup` | grep -c $DRILLBIT_CGROUP ) -eq 1 ]; then --- End diff -- Thanks for catching that! ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179158585 --- Diff: distribution/src/resources/drillbit.sh --- @@ -127,6 +127,44 @@ check_before_start() fi } +check_after_start(){ +#check if the process is running +if [ -f $pid ]; then + dbitProc=$(ps -ef | grep `cat $pid` | grep Drillbit) --- End diff -- Agreed. I'll incorporate this change. ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179158191 --- Diff: distribution/src/resources/drill-env.sh --- @@ -86,6 +86,12 @@ #export DRILL_PID_DIR=${DRILL_PID_DIR:-$DRILL_HOME} +# CGroup to which the Drillbit belong when running as a daemon using --- End diff -- :) I did speculate about enforcement for other resources, but it seems that CPU is the primary resource that needs to be managed. Memory 'management' (by YARN, for e.g.) otherwise works without the need for CGroup. ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on a diff in the pull request: https://github.com/apache/drill/pull/1200#discussion_r179156839 --- Diff: distribution/src/resources/drillbit.sh --- @@ -127,6 +127,44 @@ check_before_start() fi } +check_after_start(){ +#check if the process is running +if [ -f $pid ]; then + dbitProc=$(ps -ef | grep `cat $pid` | grep Drillbit) + if [ -n "$dbitProc" ]; then +# Check and enforce for CGroup +if [ -n "$DRILLBIT_CGROUP" ]; then + check_and_enforce_cgroup `cat $pid` +fi + fi +fi +} + +check_and_enforce_cgroup(){ +dbitPid=$1; +#if [ $(`ps -o cgroup` | grep -c $DRILLBIT_CGROUP ) -eq 1 ]; then +if [ -f /cgroup/cpu/${DRILLBIT_CGROUP}/cgroup.procs ]; then --- End diff -- I've looked to check for older versions, but wasn't sure which one aligned to which version and OS. It's easy to check if CGroups is running, but I don't see a guaranteed way of confirming specifically about that CGroup. Let me do some more research on this front. ---
[GitHub] drill issue #1201: DRILL-4091: Support for additional gis operations in gis ...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1201 @cgivre could you review this? ---
[GitHub] drill issue #1200: DRILL-143: Support CGROUPs resource management
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1200 @paul-rogers / @Ben-Zvi , could you review this? ---
[GitHub] drill pull request #1200: DRILL-143: Support CGROUPs resource management
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1200 DRILL-143: Support CGROUPs resource management Introduces the `DRILLBIT_CGROUP` option in defined in `drill-env.sh` The startup script checks if the specified CGroup (ver 2) is available and tries to apply it to the launched Drillbit JVM. This would benefit not just Drill-on-YARN usecases, but any setup that would like CGroups for enforcement of (cpu) resources management. e.g when Drillbit is configured to use `drillcpu` cgroup ``` [root@maprlabs ~]# /opt/mapr/drill/apache-drill-1.14.0-SNAPSHOT/bin/drillbit.sh restart Stopping drillbit .. Starting drillbit, logging to /var/log/drill/drillbit.out WARN: Drillbit's CPU resource usage will be managed under the CGroup : drillcpu (up to 4.00 cores allowed) ``` e.g. Non-existent CGroup `droolcpu` is used ``` [root@maprlabs ~]# /opt/mapr/drill/apache-drill-1.14.0-SNAPSHOT/bin/drillbit.sh restart Stopping drillbit .. Starting drillbit, logging to /var/log/drill/drillbit.out ERROR: cgroup droolcpu does not found. Ensure that daemon is running and cgroup exists ``` You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-143 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1200.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1200 commit e9a2551b315b4395c5227dad017f2b4340f41108 Author: Kunal Khatua <kkhatua@...> Date: 2018-04-03T05:35:53Z DRILL-143: Support CGROUPs resource management Introduces the DRILLBIT_CGROUP option in drill-env.sh. The startup script checks if the specified CGroup (ver 2) is available and tries to apply it to the launched Drillbit JVM. This would benefit not just Drill-on-YARN usecases, but any setup that would like CGroups for enforcement of (cpu) resources management. e.g when Drillbit is configured to use `drillcpu` cgroup ``` [root@maprlabs ~]# /opt/mapr/drill/apache-drill-1.14.0-SNAPSHOT/bin/drillbit.sh restart Stopping drillbit .. Starting drillbit, logging to /var/log/drill/drillbit.out WARN: Drillbit's CPU resource usage will be managed under the CGroup : drillcpu (up to 4.00 cores allowed) ``` e.g. Non-existent CGroup `droolcpu` is used ``` [root@kk127 ~]# /opt/mapr/drill/apache-drill-1.14.0-SNAPSHOT/bin/drillbit.sh restart Stopping drillbit .. Starting drillbit, logging to /var/log/drill/drillbit.out ERROR: cgroup droolcpu does not found. Ensure that daemon is running and cgroup exists ``` ---
[GitHub] drill pull request #1199: DRILL-6303: Provide a button to copy the Drillbit'...
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/1199 DRILL-6303: Provide a button to copy the Drillbit's JStack shown in /threads Provides a button in the web UI to copy the thread stack to the user's clipboard. You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-6303 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/1199.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #1199 commit 996266b13433538d411094fcaf650b10b6c17d1f Author: Kunal Khatua <kkhatua@...> Date: 2018-03-26T06:09:43Z DRILL-6303: Provide a button to copy the Drillbit's JStack shown in /threads Provides a button in the web UI to copy the thread stack to the user's clipboard. ---
[GitHub] drill issue #814: Add clickable localhost URL
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/814 @ebuildy no worries. It seems that the change was made. Could you close this JIRA? ---
[GitHub] drill issue #1126: DRILL-6179: Added pcapng-format support
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1126 @amansinha100 / @parthchandra / @arina-ielchiieva is this ready for a review? ---
[GitHub] drill issue #1193: can drill support minio s3 storage
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/1193 @rongfengliang I'm not sure what commit this is? Are you asking a question about support for S3, because we already have a user-mailing list? The S3 storage plugin is already supported : https://drill.apache.org/docs/s3-storage-plugin/ So you should be able to use Drill to query minio S3 as well. ---