IMPALA-6347: Monitor queue depth size for outgoing RPCs for Reactor threads

On systems with slow networking large queuing can occur in the Reactor
threads. It would be good to quantify how much queueing occurred.

This patch extracts the OutboundTransfer queue size information from
KRPC and displays it on the /rpcz webpage.

Testing: Tested by running queries both on a local mini-cluster and a
remote cluster and manually confirming from the webpages that connections
are made between the different nodes that exchange with each other and show
number of queued calls.

Change-Id: I84fea531e98d3e84fcc57bf7533655218bc91f4c
Reviewed-on: http://gerrit.cloudera.org:8080/9384
Reviewed-by: Sailesh Mukil <sail...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/8dcff3aa
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/8dcff3aa
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/8dcff3aa

Branch: refs/heads/master
Commit: 8dcff3aa41e7f252aa27c6ab1275712103ed5d2c
Parents: 994272b
Author: Sailesh Mukil <sail...@apache.org>
Authored: Wed Feb 21 11:57:29 2018 -0800
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Wed Mar 7 03:03:29 2018 +0000

----------------------------------------------------------------------
 be/src/rpc/rpc-mgr.cc | 12 ++++++++++++
 www/rpcz.tmpl         | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/8dcff3aa/be/src/rpc/rpc-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/rpc/rpc-mgr.cc b/be/src/rpc/rpc-mgr.cc
index 4faddb7..b377d3d 100644
--- a/be/src/rpc/rpc-mgr.cc
+++ b/be/src/rpc/rpc-mgr.cc
@@ -205,10 +205,22 @@ void RpcMgr::ToJson(Document* document) {
   document->AddMember("num_inbound_calls_in_flight", 
num_inbound_calls_in_flight,
       document->GetAllocator());
 
+  // Add per connection metrics.
+  Value per_conn_metrics(kArrayType);
   int64_t num_outbound_calls_in_flight = 0;
   for (const RpcConnectionPB& conn : response.outbound_connections()) {
     num_outbound_calls_in_flight += conn.calls_in_flight().size();
+
+    // Add per connection metrics to an array.
+    Value per_conn_metrics_entry(kObjectType);
+    Value remote_ip_str(conn.remote_ip().c_str(), document->GetAllocator());
+    per_conn_metrics_entry.AddMember(
+        "remote_ip", remote_ip_str, document->GetAllocator());
+    per_conn_metrics_entry.AddMember(
+        "outbound_queue_size", conn.outbound_queue_size(), 
document->GetAllocator());
+    per_conn_metrics.PushBack(per_conn_metrics_entry, 
document->GetAllocator());
   }
+  document->AddMember("per_conn_metrics", per_conn_metrics, 
document->GetAllocator());
   document->AddMember("num_outbound_calls_in_flight", 
num_outbound_calls_in_flight,
       document->GetAllocator());
 

http://git-wip-us.apache.org/repos/asf/impala/blob/8dcff3aa/www/rpcz.tmpl
----------------------------------------------------------------------
diff --git a/www/rpcz.tmpl b/www/rpcz.tmpl
index 8ade9ef..fd2e3bd 100644
--- a/www/rpcz.tmpl
+++ b/www/rpcz.tmpl
@@ -86,8 +86,26 @@ under the License.
 </table>
 {{/services}}
 
+<h3>Per connection metrics for KRPC</h3>
+<table class="table table-bordered table-hover" id="per_conn_metrics">
+  <thead>
+    <tr>
+      <th>Remote IP</th>
+      <th>Outbound Queue Size (count)</th>
+    </tr>
+  </thead>
+  <tbody>
+    {{#per_conn_metrics}}
+    <tr>
+      <td>{{remote_ip}}</td>
+      <td>{{outbound_queue_size}}</td>
+    </tr>
+    {{/per_conn_metrics}}
+  </tbody>
+</table>
+
 {{?servers}}
-<h2>Impala RPC Services
+<h2>Impala Thrift RPC Services
   <button class="btn btn-warning btn-xs" onClick="reset_all();">
     Reset all
   </button>
@@ -207,6 +225,22 @@ function update_krpc_services(json) {
   }
 }
 
+function update_krpc_conn_metrics_datatable(json) {
+  var table = $('#per_conn_metrics').DataTable();
+  var rows = $.map(json["per_conn_metrics"], function(row) {
+    return [[row["remote_ip"], row["outbound_queue_size"]]];
+  });
+
+  table.clear().rows.add(rows).draw();
+}
+
+$(document).ready(function() {
+  table = $('#per_conn_metrics').DataTable({
+    "order": [[ 0, "asc" ]],
+    "pageLength": 50
+  });
+});
+
 function refresh() {
   var xhr = new XMLHttpRequest();
   xhr.responseType = 'text';
@@ -217,6 +251,7 @@ function refresh() {
     json = JSON.parse(blob);
     update_impala_services(json);
     update_krpc_services(json);
+    update_krpc_conn_metrics_datatable(json);
     document.getElementById("last-updated").textContent = new Date();
   }
 

Reply via email to