This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 7e934b1f4 IMPALA-11702: add resource metrics for kudu scanner to
profile
7e934b1f4 is described below
commit 7e934b1f40bee858d73cbec11a5711fc2a302362
Author: zhangyifan27 <[email protected]>
AuthorDate: Thu Nov 3 18:36:15 2022 +0800
IMPALA-11702: add resource metrics for kudu scanner to profile
This patch adds some counters about kudu scanner resource metrics[1]
to profile. These counters are updated before we close kudu scanner
through kudu client's GetResourceMetrics interface[2].
The MaterializeTupleTime counter is also updated, it should only
include time spent materializing tuples and evaluating predicates.
Tests
- Ran some queries manually and checked the profiles.
[1]
https://github.com/apache/kudu/blob/a2fdba62d2129d0d7d0c2d0f175e8b2a6e2d2650/src/kudu/tserver/tserver.proto#L398
[2]
https://github.com/apache/kudu/blob/892bda293f238fddec47423d5c0b5be9576581f1/src/kudu/client/client.h#L2967
Change-Id: If0e84e7756ce92af48fa55672157caafcd396547
Reviewed-on: http://gerrit.cloudera.org:8080/19201
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exec/kudu/kudu-scan-node-base.cc | 28 ++++++++++++++++++++++++++++
be/src/exec/kudu/kudu-scan-node-base.h | 25 +++++++++++++++++++++++++
be/src/exec/kudu/kudu-scanner.cc | 23 +++++++++++++++++++----
3 files changed, 72 insertions(+), 4 deletions(-)
diff --git a/be/src/exec/kudu/kudu-scan-node-base.cc
b/be/src/exec/kudu/kudu-scan-node-base.cc
index 8bc1e9761..7a00175fa 100644
--- a/be/src/exec/kudu/kudu-scan-node-base.cc
+++ b/be/src/exec/kudu/kudu-scan-node-base.cc
@@ -45,6 +45,20 @@ using kudu::client::KuduTable;
namespace impala {
PROFILE_DECLARE_COUNTER(ScanRangesComplete);
+PROFILE_DEFINE_TIMER(KuduScannerTotalDurationTime, STABLE_LOW,
+ "Total time taken for all scan rpc requests to complete for Kudu
scanners.");
+PROFILE_DEFINE_TIMER(KuduScannerQueueDurationTime, STABLE_LOW,
+ "Total time taken between scan rpc requests being accepted and when they
were "
+ "handled by Kudu scanners.");
+PROFILE_DEFINE_TIMER(KuduScannerCpuUserTime, STABLE_LOW,
+ "Total elapsed CPU user time for all scan rpc requests for Kudu
scanners.");
+PROFILE_DEFINE_TIMER(KuduScannerCpuSysTime, STABLE_LOW,
+ "Total elapsed CPU system time for all scan rpc requests for Kudu
scanners.");
+PROFILE_DEFINE_COUNTER(KuduScannerCfileCacheHitBytes, STABLE_LOW, TUnit::BYTES,
+ "Number of bytes that were read from the block cache because of a hit for
Kudu "
+ "scanners.");
+PROFILE_DEFINE_COUNTER(KuduScannerCfileCacheMissBytes, STABLE_LOW,
TUnit::BYTES,
+ "Number of bytes that were read because of a block cache miss for Kudu
scanners.");
const string KuduScanNodeBase::KUDU_ROUND_TRIPS = "TotalKuduScanRoundTrips";
const string KuduScanNodeBase::KUDU_REMOTE_TOKENS = "KuduRemoteScanTokens";
@@ -69,12 +83,26 @@ KuduScanNodeBase::~KuduScanNodeBase() {
Status KuduScanNodeBase::Prepare(RuntimeState* state) {
RETURN_IF_ERROR(ScanNode::Prepare(state));
+ AddBytesReadCounters();
scan_ranges_complete_counter_ =
PROFILE_ScanRangesComplete.Instantiate(runtime_profile());
kudu_round_trips_ = ADD_COUNTER(runtime_profile(), KUDU_ROUND_TRIPS,
TUnit::UNIT);
kudu_remote_tokens_ = ADD_COUNTER(runtime_profile(), KUDU_REMOTE_TOKENS,
TUnit::UNIT);
kudu_client_time_ = ADD_TIMER(runtime_profile(), KUDU_CLIENT_TIME);
+ kudu_scanner_total_duration_time_ =
+ PROFILE_KuduScannerTotalDurationTime.Instantiate(runtime_profile());
+ kudu_scanner_queue_duration_time_ =
+ PROFILE_KuduScannerQueueDurationTime.Instantiate(runtime_profile());
+ kudu_scanner_cpu_user_time_ =
+ PROFILE_KuduScannerCpuUserTime.Instantiate(runtime_profile());
+ kudu_scanner_cpu_sys_time_ =
+ PROFILE_KuduScannerCpuSysTime.Instantiate(runtime_profile());
+ kudu_scanner_cfile_cache_hit_bytes_ =
+ PROFILE_KuduScannerCfileCacheHitBytes.Instantiate(runtime_profile());
+ kudu_scanner_cfile_cache_miss_bytes_ =
+ PROFILE_KuduScannerCfileCacheMissBytes.Instantiate(runtime_profile());
+
DCHECK(state->desc_tbl().GetTupleDescriptor(tuple_id_) != NULL);
tuple_desc_ = state->desc_tbl().GetTupleDescriptor(tuple_id_);
table_desc_ = static_cast<const
KuduTableDescriptor*>(tuple_desc_->table_desc());
diff --git a/be/src/exec/kudu/kudu-scan-node-base.h
b/be/src/exec/kudu/kudu-scan-node-base.h
index 79d8e593a..7d4dc093e 100644
--- a/be/src/exec/kudu/kudu-scan-node-base.h
+++ b/be/src/exec/kudu/kudu-scan-node-base.h
@@ -95,6 +95,13 @@ class KuduScanNodeBase : public ScanNode {
RuntimeProfile::Counter* kudu_round_trips_ = nullptr;
RuntimeProfile::Counter* kudu_remote_tokens_ = nullptr;
RuntimeProfile::Counter* kudu_client_time_ = nullptr;
+ RuntimeProfile::Counter* kudu_scanner_total_duration_time_ = nullptr;
+ RuntimeProfile::Counter* kudu_scanner_queue_duration_time_ = nullptr;
+ RuntimeProfile::Counter* kudu_scanner_cpu_user_time_ = nullptr;
+ RuntimeProfile::Counter* kudu_scanner_cpu_sys_time_ = nullptr;
+ RuntimeProfile::Counter* kudu_scanner_cfile_cache_hit_bytes_ = nullptr;
+ RuntimeProfile::Counter* kudu_scanner_cfile_cache_miss_bytes_ = nullptr;
+
static const std::string KUDU_ROUND_TRIPS;
static const std::string KUDU_REMOTE_TOKENS;
static const std::string KUDU_CLIENT_TIME;
@@ -102,5 +109,23 @@ class KuduScanNodeBase : public ScanNode {
kudu::client::KuduClient* kudu_client() { return client_.get(); }
RuntimeProfile::Counter* kudu_round_trips() const { return
kudu_round_trips_; }
RuntimeProfile::Counter* kudu_client_time() const { return
kudu_client_time_; }
+ RuntimeProfile::Counter* kudu_scanner_total_duration_time() const {
+ return kudu_scanner_total_duration_time_;
+ }
+ RuntimeProfile::Counter* kudu_scanner_queue_duration_time() const {
+ return kudu_scanner_queue_duration_time_;
+ }
+ RuntimeProfile::Counter* kudu_scanner_cpu_user_time() const {
+ return kudu_scanner_cpu_user_time_;
+ }
+ RuntimeProfile::Counter* kudu_scanner_cpu_sys_time() const {
+ return kudu_scanner_cpu_sys_time_;
+ }
+ RuntimeProfile::Counter* kudu_scanner_cfile_cache_hit_bytes() const {
+ return kudu_scanner_cfile_cache_hit_bytes_;
+ }
+ RuntimeProfile::Counter* kudu_scanner_cfile_cache_miss_bytes() const {
+ return kudu_scanner_cfile_cache_miss_bytes_;
+ }
};
} // namespace impala
diff --git a/be/src/exec/kudu/kudu-scanner.cc b/be/src/exec/kudu/kudu-scanner.cc
index 4a404bcaf..62353eea4 100644
--- a/be/src/exec/kudu/kudu-scanner.cc
+++ b/be/src/exec/kudu/kudu-scanner.cc
@@ -19,10 +19,13 @@
#include <string>
#include <vector>
+
+#include <kudu/client/resource_metrics.h>
#include <kudu/client/row_result.h>
#include <kudu/client/value.h>
#include <thrift/protocol/TDebugProtocol.h>
+#include "common/names.h"
#include "exec/exec-node.inline.h"
#include "exec/kudu/kudu-util.h"
#include "exprs/scalar-expr-evaluator.h"
@@ -48,14 +51,13 @@
#include "util/periodic-counter-updater.h"
#include "util/runtime-profile-counters.h"
-#include "common/names.h"
-
using kudu::client::KuduClient;
using kudu::client::KuduPredicate;
using kudu::client::KuduScanBatch;
using kudu::client::KuduSchema;
using kudu::client::KuduTable;
using kudu::client::KuduValue;
+using kudu::client::ResourceMetrics;
DEFINE_string(kudu_read_mode, "READ_LATEST", "(Advanced) Sets the Kudu scan
ReadMode. "
"Supported Kudu read modes are READ_LATEST and READ_AT_SNAPSHOT. Can be
overridden "
@@ -68,7 +70,6 @@ DECLARE_int32(kudu_operation_timeout_ms);
namespace impala {
-
KuduScanner::KuduScanner(KuduScanNodeBase* scan_node, RuntimeState* state)
: scan_node_(scan_node),
state_(state),
@@ -124,6 +125,7 @@ Status
KuduScanner::GetNextWithCountStarOptimization(RowBatch* row_batch, bool*
int64_t tuple_buffer_size;
uint8_t* tuple_buffer;
int capacity = 1;
+ SCOPED_TIMER(scan_node_->materialize_tuple_timer());
RETURN_IF_ERROR(row_batch->ResizeAndAllocateTupleBuffer(state_,
row_batch->tuple_data_pool(), row_batch->row_desc()->GetRowSize(),
&capacity,
&tuple_buffer_size, &tuple_buffer));
@@ -141,7 +143,6 @@ Status
KuduScanner::GetNextWithCountStarOptimization(RowBatch* row_batch, bool*
}
Status KuduScanner::GetNext(RowBatch* row_batch, bool* eos) {
- SCOPED_TIMER(scan_node_->materialize_tuple_timer());
// Optimized scanning for count(*), only write the NumRows
if (scan_node_->optimize_count_star()) {
return GetNextWithCountStarOptimization(row_batch, eos);
@@ -311,6 +312,19 @@ Status KuduScanner::OpenNextScanToken(const string&
scan_token, bool* eos) {
void KuduScanner::CloseCurrentClientScanner() {
DCHECK_NOTNULL(scanner_.get());
+
+ std::map<std::string, int64_t> metrics =
scanner_->GetResourceMetrics().Get();
+ COUNTER_ADD(scan_node_->bytes_read_counter(), metrics["bytes_read"]);
+ COUNTER_ADD(
+ scan_node_->kudu_scanner_total_duration_time(),
metrics["total_duration_nanos"]);
+ COUNTER_ADD(
+ scan_node_->kudu_scanner_queue_duration_time(),
metrics["queue_duration_nanos"]);
+ COUNTER_ADD(scan_node_->kudu_scanner_cpu_user_time(),
metrics["cpu_user_nanos"]);
+ COUNTER_ADD(scan_node_->kudu_scanner_cpu_sys_time(),
metrics["cpu_system_nanos"]);
+ COUNTER_ADD(
+ scan_node_->kudu_scanner_cfile_cache_hit_bytes(),
metrics["cfile_cache_hit_bytes"]);
+ COUNTER_ADD(scan_node_->kudu_scanner_cfile_cache_miss_bytes(),
+ metrics["cfile_cache_miss_bytes"]);
scanner_->Close();
scanner_.reset();
}
@@ -341,6 +355,7 @@ Status KuduScanner::HandleEmptyProjection(RowBatch*
row_batch) {
}
Status KuduScanner::DecodeRowsIntoRowBatch(RowBatch* row_batch, Tuple**
tuple_mem) {
+ SCOPED_TIMER(scan_node_->materialize_tuple_timer());
// Short-circuit for empty projection cases.
if (scan_node_->tuple_desc()->slots().empty()) {
return HandleEmptyProjection(row_batch);