This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 086cf6b92 IMPALA-13054: Avoid revisiting children in QueryStateExpanded
086cf6b92 is described below
commit 086cf6b921f99359b9d490555163c51285499fa2
Author: Michael Smith <[email protected]>
AuthorDate: Thu May 2 15:55:34 2024 -0700
IMPALA-13054: Avoid revisiting children in QueryStateExpanded
Avoids revisiting children in QueryStateExpanded due to duplicate
recursion. Since process_exec_profile visits children recursively, we
only want immediate children from GetChildren, not all children from
GetAllChildren.
Adjusts HDFS_SCAN_NODE test to look back at top of stack, rather than
depend on a specific depth that was caused by calling GetAllChildren
from the root.
This was identified by running the Impala E2E test suite with workload
management enabled, where
test_nested_types.py::TestMaxNestingDepth::test_max_nesting_depth took
multiple hours to run and blocked the FinishUnregisterQuery for other
tests.
Testing:
- Manual test with enable_workload_mgmt=true running
test_max_nesting_depth.
Change-Id: Ia36d53e52095d487ffb3a162fc723f1e815bec11
Reviewed-on: http://gerrit.cloudera.org:8080/21392
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/service/query-state-record.cc | 34 +++++++++++++++++++++++-----------
1 file changed, 23 insertions(+), 11 deletions(-)
diff --git a/be/src/service/query-state-record.cc
b/be/src/service/query-state-record.cc
index 8c7458a03..d054cf644 100644
--- a/be/src/service/query-state-record.cc
+++ b/be/src/service/query-state-record.cc
@@ -184,6 +184,14 @@ int64_t EstimateSize(const QueryStateRecord* record) {
return size;
}
+static bool find_instance(RuntimeProfileBase* prof) {
+ return boost::algorithm::istarts_with(prof->name(), "Instance");
+}
+
+static bool find_averaged(RuntimeProfileBase* prof) {
+ return boost::algorithm::istarts_with(prof->name(), "Averaged Fragment");
+}
+
QueryStateExpanded::QueryStateExpanded(const ClientRequestState& exec_state,
const std::shared_ptr<QueryStateRecord> base) :
base_state(base ? move(base) : make_shared<QueryStateRecord>(exec_state)) {
@@ -253,9 +261,7 @@ QueryStateExpanded::QueryStateExpanded(const
ClientRequestState& exec_state,
map<string, int64_t> bytes_read_cache;
vector<RuntimeProfileBase*> prof_stack;
- using boost::algorithm::iequals;
-
- // Lambda function to recursively walk through a profile.
+ // Lambda function to recursively walk through a profile.
std::function<void(RuntimeProfileBase*)> process_exec_profile = [
&process_exec_profile, &host_scratch_bytes, &scanner_io_wait,
&prof_stack,
&bytes_read_cache, this](RuntimeProfileBase* profile) {
@@ -266,20 +272,26 @@ QueryStateExpanded::QueryStateExpanded(const
ClientRequestState& exec_state,
}
// Metrics from HDFS_SCAN_NODE entries.
- if (prof_stack.size() >= 3) {
- if (iequals(prof_stack[1]->name().substr(0, 8), "instance") &&
- iequals(prof_stack[2]->name().substr(0, 14), "hdfs_scan_node")) {
+ if (const string& scan = prof_stack.back()->name();
+ boost::algorithm::istarts_with(scan, "HDFS_SCAN_NODE")) {
+ // Find a parent instance. If none found, assume in Averaged Fragment.
+ if (auto it = find_if(prof_stack.begin()+1, prof_stack.end()-1,
find_instance);
+ it != prof_stack.end()-1) {
+ DCHECK(find_if(prof_stack.begin(), prof_stack.end(), find_averaged)
+ == prof_stack.end());
+ const string& inst = (*it)->name();
if (const auto& cntr = profile->GetCounter("ScannerIoWaitTime");
cntr != nullptr) {
- scanner_io_wait.emplace(StrCat(
- prof_stack[1]->name(), "::", prof_stack[2]->name()),
cntr->value());
+ scanner_io_wait.emplace(StrCat(inst, "::", scan), cntr->value());
}
if (const auto& cntr = profile->GetCounter("DataCacheHitBytes");
cntr != nullptr) {
- bytes_read_cache.emplace(StrCat(
- prof_stack[1]->name(), "::", prof_stack[2]->name()),
cntr->value());
+ bytes_read_cache.emplace(StrCat(inst, "::", scan), cntr->value());
}
+ } else {
+ DCHECK(find_if(prof_stack.begin(), prof_stack.end(), find_averaged)
+ != prof_stack.end());
}
}
@@ -290,7 +302,7 @@ QueryStateExpanded::QueryStateExpanded(const
ClientRequestState& exec_state,
// Recursively walk down through all child nodes.
vector<RuntimeProfileBase*> children;
- profile->GetAllChildren(&children);
+ profile->GetChildren(&children);
for (const auto& child : children) {
process_exec_profile(child);
}