This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1830abcc7d4f34ed7d6bb7cc42005a464e86cd3b Author: Riza Suminto <[email protected]> AuthorDate: Tue May 2 12:05:39 2023 -0700 IMPALA-12056: Let child queries unset REQUEST_POOL if auto-scaling 'Compute Stats' queries gets scheduled on the smallest executor group set since these queries don't do any real work. However their child queries also gets scheduled on the smallest executor group. This may not be ideal for cases where the child query does NDVs and Counts on a big wide table. This patch let child queries to unset REQUEST_POOL query option if that option is set by frontend planner rather than client. With REQUEST_POOL unset, child query can select the executor group that best-fit its workload. Testing: - Add test in test_query_cpu_count_divisor_default Change-Id: I6dc559aa161a27a7bd5d3034788cc6241490d3b5 Reviewed-on: http://gerrit.cloudera.org:8080/19832 Reviewed-by: Kurt Deschler <[email protected]> Reviewed-by: Wenzhe Zhou <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/service/child-query.cc | 13 +++++++-- be/src/service/child-query.h | 2 +- common/thrift/Frontend.thrift | 4 +++ .../java/org/apache/impala/service/Frontend.java | 1 + tests/custom_cluster/test_executor_groups.py | 32 +++++++++++++++++++--- 5 files changed, 44 insertions(+), 8 deletions(-) diff --git a/be/src/service/child-query.cc b/be/src/service/child-query.cc index b3aa50137..cb557cbc0 100644 --- a/be/src/service/child-query.cc +++ b/be/src/service/child-query.cc @@ -45,7 +45,7 @@ Status ChildQuery::ExecAndFetch() { ImpalaServer::TUniqueIdToTHandleIdentifier(session_id, session_secret, &exec_stmt_req.sessionHandle.sessionId); exec_stmt_req.__set_statement(query_); - SetQueryOptions(parent_request_state_->exec_request().query_options, &exec_stmt_req); + SetQueryOptions(&exec_stmt_req); exec_stmt_req.confOverlay[PARENT_QUERY_OPT] = PrintId(parent_request_state_->query_id()); @@ -145,9 +145,10 @@ void PrintQueryOptionValue(const set<impala::TRuntimeFilterType::type>& filter_t val << filter_types; } -void ChildQuery::SetQueryOptions(const TQueryOptions& parent_options, - TExecuteStatementReq* exec_stmt_req) { +void ChildQuery::SetQueryOptions(TExecuteStatementReq* exec_stmt_req) { map<string, string> conf; + const TQueryOptions& parent_options = + parent_request_state_->exec_request().query_options; #define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ if (parent_options.__isset.NAME) {\ stringstream val;\ @@ -161,6 +162,12 @@ void ChildQuery::SetQueryOptions(const TQueryOptions& parent_options, // Ignore debug actions on child queries because they may cause deadlock. map<string, string>::iterator it = conf.find("DEBUG_ACTION"); if (it != conf.end()) conf.erase(it); + + if (parent_request_state_->exec_request().request_pool_set_by_frontend) { + // Remove REQUEST_POOL if this option was set by Frontend auto-scaling. + it = conf.find("REQUEST_POOL"); + if (it != conf.end()) conf.erase(it); + } exec_stmt_req->__set_confOverlay(conf); } diff --git a/be/src/service/child-query.h b/be/src/service/child-query.h index 843f8b1ad..5e6c0ba0d 100644 --- a/be/src/service/child-query.h +++ b/be/src/service/child-query.h @@ -110,7 +110,7 @@ class ChildQuery { private: /// Sets the query options from the parent query in child's HS2 request. /// TODO: Consider moving this function into a more appropriate place. - void SetQueryOptions(const TQueryOptions& parent_options, + void SetQueryOptions( apache::hive::service::cli::thrift::TExecuteStatementReq* exec_stmt_req); /// Returns Status::Cancelled if this child query has been cancelled, otherwise OK. diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift index 8c3f20797..89c46dd3d 100644 --- a/common/thrift/Frontend.thrift +++ b/common/thrift/Frontend.thrift @@ -641,6 +641,10 @@ struct TExecRequest { // Additional profile nodes to be displayed nested right under 'profile' field. 17: optional list<RuntimeProfile.TRuntimeProfileNode> profile_children + + // True if request pool is set by Frontend rather than user specifically setting it via + // REQUEST_POOL query option. + 18: optional bool request_pool_set_by_frontend = false } // Parameters to FeSupport.cacheJar(). diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java index 8829beab9..448d6f9ba 100644 --- a/fe/src/main/java/org/apache/impala/service/Frontend.java +++ b/fe/src/main/java/org/apache/impala/service/Frontend.java @@ -2273,6 +2273,7 @@ public class Frontend { if (!default_executor_group) { String namePrefix = group_set.getExec_group_name_prefix(); req.query_options.setRequest_pool(namePrefix); + req.setRequest_pool_set_by_frontend(true); if (req.query_exec_request != null) { req.query_exec_request.query_ctx.setRequest_pool(namePrefix); } diff --git a/tests/custom_cluster/test_executor_groups.py b/tests/custom_cluster/test_executor_groups.py index 0cdbce478..74af1051d 100644 --- a/tests/custom_cluster/test_executor_groups.py +++ b/tests/custom_cluster/test_executor_groups.py @@ -41,6 +41,9 @@ CPU_TEST_QUERY = "select * from tpcds_parquet.store_sales where ss_item_sk = 1 l GROUPING_TEST_QUERY = ("select ss_item_sk from tpcds_parquet.store_sales" " group by (ss_item_sk) order by ss_item_sk limit 10") +# A query to test behavior of child queries. +COMPUTE_STATS_QUERY = "COMPUTE STATS tpcds_parquet.store_sales" + # Default query option to use for testing CPU requirement. CPU_DOP_OPTIONS = {'MT_DOP': '2', 'COMPUTE_PROCESSING_COST': 'true'} @@ -910,6 +913,15 @@ class TestExecutorGroups(CustomClusterTestSuite): "Memory and cpu limit checking is skipped."), "EffectiveParallelism: 7", "ExecutorGroupsConsidered: 1"]) + # Test that child queries follow REQUEST_POOL that was set by client. + # Two child queries should all run in root.large. + self._verify_total_admitted_queries("root.large", 1) + self._run_query_and_verify_profile(COMPUTE_STATS_QUERY, options, + ["ExecutorGroupsConsidered: 1", + "Verdict: Assign to first group because query is not auto-scalable"], + ["Executor Group:"]) + self._verify_total_admitted_queries("root.large", 3) + # Test setting REQUEST_POOL and disabling COMPUTE_PROCESSING_COST options['COMPUTE_PROCESSING_COST'] = 'false' options['REQUEST_POOL'] = 'root.large' @@ -923,6 +935,18 @@ class TestExecutorGroups(CustomClusterTestSuite): # Unset REQUEST_POOL. self.execute_query_expect_success(self.client, "SET REQUEST_POOL='';") + # Test that child queries unset REQUEST_POOL that was set by Frontend planner for + # parent query. One child queries should run in root.small, and another one in + # root.large. + self._verify_total_admitted_queries("root.small", 1) + self._verify_total_admitted_queries("root.large", 4) + self._run_query_and_verify_profile(COMPUTE_STATS_QUERY, CPU_DOP_OPTIONS, + ["ExecutorGroupsConsidered: 1", + "Verdict: Assign to first group because query is not auto-scalable"], + ["Executor Group:"]) + self._verify_total_admitted_queries("root.small", 2) + self._verify_total_admitted_queries("root.large", 5) + # Test that GROUPING_TEST_QUERY will get assigned to the small group. self._run_query_and_verify_profile(GROUPING_TEST_QUERY, CPU_DOP_OPTIONS, ["Executor Group: root.small-group", "ExecutorGroupsConsidered: 2", @@ -955,12 +979,12 @@ class TestExecutorGroups(CustomClusterTestSuite): ["Executor Group:"]) # Check resource pools on the Web queries site and admission site - self._verify_query_num_for_resource_pool("root.small", 2) + self._verify_query_num_for_resource_pool("root.small", 3) self._verify_query_num_for_resource_pool("root.tiny", 3) - self._verify_query_num_for_resource_pool("root.large", 2) - self._verify_total_admitted_queries("root.small", 2) + self._verify_query_num_for_resource_pool("root.large", 5) + self._verify_total_admitted_queries("root.small", 3) self._verify_total_admitted_queries("root.tiny", 3) - self._verify_total_admitted_queries("root.large", 2) + self._verify_total_admitted_queries("root.large", 5) self.client.close() @pytest.mark.execute_serially
