This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit c2705fa480bfa33b48f2206d61ba13d3ee998dd5 Author: Riza Suminto <riza.sumi...@cloudera.com> AuthorDate: Tue Jun 24 15:16:27 2025 -0700 IMPALA-14076: Improve readability of workload management query This patch improve the readability of workload management's insert dml query profiles by: 1. Add a newline between each entry in the VALUES clause. 2. Remove analyzed query from the PLAN column in both tables. For second one, a new query option HIDE_ANALYZED_QUERY is added. If this option is set to True, 'Analyzed query' will not be printed in Plan section of runtime profile. This is helpful for long SQL such as workload management's insert dml query. Testing: - Add explain test case for HIDE_ANALYZED_QUERY option. - Manually run some queries in minicluster with enabled workload management. Confirmed that both improvement happen in DML runtime profile. Change-Id: I30576795dbc2af27a6879684f3757becfd8fc8d0 Reviewed-on: http://gerrit.cloudera.org:8080/23085 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/service/query-options.cc | 4 ++++ be/src/service/query-options.h | 3 ++- be/src/service/workload-management-worker.cc | 4 +++- common/thrift/ImpalaService.thrift | 4 ++++ common/thrift/Query.thrift | 3 +++ fe/src/main/java/org/apache/impala/planner/Planner.java | 5 +++-- .../functional-query/queries/QueryTest/explain-level2.test | 7 +++++++ .../functional-query/queries/QueryTest/explain-level3.test | 7 +++++++ 8 files changed, 33 insertions(+), 4 deletions(-) diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index c55d017f9..75edb21db 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -1394,6 +1394,10 @@ Status impala::SetQueryOption(TImpalaQueryOptions::type option, const string& va query_options->__set_mem_estimate_scale_for_spilling_operator(double_val); break; } + case TImpalaQueryOptions::HIDE_ANALYZED_QUERY: { + query_options->__set_hide_analyzed_query(IsTrue(value)); + break; + } default: string key = to_string(option); if (IsRemovedQueryOption(key)) { diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h index 68c7737a8..e56885134 100644 --- a/be/src/service/query-options.h +++ b/be/src/service/query-options.h @@ -51,7 +51,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type> // plus one. Thus, the second argument to the DCHECK has to be updated every // time we add or remove a query option to/from the enum TImpalaQueryOptions. constexpr unsigned NUM_QUERY_OPTIONS = - TImpalaQueryOptions::JSON_BINARY_FORMAT + 1; + TImpalaQueryOptions::HIDE_ANALYZED_QUERY + 1; #define QUERY_OPTS_TABLE \ DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS); \ REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \ @@ -377,6 +377,7 @@ constexpr unsigned NUM_QUERY_OPTIONS = QUERY_OPT_FN(use_calcite_planner, USE_CALCITE_PLANNER, \ TQueryOptionLevel::ADVANCED) \ QUERY_OPT_FN(json_binary_format, JSON_BINARY_FORMAT, TQueryOptionLevel::REGULAR) \ + QUERY_OPT_FN(hide_analyzed_query, HIDE_ANALYZED_QUERY, TQueryOptionLevel::ADVANCED) \ ; /// Enforce practical limits on some query options to avoid undesired query state. diff --git a/be/src/service/workload-management-worker.cc b/be/src/service/workload-management-worker.cc index 37ef244cf..2c77c70c4 100644 --- a/be/src/service/workload-management-worker.cc +++ b/be/src/service/workload-management-worker.cc @@ -465,7 +465,7 @@ static const string _queryStateToSql( StringStreamPop sql; FieldParserContext ctx(rec, FLAGS_cluster_id, sql); - sql << "("; + sql << "\n("; for (const auto& field : FIELD_DEFINITIONS) { if (field.second.Include(target_schema_version)) { @@ -681,6 +681,8 @@ void ImpalaServer::WorkloadManagementWorker(const Version& target_schema_version if (!FLAGS_debug_actions.empty()) { insert_query_opts[TImpalaQueryOptions::DEBUG_ACTION] = FLAGS_debug_actions; } + // Hide analyzed query since it can be prohibitively long. + insert_query_opts[TImpalaQueryOptions::HIDE_ANALYZED_QUERY] = "true"; while (true) { // Exit this thread if a shutdown was initiated. diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index ef6a7efbc..d704319f9 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -1033,6 +1033,10 @@ enum TImpalaQueryOptions { // BASE64 - the json binary data is read as base64 encoded string. // RAWSTRING - the json binary data is read as raw string. JSON_BINARY_FORMAT = 192 + + // Hide analyzed query from runtime profile. This is useful if query is too large, + // such as INSERT INTO with hundreds of VALUES. + HIDE_ANALYZED_QUERY = 193 } // The summary of a DML statement. diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift index 8acb92cdc..2274bc7fd 100644 --- a/common/thrift/Query.thrift +++ b/common/thrift/Query.thrift @@ -782,6 +782,9 @@ struct TQueryOptions { // See comment in ImpalaService.thrift 193: optional CatalogObjects.TJsonBinaryFormat json_binary_format = TJsonBinaryFormat.NONE; + + // See comment in ImpalaService.thrift + 194: optional bool hide_analyzed_query = false } // Impala currently has three types of sessions: Beeswax, HiveServer2 and external diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java index 3b13a2eab..5c5db03c0 100644 --- a/fe/src/main/java/org/apache/impala/planner/Planner.java +++ b/fe/src/main/java/org/apache/impala/planner/Planner.java @@ -456,8 +456,9 @@ public class Planner { hasHeader = true; } - if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal() && - queryStmt != null) { + if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal() + && !request.query_ctx.client_request.query_options.hide_analyzed_query + && queryStmt != null) { // In extended explain include the analyzed query text showing implicit casts String queryText = queryStmt.toSql(SHOW_IMPLICIT_CASTS); String wrappedText = PrintUtils.wrapString("Analyzed query: " + queryText, 80); diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test index 937939745..7a1e59155 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test @@ -102,3 +102,10 @@ explain select count(*) from tpch.region row_regex:.* cache key: [0-9a-f][0-9a-f]*.* row_regex:.*\[.*TPlanNode\(.*\] ==== +---- QUERY +# Tests HIDE_ANALYZED_QUERY=TRUE +set HIDE_ANALYZED_QUERY=TRUE; +explain select count(*) from tpch.region +---- RESULTS: VERIFY_IS_NOT_IN +'Analyzed query:' +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test index bf953b209..c3811201b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test +++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test @@ -143,3 +143,10 @@ order by a ''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY' 'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC' ==== +---- QUERY +# Tests HIDE_ANALYZED_QUERY=TRUE +set HIDE_ANALYZED_QUERY=TRUE; +explain select count(*) from tpch.region +---- RESULTS: VERIFY_IS_NOT_IN +'Analyzed query:' +====