This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c2705fa480bfa33b48f2206d61ba13d3ee998dd5
Author: Riza Suminto <riza.sumi...@cloudera.com>
AuthorDate: Tue Jun 24 15:16:27 2025 -0700

    IMPALA-14076: Improve readability of workload management query
    
    This patch improve the readability of workload management's insert dml
    query profiles by:
    1. Add a newline between each entry in the VALUES clause.
    2. Remove analyzed query from the PLAN column in both tables.
    
    For second one, a new query option HIDE_ANALYZED_QUERY is added. If this
    option is set to True, 'Analyzed query' will not be printed in Plan
    section of runtime profile. This is helpful for long SQL such as
    workload management's insert dml query.
    
    Testing:
    - Add explain test case for HIDE_ANALYZED_QUERY option.
    - Manually run some queries in minicluster with enabled workload
      management. Confirmed that both improvement happen in DML runtime
      profile.
    
    Change-Id: I30576795dbc2af27a6879684f3757becfd8fc8d0
    Reviewed-on: http://gerrit.cloudera.org:8080/23085
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 be/src/service/query-options.cc                                    | 4 ++++
 be/src/service/query-options.h                                     | 3 ++-
 be/src/service/workload-management-worker.cc                       | 4 +++-
 common/thrift/ImpalaService.thrift                                 | 4 ++++
 common/thrift/Query.thrift                                         | 3 +++
 fe/src/main/java/org/apache/impala/planner/Planner.java            | 5 +++--
 .../functional-query/queries/QueryTest/explain-level2.test         | 7 +++++++
 .../functional-query/queries/QueryTest/explain-level3.test         | 7 +++++++
 8 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index c55d017f9..75edb21db 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1394,6 +1394,10 @@ Status impala::SetQueryOption(TImpalaQueryOptions::type 
option, const string& va
         
query_options->__set_mem_estimate_scale_for_spilling_operator(double_val);
         break;
       }
+      case TImpalaQueryOptions::HIDE_ANALYZED_QUERY: {
+        query_options->__set_hide_analyzed_query(IsTrue(value));
+        break;
+      }
       default:
         string key = to_string(option);
         if (IsRemovedQueryOption(key)) {
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 68c7737a8..e56885134 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -51,7 +51,7 @@ typedef std::unordered_map<string, 
beeswax::TQueryOptionLevel::type>
 // plus one. Thus, the second argument to the DCHECK has to be updated every
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
 constexpr unsigned NUM_QUERY_OPTIONS =
-    TImpalaQueryOptions::JSON_BINARY_FORMAT + 1;
+    TImpalaQueryOptions::HIDE_ANALYZED_QUERY + 1;
 #define QUERY_OPTS_TABLE                                                       
          \
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS);   
          \
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, 
ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
@@ -377,6 +377,7 @@ constexpr unsigned NUM_QUERY_OPTIONS =
   QUERY_OPT_FN(use_calcite_planner, USE_CALCITE_PLANNER,                       
          \
       TQueryOptionLevel::ADVANCED)                                             
          \
   QUERY_OPT_FN(json_binary_format, JSON_BINARY_FORMAT, 
TQueryOptionLevel::REGULAR)       \
+  QUERY_OPT_FN(hide_analyzed_query, HIDE_ANALYZED_QUERY, 
TQueryOptionLevel::ADVANCED)    \
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query 
state.
diff --git a/be/src/service/workload-management-worker.cc 
b/be/src/service/workload-management-worker.cc
index 37ef244cf..2c77c70c4 100644
--- a/be/src/service/workload-management-worker.cc
+++ b/be/src/service/workload-management-worker.cc
@@ -465,7 +465,7 @@ static const string _queryStateToSql(
   StringStreamPop sql;
   FieldParserContext ctx(rec, FLAGS_cluster_id, sql);
 
-  sql << "(";
+  sql << "\n(";
 
   for (const auto& field : FIELD_DEFINITIONS) {
     if (field.second.Include(target_schema_version)) {
@@ -681,6 +681,8 @@ void ImpalaServer::WorkloadManagementWorker(const Version& 
target_schema_version
   if (!FLAGS_debug_actions.empty()) {
     insert_query_opts[TImpalaQueryOptions::DEBUG_ACTION] = FLAGS_debug_actions;
   }
+  // Hide analyzed query since it can be prohibitively long.
+  insert_query_opts[TImpalaQueryOptions::HIDE_ANALYZED_QUERY] = "true";
 
   while (true) {
     // Exit this thread if a shutdown was initiated.
diff --git a/common/thrift/ImpalaService.thrift 
b/common/thrift/ImpalaService.thrift
index ef6a7efbc..d704319f9 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -1033,6 +1033,10 @@ enum TImpalaQueryOptions {
   //   BASE64 - the json binary data is read as base64 encoded string.
   //   RAWSTRING - the json binary data is read as raw string.
   JSON_BINARY_FORMAT = 192
+
+  // Hide analyzed query from runtime profile. This is useful if query is too 
large,
+  // such as INSERT INTO with hundreds of VALUES.
+  HIDE_ANALYZED_QUERY = 193
 }
 
 // The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index 8acb92cdc..2274bc7fd 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -782,6 +782,9 @@ struct TQueryOptions {
   // See comment in ImpalaService.thrift
   193: optional CatalogObjects.TJsonBinaryFormat json_binary_format =
       TJsonBinaryFormat.NONE;
+
+  // See comment in ImpalaService.thrift
+  194: optional bool hide_analyzed_query = false
 }
 
 // Impala currently has three types of sessions: Beeswax, HiveServer2 and 
external
diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java 
b/fe/src/main/java/org/apache/impala/planner/Planner.java
index 3b13a2eab..5c5db03c0 100644
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -456,8 +456,9 @@ public class Planner {
       hasHeader = true;
     }
 
-    if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal() &&
-        queryStmt != null) {
+    if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal()
+        && !request.query_ctx.client_request.query_options.hide_analyzed_query
+        && queryStmt != null) {
       // In extended explain include the analyzed query text showing implicit 
casts
       String queryText = queryStmt.toSql(SHOW_IMPLICIT_CASTS);
       String wrappedText = PrintUtils.wrapString("Analyzed query: " + 
queryText, 80);
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test 
b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
index 937939745..7a1e59155 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
@@ -102,3 +102,10 @@ explain select count(*) from tpch.region
 row_regex:.* cache key: [0-9a-f][0-9a-f]*.*
 row_regex:.*\[.*TPlanNode\(.*\]
 ====
+---- QUERY
+# Tests HIDE_ANALYZED_QUERY=TRUE
+set HIDE_ANALYZED_QUERY=TRUE;
+explain select count(*) from tpch.region
+---- RESULTS: VERIFY_IS_NOT_IN
+'Analyzed query:'
+====
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test 
b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
index bf953b209..c3811201b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
@@ -143,3 +143,10 @@ order by a
 ''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
 'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
 ====
+---- QUERY
+# Tests HIDE_ANALYZED_QUERY=TRUE
+set HIDE_ANALYZED_QUERY=TRUE;
+explain select count(*) from tpch.region
+---- RESULTS: VERIFY_IS_NOT_IN
+'Analyzed query:'
+====

Reply via email to