This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 7c02ba952b1dc7b5e43c302f3bf76a18316ef417
Author: jasonmfehr <[email protected]>
AuthorDate: Wed Feb 26 13:17:28 2025 -0800

    IMPALA-13803: Fix hiveserver2_protocol_version Values in Workload Management
    
    The workload management tables sys.impala_query_live and
    sys.impala_query_log contain different values for the
    hiveserver2_protocol_version column. The live table contains a short
    string such as "V6" while the log table contains a much longer string
    with an unnecessary prefix such as "HIVE_CLI_SERVICE_PROTOCOL_V6".
    
    This patch modifies the value stored in the
    hiveserver2_protocol_version column in the sys.impala_query_log table
    to match the value stored in the sys.impala_query_live table and the
    query profile.
    
    Testing was accomplished by running the following test suites
    locally. These tests all call the 'assert_query' function of
    workload_management.py which checks the value of the
    hiveserver2_protocol_version column. A new test was added to
    test_query_live.py which uses HS2 as the client protocol.
      * tests/custom_cluster/test_query_live.py
      * tests/custom_cluster/test_query_log.py
      * tests/custom_cluster/test_workload_mgmt_init.py
    
    Change-Id: Idd8121d4fbf7abe12d313f3314377db6f1ec017a
    Reviewed-on: http://gerrit.cloudera.org:8080/22553
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/system-table-scanner.cc          |  2 +-
 be/src/service/query-state-record.cc         |  7 ++++++-
 be/src/service/query-state-record.h          |  5 +++++
 be/src/service/workload-management-worker.cc |  2 +-
 tests/custom_cluster/test_query_live.py      | 17 ++++++++++++++++-
 tests/util/workload_management.py            |  2 +-
 6 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/be/src/exec/system-table-scanner.cc 
b/be/src/exec/system-table-scanner.cc
index 141aa77c6..e286c8353 100644
--- a/be/src/exec/system-table-scanner.cc
+++ b/be/src/exec/system-table-scanner.cc
@@ -200,7 +200,7 @@ Status QueryScanner::MaterializeNextTuple(
       case TQueryTableColumn::HIVESERVER2_PROTOCOL_VERSION:
         if (query.session_type == TSessionType::HIVESERVER2) {
           RETURN_IF_ERROR(WriteStringSlot(
-              Substitute("V$0", 1 + query.hiveserver2_protocol_version), pool, 
slot));
+              query.hiveserver2_protocol_version_formatted(), pool, slot));
         }
         break;
       case TQueryTableColumn::DB_USER:
diff --git a/be/src/service/query-state-record.cc 
b/be/src/service/query-state-record.cc
index 09d9b5720..1b91ed41d 100644
--- a/be/src/service/query-state-record.cc
+++ b/be/src/service/query-state-record.cc
@@ -26,9 +26,10 @@
 #include <vector>
 
 #include <boost/algorithm/string.hpp>
-
 #include <gutil/strings/numbers.h>
 #include <gutil/strings/strcat.h>
+#include <gutil/strings/substitute.h>
+
 #include "runtime/coordinator.h"
 #include "scheduling/admission-controller.h"
 #include "scheduling/scheduler.h"
@@ -414,6 +415,10 @@ bool QueryStateExpanded::events_timeline_empty() const {
       base_state->event_sequence.timestamps.empty();
 }
 
+string QueryStateExpanded::hiveserver2_protocol_version_formatted() const {
+  return strings::Substitute("V$0", 1 + hiveserver2_protocol_version);
+}
+
 bool PerHostPeakMemoryComparator(const pair<TNetworkAddress, PerHostState>& a,
     const pair<TNetworkAddress, PerHostState>& b) {
   return a.second.peak_memory_usage < b.second.peak_memory_usage;
diff --git a/be/src/service/query-state-record.h 
b/be/src/service/query-state-record.h
index 0dd02ca5b..5c3e5b46f 100644
--- a/be/src/service/query-state-record.h
+++ b/be/src/service/query-state-record.h
@@ -355,6 +355,11 @@ struct QueryStateExpanded {
   /// Events Timeline Iterator
   EventsTimelineIterator EventsTimeline() const;
 
+  /// Builds a shortened string containing the hiveserver2 protocol version of 
the Impala
+  /// client (if connected with the HS2 protocol). The returned string will 
have the
+  /// format 'V' followed by the version number, for example 'V7'.
+  std::string hiveserver2_protocol_version_formatted() const;
+
   // Source tables accessed by this query.
   std::vector<TTableName> tables;
 
diff --git a/be/src/service/workload-management-worker.cc 
b/be/src/service/workload-management-worker.cc
index 62d4126b8..870688db3 100644
--- a/be/src/service/workload-management-worker.cc
+++ b/be/src/service/workload-management-worker.cc
@@ -135,7 +135,7 @@ const std::array<FieldParser, NumQueryTableColumns> 
FIELD_PARSERS = {{
     {[](FieldParserContext& ctx) {
       ctx.sql << "'";
       if (ctx.record->session_type == TSessionType::HIVESERVER2) {
-        ctx.sql << ctx.record->hiveserver2_protocol_version;
+        ctx.sql << ctx.record->hiveserver2_protocol_version_formatted();
       }
       ctx.sql << "'";
     }},
diff --git a/tests/custom_cluster/test_query_live.py 
b/tests/custom_cluster/test_query_live.py
index 83e222ec9..a0868d997 100644
--- a/tests/custom_cluster/test_query_live.py
+++ b/tests/custom_cluster/test_query_live.py
@@ -83,12 +83,27 @@ class TestQueryLive(CustomClusterTestSuite):
         assert False, "did not find host {}".format(host)
     assert len(actual_hosts) == 0, "did not find all expected hosts"
 
+  @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
+                                                 
"--cluster_id=test_query_live",
+                                    catalogd_args="--enable_workload_mgmt",
+                                    disable_log_buffering=True)
+  def test_query_live_hs2(self):
+    """Asserts the query live table shows and allows filtering queries. Uses 
the hs2
+       client to connect to Impala."""
+    # Use a query that reads data from disk for the 1st one, as more 
representative and a
+    # better fit for assert_query.
+    result1 = self.hs2_client.execute("select * from functional.alltypes",
+        fetch_profile_after_close=True)
+    assert_query('sys.impala_query_live', self.hs2_client, 'test_query_live',
+                 result1.runtime_profile)
+
   @CustomClusterTestSuite.with_args(impalad_args="--enable_workload_mgmt "
                                                  
"--cluster_id=test_query_live",
                                     catalogd_args="--enable_workload_mgmt",
                                     disable_log_buffering=True)
   def test_query_live(self):
-    """Asserts the query live table shows and allows filtering queries."""
+    """Asserts the query live table shows and allows filtering queries. Uses 
the default
+       client to connect to Impala."""
     # Use a query that reads data from disk for the 1st one, as more 
representative and a
     # better fit for assert_query.
     result1 = self.client.execute("select * from functional.alltypes",
diff --git a/tests/util/workload_management.py 
b/tests/util/workload_management.py
index a98c89fad..9b2f567db 100644
--- a/tests/util/workload_management.py
+++ b/tests/util/workload_management.py
@@ -144,7 +144,7 @@ def assert_query(query_tbl, client, expected_cluster_id="", 
raw_profile=None,
   if session_type.group(1) == "HIVESERVER2":
     hs2_ver = re.search(r'\n\s+HiveServer2 Protocol Version:\s+(.*)', 
profile_text)
     assert hs2_ver is not None
-    assert value == "HIVE_CLI_SERVICE_PROTOCOL_{0}".format(hs2_ver.group(1))
+    assert value == hs2_ver.group(1)
   else:
     assert value == ""
 

Reply via email to