This is an automated email from the ASF dual-hosted git repository. csringhofer pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4e3c9d09d8a466d45be6bea02b8fea997f826ceb Author: Balazs Hevele <[email protected]> AuthorDate: Wed Jan 28 11:23:10 2026 +0100 IMPALA-14673 impala-shell: profile format argument Selecting format was already supported by Impala's HS2 Interface, but so far we only used the default STRING format in impala shell. Added a command line argument to set which format to use: impala-shell.sh --profile_format=Format Can also be changed runtime as a shell option, in the shell: set profile_format=Format; Format can be string/base64/json. If not set, the default string format will be used. With base64 format, the output can be parsed with impala-profile-tool. Change-Id: I032a5d0436f123d737d46d0b8cecbae09888a789 Reviewed-on: http://gerrit.cloudera.org:8080/23884 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- shell/impala_shell/impala_client.py | 27 +++++++++++++--- shell/impala_shell/impala_shell.py | 12 ++++++-- shell/impala_shell/impala_shell_config_defaults.py | 1 + shell/impala_shell/option_parser.py | 4 +++ tests/shell/test_shell_commandline.py | 36 ++++++++++++++++++++++ 5 files changed, 74 insertions(+), 6 deletions(-) diff --git a/shell/impala_shell/impala_client.py b/shell/impala_shell/impala_client.py index 61a3f227c..c56e88b15 100644 --- a/shell/impala_shell/impala_client.py +++ b/shell/impala_shell/impala_client.py @@ -78,6 +78,7 @@ from impala_thrift_gen.TCLIService.TCLIService import ( TStatusCode, TTypeId, ) +from impala_thrift_gen.RuntimeProfile.ttypes import TRuntimeProfileFormat # Getters to extract HS2's representation of values to the display version. # An entry must be added to this map for each supported type. HS2's TColumn has many @@ -360,7 +361,7 @@ class ImpalaClient(object): False otherwise.""" raise NotImplementedError() - def get_runtime_profile(self, last_query_handle): + def get_runtime_profile(self, last_query_handle, profile_format="string"): # noqa: U100 """Get the runtime profile string from the server. Returns None if an error was encountered. If the query was retried, returns the profile of the failed attempt as well; the tuple (profile, failed_profile) is returned where 'profile' is @@ -947,15 +948,27 @@ class ImpalaHS2Client(ImpalaClient): finally: self._clear_current_query_handle() - def get_runtime_profile(self, last_query_handle): + def get_runtime_profile(self, last_query_handle, profile_format="string"): try: self._set_current_query_handle(last_query_handle) def GetRuntimeProfile(req): return self.imp_service.GetRuntimeProfile(req) + + # convert profile format from string to int (enum id) + profile_format_key = profile_format.upper() + if profile_format_key in TRuntimeProfileFormat._NAMES_TO_VALUES \ + and profile_format_key != "THRIFT": + profile_format_id = TRuntimeProfileFormat._NAMES_TO_VALUES[profile_format_key] + else: + err_msg = "Invalid profile format value {0}." + print(err_msg.format(profile_format), file=sys.stderr) + return None, None + # GetRuntimeProfile rpc is idempotent and so safe to retry. profile_req = TGetRuntimeProfileReq(last_query_handle, self.session_handle, + format=profile_format_id, include_query_attempts=True) resp = self._do_hs2_rpc(GetRuntimeProfile, profile_req, retry_on_error=True) self._check_hs2_rpc_status(resp.status) @@ -1240,7 +1253,8 @@ class StrictHS2Client(ImpalaHS2Client): def get_error_log(self, last_query_handle): return "" - def get_runtime_profile(self, last_query_handle): + def get_runtime_profile(self, last_query_handle, # noqa: U100 + profile_format="string"): # noqa: U100 return None, None def _populate_query_options(self): @@ -1389,7 +1403,12 @@ class ImpalaBeeswaxClient(ImpalaClient): lambda: self.imp_service.Cancel(last_query_handle), False) return rpc_status == RpcStatus.OK - def get_runtime_profile(self, last_query_handle): + def get_runtime_profile(self, last_query_handle, profile_format="string"): + if profile_format.upper() != "STRING": + err_msg = "Invalid profile format value {0}. Beeswax only supports string." + print(err_msg.format(profile_format), file=sys.stderr) + return None, None + profile, rpc_status = self._do_beeswax_rpc( lambda: self.imp_service.GetRuntimeProfile(last_query_handle)) if rpc_status == RpcStatus.OK and profile: diff --git a/shell/impala_shell/impala_shell.py b/shell/impala_shell/impala_shell.py index 7855fb794..12326b4e3 100644 --- a/shell/impala_shell/impala_shell.py +++ b/shell/impala_shell/impala_shell.py @@ -204,6 +204,7 @@ class ImpalaShell(cmd.Cmd, object): 'OUTPUT_FILE': (lambda x: None if x == '' else x, "output_file"), 'PROFILE_OUTPUT': (lambda x: None if x == '' else x, "profile_output"), 'VERTICAL': (lambda x: x in ImpalaShell.TRUE_STRINGS, "vertical"), + 'PROFILE_FORMAT': (lambda x: "string" if x == '' else x, "profile_format") } # Minimum time in seconds between two calls to get the exec summary. @@ -265,6 +266,7 @@ class ImpalaShell(cmd.Cmd, object): self.show_profiles = options.show_profiles self.profile_output = options.profile_output + self.profile_format = options.profile_format self.rpc_stdout = options.rpc_stdout self.rpc_file = options.rpc_file @@ -1243,6 +1245,12 @@ class ImpalaShell(cmd.Cmd, object): out_file = open(self.profile_output, 'a') query_profile_prefix = match_string_type("Query Runtime Profile:\n", profile) + if self.profile_format.upper() == "BASE64": + # Change prefix so that the output can be read by impala-profile-tool + timestamp = str(int(time.time())) + query_id = self.imp_client.get_query_id_str(self.last_query_handle) + query_profile_prefix = timestamp + " " + query_id + " " + if profile_display_mode == QueryAttemptDisplayModes.ALL: print(query_profile_prefix + profile, file=out_file) if failed_profile: @@ -1317,7 +1325,7 @@ class ImpalaShell(cmd.Cmd, object): return CmdStatus.ERROR profile, failed_profile = self.imp_client.get_runtime_profile( - self.last_query_handle) + self.last_query_handle, self.profile_format) return self.print_runtime_profile(profile, failed_profile, profile_display_mode) def do_select(self, args): @@ -1552,7 +1560,7 @@ class ImpalaShell(cmd.Cmd, object): self.imp_client.close_query(self.last_query_handle) if self.show_profiles: profile, retried_profile = self.imp_client.get_runtime_profile( - self.last_query_handle) + self.last_query_handle, self.profile_format) self.print_runtime_profile(profile, retried_profile) return CmdStatus.SUCCESS except QueryCancelledByShellException as e: diff --git a/shell/impala_shell/impala_shell_config_defaults.py b/shell/impala_shell/impala_shell_config_defaults.py index c52e4cfbd..9e914e5c9 100644 --- a/shell/impala_shell/impala_shell_config_defaults.py +++ b/shell/impala_shell/impala_shell_config_defaults.py @@ -46,6 +46,7 @@ impala_shell_defaults = { 'query': None, 'query_file': None, 'show_profiles': False, + 'profile_format': 'string', 'rpc_stdout': False, 'rpc_file': None, 'ssl': False, diff --git a/shell/impala_shell/option_parser.py b/shell/impala_shell/option_parser.py index cb06bc502..abc75b4ef 100644 --- a/shell/impala_shell/option_parser.py +++ b/shell/impala_shell/option_parser.py @@ -209,6 +209,10 @@ def get_option_parser(defaults): help="If set, query profiles will be written to the " "given file. Profiles for multiple semicolon-terminated " "queries will be appended to the same file") + parser.add_option("--profile_format", dest="profile_format", default="STRING", + help="Query profile format. Valid inputs are " + "['string', 'base64', 'json']. Format base64 " + "is compatible with impala-profile-tool.") parser.add_option("--rpc_stdout", dest="rpc_stdout", action="store_true", help="Output hs2 rpc details to stdout. " diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py index 50f7a88a7..a79b0260d 100644 --- a/tests/shell/test_shell_commandline.py +++ b/tests/shell/test_shell_commandline.py @@ -455,6 +455,42 @@ class TestImpalaShell(ImpalaTestSuite): assert len(re.findall(regex, lines)) == 1, \ "Could not detect profile in the file, file content: %s" % lines + def test_runtime_profile_format(self, vector): + if vector.get_value('strict_hs2_protocol'): + pytest.skip("Runtime profile is not supported in strict hs2 mode.") + + string_regex = re.compile(r"Query \(id=[a-z0-9:]+\):") + base64_regex = re.compile(r"\b[a-zA-Z0-9/+=]{64,}\b") + json_regex = re.compile(r"{\"contents\":.*}") + + # test default is string + args = ['-q', 'select 1; profile;'] + result_set = run_impala_shell_cmd(vector, args) + assert len(re.findall(string_regex, result_set.stdout)) == 1, \ + "Could not detect string profile by default, stdout: %s" % result_set.stdout + + # test string format explicitly + args = ['-q', 'select 1; profile;', '--profile_format=string'] + result_set = run_impala_shell_cmd(vector, args) + assert len(re.findall(string_regex, result_set.stdout)) == 1, \ + "Could not detect string profile by default, stdout: %s" % result_set.stdout + + # test json format + args = ['-q', 'select 1; profile;', '--profile_format=json'] + result_set = run_impala_shell_cmd(vector, args) + assert len(re.findall(string_regex, result_set.stdout)) == 0, \ + "Did not expect string profile with json format, stdout: %s" % result_set.stdout + assert len(re.findall(json_regex, result_set.stdout)) == 1, \ + "Could not detect json profile, stdout: %s" % result_set.stdout + + # test base64 format + args = ['-q', 'select 1; profile;', '--profile_format=base64'] + result_set = run_impala_shell_cmd(vector, args) + assert len(re.findall(string_regex, result_set.stdout)) == 0, \ + "Did not expect string profile with base64 format, stdout: %s" % result_set.stdout + assert len(re.findall(base64_regex, result_set.stdout)) == 1, \ + "Could not detect base64 profile, stdout: %s" % result_set.stdout + def test_runtime_profile_referenced_tables(self, vector, unique_database): if vector.get_value('strict_hs2_protocol'): pytest.skip("Runtime profile is not supported in strict hs2 mode.")
