This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 96ae16b60 IMPALA-13584: Add option to shows num row report in
impala-shell
96ae16b60 is described below
commit 96ae16b60bc37e8b38ab3d598c7e867ea1e1d427
Author: Riza Suminto <[email protected]>
AuthorDate: Thu Apr 24 09:34:00 2025 -0700
IMPALA-13584: Add option to shows num row report in impala-shell
In beeswax all statements with the exception of USE print
'Fetched X row(s) in Ys', while in HS2 some statements (REFRESH,
INVALIDATE) metadata does not print it. While these statements always
return 0 rows, the amount of time spent with the statement can be
useful.
This patch modifies add impala-shell to let it print elapsed time for
that query, even if query is not expected to return result metadata.
Added --beeswax_compat_num_rows option in impala-shell. It default to
False. If this option is set (True), 'Fetched 0 row(s) in' will be
printed for all Impala protocol, just like beeswax. One exception for
this is USE query, which will remain silent.
Testing:
- Added test_beeswax_compat_num_rows in test_shell_interactive.py.
- Pass test_shell_interactive.py.
Change-Id: Id76ede98c514f73ff1dfa123a0d951e80e7508b4
Reviewed-on: http://gerrit.cloudera.org:8080/22813
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
shell/impala_client.py | 3 ++-
shell/impala_shell.py | 13 +++++++++++++
shell/option_parser.py | 6 ++++++
tests/shell/test_shell_interactive.py | 23 +++++++++++++++++++++++
4 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/shell/impala_client.py b/shell/impala_client.py
index 8e2c2d1f6..49fd1b848 100755
--- a/shell/impala_client.py
+++ b/shell/impala_client.py
@@ -1395,8 +1395,9 @@ class ImpalaBeeswaxClient(ImpalaClient):
def expect_result_metadata(self, query_str, query_handle):
# Beeswax doesn't provide us this metadata; try to guess whether to expect
it based
# on the query string.
+ query_substr = query_str[:3].lower()
excluded_query_types = ['use']
- if True in set(map(query_str.startswith, excluded_query_types)):
+ if any(map(query_substr.startswith, excluded_query_types)):
return False
return True
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 821b90f5b..feb655294 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -265,6 +265,9 @@ class ImpalaShell(cmd.Cmd, object):
# Tracks query handle of the last query executed. Used by the 'profile'
command.
self.last_query_handle = None
+ # Whether to print num rows report at the end of each query like beeswax,
or not.
+ self.beeswax_compat_num_rows = options.beeswax_compat_num_rows
+
# live_summary and live_progress are turned off in strict_hs2_protocol mode
if options.strict_hs2_protocol:
if options.live_summary:
@@ -1389,6 +1392,15 @@ class ImpalaShell(cmd.Cmd, object):
else:
return "Time elapsed: %2.2fs" % time_elapsed
+ def _format_no_result_set(self, query_str, start_time):
+ # Print the elapsed time except for USE.
+ if not (query_str[:3].lower().startswith('use')):
+ num_fetched_rows = 0 if self.beeswax_compat_num_rows else None
+ time_elapsed = time.time() - start_time
+ row_report = self._format_num_rows_report(
+ time_elapsed, num_fetched_rows=num_fetched_rows)
+ self._print_if_verbose(row_report)
+
def _execute_stmt(self, query_str, is_dml=False, print_web_link=False):
"""Executes 'query_str' with options self.set_query_options on the Impala
server.
The query is run to completion and close with any results, warnings,
errors or
@@ -1433,6 +1445,7 @@ class ImpalaShell(cmd.Cmd, object):
if not self.imp_client.expect_result_metadata(query_str,
self.last_query_handle):
# Close the query
self.imp_client.close_query(self.last_query_handle)
+ self._format_no_result_set(query_str, start_time)
return CmdStatus.SUCCESS
self._format_outputstream()
diff --git a/shell/option_parser.py b/shell/option_parser.py
index 6c4e90837..6784c8199 100755
--- a/shell/option_parser.py
+++ b/shell/option_parser.py
@@ -362,6 +362,12 @@ def get_option_parser(defaults):
dest="hs2_x_forward", default=None,
help="When using the hs2-http protocol, set this value in
the "
"X-Forwarded-For header. This is primarily for testing
purposes.")
+ parser.add_option("--beeswax_compat_num_rows",
dest="beeswax_compat_num_rows",
+ action="store_true",
+ help="If specified, always print num rows report at the
end of query "
+ "execution, even if query does not expect to fetch any
rows. "
+ "This is the default behavior when using beeswax protocol.
"
+ "Default to false for other Impala protocol.")
# add default values to the help text
for option in parser.option_list:
diff --git a/tests/shell/test_shell_interactive.py
b/tests/shell/test_shell_interactive.py
index 5114b02f4..94767cdbd 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -1070,6 +1070,29 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
result = run_impala_shell_interactive(vector, query)
assert '| 1 ' in result.stdout
+ def test_beeswax_compat_num_rows(self, vector):
+ if vector.get_value('strict_hs2_protocol'):
+ pytest.skip("This test uses Impala specific query.")
+ queries = [
+ "INVALIDATE METADATA functional.alltypes",
+ "REFRESH functional.alltypessmall",
+ "USE functional_parquet",
+ ]
+ for query in queries:
+ for arg in [[], ['--beeswax_compat_num_rows']]:
+ result = run_impala_shell_interactive(vector, query, shell_args=arg)
+ row_report = 'Fetched 0 row(s) in'
+ if vector.get_value('protocol') != 'beeswax' and not arg:
+ row_report = 'Time elapsed: '
+ has_row_report = row_report in result.stderr
+ if query.startswith('USE'):
+ # USE query should remain silent.
+ assert not has_row_report, (
+ "Expected no '{0}' but one exist:\n{1}").format(row_report,
result.stderr)
+ else:
+ assert has_row_report, (
+ "Expected '{0}' but none exist:\n{1}").format(row_report,
result.stderr)
+
def test_shell_prompt(self, vector):
shell_cmd = get_shell_cmd(vector)
proc = spawn_shell(shell_cmd)