This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 14035065fac5b047fb8395b56d2c78000ba94dec Author: Csaba Ringhofer <[email protected]> AuthorDate: Tue May 16 16:18:46 2023 +0200 IMPALA-12145: Fix profiles with non-ascii character in impala-shell (python2) As __future__.unicode_literals is imported in impala-shell concatenating an str with a literal leads to decoding the string with 'ascii' codec which fails if there are non-ascii characters. Converting the literal to str solves the issue. Testing: - added regression test + ran related EE tests Change-Id: I99b72dd262fc7c382e8baee1dce7592880c84de2 Reviewed-on: http://gerrit.cloudera.org:8080/19893 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- shell/impala_shell.py | 9 ++++++--- shell/shell_output.py | 20 ++++++++++++++++++++ tests/shell/test_shell_commandline.py | 10 ++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/shell/impala_shell.py b/shell/impala_shell.py index e531d11b8..ddf2e2145 100755 --- a/shell/impala_shell.py +++ b/shell/impala_shell.py @@ -45,7 +45,8 @@ from impala_client import ImpalaHS2Client, StrictHS2Client, \ from impala_shell_config_defaults import impala_shell_defaults from option_parser import get_option_parser, get_config_from_file from shell_output import (DelimitedOutputFormatter, OutputStream, PrettyOutputFormatter, - OverwritingStdErrOutputStream, VerticalOutputFormatter) + OverwritingStdErrOutputStream, VerticalOutputFormatter, + match_string_type) from subprocess import call from shell_exceptions import (RPCException, DisconnectedException, QueryStateException, QueryCancelledByShellException, MissingThriftMethodException) @@ -1138,11 +1139,13 @@ class ImpalaShell(cmd.Cmd, object): """ if self.show_profiles or status: if profile: - query_profile_prefix = "Query Runtime Profile:\n" + query_profile_prefix = match_string_type("Query Runtime Profile:\n", profile) if profile_display_mode == QueryAttemptDisplayModes.ALL: print(query_profile_prefix + profile) if failed_profile: - print("Failed Query Runtime Profile(s):\n" + failed_profile) + failed_profile_prefix = \ + match_string_type("Failed Query Runtime Profile(s):\n", failed_profile) + print(failed_profile_prefix + failed_profile) elif profile_display_mode == QueryAttemptDisplayModes.LATEST: print(query_profile_prefix + profile) elif profile_display_mode == QueryAttemptDisplayModes.ORIGINAL: diff --git a/shell/shell_output.py b/shell/shell_output.py index b417ceacc..070f66910 100644 --- a/shell/shell_output.py +++ b/shell/shell_output.py @@ -29,6 +29,26 @@ except ImportError: from io import StringIO # python 3 +def match_string_type(str_to_convert, reference_str): + """ Returns 'str_to_convert' converted to the same type as 'reference_str'. + Can handle only str and unicode. NOOP in Python 3. + """ + if sys.version_info.major >= 3: + assert isinstance(reference_str, str) + assert isinstance(str_to_convert, str) + return str_to_convert + + if type(str_to_convert) == type(reference_str): + return str_to_convert + + if isinstance(reference_str, str): + assert isinstance(str_to_convert, unicode) + return str_to_convert.encode('UTF-8') + else: + assert isinstance(reference_str, str) + return str_to_convert.decode('UTF-8') + + class PrettyOutputFormatter(object): def __init__(self, prettytable): self.prettytable = prettytable diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py index 9b209f86d..9bbbac41c 100644 --- a/tests/shell/test_shell_commandline.py +++ b/tests/shell/test_shell_commandline.py @@ -613,6 +613,16 @@ class TestImpalaShell(ImpalaTestSuite): assert 'UnicodeDecodeError' not in result.stderr assert RUSSIAN_CHARS.encode('utf-8') in result.stdout + def test_international_characters_profile(self, vector): + """IMPALA-12145: ensure we can handle international characters in the profile. """ + if vector.get_value('strict_hs2_protocol'): + pytest.skip("Profile not supported in strict hs2 mode.") + text = RUSSIAN_CHARS.encode('utf-8') + args = ['-o', '/dev/null', '-p', '-q', "select '{0}'".format(text)] + result = run_impala_shell_cmd(vector, args) + assert 'UnicodeDecodeError' not in result.stderr + assert text in result.stdout + def test_utf8_decoding_error_handling(self, vector): """IMPALA-10145,IMPALA-10299: Regression tests for elegantly handling malformed utf-8 characters."""
