This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 09f15eea7 IMPALA-12517: Decode binary data with Python 3
09f15eea7 is described below
commit 09f15eea78e54464699cf7e42b4433d5b74e8231
Author: Michael Smith <[email protected]>
AuthorDate: Wed Oct 25 16:41:38 2023 -0700
IMPALA-12517: Decode binary data with Python 3
When impala-shell receives binary data with the HS2 protocol, it uses a
stringifier to decode it. In Python 3, 'str' on binary data wraps it in
"b'...'"; to get equivalent output to 'str' in Python 2, we need to
decode as UTF-8 and handle errors.
Adds a test case for how impala-shell formats binary data.
Change-Id: I9222cd1ac081a38ab2b37d58628faac0812695ec
Reviewed-on: http://gerrit.cloudera.org:8080/20624
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
shell/value_converter.py | 9 ++++++++-
tests/shell/test_shell_commandline.py | 25 +++++++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/shell/value_converter.py b/shell/value_converter.py
index 92a4259c4..4c33fb004 100644
--- a/shell/value_converter.py
+++ b/shell/value_converter.py
@@ -17,6 +17,8 @@
from TCLIService.TCLIService import TTypeId
+import sys
+
class ValueConverter(object):
@@ -29,6 +31,11 @@ class ValueConverter(object):
class HS2ValueConverter(ValueConverter):
+ def __get_binary_converter(self):
+ if sys.version_info.major < 3:
+ return str
+ return lambda s: s.decode(errors='replace')
+
def __init__(self):
self.value_converters = {
TTypeId.BOOLEAN_TYPE: lambda b: 'true' if b else 'false',
@@ -36,7 +43,7 @@ class HS2ValueConverter(ValueConverter):
TTypeId.SMALLINT_TYPE: str,
TTypeId.INT_TYPE: str,
TTypeId.BIGINT_TYPE: str,
- TTypeId.BINARY_TYPE: str,
+ TTypeId.BINARY_TYPE: self.__get_binary_converter(),
TTypeId.FLOAT_TYPE: str,
TTypeId.DOUBLE_TYPE: str
}
diff --git a/tests/shell/test_shell_commandline.py
b/tests/shell/test_shell_commandline.py
index 750a100c2..b0cf436fd 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1185,6 +1185,31 @@ class TestImpalaShell(ImpalaTestSuite):
assert "| a | b |" in result.stdout, result.stdout
assert "| true | false |" in result.stdout, result.stdout
+ def test_binary_display(self, vector):
+ """Test that binary values are displayed correctly."""
+ query = "select binary_col from functional.binary_tbl"
+ result = run_impala_shell_cmd(vector, ['-q', query])
+ assert "| binary1 |" in result.stdout, result.stdout
+ assert "| NULL |" in result.stdout, result.stdout
+ assert "| |" in result.stdout, result.stdout
+ assert "| árvíztűrőtükörfúró |" in result.stdout, result.stdout
+ assert "| 你好hello |" in result.stdout, result.stdout
+ assert "| \x00\xef\xbf\xbd\x00\xef\xbf\xbd |" in
result.stdout, \
+ result.stdout
+ assert '| \xef\xbf\xbdD3"\x11\x00 |' in result.stdout,
result.stdout
+
+ def test_binary_as_string(self, vector):
+ query = """select cast(binary_col as string) from functional.binary_tbl
+ where string_col != "invalid utf8" """
+ result = run_impala_shell_cmd(vector, ['-q', query])
+ # Column length omitted because some strict HS2 protocol returns header
"binary_col"
+ # while others return "cast(binary_col as string)".
+ assert "| binary1 " in result.stdout, result.stdout
+ assert "| NULL " in result.stdout, result.stdout
+ assert "| " in result.stdout, result.stdout
+ assert "| árvíztűrőtükörfúró " in result.stdout, result.stdout
+ assert "| 你好hello " in result.stdout, result.stdout
+
def test_null_values(self, vector):
"""Test that null values are displayed correctly."""
if vector.get_value('strict_hs2_protocol'):