This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 09f15eea7 IMPALA-12517: Decode binary data with Python 3
09f15eea7 is described below

commit 09f15eea78e54464699cf7e42b4433d5b74e8231
Author: Michael Smith <[email protected]>
AuthorDate: Wed Oct 25 16:41:38 2023 -0700

    IMPALA-12517: Decode binary data with Python 3
    
    When impala-shell receives binary data with the HS2 protocol, it uses a
    stringifier to decode it. In Python 3, 'str' on binary data wraps it in
    "b'...'"; to get equivalent output to 'str' in Python 2, we need to
    decode as UTF-8 and handle errors.
    
    Adds a test case for how impala-shell formats binary data.
    
    Change-Id: I9222cd1ac081a38ab2b37d58628faac0812695ec
    Reviewed-on: http://gerrit.cloudera.org:8080/20624
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 shell/value_converter.py              |  9 ++++++++-
 tests/shell/test_shell_commandline.py | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/shell/value_converter.py b/shell/value_converter.py
index 92a4259c4..4c33fb004 100644
--- a/shell/value_converter.py
+++ b/shell/value_converter.py
@@ -17,6 +17,8 @@
 
 from TCLIService.TCLIService import TTypeId
 
+import sys
+
 
 class ValueConverter(object):
 
@@ -29,6 +31,11 @@ class ValueConverter(object):
 
 class HS2ValueConverter(ValueConverter):
 
+  def __get_binary_converter(self):
+      if sys.version_info.major < 3:
+          return str
+      return lambda s: s.decode(errors='replace')
+
   def __init__(self):
       self.value_converters = {
           TTypeId.BOOLEAN_TYPE: lambda b: 'true' if b else 'false',
@@ -36,7 +43,7 @@ class HS2ValueConverter(ValueConverter):
           TTypeId.SMALLINT_TYPE: str,
           TTypeId.INT_TYPE: str,
           TTypeId.BIGINT_TYPE: str,
-          TTypeId.BINARY_TYPE: str,
+          TTypeId.BINARY_TYPE: self.__get_binary_converter(),
           TTypeId.FLOAT_TYPE: str,
           TTypeId.DOUBLE_TYPE: str
       }
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index 750a100c2..b0cf436fd 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1185,6 +1185,31 @@ class TestImpalaShell(ImpalaTestSuite):
     assert "| a    | b     |" in result.stdout, result.stdout
     assert "| true | false |" in result.stdout, result.stdout
 
+  def test_binary_display(self, vector):
+    """Test that binary values are displayed correctly."""
+    query = "select binary_col from functional.binary_tbl"
+    result = run_impala_shell_cmd(vector, ['-q', query])
+    assert "| binary1            |" in result.stdout, result.stdout
+    assert "| NULL               |" in result.stdout, result.stdout
+    assert "|                    |" in result.stdout, result.stdout
+    assert "| árvíztűrőtükörfúró |" in result.stdout, result.stdout
+    assert "| 你好hello          |" in result.stdout, result.stdout
+    assert "| \x00\xef\xbf\xbd\x00\xef\xbf\xbd                 |" in 
result.stdout, \
+        result.stdout
+    assert '| \xef\xbf\xbdD3"\x11\x00              |' in result.stdout, 
result.stdout
+
+  def test_binary_as_string(self, vector):
+    query = """select cast(binary_col as string) from functional.binary_tbl
+               where string_col != "invalid utf8" """
+    result = run_impala_shell_cmd(vector, ['-q', query])
+    # Column length omitted because some strict HS2 protocol returns header 
"binary_col"
+    # while others return "cast(binary_col as string)".
+    assert "| binary1            " in result.stdout, result.stdout
+    assert "| NULL               " in result.stdout, result.stdout
+    assert "|                    " in result.stdout, result.stdout
+    assert "| árvíztűrőtükörfúró " in result.stdout, result.stdout
+    assert "| 你好hello          " in result.stdout, result.stdout
+
   def test_null_values(self, vector):
     """Test that null values are displayed correctly."""
     if vector.get_value('strict_hs2_protocol'):

Reply via email to