This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4fea5260cf77bbbfa23457ff15907f5aef55266e
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Mon May 5 19:54:49 2025 +0200

    IMPALA-14030: Fix buffer underflow when base64 decoding 0 length binaries
    
    The issue didn't cause problems under normal circumstances but ASAN
    tests caught it in JSON tests enabled in IMPALA-12927.
    
    Changed text parsing logic to skip base64 decoding for empty binaries.
    
    Also fixed Base64DecodeBufLen() with len=0 and added unit tests, though
    this function is not used with len=0 outside BE tests.
    
    Change-Id: I511cff8cec319d03d494a342f2cbb4a251cb893e
    Reviewed-on: http://gerrit.cloudera.org:8080/22855
    Reviewed-by: Riza Suminto <[email protected]>
    Reviewed-by: Zoltan Borok-Nagy <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/text-converter.inline.h |  2 +-
 be/src/util/coding-util-test.cc     | 10 ++++++++++
 be/src/util/coding-util.cc          |  6 ++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/be/src/exec/text-converter.inline.h 
b/be/src/exec/text-converter.inline.h
index 6e1474607..15df3e349 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -67,7 +67,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* 
slot_desc, Tuple* tup
           !(len != 0 && (copy_string || need_escape));
 
       bool base64_decode = false;
-      if (type.IsBinaryType() && decode_binary_) {
+      if (type.IsBinaryType() && decode_binary_ && len != 0) {
         base64_decode = true;
         reuse_data = false;
         int64_t out_len;
diff --git a/be/src/util/coding-util-test.cc b/be/src/util/coding-util-test.cc
index 7914de148..8a2f855c8 100644
--- a/be/src/util/coding-util-test.cc
+++ b/be/src/util/coding-util-test.cc
@@ -144,6 +144,16 @@ TEST(Base64Test, Basic) {
   TestBase64(string("a\0b\0", 4), "YQBiAA==");
 }
 
+TEST(Base64Test, TinyLength) {
+  string str = "====";
+  int64_t len;
+  // Length of 1 should fail as it is not divisible with 4.
+  EXPECT_FALSE(Base64DecodeBufLen(str.data(), 1, &len));
+  // Length of 0 is valid and should return 0 (regression test for 
IMPALA-14030).
+  EXPECT_TRUE(Base64DecodeBufLen(str.data()+2, 0, &len));
+  EXPECT_EQ(len, 0);
+}
+
 TEST(Base64Test, VariousInitialVariableValues) {
   TestBase64EncodeWithInitialValues(0, 0);
   TestBase64EncodeWithInitialValues(5, -10);
diff --git a/be/src/util/coding-util.cc b/be/src/util/coding-util.cc
index 7a7d1f6ae..f4578e4d4 100644
--- a/be/src/util/coding-util.cc
+++ b/be/src/util/coding-util.cc
@@ -191,10 +191,16 @@ bool Base64DecodeBufLen(const char* in, int64_t in_len, 
int64_t* out_max) {
   // producing one fewer byte of output. This is repeated if the 
second-to-last character
   // is '='. One more byte must be allocated to account for Base64Decode's 
null-padding
   // of its output.
+  if (UNLIKELY(in_len == 0)) {
+    *out_max = 0;
+    return true;
+  }
   if (UNLIKELY((in_len & 3) != 0)) return false;
   *out_max = 1 + 3 * (in_len / 4);
+  DCHECK_GE(in_len, 1);
   if (in[in_len - 1] == '=') {
     --(*out_max);
+    DCHECK_GE(in_len, 2);
     if (in[in_len - 2] == '=') {
       --(*out_max);
     }

Reply via email to