This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4fea5260cf77bbbfa23457ff15907f5aef55266e Author: Csaba Ringhofer <[email protected]> AuthorDate: Mon May 5 19:54:49 2025 +0200 IMPALA-14030: Fix buffer underflow when base64 decoding 0 length binaries The issue didn't cause problems under normal circumstances but ASAN tests caught it in JSON tests enabled in IMPALA-12927. Changed text parsing logic to skip base64 decoding for empty binaries. Also fixed Base64DecodeBufLen() with len=0 and added unit tests, though this function is not used with len=0 outside BE tests. Change-Id: I511cff8cec319d03d494a342f2cbb4a251cb893e Reviewed-on: http://gerrit.cloudera.org:8080/22855 Reviewed-by: Riza Suminto <[email protected]> Reviewed-by: Zoltan Borok-Nagy <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/text-converter.inline.h | 2 +- be/src/util/coding-util-test.cc | 10 ++++++++++ be/src/util/coding-util.cc | 6 ++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h index 6e1474607..15df3e349 100644 --- a/be/src/exec/text-converter.inline.h +++ b/be/src/exec/text-converter.inline.h @@ -67,7 +67,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup !(len != 0 && (copy_string || need_escape)); bool base64_decode = false; - if (type.IsBinaryType() && decode_binary_) { + if (type.IsBinaryType() && decode_binary_ && len != 0) { base64_decode = true; reuse_data = false; int64_t out_len; diff --git a/be/src/util/coding-util-test.cc b/be/src/util/coding-util-test.cc index 7914de148..8a2f855c8 100644 --- a/be/src/util/coding-util-test.cc +++ b/be/src/util/coding-util-test.cc @@ -144,6 +144,16 @@ TEST(Base64Test, Basic) { TestBase64(string("a\0b\0", 4), "YQBiAA=="); } +TEST(Base64Test, TinyLength) { + string str = "===="; + int64_t len; + // Length of 1 should fail as it is not divisible with 4. + EXPECT_FALSE(Base64DecodeBufLen(str.data(), 1, &len)); + // Length of 0 is valid and should return 0 (regression test for IMPALA-14030). + EXPECT_TRUE(Base64DecodeBufLen(str.data()+2, 0, &len)); + EXPECT_EQ(len, 0); +} + TEST(Base64Test, VariousInitialVariableValues) { TestBase64EncodeWithInitialValues(0, 0); TestBase64EncodeWithInitialValues(5, -10); diff --git a/be/src/util/coding-util.cc b/be/src/util/coding-util.cc index 7a7d1f6ae..f4578e4d4 100644 --- a/be/src/util/coding-util.cc +++ b/be/src/util/coding-util.cc @@ -191,10 +191,16 @@ bool Base64DecodeBufLen(const char* in, int64_t in_len, int64_t* out_max) { // producing one fewer byte of output. This is repeated if the second-to-last character // is '='. One more byte must be allocated to account for Base64Decode's null-padding // of its output. + if (UNLIKELY(in_len == 0)) { + *out_max = 0; + return true; + } if (UNLIKELY((in_len & 3) != 0)) return false; *out_max = 1 + 3 * (in_len / 4); + DCHECK_GE(in_len, 1); if (in[in_len - 1] == '=') { --(*out_max); + DCHECK_GE(in_len, 2); if (in[in_len - 2] == '=') { --(*out_max); }
