This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ef6dad694d131f600478798674ed460d2391e7ac
Author: Daniel Becker <[email protected]>
AuthorDate: Tue Apr 9 14:46:44 2024 +0200

    IMPALA-12986: Base64Encode fails if the 'out_len' output parameter is 
passed with certain values
    
    The Base64Encode function in coding-util.h with signature
    
      bool Base64Encode(const char* in, int64_t in_len, int64_t out_max,
          char* out, int64_t* out_len);
    
    fails if '*out_len', when passed to the function, contains a value that
    does not fit in a 32 bit integer.
    
    Internally we use the
    
      int sasl_encode64(const char *in, unsigned inlen, char *out, unsigned
          outmax, unsigned *outlen);
    
    function and explicitly cast 'out_len' to 'unsigned*'.
    
    The error is that the called sasl_encode64() function only sets the four
    lower bytes of '*out_len' (assuming that 'unsigned' is a 32 bit
    integer), and if the upper bytes are not all zero, the resulting value
    of '*out_len' will be incorrect.
    
    This change changes the type of 'out_len' from 'int64_t*' to 'unsigned*'
    to match the type that sasl_encode64() expects.
    
    Base64Decode() is also updated to use 'unsigned*'. Before this change it
    used an intermediate 32 bit local variable to avoid this issue.
    
    Testing:
     - added a regression test in coding-util-test.cc
    
    Change-Id: I35ae59fc9b3280f89ea4f7d95d27d2f21751001f
    Reviewed-on: http://gerrit.cloudera.org:8080/21271
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/text-converter.inline.h |  2 +-
 be/src/exprs/string-functions-ir.cc |  4 ++--
 be/src/util/coding-util-test.cc     | 33 +++++++++++++++++++++++++++++++--
 be/src/util/coding-util.cc          | 12 +++++-------
 be/src/util/coding-util.h           |  6 +++---
 be/src/util/runtime-profile.cc      |  2 +-
 6 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/be/src/exec/text-converter.inline.h 
b/be/src/exec/text-converter.inline.h
index 6f4c74ff9..67c6f3eae 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -94,7 +94,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* 
slot_desc, Tuple* tup
             reinterpret_cast<char*>(slot);
         if (UNLIKELY(str.ptr == nullptr)) return false;
         if (base64_decode) {
-          int64_t out_len;
+          unsigned out_len;
           if(!Base64Decode(data, len, buffer_len, str.ptr, &out_len)) return 
false;
           DCHECK_LE(out_len, buffer_len);
           str.len = out_len;
diff --git a/be/src/exprs/string-functions-ir.cc 
b/be/src/exprs/string-functions-ir.cc
index f68e483db..dc9468328 100644
--- a/be/src/exprs/string-functions-ir.cc
+++ b/be/src/exprs/string-functions-ir.cc
@@ -1529,7 +1529,7 @@ StringVal StringFunctions::Base64Encode(FunctionContext* 
ctx, const StringVal& s
   }
   StringVal result(ctx, out_max);
   if (UNLIKELY(result.is_null)) return result;
-  int64_t out_len = 0;
+  unsigned out_len = 0;
   if (UNLIKELY(!impala::Base64Encode(
           reinterpret_cast<const char*>(str.ptr), str.len,
           out_max, reinterpret_cast<char*>(result.ptr), &out_len))) {
@@ -1558,7 +1558,7 @@ StringVal StringFunctions::Base64Decode(FunctionContext* 
ctx, const StringVal& s
   }
   StringVal result(ctx, out_max);
   if (UNLIKELY(result.is_null)) return result;
-  int64_t out_len = 0;
+  unsigned out_len = 0;
   if (UNLIKELY(!impala::Base64Decode(
           reinterpret_cast<const char*>(str.ptr), 
static_cast<int64_t>(str.len),
           out_max, reinterpret_cast<char*>(result.ptr), &out_len))) {
diff --git a/be/src/util/coding-util-test.cc b/be/src/util/coding-util-test.cc
index 4222d9c01..ad04222ae 100644
--- a/be/src/util/coding-util-test.cc
+++ b/be/src/util/coding-util-test.cc
@@ -58,9 +58,9 @@ void TestBase64(const string& input, const string& 
expected_encoded) {
   int64_t out_max = 0;
   EXPECT_TRUE(Base64DecodeBufLen(intermediate.c_str(), intermediate.size(), 
&out_max));
   string output(out_max, '\0');
-  int64_t out_len = 0;
+  unsigned out_len = 0;
   EXPECT_TRUE(Base64Decode(intermediate.c_str(), intermediate.size(),
-        out_max, const_cast<char*>(output.c_str()), &out_len));
+      out_max, const_cast<char*>(output.c_str()), &out_len));
   output.resize(out_len);
   EXPECT_EQ(input, output);
 
@@ -73,6 +73,28 @@ void TestBase64(const string& input, const string& 
expected_encoded) {
   EXPECT_EQ(intermediate, intermediate2);
 }
 
+// Test Base64 encoding when the variables in which the calculated maximal 
output size and
+// the actual output size are stored have specific initial values (regression 
test for
+// IMPALA-12986).
+void TestBase64EncodeWithInitialValues(int64_t initial_max_value,
+    int64_t initial_out_value) {
+  const string bytes = "abc\1\2\3";
+
+  int64_t base64_max_len = initial_max_value;
+  bool succ = Base64EncodeBufLen(bytes.size(), &base64_max_len);
+  EXPECT_TRUE(succ);
+
+  // 'base64_max_len' includes the null terminator.
+  string buf(base64_max_len - 1, '\0');
+  unsigned base64_len = initial_out_value;
+  succ = Base64Encode(bytes.c_str(), bytes.size(), base64_max_len, buf.data(),
+      &base64_len);
+  EXPECT_TRUE(succ);
+
+  const string expected = "YWJjAQID";
+  EXPECT_EQ(expected, buf);
+}
+
 // Test URL encoding. Check that the values that are put in are the
 // same that come out.
 TEST(UrlCodingTest, Basic) {
@@ -112,6 +134,13 @@ TEST(Base64Test, Basic) {
   TestBase64(string("a\0b\0", 4), "YQBiAA==");
 }
 
+TEST(Base64Test, VariousInitialVariableValues) {
+  TestBase64EncodeWithInitialValues(0, 0);
+  TestBase64EncodeWithInitialValues(5, -10);
+  // Test a value that doesn't fit in 32 bits.
+  TestBase64EncodeWithInitialValues(5, 88090617260393);
+}
+
 TEST(HtmlEscapingTest, Basic) {
   string before = "<html><body>&amp";
   stringstream after;
diff --git a/be/src/util/coding-util.cc b/be/src/util/coding-util.cc
index b093b1e7b..a78a3791d 100644
--- a/be/src/util/coding-util.cc
+++ b/be/src/util/coding-util.cc
@@ -123,13 +123,13 @@ bool Base64EncodeBufLen(int64_t in_len, int64_t* out_max) 
{
 }
 
 bool Base64Encode(const char* in, int64_t in_len, int64_t out_max, char* out,
-    int64_t* out_len) {
+    unsigned* out_len) {
   if (UNLIKELY(in_len < 0 || in_len > std::numeric_limits<unsigned>::max() ||
         out_max < 0 || out_max > std::numeric_limits<unsigned>::max())) {
     return false;
   }
   const int encode_result = sasl_encode64(in, static_cast<unsigned>(in_len), 
out,
-      static_cast<unsigned>(out_max), reinterpret_cast<unsigned*>(out_len));
+      static_cast<unsigned>(out_max), out_len);
   if (UNLIKELY(encode_result != SASL_OK || *out_len != out_max - 1)) return 
false;
   return true;
 }
@@ -142,7 +142,7 @@ void Base64Encode(const char* in, int64_t in_len, 
stringstream* out) {
   int64_t out_max = 0;
   if (UNLIKELY(!Base64EncodeBufLen(in_len, &out_max))) return;
   string result(out_max, '\0');
-  int64_t out_len = 0;
+  unsigned out_len = 0;
   if (UNLIKELY(!Base64Encode(in, in_len, out_max, 
const_cast<char*>(result.c_str()),
           &out_len))) {
     return;
@@ -198,12 +198,10 @@ bool Base64DecodeBufLen(const char* in, int64_t in_len, 
int64_t* out_max) {
 }
 
 bool Base64Decode(const char* in, int64_t in_len, int64_t out_max, char* out,
-    int64_t* out_len) {
-  uint32_t out_len_u32 = 0;
+    unsigned* out_len) {
   if (UNLIKELY((in_len & 3) != 0)) return false;
   const int decode_result = sasl_decode64(in, static_cast<unsigned>(in_len), 
out,
-      static_cast<unsigned>(out_max), &out_len_u32);
-  *out_len = out_len_u32;
+      static_cast<unsigned>(out_max), out_len);
   if (UNLIKELY(decode_result != SASL_OK || *out_len != out_max - 1)) return 
false;
   return true;
 }
diff --git a/be/src/util/coding-util.h b/be/src/util/coding-util.h
index 8716d0245..dd71679a3 100644
--- a/be/src/util/coding-util.h
+++ b/be/src/util/coding-util.h
@@ -48,9 +48,9 @@ bool Base64EncodeBufLen(int64_t in_len, int64_t* out_max);
 /// data, the space of size out_max should be allocated before calling this 
function.
 /// out_len saves the actual length of encoded string.
 bool Base64Encode(const char* in, int64_t in_len, int64_t out_max, char* out,
-    int64_t* out_len);
+    unsigned* out_len);
 
-/// Utility method to encode input as base-64 encoded.  This is not
+/// Utility method to encode input as base-64 encoded. This is not
 /// very performant (multiple string copies) and should not be used
 /// in a hot path.
 void Base64Encode(const std::vector<uint8_t>& in, std::string* out);
@@ -68,7 +68,7 @@ bool Base64DecodeBufLen(const char* in, int64_t in_len, 
int64_t* out_max);
 /// out_max should be allocated before calling this function. out_len saves 
the actual
 /// length of decoded string.
 bool Base64Decode(const char* in, int64_t in_len, int64_t out_max, char* out,
-    int64_t* out_len);
+    unsigned* out_len);
 
 /// Replaces &, < and > with &amp;, &lt; and &gt; respectively. This is
 /// not the full set of required encodings, but one that should be
diff --git a/be/src/util/runtime-profile.cc b/be/src/util/runtime-profile.cc
index 9a531f01e..f6b848c5d 100644
--- a/be/src/util/runtime-profile.cc
+++ b/be/src/util/runtime-profile.cc
@@ -1687,7 +1687,7 @@ Status RuntimeProfile::DeserializeFromArchiveString(
 
   vector<uint8_t> decoded_buffer;
   decoded_buffer.resize(decoded_max);
-  int64_t decoded_len;
+  unsigned decoded_len;
   if (!Base64Decode(archive_str.c_str(), archive_str.size(), decoded_max,
           reinterpret_cast<char*>(decoded_buffer.data()), &decoded_len)) {
     return Status("Error in DeserializeFromArchiveString: Base64Decode 
failed.");

Reply via email to