This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 5d7ca0712 IMPALA-13150: Possible buffer overflow in 
StringVal::CopyFrom()
5d7ca0712 is described below

commit 5d7ca0712af493eca6704a3fdfcfaf16bde46ed0
Author: Daniel Becker <[email protected]>
AuthorDate: Mon Jun 10 15:22:04 2024 +0200

    IMPALA-13150: Possible buffer overflow in StringVal::CopyFrom()
    
    In StringVal::CopyFrom(), we take the 'len' parameter as a size_t, which
    is usually a 64-bit unsigned integer. We pass it to the constructor of
    StringVal, which takes it as an int, which is usually a 32-bit signed
    integer. The constructor then allocates memory for the length using the
    int value, but afterwards in CopyFrom(), we copy the buffer with the
    size_t length. If size_t is indeed 64 bits and int is 32 bits, and the
    value is truncated, we may copy more bytes that what we have allocated
    for the destination.
    
    Note that in the constructor of StringVal it is checked whether the
    length is greater than 1GB, but if the value is truncated because of the
    type conversion, the check doesn't necessarily catch it as the truncated
    value may be small.
    
    This change fixes the problem by doing the length check with 64 bit
    integers in StringVal::CopyFrom().
    
    Testing:
     - added unit tests for StringVal::CopyFrom() in udf-test.cc.
    
    Change-Id: I6a1d03d65ec4339a0f33e69ff29abdd8cc3e3067
    Reviewed-on: http://gerrit.cloudera.org:8080/21501
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/udf/udf-test.cc | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++
 be/src/udf/udf.cc      | 41 +++++++++++++++++++--------------
 be/src/udf/udf.h       |  4 ++++
 3 files changed, 89 insertions(+), 17 deletions(-)

diff --git a/be/src/udf/udf-test.cc b/be/src/udf/udf-test.cc
index 9951db25a..fabf11055 100644
--- a/be/src/udf/udf-test.cc
+++ b/be/src/udf/udf-test.cc
@@ -337,4 +337,65 @@ TEST(UdfTest, MemTest) {
 
 }
 
+// Creates and manages a FunctionContext for a dummy UDF with no arguments.
+class FunctionCtxGuard {
+ public:
+  FunctionCtxGuard(): ctx_(UdfTestHarness::CreateTestContext(return_type, 
arg_types)) {}
+
+  ~FunctionCtxGuard() { UdfTestHarness::CloseContext(ctx_.get()); }
+
+  FunctionContext* getCtx() { return ctx_.get(); }
+ private:
+  static const FunctionContext::TypeDesc return_type;
+  static const std::vector<FunctionContext::TypeDesc> arg_types;
+
+  const std::unique_ptr<FunctionContext> ctx_;
+};
+const FunctionContext::TypeDesc FunctionCtxGuard::return_type {};
+const std::vector<FunctionContext::TypeDesc> FunctionCtxGuard::arg_types;
+
+bool StringValEqualsStr(const StringVal& string_val, const std::string& str) {
+  return str.length() == string_val.len &&
+      std::memcmp(str.c_str(), string_val.ptr, str.length()) == 0;
+}
+
+void CheckStringValCopyFromSucceeds(const std::string& str) {
+  FunctionCtxGuard ctx_guard;
+
+  const StringVal string_val = StringVal::CopyFrom(ctx_guard.getCtx(),
+      reinterpret_cast<const uint8_t*>(str.c_str()), str.length());
+  EXPECT_TRUE(StringValEqualsStr(string_val, str));
+  EXPECT_FALSE(string_val.is_null);
+  EXPECT_FALSE(ctx_guard.getCtx()->has_error());
+}
+
+void CheckStringValCopyFromFailsWithLength(uint64_t len) {
+  FunctionCtxGuard ctx_guard;
+  const StringVal string_val = StringVal::CopyFrom(ctx_guard.getCtx(), 
nullptr, len);
+  EXPECT_TRUE(string_val.is_null);
+  EXPECT_TRUE(ctx_guard.getCtx()->has_error());
+}
+
+TEST(UdfTest, TestStringValCopyFrom) {
+  const std::string empty_string = "";
+  const std::string small_string = "small";
+  const std::string longer_string = "This is a somewhat longer string.";
+
+  // Test that copying works correctly.
+  CheckStringValCopyFromSucceeds(empty_string);
+  CheckStringValCopyFromSucceeds(small_string);
+  CheckStringValCopyFromSucceeds(longer_string);
+
+  // Test that if a length bigger than StringVal::MAX_LENGTH is passed, the 
result is null
+  // and an error.
+  constexpr int len32 = StringVal::MAX_LENGTH + 1;
+  CheckStringValCopyFromFailsWithLength(len32);
+
+  // Test that if a length bigger than StringVal::MAX_LENGTH is passed as a 
size_t, but
+  // when truncated to int it is no longer too big, the result is still null 
and an error.
+  // Regression test for IMPALA-13150.
+  constexpr uint64_t len64 = (1UL << 40) + 1;
+  CheckStringValCopyFromFailsWithLength(len64);
+}
+
 IMPALA_TEST_MAIN();
diff --git a/be/src/udf/udf.cc b/be/src/udf/udf.cc
index dee3fc446..10390824f 100644
--- a/be/src/udf/udf.cc
+++ b/be/src/udf/udf.cc
@@ -525,26 +525,13 @@ void 
FunctionContextImpl::SetNonConstantArgs(NonConstantArgsVector&& non_constan
 // Note: this function crashes LLVM's JIT in expr-test if it's xcompiled. Do 
not move to
 // expr-ir.cc. This could probably use further investigation.
 StringVal::StringVal(FunctionContext* context, int str_len) noexcept : 
len(str_len),
-                                                                       
ptr(NULL) {
-  if (UNLIKELY(str_len > StringVal::MAX_LENGTH)) {
-    context->SetError("String length larger than allowed limit of "
-                      "1 GB character data.");
-    len = 0;
-    is_null = true;
-  } else {
-    ptr = context->impl()->AllocateForResults(str_len);
-    if (UNLIKELY(ptr == NULL && str_len > 0)) {
-#ifndef IMPALA_UDF_SDK_BUILD
-      assert(!context->impl()->state()->GetQueryStatus().ok());
-#endif
-      len = 0;
-      is_null = true;
-    }
-  }
+                                                                       
ptr(nullptr) {
+  AllocateStringValWithLenCheck(context, str_len, this);
 }
 
 StringVal StringVal::CopyFrom(FunctionContext* ctx, const uint8_t* buf, size_t 
len) noexcept {
-  StringVal result(ctx, len);
+  StringVal result;
+  AllocateStringValWithLenCheck(ctx, len, &result);
   if (LIKELY(!result.is_null)) {
     std::copy(buf, buf + len, result.ptr);
   }
@@ -573,6 +560,26 @@ bool StringVal::Resize(FunctionContext* ctx, int new_len) 
noexcept {
   return false;
 }
 
+void StringVal::AllocateStringValWithLenCheck(FunctionContext* ctx, uint64_t 
str_len,
+    StringVal* res) {
+  if (UNLIKELY(str_len > StringVal::MAX_LENGTH)) {
+    ctx->SetError("String length larger than allowed limit of 1 GB character 
data.");
+    res->len = 0;
+    res->is_null = true;
+  } else {
+    res->ptr = ctx->impl()->AllocateForResults(str_len);
+    if (UNLIKELY(res->ptr == nullptr && str_len > 0)) {
+#ifndef IMPALA_UDF_SDK_BUILD
+      assert(!ctx->impl()->state()->GetQueryStatus().ok());
+#endif
+      res->len = 0;
+      res->is_null = true;
+    } else {
+      res->len = str_len;
+    }
+  }
+}
+
 void StructVal::ReserveMemory(FunctionContext* ctx) {
   assert(ctx != nullptr);
   assert(num_children >= 0);
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 6a6626c74..f997f7a07 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -710,6 +710,10 @@ struct StringVal : public AnyVal {
     return ptr == other.ptr || memcmp(ptr, other.ptr, len) == 0;
   }
   bool operator!=(const StringVal& other) const { return !(*this == other); }
+
+ private:
+  static void AllocateStringValWithLenCheck(FunctionContext* ctx, uint64_t 
str_len,
+      StringVal* res);
 };
 
 struct DecimalVal : public impala_udf::AnyVal {

Reply via email to