This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 8b83784406 GH-49159: [C++][Gandiva] Detect overflow in repeat() 
(#49160)
8b83784406 is described below

commit 8b837844064e1a2eaad09fa739a2d4eaa257a8d0
Author: Sutou Kouhei <[email protected]>
AuthorDate: Tue Feb 10 18:35:43 2026 +0900

    GH-49159: [C++][Gandiva] Detect overflow in repeat() (#49160)
    
    ### Rationale for this change
    
    `repeat()` can only generate `< 2147483647` size output. So output larger 
than `2147483647` must be rejected.
    
    ### What changes are included in this PR?
    
    Add overflow check in `repeat()`.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    Yes.
    * GitHub Issue: #49159
    
    Lead-authored-by: Sutou Kouhei <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 cpp/src/gandiva/precompiled/string_ops.cc      | 7 ++++++-
 cpp/src/gandiva/precompiled/string_ops_test.cc | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/precompiled/string_ops.cc 
b/cpp/src/gandiva/precompiled/string_ops.cc
index 7450018a55..0b787f461c 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -841,7 +841,12 @@ const char* repeat_utf8_int32(gdv_int64 context, const 
char* in, gdv_int32 in_le
     *out_len = 0;
     return "";
   }
-  *out_len = repeat_number * in_len;
+  if (ARROW_PREDICT_FALSE(
+          arrow::internal::MultiplyWithOverflow(repeat_number, in_len, 
out_len))) {
+    gdv_fn_context_set_error_msg(context, "Would overflow maximum output 
size");
+    *out_len = 0;
+    return "";
+  }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc 
b/cpp/src/gandiva/precompiled/string_ops_test.cc
index ca2b2b5785..e0248667e3 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -387,6 +387,13 @@ TEST(TestStringOps, TestRepeat) {
   EXPECT_EQ(std::string(out_str, out_len), "");
   EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Repeat number can't be 
negative"));
   ctx.Reset();
+
+  out_str = repeat_utf8_int32(ctx_ptr, "aa", 2,
+                              std::numeric_limits<int32_t>::max() / 2 + 1, 
&out_len);
+  EXPECT_EQ(std::string(out_str, out_len), "");
+  EXPECT_THAT(ctx.get_error(),
+              ::testing::HasSubstr("Would overflow maximum output size"));
+  ctx.Reset();
 }
 
 TEST(TestStringOps, TestCastBoolToVarchar) {

Reply via email to