This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 699306f1a5 ARROW-16699: [C++][GANDIVA] Fix Concat_WS allocation bug 
(#13276)
699306f1a5 is described below

commit 699306f1a5186a89588c5148e9ce92712c867035
Author: Vinícius Roque <[email protected]>
AuthorDate: Fri Jun 3 10:24:03 2022 -0300

    ARROW-16699: [C++][GANDIVA] Fix Concat_WS allocation bug (#13276)
    
    Created to fix an allocation issue in version 22.0.0
    
    Authored-by: ViniciusSouzaRoque <[email protected]>
    Signed-off-by: Pindikura Ravindra <[email protected]>
---
 cpp/src/gandiva/precompiled/string_ops.cc      | 57 ++++++++++++++++++++++++--
 cpp/src/gandiva/precompiled/string_ops_test.cc |  5 +++
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/cpp/src/gandiva/precompiled/string_ops.cc 
b/cpp/src/gandiva/precompiled/string_ops.cc
index 2cbb1f0110..c255b9a11c 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2424,18 +2424,27 @@ const char* concat_ws_utf8_utf8(int64_t context, const 
char* separator,
                                 const char* word2, int32_t word2_len, bool 
word2_validity,
                                 bool* out_valid, int32_t* out_len) {
   *out_len = 0;
+  int numValidInput = 0;
   // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }
-  *out_len += separator_len;
+
   if (word1_validity) {
     *out_len += word1_len;
+    numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
+    numValidInput++;
+  }
+
+  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+  if (*out_len == 0) {
+    *out_valid = true;
+    return "";
   }
 
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
@@ -2467,20 +2476,33 @@ const char* concat_ws_utf8_utf8_utf8(
     const char* word2, int32_t word2_len, bool word2_validity, const char* 
word3,
     int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) 
{
   *out_len = 0;
+  int numValidInput = 0;
+  // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }
-  *out_len += separator_len * 2;
+
   if (word1_validity) {
     *out_len += word1_len;
+    numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
+    numValidInput++;
   }
   if (word3_validity) {
     *out_len += word3_len;
+    numValidInput++;
+  }
+
+  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+
+  if (*out_len == 0) {
+    *out_len = 0;
+    *out_valid = true;
+    return "";
   }
 
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
@@ -2515,23 +2537,36 @@ const char* concat_ws_utf8_utf8_utf8_utf8(
     int32_t word3_len, bool word3_validity, const char* word4, int32_t 
word4_len,
     bool word4_validity, bool* out_valid, int32_t* out_len) {
   *out_len = 0;
+  int numValidInput = 0;
+  // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }
-  *out_len += separator_len;
   if (word1_validity) {
     *out_len += word1_len;
+    numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
+    numValidInput++;
   }
   if (word3_validity) {
     *out_len += word3_len;
+    numValidInput++;
   }
   if (word4_validity) {
     *out_len += word4_len;
+    numValidInput++;
+  }
+
+  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+
+  if (*out_len == 0) {
+    *out_len = 0;
+    *out_valid = true;
+    return "";
   }
 
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
@@ -2569,26 +2604,40 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(
     bool word4_validity, const char* word5, int32_t word5_len, bool 
word5_validity,
     bool* out_valid, int32_t* out_len) {
   *out_len = 0;
+  int numValidInput = 0;
+  // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }
-  *out_len += separator_len;
   if (word1_validity) {
     *out_len += word1_len;
+    numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
+    numValidInput++;
   }
   if (word3_validity) {
     *out_len += word3_len;
+    numValidInput++;
   }
   if (word4_validity) {
     *out_len += word4_len;
+    numValidInput++;
   }
   if (word5_validity) {
     *out_len += word5_len;
+    numValidInput++;
+  }
+
+  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+
+  if (*out_len == 0) {
+    *out_len = 0;
+    *out_valid = true;
+    return "";
   }
 
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc 
b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 2f8212d8b8..b84c51b3a6 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -2062,6 +2062,11 @@ TEST(TestStringOps, TestConcatWs) {
   EXPECT_EQ(std::string(out, out_len), "hey-hello");
   EXPECT_EQ(out_result, true);
 
+  out = concat_ws_utf8_utf8(ctx_ptr, "", 0, true, "", 0, true, "", 0, true, 
&out_result,
+                            &out_len);
+  EXPECT_EQ(std::string(out, out_len), "");
+  EXPECT_EQ(out_result, true);
+
   out = concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, true, "", 0, true, 
word2,
                             word2_len, true, &out_result, &out_len);
   EXPECT_EQ(std::string(out, out_len), "-hello");

Reply via email to