This is an automated email from the ASF dual-hosted git repository.
ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 699306f1a5 ARROW-16699: [C++][GANDIVA] Fix Concat_WS allocation bug
(#13276)
699306f1a5 is described below
commit 699306f1a5186a89588c5148e9ce92712c867035
Author: VinÃcius Roque <[email protected]>
AuthorDate: Fri Jun 3 10:24:03 2022 -0300
ARROW-16699: [C++][GANDIVA] Fix Concat_WS allocation bug (#13276)
Created to fix an allocation issue in version 22.0.0
Authored-by: ViniciusSouzaRoque <[email protected]>
Signed-off-by: Pindikura Ravindra <[email protected]>
---
cpp/src/gandiva/precompiled/string_ops.cc | 57 ++++++++++++++++++++++++--
cpp/src/gandiva/precompiled/string_ops_test.cc | 5 +++
2 files changed, 58 insertions(+), 4 deletions(-)
diff --git a/cpp/src/gandiva/precompiled/string_ops.cc
b/cpp/src/gandiva/precompiled/string_ops.cc
index 2cbb1f0110..c255b9a11c 100644
--- a/cpp/src/gandiva/precompiled/string_ops.cc
+++ b/cpp/src/gandiva/precompiled/string_ops.cc
@@ -2424,18 +2424,27 @@ const char* concat_ws_utf8_utf8(int64_t context, const
char* separator,
const char* word2, int32_t word2_len, bool
word2_validity,
bool* out_valid, int32_t* out_len) {
*out_len = 0;
+ int numValidInput = 0;
// If separator is null, always return null
if (!separator_validity) {
*out_len = 0;
*out_valid = false;
return "";
}
- *out_len += separator_len;
+
if (word1_validity) {
*out_len += word1_len;
+ numValidInput++;
}
if (word2_validity) {
*out_len += word2_len;
+ numValidInput++;
+ }
+
+ *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+ if (*out_len == 0) {
+ *out_valid = true;
+ return "";
}
char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
@@ -2467,20 +2476,33 @@ const char* concat_ws_utf8_utf8_utf8(
const char* word2, int32_t word2_len, bool word2_validity, const char*
word3,
int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len)
{
*out_len = 0;
+ int numValidInput = 0;
+ // If separator is null, always return null
if (!separator_validity) {
*out_len = 0;
*out_valid = false;
return "";
}
- *out_len += separator_len * 2;
+
if (word1_validity) {
*out_len += word1_len;
+ numValidInput++;
}
if (word2_validity) {
*out_len += word2_len;
+ numValidInput++;
}
if (word3_validity) {
*out_len += word3_len;
+ numValidInput++;
+ }
+
+ *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+
+ if (*out_len == 0) {
+ *out_len = 0;
+ *out_valid = true;
+ return "";
}
char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
@@ -2515,23 +2537,36 @@ const char* concat_ws_utf8_utf8_utf8_utf8(
int32_t word3_len, bool word3_validity, const char* word4, int32_t
word4_len,
bool word4_validity, bool* out_valid, int32_t* out_len) {
*out_len = 0;
+ int numValidInput = 0;
+ // If separator is null, always return null
if (!separator_validity) {
*out_len = 0;
*out_valid = false;
return "";
}
- *out_len += separator_len;
if (word1_validity) {
*out_len += word1_len;
+ numValidInput++;
}
if (word2_validity) {
*out_len += word2_len;
+ numValidInput++;
}
if (word3_validity) {
*out_len += word3_len;
+ numValidInput++;
}
if (word4_validity) {
*out_len += word4_len;
+ numValidInput++;
+ }
+
+ *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+
+ if (*out_len == 0) {
+ *out_len = 0;
+ *out_valid = true;
+ return "";
}
char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
@@ -2569,26 +2604,40 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(
bool word4_validity, const char* word5, int32_t word5_len, bool
word5_validity,
bool* out_valid, int32_t* out_len) {
*out_len = 0;
+ int numValidInput = 0;
+ // If separator is null, always return null
if (!separator_validity) {
*out_len = 0;
*out_valid = false;
return "";
}
- *out_len += separator_len;
if (word1_validity) {
*out_len += word1_len;
+ numValidInput++;
}
if (word2_validity) {
*out_len += word2_len;
+ numValidInput++;
}
if (word3_validity) {
*out_len += word3_len;
+ numValidInput++;
}
if (word4_validity) {
*out_len += word4_len;
+ numValidInput++;
}
if (word5_validity) {
*out_len += word5_len;
+ numValidInput++;
+ }
+
+ *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+
+ if (*out_len == 0) {
+ *out_len = 0;
+ *out_valid = true;
+ return "";
}
char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
*out_len));
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc
b/cpp/src/gandiva/precompiled/string_ops_test.cc
index 2f8212d8b8..b84c51b3a6 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -2062,6 +2062,11 @@ TEST(TestStringOps, TestConcatWs) {
EXPECT_EQ(std::string(out, out_len), "hey-hello");
EXPECT_EQ(out_result, true);
+ out = concat_ws_utf8_utf8(ctx_ptr, "", 0, true, "", 0, true, "", 0, true,
&out_result,
+ &out_len);
+ EXPECT_EQ(std::string(out, out_len), "");
+ EXPECT_EQ(out_result, true);
+
out = concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, true, "", 0, true,
word2,
word2_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "-hello");