Copilot commented on code in PR #49813:
URL: https://github.com/apache/arrow/pull/49813#discussion_r3285530035


##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2444,182 +2462,180 @@ void concat_word(char* out_buf, int* out_idx, const 
char* in_buf, int in_len,
   *out_idx += in_len;
 }
 
-FORCE_INLINE
-const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, bool separator_validity,
-                                const char* word1, int32_t word1_len, bool 
word1_validity,
-                                const char* word2, int32_t word2_len, bool 
word2_validity,
-                                bool* out_valid, int32_t* out_len) {
-  *out_len = 0;
-  int numValidInput = 0;
-  // If separator is null, always return null
-  if (!separator_validity) {
-    *out_len = 0;
-    *out_valid = false;
-    return "";
-  }
+// Helper structure to maintain state during safe length accumulation
+struct SafeLengthState {
+  int32_t total_len = 0;
+  int32_t num_valid = 0;
+  bool overflow = false;
+};
+
+// Helper to safely add a word length
+static inline bool safe_accumulate_word(int64_t context, SafeLengthState& 
state,
+                                        int32_t word_len, bool word_validity) {
+  if (!word_validity) return true;
 
-  if (word1_validity) {
-    *out_len += word1_len;
-    numValidInput++;
+  if (word_len < 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid word length.");
+    return false;
   }
-  if (word2_validity) {
-    *out_len += word2_len;
-    numValidInput++;
+
+  int32_t temp = 0;
+  if (ARROW_PREDICT_FALSE(
+          arrow::internal::AddWithOverflow(state.total_len, word_len, &temp))) 
{
+    gdv_fn_context_set_error_msg(context, "Overflow in addition detected.");
+    state.overflow = true;
+    return false;
   }
+  state.total_len = temp;
+  state.num_valid++;
+  return true;
+}
 
-  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
-  if (*out_len == 0) {
-    *out_valid = true;
-    return "";
+// Helper to safely add separators based on number of valid words
+static inline bool safe_add_separators(int64_t context, SafeLengthState* state,
+                                       int32_t separator_len) {
+  if (state->num_valid <= 1) return true;
+
+  int32_t sep_total = 0;
+  int32_t temp = 0;
+
+  if (ARROW_PREDICT_FALSE(arrow::internal::MultiplyWithOverflow(
+          separator_len, state->num_valid - 1, &sep_total))) {
+    gdv_fn_context_set_error_msg(context, "Overflow in multiplication 
detected.");
+    state->overflow = true;
+    return false;
   }
 
-  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
-  if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
-    *out_len = 0;
-    *out_valid = false;
-    return "";
+  if (ARROW_PREDICT_FALSE(
+          arrow::internal::AddWithOverflow(state->total_len, sep_total, 
&temp))) {
+    gdv_fn_context_set_error_msg(context, "Overflow in addition detected.");
+    state->overflow = true;
+    return false;
   }
 
-  char* tmp = out;
-  int out_idx = 0;
-  bool seenAnyValidInput = false;
+  state->total_len = temp;
+  return true;
+}
 
-  concat_word(tmp, &out_idx, word1, word1_len, word1_validity, separator, 
separator_len,
-              &seenAnyValidInput);
-  concat_word(tmp, &out_idx, word2, word2_len, word2_validity, separator, 
separator_len,
-              &seenAnyValidInput);
+// Helper to handle overflow failure (sets output parameters and returns empty 
string)
+static inline const char* handle_overflow_failure(bool* out_valid, int32_t* 
out_len) {
+  *out_len = 0;
+  *out_valid = false;
+  return "";
+}
 
+// Helper to handle empty result (all words invalid)
+static inline const char* handle_empty_result(bool* out_valid, int32_t* 
out_len) {
+  *out_len = 0;
   *out_valid = true;
-  *out_len = out_idx;
-  return out;
+  return "";
 }
 
-FORCE_INLINE
-const char* concat_ws_utf8_utf8_utf8(
-    int64_t context, const char* separator, int32_t separator_len,
-    bool separator_validity, const char* word1, int32_t word1_len, bool 
word1_validity,
-    const char* word2, int32_t word2_len, bool word2_validity, const char* 
word3,
-    int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) 
{
+struct WordArg {
+  const char* data;
+  int32_t len;
+  bool valid;
+};
+
+static inline const char* concat_ws_impl(int64_t context, const char* 
separator,
+                                         int32_t separator_len, bool 
separator_validity,
+                                         bool* out_valid, int32_t* out_len,
+                                         std::initializer_list<WordArg> words) 
{
   *out_len = 0;
-  int numValidInput = 0;
-  // If separator is null, always return null
+
+  // Separator validity check
   if (!separator_validity) {
-    *out_len = 0;
     *out_valid = false;
     return "";
   }
-
-  if (word1_validity) {
-    *out_len += word1_len;
-    numValidInput++;
-  }
-  if (word2_validity) {
-    *out_len += word2_len;
-    numValidInput++;
+  if (separator_len < 0) {
+    *out_valid = false;
+    return "";
   }
-  if (word3_validity) {
-    *out_len += word3_len;
-    numValidInput++;
+
+  SafeLengthState state;
+
+  // Accumulate all word lengths safely
+  for (const WordArg& w : words) {
+    if (!safe_accumulate_word(context, state, w.len, w.valid)) {
+      gdv_fn_context_set_error_msg(context, "Invalid word length or 
validity.");
+      *out_len = 0;
+      *out_valid = false;
+      return "";
+    }

Review Comment:
   In `concat_ws_impl`, when `safe_accumulate_word()` fails it has already set 
a specific error message (e.g., "Invalid word length." or "Overflow in addition 
detected."). The unconditional `gdv_fn_context_set_error_msg(context, "Invalid 
word length or validity.")` here overwrites that more precise message, making 
debugging harder. Consider removing this overwrite or only setting a message 
when none is set / when you want to map `state.overflow` vs negative length 
distinctly.



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2444,182 +2462,180 @@ void concat_word(char* out_buf, int* out_idx, const 
char* in_buf, int in_len,
   *out_idx += in_len;
 }
 
-FORCE_INLINE
-const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, bool separator_validity,
-                                const char* word1, int32_t word1_len, bool 
word1_validity,
-                                const char* word2, int32_t word2_len, bool 
word2_validity,
-                                bool* out_valid, int32_t* out_len) {
-  *out_len = 0;
-  int numValidInput = 0;
-  // If separator is null, always return null
-  if (!separator_validity) {
-    *out_len = 0;
-    *out_valid = false;
-    return "";
-  }
+// Helper structure to maintain state during safe length accumulation
+struct SafeLengthState {
+  int32_t total_len = 0;
+  int32_t num_valid = 0;
+  bool overflow = false;
+};
+
+// Helper to safely add a word length
+static inline bool safe_accumulate_word(int64_t context, SafeLengthState& 
state,
+                                        int32_t word_len, bool word_validity) {
+  if (!word_validity) return true;
 
-  if (word1_validity) {
-    *out_len += word1_len;
-    numValidInput++;
+  if (word_len < 0) {
+    gdv_fn_context_set_error_msg(context, "Invalid word length.");
+    return false;
   }
-  if (word2_validity) {
-    *out_len += word2_len;
-    numValidInput++;
+
+  int32_t temp = 0;
+  if (ARROW_PREDICT_FALSE(
+          arrow::internal::AddWithOverflow(state.total_len, word_len, &temp))) 
{
+    gdv_fn_context_set_error_msg(context, "Overflow in addition detected.");
+    state.overflow = true;
+    return false;
   }
+  state.total_len = temp;
+  state.num_valid++;
+  return true;
+}
 
-  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
-  if (*out_len == 0) {
-    *out_valid = true;
-    return "";
+// Helper to safely add separators based on number of valid words
+static inline bool safe_add_separators(int64_t context, SafeLengthState* state,
+                                       int32_t separator_len) {
+  if (state->num_valid <= 1) return true;
+
+  int32_t sep_total = 0;
+  int32_t temp = 0;
+
+  if (ARROW_PREDICT_FALSE(arrow::internal::MultiplyWithOverflow(
+          separator_len, state->num_valid - 1, &sep_total))) {
+    gdv_fn_context_set_error_msg(context, "Overflow in multiplication 
detected.");
+    state->overflow = true;
+    return false;
   }
 
-  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
-  if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
-    *out_len = 0;
-    *out_valid = false;
-    return "";
+  if (ARROW_PREDICT_FALSE(
+          arrow::internal::AddWithOverflow(state->total_len, sep_total, 
&temp))) {
+    gdv_fn_context_set_error_msg(context, "Overflow in addition detected.");
+    state->overflow = true;
+    return false;
   }
 
-  char* tmp = out;
-  int out_idx = 0;
-  bool seenAnyValidInput = false;
+  state->total_len = temp;
+  return true;
+}
 
-  concat_word(tmp, &out_idx, word1, word1_len, word1_validity, separator, 
separator_len,
-              &seenAnyValidInput);
-  concat_word(tmp, &out_idx, word2, word2_len, word2_validity, separator, 
separator_len,
-              &seenAnyValidInput);
+// Helper to handle overflow failure (sets output parameters and returns empty 
string)
+static inline const char* handle_overflow_failure(bool* out_valid, int32_t* 
out_len) {
+  *out_len = 0;
+  *out_valid = false;
+  return "";
+}
 
+// Helper to handle empty result (all words invalid)
+static inline const char* handle_empty_result(bool* out_valid, int32_t* 
out_len) {
+  *out_len = 0;
   *out_valid = true;
-  *out_len = out_idx;
-  return out;
+  return "";
 }
 
-FORCE_INLINE
-const char* concat_ws_utf8_utf8_utf8(
-    int64_t context, const char* separator, int32_t separator_len,
-    bool separator_validity, const char* word1, int32_t word1_len, bool 
word1_validity,
-    const char* word2, int32_t word2_len, bool word2_validity, const char* 
word3,
-    int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) 
{
+struct WordArg {
+  const char* data;
+  int32_t len;
+  bool valid;
+};
+
+static inline const char* concat_ws_impl(int64_t context, const char* 
separator,
+                                         int32_t separator_len, bool 
separator_validity,
+                                         bool* out_valid, int32_t* out_len,
+                                         std::initializer_list<WordArg> words) 
{
   *out_len = 0;
-  int numValidInput = 0;
-  // If separator is null, always return null
+
+  // Separator validity check
   if (!separator_validity) {
-    *out_len = 0;
     *out_valid = false;
     return "";
   }
-
-  if (word1_validity) {
-    *out_len += word1_len;
-    numValidInput++;
-  }
-  if (word2_validity) {
-    *out_len += word2_len;
-    numValidInput++;
+  if (separator_len < 0) {
+    *out_valid = false;
+    return "";
   }
-  if (word3_validity) {
-    *out_len += word3_len;
-    numValidInput++;
+
+  SafeLengthState state;
+
+  // Accumulate all word lengths safely
+  for (const WordArg& w : words) {
+    if (!safe_accumulate_word(context, state, w.len, w.valid)) {
+      gdv_fn_context_set_error_msg(context, "Invalid word length or 
validity.");
+      *out_len = 0;
+      *out_valid = false;
+      return "";
+    }
   }
 
-  *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
+  // Add separator lengths
+  if (!safe_add_separators(context, &state, separator_len)) {
+    gdv_fn_context_set_error_msg(context, "Invalid separator.");

Review Comment:
   Similarly, `safe_add_separators()` sets specific overflow errors 
(multiplication/addition). The `gdv_fn_context_set_error_msg(context, "Invalid 
separator.")` here overwrites those details even though the separator value 
itself may be fine and the failure is an overflow. Consider preserving the 
helper’s message (or branching on `state.overflow` to emit an overflow-specific 
message).
   



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2829,8 +2789,31 @@ const char* to_hex_binary(int64_t context, const char* 
text, int32_t text_len,
     return "";
   }
 
-  auto ret =
-      reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, text_len * 
2 + 1));
+  if (ARROW_PREDICT_FALSE(text_len < 0)) {
+    gdv_fn_context_set_error_msg(context, "Text length invalid(negative).");

Review Comment:
   Error message has a missing space: "Text length invalid(negative)." reads 
like a formatting typo. Consider changing it to "Text length invalid 
(negative)." (and update the corresponding test assertion).
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to