vvellanki commented on code in PR #12803:
URL: https://github.com/apache/arrow/pull/12803#discussion_r844658304


##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,213 @@ const char* byte_substr_binary_int32_int32(gdv_int64 
context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
+                 bool in_validity, const char* separator, int separator_len) {
+  if (!in_validity) {
+    return;
+  }
+
+  // input is valid
+  if (*out_idx != 0) {
+    // copy the separator and update *out_idx
+    memcpy(out_buf + *out_idx, separator, separator_len);
+    *out_idx += separator_len;
+  }
+  // copy the input and update *out_idx
+  memcpy(out_buf + *out_idx, in_buf, in_len);
+  *out_idx += in_len;
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t 
word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool 
word1_validity,
+                                const char* word2, int32_t word2_len, bool 
word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;

Review Comment:
   Can you check the implementation of other Gandiva functions? I dont think an 
error in memory allocation should return a valid result... I believe an error 
should be thrown with the memory allocation error.. 
   
   Can you check with Projjal on this?



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,213 @@ const char* byte_substr_binary_int32_int32(gdv_int64 
context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
+                 bool in_validity, const char* separator, int separator_len) {
+  if (!in_validity) {
+    return;
+  }
+
+  // input is valid
+  if (*out_idx != 0) {
+    // copy the separator and update *out_idx
+    memcpy(out_buf + *out_idx, separator, separator_len);
+    *out_idx += separator_len;
+  }
+  // copy the input and update *out_idx
+  memcpy(out_buf + *out_idx, in_buf, in_len);
+  *out_idx += in_len;
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t 
word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool 
word1_validity,
+                                const char* word2, int32_t word2_len, bool 
word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;

Review Comment:
   Please remove unused local variables.. we dont need last and next any more



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,213 @@ const char* byte_substr_binary_int32_int32(gdv_int64 
context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
+                 bool in_validity, const char* separator, int separator_len) {
+  if (!in_validity) {
+    return;
+  }
+
+  // input is valid
+  if (*out_idx != 0) {
+    // copy the separator and update *out_idx
+    memcpy(out_buf + *out_idx, separator, separator_len);
+    *out_idx += separator_len;
+  }
+  // copy the input and update *out_idx
+  memcpy(out_buf + *out_idx, in_buf, in_len);
+  *out_idx += in_len;
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t 
word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool 
word1_validity,
+                                const char* word2, int32_t word2_len, bool 
word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
+  int out_tmp = 0;
+
+  concat_word(tmp, &out_tmp, word1, word1_len, word1_validity, separator, 
separator_len);
+  concat_word(tmp, &out_tmp, word2, word2_len, word2_validity, separator, 
separator_len);
 
+  *out_valid = true;
+  *out_len = out_tmp;
   return out;
 }
 
 FORCE_INLINE
-const char* concat_ws_utf8_utf8_utf8(int64_t context, const char* separator,
-                                     int32_t separator_len, const char* word1,
-                                     int32_t word1_len, const char* word2,
-                                     int32_t word2_len, const char* word3,
-                                     int32_t word3_len, int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || word3_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+const char* concat_ws_utf8_utf8_utf8(
+    int64_t context, const char* separator, int32_t separator_len,
+    bool separator_validity, const char* word1, int32_t word1_len, bool 
word1_validity,
+    const char* word2, int32_t word2_len, bool word2_validity, const char* 
word3,
+    int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) 
{
+  *out_len = 0;
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len * 2;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
+  if (word3_validity) {
+    *out_len += word3_len;
+  }
 
-  *out_len = word1_len + word2_len + word3_len + (2 * separator_len);
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
-  tmp += word2_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word3, word3_len);
+  int out_tmp = 0;

Review Comment:
   Rename this to out_idx



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,213 @@ const char* byte_substr_binary_int32_int32(gdv_int64 
context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
+                 bool in_validity, const char* separator, int separator_len) {
+  if (!in_validity) {
+    return;
+  }
+
+  // input is valid
+  if (*out_idx != 0) {
+    // copy the separator and update *out_idx
+    memcpy(out_buf + *out_idx, separator, separator_len);
+    *out_idx += separator_len;
+  }
+  // copy the input and update *out_idx
+  memcpy(out_buf + *out_idx, in_buf, in_len);
+  *out_idx += in_len;
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t 
word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool 
word1_validity,
+                                const char* word2, int32_t word2_len, bool 
word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
+  int out_tmp = 0;
+
+  concat_word(tmp, &out_tmp, word1, word1_len, word1_validity, separator, 
separator_len);
+  concat_word(tmp, &out_tmp, word2, word2_len, word2_validity, separator, 
separator_len);
 
+  *out_valid = true;
+  *out_len = out_tmp;
   return out;
 }
 
 FORCE_INLINE
-const char* concat_ws_utf8_utf8_utf8(int64_t context, const char* separator,
-                                     int32_t separator_len, const char* word1,
-                                     int32_t word1_len, const char* word2,
-                                     int32_t word2_len, const char* word3,
-                                     int32_t word3_len, int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || word3_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+const char* concat_ws_utf8_utf8_utf8(
+    int64_t context, const char* separator, int32_t separator_len,
+    bool separator_validity, const char* word1, int32_t word1_len, bool 
word1_validity,
+    const char* word2, int32_t word2_len, bool word2_validity, const char* 
word3,
+    int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) 
{
+  *out_len = 0;
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len * 2;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
+  if (word3_validity) {
+    *out_len += word3_len;
+  }
 
-  *out_len = word1_len + word2_len + word3_len + (2 * separator_len);
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;

Review Comment:
   Please remove unused variables.. Same comment applies to all variants of 
this function



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,213 @@ const char* byte_substr_binary_int32_int32(gdv_int64 
context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
+                 bool in_validity, const char* separator, int separator_len) {
+  if (!in_validity) {
+    return;
+  }
+
+  // input is valid
+  if (*out_idx != 0) {
+    // copy the separator and update *out_idx
+    memcpy(out_buf + *out_idx, separator, separator_len);
+    *out_idx += separator_len;
+  }
+  // copy the input and update *out_idx
+  memcpy(out_buf + *out_idx, in_buf, in_len);
+  *out_idx += in_len;
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t 
word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool 
word1_validity,
+                                const char* word2, int32_t word2_len, bool 
word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
+  int out_tmp = 0;
+
+  concat_word(tmp, &out_tmp, word1, word1_len, word1_validity, separator, 
separator_len);
+  concat_word(tmp, &out_tmp, word2, word2_len, word2_validity, separator, 
separator_len);
 
+  *out_valid = true;
+  *out_len = out_tmp;
   return out;
 }
 
 FORCE_INLINE
-const char* concat_ws_utf8_utf8_utf8(int64_t context, const char* separator,
-                                     int32_t separator_len, const char* word1,
-                                     int32_t word1_len, const char* word2,
-                                     int32_t word2_len, const char* word3,
-                                     int32_t word3_len, int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || word3_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+const char* concat_ws_utf8_utf8_utf8(
+    int64_t context, const char* separator, int32_t separator_len,
+    bool separator_validity, const char* word1, int32_t word1_len, bool 
word1_validity,
+    const char* word2, int32_t word2_len, bool word2_validity, const char* 
word3,
+    int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) 
{
+  *out_len = 0;
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len * 2;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
+  if (word3_validity) {
+    *out_len += word3_len;
+  }
 
-  *out_len = word1_len + word2_len + word3_len + (2 * separator_len);
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;

Review Comment:
   See the comment in the previous function... I believe this should return an 
error



##########
cpp/src/gandiva/precompiled/string_ops.cc:
##########
@@ -2393,152 +2393,213 @@ const char* byte_substr_binary_int32_int32(gdv_int64 
context, const char* text,
   return ret;
 }
 
+FORCE_INLINE
+void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
+                 bool in_validity, const char* separator, int separator_len) {
+  if (!in_validity) {
+    return;
+  }
+
+  // input is valid
+  if (*out_idx != 0) {
+    // copy the separator and update *out_idx
+    memcpy(out_buf + *out_idx, separator, separator_len);
+    *out_idx += separator_len;
+  }
+  // copy the input and update *out_idx
+  memcpy(out_buf + *out_idx, in_buf, in_len);
+  *out_idx += in_len;
+}
+
 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
-                                int32_t separator_len, const char* word1,
-                                int32_t word1_len, const char* word2, int32_t 
word2_len,
-                                int32_t* out_len) {
-  if (word1_len < 0 || word2_len < 0 || separator_len < 0) {
-    gdv_fn_context_set_error_msg(context, "All words can not be null.");
+                                int32_t separator_len, bool separator_validity,
+                                const char* word1, int32_t word1_len, bool 
word1_validity,
+                                const char* word2, int32_t word2_len, bool 
word2_validity,
+                                bool* out_valid, int32_t* out_len) {
+  *out_len = 0;
+  // If separator is null, always return null
+  if (!separator_validity) {
     *out_len = 0;
+    *out_valid = false;
     return "";
   }
+  *out_len += separator_len;
+  if (word1_validity) {
+    *out_len += word1_len;
+  }
+  if (word2_validity) {
+    *out_len += word2_len;
+  }
 
-  *out_len = word1_len + separator_len + word2_len;
   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
   if (out == nullptr) {
-    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
     *out_len = 0;
+    *out_valid = true;
     return "";
   }
 
+  bool last = false;
+  bool next = false;
+
   char* tmp = out;
-  memcpy(tmp, word1, word1_len);
-  tmp += word1_len;
-  memcpy(tmp, separator, separator_len);
-  tmp += separator_len;
-  memcpy(tmp, word2, word2_len);
+  int out_tmp = 0;

Review Comment:
   rename this to out_idx. This is the offset in the out array that is being 
written to



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to