naman1996 commented on a change in pull request #8231:
URL: https://github.com/apache/arrow/pull/8231#discussion_r501558799



##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -835,6 +847,66 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context, 
const char* text,
                                              out_len);
 }
 
+FORCE_INLINE
+const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
+                       const char* delimiter, gdv_int32 delim_len, gdv_int32 
index,
+                       gdv_int32* out_len) {
+  if (index < 1) {
+    char error_message[100];
+    snprintf(error_message, sizeof(error_message),
+             "Index in split_part must be positive, value provided was %d", 
index);
+    gdv_fn_context_set_error_msg(context, error_message);
+    *out_len = 0;
+    return "";
+  }
+
+  if (delim_len == 0 || text_len == 0) {
+    // output will just be text if no delimiter is provided
+    *out_len = text_len;
+    return text;
+  }
+
+  int i = 0, match_no = 1;
+
+  while (i < text_len) {
+    // find the position where delimiter matched for the first time
+    int match_pos = match_string(text, text_len, i, delimiter, delim_len);
+    if (match_pos == -1 && match_no != index) {
+      // reached the end without finding a match.
+      *out_len = 0;
+      return "";
+    } else {
+      // Found a match. If the match number is index then return this match
+      if (match_no == index) {
+        int end_pos = match_pos - delim_len;
+
+        if (match_pos == -1) {
+          // end position should be last position of the string as we have the 
last
+          // delimiter
+          end_pos = text_len;
+        }
+
+        *out_len = end_pos - i;
+        char* out_str =
+            reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
+        if (out_str == nullptr) {
+          gdv_fn_context_set_error_msg(context,
+                                       "Could not allocate memory for output 
string");
+          *out_len = 0;

Review comment:
       added out_len 0 in the beginning

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -40,6 +40,18 @@ gdv_int32 bit_length_binary(const gdv_binary input, 
gdv_int32 length) {
   return length * 8;
 }
 
+int match_string(const char* input, gdv_int32 input_len, gdv_int32 start_pos,

Review comment:
       added

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -835,6 +847,66 @@ const char* replace_utf8_utf8_utf8(gdv_int64 context, 
const char* text,
                                              out_len);
 }
 
+FORCE_INLINE
+const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
+                       const char* delimiter, gdv_int32 delim_len, gdv_int32 
index,
+                       gdv_int32* out_len) {
+  if (index < 1) {
+    char error_message[100];
+    snprintf(error_message, sizeof(error_message),
+             "Index in split_part must be positive, value provided was %d", 
index);
+    gdv_fn_context_set_error_msg(context, error_message);
+    *out_len = 0;
+    return "";
+  }
+
+  if (delim_len == 0 || text_len == 0) {
+    // output will just be text if no delimiter is provided
+    *out_len = text_len;
+    return text;
+  }
+
+  int i = 0, match_no = 1;
+
+  while (i < text_len) {
+    // find the position where delimiter matched for the first time
+    int match_pos = match_string(text, text_len, i, delimiter, delim_len);
+    if (match_pos == -1 && match_no != index) {
+      // reached the end without finding a match.
+      *out_len = 0;
+      return "";
+    } else {
+      // Found a match. If the match number is index then return this match
+      if (match_no == index) {
+        int end_pos = match_pos - delim_len;
+
+        if (match_pos == -1) {
+          // end position should be last position of the string as we have the 
last
+          // delimiter
+          end_pos = text_len;
+        }
+
+        *out_len = end_pos - i;
+        char* out_str =
+            reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
*out_len));
+        if (out_str == nullptr) {
+          gdv_fn_context_set_error_msg(context,
+                                       "Could not allocate memory for output 
string");
+          *out_len = 0;

Review comment:
       I cannot refactor this output length out as we set this before trying to 
allocate memory. In rest of the places I have removed *out_len = 0 in the PR 
and added it in the beginning.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to