projjal commented on a change in pull request #10169:
URL: https://github.com/apache/arrow/pull/10169#discussion_r622097959



##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1520,40 @@ const char* binary_string(gdv_int64 context, const char* 
text, gdv_int32 text_le
   return ret;
 }
 
+FORCE_INLINE
+const char* byte_substr(gdv_int64 context, const char* text, gdv_int32 
text_len,
+                        gdv_int32 offset, gdv_int32 length, gdv_int32* 
out_len) {
+  // the first offset position for a string is 1, so not consider offset == 0
+  // also, the length should be always a positive number
+  if (text_len == 0 || offset == 0 || length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, 
text_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t startPos;
+  if (offset < 0) {

Review comment:
       Looks like it offset can be negative in which case it starts from the 
end. Can you add these cases in a comment at the top of the function

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1520,40 @@ const char* binary_string(gdv_int64 context, const char* 
text, gdv_int32 text_le
   return ret;
 }
 
+FORCE_INLINE
+const char* byte_substr(gdv_int64 context, const char* text, gdv_int32 
text_len,
+                        gdv_int32 offset, gdv_int32 length, gdv_int32* 
out_len) {
+  // the first offset position for a string is 1, so not consider offset == 0
+  // also, the length should be always a positive number
+  if (text_len == 0 || offset == 0 || length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, 
text_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t startPos;
+  if (offset < 0) {
+    startPos = text_len + offset;

Review comment:
       You need to validate the offset or else startPos may go negative

##########
File path: cpp/src/gandiva/precompiled/string_ops.cc
##########
@@ -1520,4 +1520,40 @@ const char* binary_string(gdv_int64 context, const char* 
text, gdv_int32 text_le
   return ret;
 }
 
+FORCE_INLINE
+const char* byte_substr(gdv_int64 context, const char* text, gdv_int32 
text_len,
+                        gdv_int32 offset, gdv_int32 length, gdv_int32* 
out_len) {
+  // the first offset position for a string is 1, so not consider offset == 0
+  // also, the length should be always a positive number
+  if (text_len == 0 || offset == 0 || length <= 0) {
+    *out_len = 0;
+    return "";
+  }
+
+  char* ret =
+      reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, 
text_len));
+
+  if (ret == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
+    *out_len = 0;
+    return "";
+  }
+
+  int32_t startPos;
+  if (offset < 0) {
+    startPos = text_len + offset;
+  } else {
+    startPos = offset - 1;

Review comment:
       Write a comment at the top of the function that offset is 1-indexed

##########
File path: cpp/src/gandiva/function_registry_string.cc
##########
@@ -216,6 +216,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                      
"concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8",
                      NativeFunction::kNeedsContext),
 
+      NativeFunction("byte_substr", {"bytesubstring"},
+                     DataTypeVector{binary(), int32(), int32()}, binary(),
+                     kResultNullIfNull, "byte_substr", 
NativeFunction::kNeedsContext),

Review comment:
       nit: other function signature keep the argument types in the function 
name. Lets keep that convention




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to