vvellanki commented on a change in pull request #11287: URL: https://github.com/apache/arrow/pull/11287#discussion_r743392841
########## File path: cpp/src/gandiva/gdv_function_stubs.cc ########## @@ -794,6 +794,27 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_ *out_len = out_idx; return out; } + +GANDIVA_EXPORT +int32_t gd_fn_instr_utf8(int64_t context, const char* string, int32_t string_len, + const char* substring, int32_t substring_len) { Review comment: After this one is merged is fine ########## File path: cpp/src/gandiva/function_registry_string.cc ########## @@ -406,6 +406,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() { NativeFunction("split_part", {}, DataTypeVector{utf8(), utf8(), int32()}, utf8(), kResultNullIfNull, "split_part", + NativeFunction::kNeedsContext | NativeFunction::kCanReturnErrors), + + NativeFunction("instr", {}, DataTypeVector{utf8(), utf8()}, int32(), + kResultNullIfNull, "gd_fn_instr_utf8", Review comment: This is not going to allocate memory. So, I dont understand why it needs context ########## File path: cpp/src/gandiva/gdv_function_stubs.cc ########## @@ -794,6 +795,39 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_ *out_len = out_idx; return out; } + +GANDIVA_EXPORT +int32_t gdv_fn_instr_utf8(int64_t context, const char* string, int32_t string_len, + const char* substring, int32_t substring_len) { + if (string_len == 0 && substring_len == 0) { Review comment: This code is going to run for all the rows in the table. So, it is important to avoid if-statements in the code. How about the following: if (substring_len == 0) { return 1; } // I see instr('', '') returning 1. So, there is no need to check for string_len ########## File path: cpp/src/gandiva/gdv_function_stubs.cc ########## @@ -794,6 +795,39 @@ const char* gdv_fn_initcap_utf8(int64_t context, const char* data, int32_t data_ *out_len = out_idx; return out; } + +GANDIVA_EXPORT +int32_t gdv_fn_instr_utf8(int64_t context, const char* string, int32_t string_len, + const char* substring, int32_t substring_len) { + if (string_len == 0 && substring_len == 0) { + return 1; + } + + if (substring_len == 0) { + return 1; + } + + if (string_len == 0) { + return 0; + } + + int32_t match = 0; + int32_t pos = 0; + for (int i = 0; i < string_len; ++i) { + if (string[i] == substring[match]) { Review comment: This is incorrect - if string = "Hello World" and substring = "HW", this implementation will return 1 You should change this to: for(int i = 0; i < string_len; i++) { if (string[i] == substring[0] && stringMatches(string + 1, substring + 1, substring_len - 1)) { return (i + 1); } } return 0; stringMatches(const char *string, const char *substring, int substring_len) { for(int i = 0; i < substring_len; i++) { if (string[i] != substring[i]) { return false; } } return true; } -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org