projjal commented on a change in pull request #10059: URL: https://github.com/apache/arrow/pull/10059#discussion_r623623465
########## File path: cpp/src/gandiva/gdv_function_stubs.cc ########## @@ -42,6 +43,41 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len, return (*holder)(std::string(data, data_len)); } +const char* gdv_fn_regexp_replace_utf8_utf8( + int64_t ptr, int64_t holder_ptr, const char* data, int32_t data_len, + const char* /*pattern*/, int32_t /*pattern_len*/, const char* replace_string, + int32_t replace_string_len, int32_t* out_length) { + std::string user_input(data, data_len); + std::string replace_input(replace_string, replace_string_len); + gandiva::ExecutionContext* context = reinterpret_cast<gandiva::ExecutionContext*>(ptr); + + gandiva::ReplaceHolder* holder = reinterpret_cast<gandiva::ReplaceHolder*>(holder_ptr); + bool was_replaced = (*holder)(context, user_input, replace_input, out_length); + + if (!was_replaced) { + return data; + } + + // This condition treats the case where the whole string is replaced by an empty string + if (*out_length == 0) { + return ""; + } + + char* result_buffer = + reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ptr, *out_length)); + + if (result_buffer == nullptr) { + std::string err_msg = "Could not allocate memory for result"; + gdv_fn_context_set_error_msg(ptr, err_msg.data()); Review comment: The above still involves an allocation. change this to gdv_fn_context_set_error_msg(ptr, "Could not allocate memory for result"); ########## File path: java/gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java ########## @@ -648,6 +648,66 @@ public void testRegex() throws GandivaException { eval.close(); } + @Test + public void testRegexpReplace() throws GandivaException { + + Field x = Field.nullable("x", new ArrowType.Utf8()); + Field replaceString = Field.nullable("replaceString", new ArrowType.Utf8()); + + Field retType = Field.nullable("c", new ArrowType.Utf8()); + + TreeNode cond = + TreeBuilder.makeFunction( + "regexp_replace", + Lists.newArrayList(TreeBuilder.makeField(x), TreeBuilder.makeStringLiteral("ana"), + TreeBuilder.makeField(replaceString)), + new ArrowType.Utf8()); + ExpressionTree expr = TreeBuilder.makeExpression(cond, retType); + Schema schema = new Schema(Lists.newArrayList(x, replaceString)); + Projector eval = Projector.make(schema, Lists.newArrayList(expr)); + + int numRows = 5; + byte[] validity = new byte[]{(byte) 15, 0}; + String[] valuesX = new String[]{"banana", "bananaana", "bananana", "anaana", "anaana"}; + String[] valuesReplace = new String[]{"", "", "", "c", ""}; Review comment: add some multi char replace string too ########## File path: cpp/src/gandiva/gdv_function_stubs.cc ########## @@ -42,6 +43,41 @@ bool gdv_fn_like_utf8_utf8(int64_t ptr, const char* data, int data_len, return (*holder)(std::string(data, data_len)); } +const char* gdv_fn_regexp_replace_utf8_utf8( + int64_t ptr, int64_t holder_ptr, const char* data, int32_t data_len, + const char* /*pattern*/, int32_t /*pattern_len*/, const char* replace_string, + int32_t replace_string_len, int32_t* out_length) { + std::string user_input(data, data_len); + std::string replace_input(replace_string, replace_string_len); + gandiva::ExecutionContext* context = reinterpret_cast<gandiva::ExecutionContext*>(ptr); + + gandiva::ReplaceHolder* holder = reinterpret_cast<gandiva::ReplaceHolder*>(holder_ptr); + bool was_replaced = (*holder)(context, user_input, replace_input, out_length); + + if (!was_replaced) { Review comment: Why not incorporate the following logic inside the holder operator() method too? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org