ViniciusSouzaRoque commented on code in PR #13015: URL: https://github.com/apache/arrow/pull/13015#discussion_r860938249
########## cpp/src/gandiva/tests/projector_test.cc: ########## @@ -2624,4 +2624,54 @@ TEST_F(TestProjector, TestNextDay) { // Validate results EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0)); } + +TEST_F(TestProjector, TestRegexpExtract) { + // schema for input fields + auto field0 = field("f0", arrow::utf8()); + auto field1 = field("f1", arrow::int32()); + auto schema = arrow::schema({field0, field1}); + + // output fields + auto field_extract = field("extract", arrow::utf8()); + + // The pattern to match this sequence: string string - number + std::string pattern(R"((\w+) (\w+) - (\d+))"); + auto literal = TreeExprBuilder::MakeStringLiteral(pattern); + auto node0 = TreeExprBuilder::MakeField(field0); + auto node1 = TreeExprBuilder::MakeField(field1); + + // Build expression + auto regexp_extract_func = TreeExprBuilder::MakeFunction( + "regexp_extract", {node0, literal, node1}, arrow::utf8()); + auto extract_expr = TreeExprBuilder::MakeExpression(regexp_extract_func, field_extract); + + std::shared_ptr<Projector> projector; + auto status = Projector::Make(schema, {extract_expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 7; + auto array0 = MakeArrowArrayUtf8( + {"John Doe - 124", "John Doe - 124", "John Doe - 124", "John Doe - 124", Review Comment: can you add test with UTF8 entry? e.g. : b路%$大 路%$大a 路%c$大 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org