This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new a8479e9c25 ARROW-13052: [Gandiva][C++] Add regexp_extract function
a8479e9c25 is described below

commit a8479e9c252482438b6fc2bc0383ac5cf6a09d59
Author: Johnnathan <[email protected]>
AuthorDate: Wed May 11 15:36:53 2022 +0530

    ARROW-13052: [Gandiva][C++] Add regexp_extract function
    
    Implements the REGEXP_EXTRACT function based on [the Hive 
implementation](https://www.revisitclass.com/hadoop/regexp_extract-function-in-hive-with-examples/).
    
    Closes #13015 from Johnnathanalmeida/feature/add-regexp-extract
    
    Authored-by: Johnnathan <[email protected]>
    Signed-off-by: Pindikura Ravindra <[email protected]>
---
 cpp/src/gandiva/CMakeLists.txt                     |   6 +-
 cpp/src/gandiva/function_holder_registry.h         |   4 +-
 cpp/src/gandiva/function_registry_string.cc        |   6 +
 cpp/src/gandiva/gdv_string_function_stubs.cc       |  30 +-
 cpp/src/gandiva/like_holder.h                      |  68 --
 cpp/src/gandiva/like_holder_test.cc                | 317 ----------
 .../{like_holder.cc => regex_functions_holder.cc}  | 113 +++-
 .../{replace_holder.h => regex_functions_holder.h} |  62 ++
 cpp/src/gandiva/regex_functions_holder_test.cc     | 701 +++++++++++++++++++++
 cpp/src/gandiva/replace_holder.cc                  |  61 --
 cpp/src/gandiva/replace_holder_test.cc             | 129 ----
 cpp/src/gandiva/tests/projector_test.cc            |  50 ++
 12 files changed, 962 insertions(+), 585 deletions(-)

diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 71faf9a38e..46e819ccc6 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -87,11 +87,10 @@ set(SRC_FILES
     interval_holder.cc
     llvm_generator.cc
     llvm_types.cc
-    like_holder.cc
     literal_holder.cc
     projector.cc
     regex_util.cc
-    replace_holder.cc
+    regex_functions_holder.cc
     selection_vector.cc
     tree_expr_builder.cc
     to_date_holder.cc
@@ -236,8 +235,7 @@ add_gandiva_test(internals-test
                  lru_cache_test.cc
                  to_date_holder_test.cc
                  simple_arena_test.cc
-                 like_holder_test.cc
-                 replace_holder_test.cc
+                 regex_functions_holder_test.cc
                  decimal_type_util_test.cc
                  random_generator_holder_test.cc
                  hash_utils_test.cc
diff --git a/cpp/src/gandiva/function_holder_registry.h 
b/cpp/src/gandiva/function_holder_registry.h
index bddf32034f..97a03db347 100644
--- a/cpp/src/gandiva/function_holder_registry.h
+++ b/cpp/src/gandiva/function_holder_registry.h
@@ -25,10 +25,9 @@
 #include "arrow/status.h"
 #include "gandiva/function_holder.h"
 #include "gandiva/interval_holder.h"
-#include "gandiva/like_holder.h"
 #include "gandiva/node.h"
 #include "gandiva/random_generator_holder.h"
-#include "gandiva/replace_holder.h"
+#include "gandiva/regex_functions_holder.h"
 #include "gandiva/to_date_holder.h"
 
 namespace gandiva {
@@ -67,6 +66,7 @@ class FunctionHolderRegistry {
                                  {"random", 
LAMBDA_MAKER(RandomGeneratorHolder)},
                                  {"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
                                  {"regexp_replace", 
LAMBDA_MAKER(ReplaceHolder)},
+                                 {"regexp_extract", 
LAMBDA_MAKER(ExtractHolder)},
                                  {"castintervalday", 
LAMBDA_MAKER(IntervalDaysHolder)},
                                  {"castintervalyear", 
LAMBDA_MAKER(IntervalYearsHolder)}};
     return maker_map;
diff --git a/cpp/src/gandiva/function_registry_string.cc 
b/cpp/src/gandiva/function_registry_string.cc
index 4890ec8838..c1b6ef1648 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -255,6 +255,12 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
                          NativeFunction::kNeedsFunctionHolder |
                          NativeFunction::kCanReturnErrors),
 
+      NativeFunction("regexp_extract", {}, DataTypeVector{utf8(), utf8(), 
int32()},
+                     utf8(), kResultNullIfNull, 
"gdv_fn_regexp_extract_utf8_utf8_int32",
+                     NativeFunction::kNeedsContext |
+                         NativeFunction::kNeedsFunctionHolder |
+                         NativeFunction::kCanReturnErrors),
+
       NativeFunction("concatOperator", {}, DataTypeVector{utf8(), utf8()}, 
utf8(),
                      kResultNullIfNull, "concatOperator_utf8_utf8",
                      NativeFunction::kNeedsContext),
diff --git a/cpp/src/gandiva/gdv_string_function_stubs.cc 
b/cpp/src/gandiva/gdv_string_function_stubs.cc
index 862c6e91cb..1948d3a3e1 100644
--- a/cpp/src/gandiva/gdv_string_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_string_function_stubs.cc
@@ -31,9 +31,8 @@
 #include "gandiva/engine.h"
 #include "gandiva/exported_funcs.h"
 #include "gandiva/formatting_utils.h"
-#include "gandiva/like_holder.h"
 #include "gandiva/precompiled/types.h"
-#include "gandiva/replace_holder.h"
+#include "gandiva/regex_functions_holder.h"
 
 extern "C" {
 
@@ -68,6 +67,19 @@ const char* gdv_fn_regexp_replace_utf8_utf8(
                    out_length);
 }
 
+const char* gdv_fn_regexp_extract_utf8_utf8_int32(int64_t ptr, int64_t 
holder_ptr,
+                                                  const char* data, int32_t 
data_len,
+                                                  const char* /*pattern*/,
+                                                  int32_t /*pattern_len*/,
+                                                  int32_t extract_index,
+                                                  int32_t* out_length) {
+  gandiva::ExecutionContext* context = 
reinterpret_cast<gandiva::ExecutionContext*>(ptr);
+
+  gandiva::ExtractHolder* holder = 
reinterpret_cast<gandiva::ExtractHolder*>(holder_ptr);
+
+  return (*holder)(context, data, data_len, extract_index, out_length);
+}
+
 #define GDV_FN_CAST_VARLEN_TYPE_FROM_TYPE(IN_TYPE, CAST_NAME, ARROW_TYPE)      
   \
   GANDIVA_EXPORT                                                               
   \
   const char* gdv_fn_cast##CAST_NAME##_##IN_TYPE##_int64(                      
   \
@@ -495,6 +507,20 @@ void ExportedStringFunctions::AddMappings(Engine* engine) 
const {
       "gdv_fn_regexp_replace_utf8_utf8", types->i8_ptr_type() /*return_type*/, 
args,
       reinterpret_cast<void*>(gdv_fn_regexp_replace_utf8_utf8));
 
+  // gdv_fn_regexp_extract_utf8_utf8_int32
+  args = {types->i64_type(),       // int64_t ptr
+          types->i64_type(),       // int64_t holder_ptr
+          types->i8_ptr_type(),    // const char* data
+          types->i32_type(),       // int data_len
+          types->i8_ptr_type(),    // const char* pattern
+          types->i32_type(),       // int pattern_len
+          types->i32_type(),       // int32_t extract_index
+          types->i32_ptr_type()};  // int32_t* out_length
+
+  engine->AddGlobalMappingForFunc(
+      "gdv_fn_regexp_extract_utf8_utf8_int32", types->i8_ptr_type() 
/*return_type*/, args,
+      reinterpret_cast<void*>(gdv_fn_regexp_extract_utf8_utf8_int32));
+
   // gdv_fn_castVARCHAR_int32_int64
   args = {types->i64_type(),       // int64_t execution_context
           types->i32_type(),       // int32_t value
diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h
deleted file mode 100644
index 73e58017de..0000000000
--- a/cpp/src/gandiva/like_holder.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <memory>
-#include <string>
-
-#include <re2/re2.h>
-
-#include "arrow/status.h"
-
-#include "gandiva/function_holder.h"
-#include "gandiva/node.h"
-#include "gandiva/visibility.h"
-
-namespace gandiva {
-
-/// Function Holder for SQL 'like'
-class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
- public:
-  ~LikeHolder() override = default;
-
-  static Status Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* 
holder);
-
-  static Status Make(const std::string& sql_pattern, 
std::shared_ptr<LikeHolder>* holder);
-
-  static Status Make(const std::string& sql_pattern, const std::string& 
escape_char,
-                     std::shared_ptr<LikeHolder>* holder);
-
-  static Status Make(const std::string& sql_pattern, 
std::shared_ptr<LikeHolder>* holder,
-                     RE2::Options regex_op);
-
-  // Try and optimise a function node with a "like" pattern.
-  static const FunctionNode TryOptimize(const FunctionNode& node);
-
-  /// Return true if the data matches the pattern.
-  bool operator()(const std::string& data) { return RE2::FullMatch(data, 
regex_); }
-
- private:
-  explicit LikeHolder(const std::string& pattern) : pattern_(pattern), 
regex_(pattern) {}
-
-  LikeHolder(const std::string& pattern, RE2::Options regex_op)
-      : pattern_(pattern), regex_(pattern, regex_op) {}
-
-  std::string pattern_;  // posix pattern string, to help debugging
-  RE2 regex_;            // compiled regex for the pattern
-
-  static RE2 starts_with_regex_;  // pre-compiled pattern for matching 
starts_with
-  static RE2 ends_with_regex_;    // pre-compiled pattern for matching 
ends_with
-  static RE2 is_substr_regex_;    // pre-compiled pattern for matching 
is_substr
-};
-
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/like_holder_test.cc 
b/cpp/src/gandiva/like_holder_test.cc
deleted file mode 100644
index 76a7754298..0000000000
--- a/cpp/src/gandiva/like_holder_test.cc
+++ /dev/null
@@ -1,317 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gandiva/like_holder.h"
-#include "gandiva/regex_util.h"
-
-#include <memory>
-#include <vector>
-
-#include <gtest/gtest.h>
-
-namespace gandiva {
-
-class TestLikeHolder : public ::testing::Test {
- public:
-  RE2::Options regex_op;
-  FunctionNode BuildLike(std::string pattern) {
-    auto field = std::make_shared<FieldNode>(arrow::field("in", 
arrow::utf8()));
-    auto pattern_node =
-        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), 
false);
-    return FunctionNode("like", {field, pattern_node}, arrow::boolean());
-  }
-
-  FunctionNode BuildLike(std::string pattern, char escape_char) {
-    auto field = std::make_shared<FieldNode>(arrow::field("in", 
arrow::utf8()));
-    auto pattern_node =
-        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), 
false);
-    auto escape_char_node = std::make_shared<LiteralNode>(
-        arrow::int8(), LiteralHolder((int8_t)escape_char), false);
-    return FunctionNode("like", {field, pattern_node, escape_char_node},
-                        arrow::boolean());
-  }
-};
-
-TEST_F(TestLikeHolder, TestMatchAny) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like("ab"));
-  EXPECT_TRUE(like("abc"));
-  EXPECT_TRUE(like("abcd"));
-
-  EXPECT_FALSE(like("a"));
-  EXPECT_FALSE(like("cab"));
-}
-
-TEST_F(TestLikeHolder, TestMatchOne) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like("abc"));
-  EXPECT_TRUE(like("abd"));
-
-  EXPECT_FALSE(like("a"));
-  EXPECT_FALSE(like("abcd"));
-  EXPECT_FALSE(like("dabc"));
-}
-
-TEST_F(TestLikeHolder, TestPcreSpecial) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like(".*abc"));  // . and * aren't special in sql regex
-  EXPECT_FALSE(like("xxabc"));
-}
-
-TEST_F(TestLikeHolder, TestRegexEscape) {
-  std::string res;
-  auto status = RegexUtil::SqlLikePatternToPcre("#%hello#_abc_def##", '#', 
res);
-  EXPECT_TRUE(status.ok()) << status.message();
-
-  EXPECT_EQ(res, "%hello_abc.def#");
-}
-
-TEST_F(TestLikeHolder, TestDot) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_FALSE(like("abcd"));
-}
-
-TEST_F(TestLikeHolder, TestMatchSubString) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("%abc%", "\\", &like_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like("abc"));
-  EXPECT_FALSE(like("xxabdc"));
-
-  status = LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", &like_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like_reserved_char = *like_holder;
-  EXPECT_TRUE(like_reserved_char("XXab-.^$*+?()[]{}|—/c%d"));
-  EXPECT_FALSE(like_reserved_char("xxad-.^$*+?()[]{}|—/c"));
-}
-
-TEST_F(TestLikeHolder, TestOptimise) {
-  // optimise for 'starts_with'
-  auto fnode = LikeHolder::TryOptimize(BuildLike("xy 123z%"));
-  EXPECT_EQ(fnode.descriptor()->name(), "starts_with");
-  EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) 
'xy 123z')");
-
-  // optimise for 'ends_with'
-  fnode = LikeHolder::TryOptimize(BuildLike("%xyz"));
-  EXPECT_EQ(fnode.descriptor()->name(), "ends_with");
-  EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) 
'xyz')");
-
-  // optimise for 'is_substr'
-  fnode = LikeHolder::TryOptimize(BuildLike("%abc%"));
-  EXPECT_EQ(fnode.descriptor()->name(), "is_substr");
-  EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) 
'abc')");
-
-  // optimise for 'is_substr with special characters'
-  fnode = LikeHolder::TryOptimize(BuildLike("%ab-c%"));
-  EXPECT_EQ(fnode.descriptor()->name(), "is_substr");
-  EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) 
'ab-c')");
-
-  // optimise for 'ends_with with special characters'
-  fnode = LikeHolder::TryOptimize(BuildLike("%ab-c"));
-  EXPECT_EQ(fnode.descriptor()->name(), "ends_with");
-  EXPECT_EQ(fnode.ToString(),
-            "bool ends_with((string) in, (const string) "
-            "'ab-c')");
-
-  // optimise for 'starts_with with special characters'
-  fnode = LikeHolder::TryOptimize(BuildLike("ab-c%"));
-  EXPECT_EQ(fnode.descriptor()->name(), "starts_with");
-  EXPECT_EQ(fnode.ToString(),
-            "bool starts_with((string) in, (const string) "
-            "'ab-c')");
-
-  // no optimisation for others.
-  fnode = LikeHolder::TryOptimize(BuildLike("xyz_"));
-  EXPECT_EQ(fnode.descriptor()->name(), "like");
-
-  fnode = LikeHolder::TryOptimize(BuildLike("_xyz"));
-  EXPECT_EQ(fnode.descriptor()->name(), "like");
-
-  fnode = LikeHolder::TryOptimize(BuildLike("_xyz_"));
-  EXPECT_EQ(fnode.descriptor()->name(), "like");
-
-  fnode = LikeHolder::TryOptimize(BuildLike("%xyz_"));
-  EXPECT_EQ(fnode.descriptor()->name(), "like");
-
-  fnode = LikeHolder::TryOptimize(BuildLike("x_yz%"));
-  EXPECT_EQ(fnode.descriptor()->name(), "like");
-
-  // no optimisation for escaped pattern.
-  fnode = LikeHolder::TryOptimize(BuildLike("\\%xyz", '\\'));
-  EXPECT_EQ(fnode.descriptor()->name(), "like");
-  EXPECT_EQ(fnode.ToString(),
-            "bool like((string) in, (const string) '\\%xyz', (const int8) 
\\)");
-}
-
-TEST_F(TestLikeHolder, TestMatchOneEscape) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-
-  EXPECT_TRUE(like("ab_"));
-
-  EXPECT_FALSE(like("abc"));
-  EXPECT_FALSE(like("abd"));
-  EXPECT_FALSE(like("a"));
-  EXPECT_FALSE(like("abcd"));
-  EXPECT_FALSE(like("dabc"));
-}
-
-TEST_F(TestLikeHolder, TestMatchManyEscape) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-
-  EXPECT_TRUE(like("ab%"));
-
-  EXPECT_FALSE(like("abc"));
-  EXPECT_FALSE(like("abd"));
-  EXPECT_FALSE(like("a"));
-  EXPECT_FALSE(like("abcd"));
-  EXPECT_FALSE(like("dabc"));
-}
-
-TEST_F(TestLikeHolder, TestMatchEscape) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-
-  EXPECT_TRUE(like("ab\\"));
-
-  EXPECT_FALSE(like("abc"));
-}
-
-TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab\\_", "", &like_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-
-  EXPECT_TRUE(like("ab\\c"));
-  EXPECT_TRUE(like("ab\\_"));
-
-  EXPECT_FALSE(like("ab\\_d"));
-  EXPECT_FALSE(like("ab__"));
-}
-
-TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
-  EXPECT_EQ(status.ok(), false) << status.message();
-}
-class TestILikeHolder : public ::testing::Test {
- public:
-  RE2::Options regex_op;
-  FunctionNode BuildILike(std::string pattern) {
-    auto field = std::make_shared<FieldNode>(arrow::field("in", 
arrow::utf8()));
-    auto pattern_node =
-        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), 
false);
-    return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
-  }
-};
-
-TEST_F(TestILikeHolder, TestMatchAny) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  regex_op.set_case_sensitive(false);
-  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like("ab"));
-  EXPECT_TRUE(like("aBc"));
-  EXPECT_TRUE(like("ABCD"));
-
-  EXPECT_FALSE(like("a"));
-  EXPECT_FALSE(like("cab"));
-}
-
-TEST_F(TestILikeHolder, TestMatchOne) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  regex_op.set_case_sensitive(false);
-  auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like("abc"));
-  EXPECT_TRUE(like("aBd"));
-
-  EXPECT_FALSE(like("A"));
-  EXPECT_FALSE(like("Abcd"));
-  EXPECT_FALSE(like("DaBc"));
-}
-
-TEST_F(TestILikeHolder, TestPcreSpecial) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  regex_op.set_case_sensitive(false);
-  auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_TRUE(like(".*Abc"));  // . and * aren't special in sql regex
-  EXPECT_FALSE(like("xxAbc"));
-}
-
-TEST_F(TestILikeHolder, TestDot) {
-  std::shared_ptr<LikeHolder> like_holder;
-
-  regex_op.set_case_sensitive(false);
-  auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  auto& like = *like_holder;
-  EXPECT_FALSE(like("abcd"));
-}
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/like_holder.cc 
b/cpp/src/gandiva/regex_functions_holder.cc
similarity index 62%
rename from cpp/src/gandiva/like_holder.cc
rename to cpp/src/gandiva/regex_functions_holder.cc
index 3391c7ec16..b1e2e59cb2 100644
--- a/cpp/src/gandiva/like_holder.cc
+++ b/cpp/src/gandiva/regex_functions_holder.cc
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "gandiva/like_holder.h"
-
+#include "gandiva/regex_functions_holder.h"
 #include <regex>
 #include "gandiva/node.h"
 #include "gandiva/regex_util.h"
 
 namespace gandiva {
+
 RE2 LikeHolder::starts_with_regex_(R"(([^\.\*])*\.\*)");
 RE2 LikeHolder::ends_with_regex_(R"(\.\*([^\.\*])*)");
 RE2 LikeHolder::is_substr_regex_(R"(\.\*([^\.\*])*\.\*)");
@@ -163,4 +163,113 @@ Status LikeHolder::Make(const std::string& sql_pattern,
   *holder = lholder;
   return Status::OK();
 }
+
+Status ReplaceHolder::Make(const FunctionNode& node,
+                           std::shared_ptr<ReplaceHolder>* holder) {
+  ARROW_RETURN_IF(node.children().size() != 3,
+                  Status::Invalid("'replace' function requires three 
parameters"));
+
+  auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+  ARROW_RETURN_IF(
+      literal == nullptr,
+      Status::Invalid("'replace' function requires a literal as the second 
parameter"));
+
+  auto literal_type = literal->return_type()->id();
+  ARROW_RETURN_IF(
+      !(literal_type == arrow::Type::STRING || literal_type == 
arrow::Type::BINARY),
+      Status::Invalid(
+          "'replace' function requires a string literal as the second 
parameter"));
+
+  return Make(arrow::util::get<std::string>(literal->holder()), holder);
+}
+
+Status ReplaceHolder::Make(const std::string& sql_pattern,
+                           std::shared_ptr<ReplaceHolder>* holder) {
+  auto lholder = std::shared_ptr<ReplaceHolder>(new 
ReplaceHolder(sql_pattern));
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", sql_pattern, "' 
failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
+
+void ReplaceHolder::return_error(ExecutionContext* context, std::string& data,
+                                 std::string& replace_string) {
+  std::string err_msg = "Error replacing '" + replace_string + "' on the given 
string '" +
+                        data + "' for the given pattern: " + pattern_;
+  context->set_error_msg(err_msg.c_str());
+}
+
+Status ExtractHolder::Make(const FunctionNode& node,
+                           std::shared_ptr<ExtractHolder>* holder) {
+  ARROW_RETURN_IF(node.children().size() != 3,
+                  Status::Invalid("'extract' function requires three 
parameters"));
+
+  auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+  ARROW_RETURN_IF(
+      literal == nullptr || 
!IsArrowStringLiteral(literal->return_type()->id()),
+      Status::Invalid("'extract' function requires a literal as the second 
parameter"));
+
+  return ExtractHolder::Make(arrow::util::get<std::string>(literal->holder()), 
holder);
+}
+
+Status ExtractHolder::Make(const std::string& sql_pattern,
+                           std::shared_ptr<ExtractHolder>* holder) {
+  auto lholder = std::shared_ptr<ExtractHolder>(new 
ExtractHolder(sql_pattern));
+  ARROW_RETURN_IF(!lholder->regex_.ok(),
+                  Status::Invalid("Building RE2 pattern '", sql_pattern, "' 
failed"));
+
+  *holder = lholder;
+  return Status::OK();
+}
+
+const char* ExtractHolder::operator()(ExecutionContext* ctx, const char* 
user_input,
+                                      int32_t user_input_len, int32_t 
extract_index,
+                                      int32_t* out_length) {
+  if (extract_index < 0 || extract_index >= num_groups_pattern_) {
+    ctx->set_error_msg("Index to extract out of range");
+    *out_length = 0;
+    return "";
+  }
+
+  std::string user_input_as_str(user_input, user_input_len);
+
+  // Create the vectors that will store the arguments to be captured by the 
regex
+  // groups.
+  std::vector<std::string> arguments_as_str(num_groups_pattern_);
+  std::vector<RE2::Arg> arguments(num_groups_pattern_);
+  std::vector<RE2::Arg*> arguments_ptrs(num_groups_pattern_);
+
+  for (int32_t i = 0; i < num_groups_pattern_; i++) {
+    // Bind argument to string from vector.
+    arguments[i] = &arguments_as_str[i];
+    // Save pointer to argument.
+    arguments_ptrs[i] = &arguments[i];
+  }
+
+  re2::StringPiece piece(user_input_as_str);
+  if (!RE2::FindAndConsumeN(&piece, regex_, arguments_ptrs.data(), 
num_groups_pattern_)) {
+    *out_length = 0;
+    return "";
+  }
+
+  auto out_str = arguments_as_str[extract_index];
+  *out_length = static_cast<int32_t>(out_str.size());
+
+  // This condition treats the case where the return is an empty string
+  if (*out_length == 0) {
+    return "";
+  }
+
+  char* result_buffer = 
reinterpret_cast<char*>(ctx->arena()->Allocate(*out_length));
+  if (result_buffer == NULLPTR) {
+    ctx->set_error_msg("Could not allocate memory for result");
+    *out_length = 0;
+    return "";
+  }
+
+  memcpy(result_buffer, out_str.data(), *out_length);
+  return result_buffer;
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder.h 
b/cpp/src/gandiva/regex_functions_holder.h
similarity index 54%
rename from cpp/src/gandiva/replace_holder.h
rename to cpp/src/gandiva/regex_functions_holder.h
index 79150d7aa4..8e0fe44269 100644
--- a/cpp/src/gandiva/replace_holder.h
+++ b/cpp/src/gandiva/regex_functions_holder.h
@@ -30,6 +30,41 @@
 
 namespace gandiva {
 
+/// Function Holder for SQL 'like'
+class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
+ public:
+  ~LikeHolder() override = default;
+
+  static Status Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* 
holder);
+
+  static Status Make(const std::string& sql_pattern, 
std::shared_ptr<LikeHolder>* holder);
+
+  static Status Make(const std::string& sql_pattern, const std::string& 
escape_char,
+                     std::shared_ptr<LikeHolder>* holder);
+
+  static Status Make(const std::string& sql_pattern, 
std::shared_ptr<LikeHolder>* holder,
+                     RE2::Options regex_op);
+
+  // Try and optimise a function node with a "like" pattern.
+  static const FunctionNode TryOptimize(const FunctionNode& node);
+
+  /// Return true if the data matches the pattern.
+  bool operator()(const std::string& data) { return RE2::FullMatch(data, 
regex_); }
+
+ private:
+  explicit LikeHolder(const std::string& pattern) : pattern_(pattern), 
regex_(pattern) {}
+
+  LikeHolder(const std::string& pattern, RE2::Options regex_op)
+      : pattern_(pattern), regex_(pattern, regex_op) {}
+
+  std::string pattern_;  // posix pattern string, to help debugging
+  RE2 regex_;            // compiled regex for the pattern
+
+  static RE2 starts_with_regex_;  // pre-compiled pattern for matching 
starts_with
+  static RE2 ends_with_regex_;    // pre-compiled pattern for matching 
ends_with
+  static RE2 is_substr_regex_;    // pre-compiled pattern for matching 
is_substr
+};
+
 /// Function Holder for 'replace'
 class GANDIVA_EXPORT ReplaceHolder : public FunctionHolder {
  public:
@@ -94,4 +129,31 @@ class GANDIVA_EXPORT ReplaceHolder : public FunctionHolder {
   RE2 regex_;            // compiled regex for the pattern
 };
 
+/// Function Holder for 'regexp_extract' function
+class GANDIVA_EXPORT ExtractHolder : public FunctionHolder {
+ public:
+  ~ExtractHolder() override = default;
+
+  static Status Make(const FunctionNode& node, std::shared_ptr<ExtractHolder>* 
holder);
+
+  static Status Make(const std::string& sql_pattern,
+                     std::shared_ptr<ExtractHolder>* holder);
+
+  /// Extracts the matching text from a string using a regex
+  const char* operator()(ExecutionContext* ctx, const char* user_input,
+                         int32_t user_input_len, int32_t extract_index,
+                         int32_t* out_length);
+
+ private:
+  // The pattern must be enclosed inside an outside group to be able to catch 
the string
+  // piece that matched with the entire regex when the user define the group 
"0". It is
+  // used because the RE2 library does not provide that defined behavior by 
default.
+  explicit ExtractHolder(const std::string& pattern) : regex_("(" + pattern + 
")") {
+    num_groups_pattern_ = regex_.NumberOfCapturingGroups();
+  }
+
+  RE2 regex_;                   // compiled regex for the pattern
+  int32_t num_groups_pattern_;  // number of groups that user defined inside 
the regex
+};
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/regex_functions_holder_test.cc 
b/cpp/src/gandiva/regex_functions_holder_test.cc
new file mode 100644
index 0000000000..584674a20f
--- /dev/null
+++ b/cpp/src/gandiva/regex_functions_holder_test.cc
@@ -0,0 +1,701 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "gandiva/regex_functions_holder.h"
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <memory>
+#include <vector>
+#include "gandiva/regex_util.h"
+
+namespace gandiva {
+
+class TestLikeHolder : public ::testing::Test {
+ public:
+  RE2::Options regex_op;
+  FunctionNode BuildLike(std::string pattern) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", 
arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), 
false);
+    return FunctionNode("like", {field, pattern_node}, arrow::boolean());
+  }
+
+  FunctionNode BuildLike(std::string pattern, char escape_char) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", 
arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), 
false);
+    auto escape_char_node = std::make_shared<LiteralNode>(
+        arrow::int8(), LiteralHolder((int8_t)escape_char), false);
+    return FunctionNode("like", {field, pattern_node, escape_char_node},
+                        arrow::boolean());
+  }
+};
+
+TEST_F(TestLikeHolder, TestMatchAny) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("ab"));
+  EXPECT_TRUE(like("abc"));
+  EXPECT_TRUE(like("abcd"));
+
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("cab"));
+}
+
+TEST_F(TestLikeHolder, TestMatchOne) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("abc"));
+  EXPECT_TRUE(like("abd"));
+
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestPcreSpecial) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like(".*abc"));  // . and * aren't special in sql regex
+  EXPECT_FALSE(like("xxabc"));
+}
+
+TEST_F(TestLikeHolder, TestRegexEscape) {
+  std::string res;
+  auto status = RegexUtil::SqlLikePatternToPcre("#%hello#_abc_def##", '#', 
res);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  EXPECT_EQ(res, "%hello_abc.def#");
+}
+
+TEST_F(TestLikeHolder, TestDot) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_FALSE(like("abcd"));
+}
+
+TEST_F(TestLikeHolder, TestMatchSubString) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("%abc%", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("abc"));
+  EXPECT_FALSE(like("xxabdc"));
+
+  status = LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like_reserved_char = *like_holder;
+  EXPECT_TRUE(like_reserved_char("XXab-.^$*+?()[]{}|—/c%d"));
+  EXPECT_FALSE(like_reserved_char("xxad-.^$*+?()[]{}|—/c"));
+}
+
+TEST_F(TestLikeHolder, TestOptimise) {
+  // optimise for 'starts_with'
+  auto fnode = LikeHolder::TryOptimize(BuildLike("xy 123z%"));
+  EXPECT_EQ(fnode.descriptor()->name(), "starts_with");
+  EXPECT_EQ(fnode.ToString(), "bool starts_with((string) in, (const string) 
'xy 123z')");
+
+  // optimise for 'ends_with'
+  fnode = LikeHolder::TryOptimize(BuildLike("%xyz"));
+  EXPECT_EQ(fnode.descriptor()->name(), "ends_with");
+  EXPECT_EQ(fnode.ToString(), "bool ends_with((string) in, (const string) 
'xyz')");
+
+  // optimise for 'is_substr'
+  fnode = LikeHolder::TryOptimize(BuildLike("%abc%"));
+  EXPECT_EQ(fnode.descriptor()->name(), "is_substr");
+  EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) 
'abc')");
+
+  // optimise for 'is_substr with special characters'
+  fnode = LikeHolder::TryOptimize(BuildLike("%ab-c%"));
+  EXPECT_EQ(fnode.descriptor()->name(), "is_substr");
+  EXPECT_EQ(fnode.ToString(), "bool is_substr((string) in, (const string) 
'ab-c')");
+
+  // optimise for 'ends_with with special characters'
+  fnode = LikeHolder::TryOptimize(BuildLike("%ab-c"));
+  EXPECT_EQ(fnode.descriptor()->name(), "ends_with");
+  EXPECT_EQ(fnode.ToString(),
+            "bool ends_with((string) in, (const string) "
+            "'ab-c')");
+
+  // optimise for 'starts_with with special characters'
+  fnode = LikeHolder::TryOptimize(BuildLike("ab-c%"));
+  EXPECT_EQ(fnode.descriptor()->name(), "starts_with");
+  EXPECT_EQ(fnode.ToString(),
+            "bool starts_with((string) in, (const string) "
+            "'ab-c')");
+
+  // no optimisation for others.
+  fnode = LikeHolder::TryOptimize(BuildLike("xyz_"));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  fnode = LikeHolder::TryOptimize(BuildLike("_xyz"));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  fnode = LikeHolder::TryOptimize(BuildLike("_xyz_"));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  fnode = LikeHolder::TryOptimize(BuildLike("%xyz_"));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  fnode = LikeHolder::TryOptimize(BuildLike("x_yz%"));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+
+  // no optimisation for escaped pattern.
+  fnode = LikeHolder::TryOptimize(BuildLike("\\%xyz", '\\'));
+  EXPECT_EQ(fnode.descriptor()->name(), "like");
+  EXPECT_EQ(fnode.ToString(),
+            "bool like((string) in, (const string) '\\%xyz', (const int8) 
\\)");
+}
+
+TEST_F(TestLikeHolder, TestMatchOneEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab_"));
+
+  EXPECT_FALSE(like("abc"));
+  EXPECT_FALSE(like("abd"));
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestMatchManyEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab%"));
+
+  EXPECT_FALSE(like("abc"));
+  EXPECT_FALSE(like("abd"));
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("abcd"));
+  EXPECT_FALSE(like("dabc"));
+}
+
+TEST_F(TestLikeHolder, TestMatchEscape) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab\\"));
+
+  EXPECT_FALSE(like("abc"));
+}
+
+TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "", &like_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+
+  EXPECT_TRUE(like("ab\\c"));
+  EXPECT_TRUE(like("ab\\_"));
+
+  EXPECT_FALSE(like("ab\\_d"));
+  EXPECT_FALSE(like("ab__"));
+}
+
+TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+}
+
+class TestILikeHolder : public ::testing::Test {
+ public:
+  RE2::Options regex_op;
+  FunctionNode BuildILike(std::string pattern) {
+    auto field = std::make_shared<FieldNode>(arrow::field("in", 
arrow::utf8()));
+    auto pattern_node =
+        std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), 
false);
+    return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
+  }
+};
+
+TEST_F(TestILikeHolder, TestMatchAny) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("ab"));
+  EXPECT_TRUE(like("aBc"));
+  EXPECT_TRUE(like("ABCD"));
+
+  EXPECT_FALSE(like("a"));
+  EXPECT_FALSE(like("cab"));
+}
+
+TEST_F(TestILikeHolder, TestMatchOne) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like("abc"));
+  EXPECT_TRUE(like("aBd"));
+
+  EXPECT_FALSE(like("A"));
+  EXPECT_FALSE(like("Abcd"));
+  EXPECT_FALSE(like("DaBc"));
+}
+
+TEST_F(TestILikeHolder, TestPcreSpecial) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_TRUE(like(".*Abc"));  // . and * aren't special in sql regex
+  EXPECT_FALSE(like("xxAbc"));
+}
+
+TEST_F(TestILikeHolder, TestDot) {
+  std::shared_ptr<LikeHolder> like_holder;
+
+  regex_op.set_case_sensitive(false);
+  auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& like = *like_holder;
+  EXPECT_FALSE(like("abcd"));
+}
+
+class TestReplaceHolder : public ::testing::Test {
+ protected:
+  ExecutionContext execution_context_;
+};
+
+TEST_F(TestReplaceHolder, TestMultipleReplace) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("ana", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "banana";
+  std::string replace_string;
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_str(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "bna");
+
+  input_string = "bananaana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "bna");
+
+  input_string = "bananana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 2);
+  EXPECT_EQ(ret_as_str, "bn");
+
+  input_string = "anaana";
+
+  ret = replace(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
+                static_cast<int32_t>(replace_string.length()), &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_FALSE(execution_context_.has_error());
+  EXPECT_EQ(ret_as_str, "");
+}
+
+TEST_F(TestReplaceHolder, TestNoMatchPattern) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("ana", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "apple";
+  std::string replace_string;
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_string(ret, out_length);
+  EXPECT_EQ(out_length, 5);
+  EXPECT_EQ(ret_as_string, "apple");
+}
+
+TEST_F(TestReplaceHolder, TestReplaceSameSize) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("a", &replace_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "ananindeua";
+  std::string replace_string = "b";
+  int32_t out_length = 0;
+
+  auto& replace = *replace_holder;
+  const char* ret =
+      replace(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
+              static_cast<int32_t>(replace_string.length()), &out_length);
+  std::string ret_as_string(ret, out_length);
+  EXPECT_EQ(out_length, 10);
+  EXPECT_EQ(ret_as_string, "bnbnindeub");
+}
+
+TEST_F(TestReplaceHolder, TestReplaceInvalidPattern) {
+  std::shared_ptr<ReplaceHolder> replace_holder;
+
+  auto status = ReplaceHolder::Make("+", &replace_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+
+  execution_context_.Reset();
+}
+
+// Tests related to the REGEXP_EXTRACT function
+class TestExtractHolder : public ::testing::Test {
+ protected:
+  ExecutionContext execution_context_;
+};
+
+TEST_F(TestExtractHolder, TestSimpleExtract) {
+  std::shared_ptr<ExtractHolder> extract_holder;
+
+  // Pattern to match of two group of letters
+  auto status = ExtractHolder::Make(R"((\w+) (\w+))", &extract_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "John Doe";
+  int32_t extract_index = 2;  // Retrieve the surname
+  int32_t out_length = 0;
+
+  auto& extract = *extract_holder;
+  const char* ret =
+      extract(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  std::string ret_as_str(ret, out_length);
+  EXPECT_EQ(out_length, 3);
+  EXPECT_EQ(ret_as_str, "Doe");
+
+  input_string = "Ringo Beast";
+  extract_index = 1;  // Retrieve the first name
+
+  ret = extract(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 5);
+  EXPECT_EQ(ret_as_str, "Ringo");
+
+  input_string = "Paul Test";
+  extract_index = 0;  // Retrieve all match
+
+  ret = extract(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 9);
+  EXPECT_EQ(ret_as_str, "Paul Test");
+
+  status = ExtractHolder::Make(R"((\w+) (\w+) - (\d+))", &extract_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& extract2 = *extract_holder;
+
+  input_string = "John Doe - 124";
+  extract_index = 0;  // Retrieve all match
+
+  ret = extract2(&execution_context_, input_string.c_str(),
+                 static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 14);
+  EXPECT_EQ(ret_as_str, "John Doe - 124");
+
+  input_string = "John Doe - 124 MoreString";
+  extract_index = 0;  // Retrieve all match
+
+  ret = extract2(&execution_context_, input_string.c_str(),
+                 static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 14);
+  EXPECT_EQ(ret_as_str, "John Doe - 124");
+
+  input_string = "MoreString John Doe - 124";
+  extract_index = 0;  // Retrieve all match
+
+  ret = extract2(&execution_context_, input_string.c_str(),
+                 static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 14);
+  EXPECT_EQ(ret_as_str, "John Doe - 124");
+
+  // Pattern to match only numbers
+  status = ExtractHolder::Make(R"(((\w+)))", &extract_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& extract_numbers = *extract_holder;
+
+  input_string = "路%$大a";
+  extract_index = 0;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 1);
+  EXPECT_EQ(ret_as_str, "a");
+
+  input_string = "b路%$大";
+  extract_index = 0;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 1);
+  EXPECT_EQ(ret_as_str, "b");
+
+  input_string = "路%c$大";
+  extract_index = 0;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 1);
+  EXPECT_EQ(ret_as_str, "c");
+
+  input_string = "路%c$大";
+  extract_index = 1;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 1);
+  EXPECT_EQ(ret_as_str, "c");
+
+  input_string = "路%c$大";
+  extract_index = 2;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 1);
+  EXPECT_EQ(ret_as_str, "c");
+
+  input_string = "路%c$大";
+  extract_index = 3;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_TRUE(execution_context_.has_error());
+}
+
+TEST_F(TestExtractHolder, TestNoMatches) {
+  std::shared_ptr<ExtractHolder> extract_holder;
+
+  // Pattern to match of two group of letters
+  auto status = ExtractHolder::Make(R"((\w+) (\w+))", &extract_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "John";
+  int32_t extract_index = 2;  // The regex will not match with the input string
+  int32_t out_length = 0;
+
+  auto& extract = *extract_holder;
+  const char* ret =
+      extract(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  std::string ret_as_str(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_FALSE(execution_context_.has_error());
+
+  // Pattern to match only numbers
+  status = ExtractHolder::Make(R"(\d+)", &extract_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  auto& extract_numbers = *extract_holder;
+
+  input_string = "12345";
+  extract_index = 0;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 5);
+  EXPECT_EQ(ret_as_str, "12345");
+
+  input_string = "12345A";
+  extract_index = 0;  // Retrieve all matched string
+
+  ret = extract_numbers(&execution_context_, input_string.c_str(),
+                        static_cast<int32_t>(input_string.length()), 
extract_index,
+                        &out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 5);
+  EXPECT_FALSE(execution_context_.has_error());
+  EXPECT_EQ(ret_as_str, "12345");
+}
+
+TEST_F(TestExtractHolder, TestInvalidRange) {
+  std::shared_ptr<ExtractHolder> extract_holder;
+
+  // Pattern to match of two group of letters
+  auto status = ExtractHolder::Make(R"((\w+) (\w+))", &extract_holder);
+  EXPECT_EQ(status.ok(), true) << status.message();
+
+  std::string input_string = "John Doe";
+  int32_t extract_index = -1;
+  int32_t out_length = 0;
+
+  auto& extract = *extract_holder;
+  const char* ret =
+      extract(&execution_context_, input_string.c_str(),
+              static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  std::string ret_as_str(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_TRUE(execution_context_.has_error());
+
+  execution_context_.Reset();
+
+  // The test regex has two capturing groups, so the higher index
+  // allowed for the test regex is 2
+  extract_index = 3;
+
+  ret = extract(&execution_context_, input_string.c_str(),
+                static_cast<int32_t>(input_string.length()), extract_index, 
&out_length);
+  ret_as_str = std::string(ret, out_length);
+  EXPECT_EQ(out_length, 0);
+  EXPECT_TRUE(execution_context_.has_error());
+
+  execution_context_.Reset();
+}
+
+TEST_F(TestExtractHolder, TestExtractInvalidPattern) {
+  std::shared_ptr<ExtractHolder> extract_holder;
+
+  auto status = ExtractHolder::Make("+", &extract_holder);
+  EXPECT_EQ(status.ok(), false) << status.message();
+
+  execution_context_.Reset();
+}
+
+TEST_F(TestExtractHolder, TestErrorWhileBuildingHolder) {
+  std::shared_ptr<ExtractHolder> extract_holder;
+
+  // Create function with incorrect number of params
+  auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+  auto pattern_node = std::make_shared<LiteralNode>(
+      arrow::utf8(), LiteralHolder(R"((\w+) (\w+))"), false);
+  auto function_node =
+      FunctionNode("regexp_extract", {field, pattern_node}, arrow::utf8());
+
+  auto status = ExtractHolder::Make(function_node, &extract_holder);
+  EXPECT_EQ(status.ok(), false);
+  EXPECT_THAT(status.message(),
+              ::testing::HasSubstr("'extract' function requires three 
parameters"));
+
+  execution_context_.Reset();
+
+  // Create function with non-utf8 literal parameter as pattern
+  field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+  pattern_node = std::make_shared<LiteralNode>(arrow::int32(), 
LiteralHolder(2), false);
+  auto index_node = std::make_shared<FieldNode>(arrow::field("idx", 
arrow::int32()));
+  function_node =
+      FunctionNode("regexp_extract", {field, pattern_node, index_node}, 
arrow::utf8());
+
+  status = ExtractHolder::Make(function_node, &extract_holder);
+  EXPECT_EQ(status.ok(), false);
+  EXPECT_THAT(status.message(),
+              ::testing::HasSubstr(
+                  "'extract' function requires a literal as the second 
parameter"));
+
+  execution_context_.Reset();
+
+  // Create function not using a literal parameter as pattern
+  field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
+  auto pattern_as_node =
+      std::make_shared<FieldNode>(arrow::field("pattern", arrow::utf8()));
+  index_node = std::make_shared<FieldNode>(arrow::field("idx", 
arrow::int32()));
+  function_node =
+      FunctionNode("regexp_extract", {field, pattern_as_node, index_node}, 
arrow::utf8());
+
+  status = ExtractHolder::Make(function_node, &extract_holder);
+  EXPECT_EQ(status.ok(), false);
+  EXPECT_THAT(status.message(),
+              ::testing::HasSubstr(
+                  "'extract' function requires a literal as the second 
parameter"));
+
+  execution_context_.Reset();
+}
+
+}  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder.cc 
b/cpp/src/gandiva/replace_holder.cc
deleted file mode 100644
index 1bcbe13802..0000000000
--- a/cpp/src/gandiva/replace_holder.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gandiva/replace_holder.h"
-
-#include "gandiva/node.h"
-#include "gandiva/regex_util.h"
-
-namespace gandiva {
-
-Status ReplaceHolder::Make(const FunctionNode& node,
-                           std::shared_ptr<ReplaceHolder>* holder) {
-  ARROW_RETURN_IF(node.children().size() != 3,
-                  Status::Invalid("'replace' function requires three 
parameters"));
-
-  auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
-  ARROW_RETURN_IF(
-      literal == nullptr,
-      Status::Invalid("'replace' function requires a literal as the second 
parameter"));
-
-  auto literal_type = literal->return_type()->id();
-  ARROW_RETURN_IF(
-      !(literal_type == arrow::Type::STRING || literal_type == 
arrow::Type::BINARY),
-      Status::Invalid(
-          "'replace' function requires a string literal as the second 
parameter"));
-
-  return Make(arrow::util::get<std::string>(literal->holder()), holder);
-}
-
-Status ReplaceHolder::Make(const std::string& sql_pattern,
-                           std::shared_ptr<ReplaceHolder>* holder) {
-  auto lholder = std::shared_ptr<ReplaceHolder>(new 
ReplaceHolder(sql_pattern));
-  ARROW_RETURN_IF(!lholder->regex_.ok(),
-                  Status::Invalid("Building RE2 pattern '", sql_pattern, "' 
failed"));
-
-  *holder = lholder;
-  return Status::OK();
-}
-
-void ReplaceHolder::return_error(ExecutionContext* context, std::string& data,
-                                 std::string& replace_string) {
-  std::string err_msg = "Error replacing '" + replace_string + "' on the given 
string '" +
-                        data + "' for the given pattern: " + pattern_;
-  context->set_error_msg(err_msg.c_str());
-}
-
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/replace_holder_test.cc 
b/cpp/src/gandiva/replace_holder_test.cc
deleted file mode 100644
index b0830d4f00..0000000000
--- a/cpp/src/gandiva/replace_holder_test.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "gandiva/replace_holder.h"
-
-#include <gtest/gtest.h>
-
-#include <memory>
-#include <vector>
-
-namespace gandiva {
-
-class TestReplaceHolder : public ::testing::Test {
- protected:
-  ExecutionContext execution_context_;
-};
-
-TEST_F(TestReplaceHolder, TestMultipleReplace) {
-  std::shared_ptr<ReplaceHolder> replace_holder;
-
-  auto status = ReplaceHolder::Make("ana", &replace_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  std::string input_string = "banana";
-  std::string replace_string;
-  int32_t out_length = 0;
-
-  auto& replace = *replace_holder;
-  const char* ret =
-      replace(&execution_context_, input_string.c_str(),
-              static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
-              static_cast<int32_t>(replace_string.length()), &out_length);
-  std::string ret_as_str(ret, out_length);
-  EXPECT_EQ(out_length, 3);
-  EXPECT_EQ(ret_as_str, "bna");
-
-  input_string = "bananaana";
-
-  ret = replace(&execution_context_, input_string.c_str(),
-                static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
-                static_cast<int32_t>(replace_string.length()), &out_length);
-  ret_as_str = std::string(ret, out_length);
-  EXPECT_EQ(out_length, 3);
-  EXPECT_EQ(ret_as_str, "bna");
-
-  input_string = "bananana";
-
-  ret = replace(&execution_context_, input_string.c_str(),
-                static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
-                static_cast<int32_t>(replace_string.length()), &out_length);
-  ret_as_str = std::string(ret, out_length);
-  EXPECT_EQ(out_length, 2);
-  EXPECT_EQ(ret_as_str, "bn");
-
-  input_string = "anaana";
-
-  ret = replace(&execution_context_, input_string.c_str(),
-                static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
-                static_cast<int32_t>(replace_string.length()), &out_length);
-  ret_as_str = std::string(ret, out_length);
-  EXPECT_EQ(out_length, 0);
-  EXPECT_FALSE(execution_context_.has_error());
-  EXPECT_EQ(ret_as_str, "");
-}
-
-TEST_F(TestReplaceHolder, TestNoMatchPattern) {
-  std::shared_ptr<ReplaceHolder> replace_holder;
-
-  auto status = ReplaceHolder::Make("ana", &replace_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  std::string input_string = "apple";
-  std::string replace_string;
-  int32_t out_length = 0;
-
-  auto& replace = *replace_holder;
-  const char* ret =
-      replace(&execution_context_, input_string.c_str(),
-              static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
-              static_cast<int32_t>(replace_string.length()), &out_length);
-  std::string ret_as_string(ret, out_length);
-  EXPECT_EQ(out_length, 5);
-  EXPECT_EQ(ret_as_string, "apple");
-}
-
-TEST_F(TestReplaceHolder, TestReplaceSameSize) {
-  std::shared_ptr<ReplaceHolder> replace_holder;
-
-  auto status = ReplaceHolder::Make("a", &replace_holder);
-  EXPECT_EQ(status.ok(), true) << status.message();
-
-  std::string input_string = "ananindeua";
-  std::string replace_string = "b";
-  int32_t out_length = 0;
-
-  auto& replace = *replace_holder;
-  const char* ret =
-      replace(&execution_context_, input_string.c_str(),
-              static_cast<int32_t>(input_string.length()), 
replace_string.c_str(),
-              static_cast<int32_t>(replace_string.length()), &out_length);
-  std::string ret_as_string(ret, out_length);
-  EXPECT_EQ(out_length, 10);
-  EXPECT_EQ(ret_as_string, "bnbnindeub");
-}
-
-TEST_F(TestReplaceHolder, TestReplaceInvalidPattern) {
-  std::shared_ptr<ReplaceHolder> replace_holder;
-
-  auto status = ReplaceHolder::Make("+", &replace_holder);
-  EXPECT_EQ(status.ok(), false) << status.message();
-
-  execution_context_.Reset();
-}
-
-}  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc 
b/cpp/src/gandiva/tests/projector_test.cc
index 06c1dbdf08..93fda6973a 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -2624,4 +2624,54 @@ TEST_F(TestProjector, TestNextDay) {
   // Validate results
   EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
 }
+
+TEST_F(TestProjector, TestRegexpExtract) {
+  // schema for input fields
+  auto field0 = field("f0", arrow::utf8());
+  auto field1 = field("f1", arrow::int32());
+  auto schema = arrow::schema({field0, field1});
+
+  // output fields
+  auto field_extract = field("extract", arrow::utf8());
+
+  // The pattern to match this sequence: string string - number
+  std::string pattern(R"((\w+) (\w+) - (\d+))");
+  auto literal = TreeExprBuilder::MakeStringLiteral(pattern);
+  auto node0 = TreeExprBuilder::MakeField(field0);
+  auto node1 = TreeExprBuilder::MakeField(field1);
+
+  // Build expression
+  auto regexp_extract_func = TreeExprBuilder::MakeFunction(
+      "regexp_extract", {node0, literal, node1}, arrow::utf8());
+  auto extract_expr = TreeExprBuilder::MakeExpression(regexp_extract_func, 
field_extract);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {extract_expr}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Create a row-batch with some sample data
+  int num_records = 7;
+  auto array0 = MakeArrowArrayUtf8(
+      {"John Doe - 124", "John Doe - 124", "John Doe - 124", "John Doe - 124",
+       "John Doe - 124 MoreString", "MoreString John Doe - 124", 
"stringthatdonotmatch"},
+      {true, true, true, true, true, true, true});
+  auto array1 = MakeArrowArrayInt32({1, 2, 3, 0, 0, 3, 0},
+                                    {true, true, true, true, true, true, 
true});
+  // expected output
+  auto exp_extract = MakeArrowArrayUtf8(
+      {"John", "Doe", "124", "John Doe - 124", "John Doe - 124", "124", ""},
+      {true, true, true, true, true, true, true});
+
+  // prepare input record batch
+  auto in = arrow::RecordBatch::Make(schema, num_records, {array0, array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in, pool_, &outputs);
+  EXPECT_TRUE(status.ok()) << status.message();
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_extract, outputs.at(0));
+}
+
 }  // namespace gandiva

Reply via email to