This is an automated email from the ASF dual-hosted git repository.

ravindra pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f9f0e65e5a ARROW-17121: [Gandiva][C++] Adding mask function to Gandiva 
(#13647)
f9f0e65e5a is described below

commit f9f0e65e5a370bd26f49b12c5e8bb51218cd82a6
Author: palak-9202 <[email protected]>
AuthorDate: Fri Jul 22 21:57:18 2022 +0530

    ARROW-17121: [Gandiva][C++] Adding mask function to Gandiva (#13647)
    
    Add mask(str inp[, str uc-mask[, str lc-mask[, str num-mask]]]) function to 
Gandiva.
    
    With default masking upper case letters as 'X', lower case letters as 'x' 
and numbers as 'n'.
    Custom masking as specified in parameters.
    
    Lead-authored-by: Palak Pariawala <[email protected]>
    Co-authored-by: palak-9202 <[email protected]>
    Signed-off-by: Pindikura Ravindra <[email protected]>
---
 cpp/src/gandiva/function_registry_string.cc |  16 ++-
 cpp/src/gandiva/gdv_function_stubs.cc       | 178 ++++++++++++++++++++++++++++
 cpp/src/gandiva/gdv_function_stubs.h        |  20 ++++
 cpp/src/gandiva/gdv_function_stubs_test.cc  |  70 +++++++++++
 cpp/src/gandiva/tests/projector_test.cc     | 158 ++++++++++++++++++++++++
 5 files changed, 441 insertions(+), 1 deletion(-)

diff --git a/cpp/src/gandiva/function_registry_string.cc 
b/cpp/src/gandiva/function_registry_string.cc
index a60ca3ecca..2bc6936d77 100644
--- a/cpp/src/gandiva/function_registry_string.cc
+++ b/cpp/src/gandiva/function_registry_string.cc
@@ -527,7 +527,21 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
 
       NativeFunction("mask_show_last_n", {}, DataTypeVector{utf8(), int32()}, 
utf8(),
                      kResultNullIfNull, "gdv_mask_show_last_n_utf8_int32",
-                     NativeFunction::kNeedsContext)};
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("mask", {}, DataTypeVector{utf8(), utf8(), utf8(), 
utf8()}, utf8(),
+                     kResultNullIfNull, "mask_utf8_utf8_utf8_utf8",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("mask", {}, DataTypeVector{utf8(), utf8(), utf8()}, 
utf8(),
+                     kResultNullIfNull, "mask_utf8_utf8_utf8",
+                     NativeFunction::kNeedsContext),
+
+      NativeFunction("mask", {}, DataTypeVector{utf8(), utf8()}, utf8(),
+                     kResultNullIfNull, "mask_utf8_utf8", 
NativeFunction::kNeedsContext),
+
+      NativeFunction("mask", {}, DataTypeVector{utf8()}, utf8(), 
kResultNullIfNull,
+                     "mask_utf8", NativeFunction::kNeedsContext)};
   return string_fn_registry_;
 }
 
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc 
b/cpp/src/gandiva/gdv_function_stubs.cc
index 1b84c706fe..40fb656bd4 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -556,6 +556,125 @@ const char* gdv_mask_last_n_utf8_int32(int64_t context, 
const char* data,
   return out;
 }
 
+GANDIVA_EXPORT
+const char* mask_utf8_utf8_utf8_utf8(int64_t context, const char* data, 
int32_t data_len,
+                                     const char* upper, int32_t upper_length,
+                                     const char* lower, int32_t lower_length,
+                                     const char* num, int32_t num_length,
+                                     int32_t* out_len) {
+  if (data_len <= 0) {
+    *out_len = 0;
+    return nullptr;
+  }
+
+  int32_t max_length =
+      std::max(upper_length, std::max(lower_length, num_length)) * data_len;
+  char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
max_length));
+  if (out == nullptr) {
+    gdv_fn_context_set_error_msg(context, "Could not allocate memory for 
output string");
+    *out_len = 0;
+    return nullptr;
+  }
+
+  bool has_multi_byte = false;
+  for (int i = 0; i < data_len; i++) {
+    unsigned char char_single_byte = data[i];
+    if (char_single_byte > 127) {
+      // found a multi-byte utf-8 char
+      has_multi_byte = true;
+      break;
+    }
+  }
+
+  if (!has_multi_byte) {
+    int out_index = 0;
+    for (int i = 0; i < data_len; ++i) {
+      unsigned char char_single_byte = data[i];
+      if (char_single_byte >= 'A' && char_single_byte <= 'Z') {
+        memcpy(out + out_index, upper, upper_length);
+        out_index += upper_length;
+      } else if (char_single_byte >= 'a' && char_single_byte <= 'z') {
+        memcpy(out + out_index, lower, lower_length);
+        out_index += lower_length;
+      } else if (isdigit(char_single_byte)) {
+        memcpy(out + out_index, num, num_length);
+        out_index += num_length;
+      } else {
+        out[out_index] = char_single_byte;
+        out_index++;
+      }
+    }
+    *out_len = out_index;
+    return out;
+  }
+
+  utf8proc_int32_t utf8_char;
+  int bytes_read = 0;
+  int32_t out_index = 0;
+  while (bytes_read < data_len) {
+    auto char_len =
+        utf8proc_iterate(reinterpret_cast<const utf8proc_uint8_t*>(data + 
bytes_read),
+                         data_len, &utf8_char);
+    switch (utf8proc_category(utf8_char)) {
+      case UTF8PROC_CATEGORY_LU:
+        memcpy(out + out_index, upper, upper_length);
+        out_index += upper_length;
+        break;
+      case UTF8PROC_CATEGORY_LT:
+        memcpy(out + out_index, upper, upper_length);
+        out_index += upper_length;
+        break;
+      case UTF8PROC_CATEGORY_LL:
+        memcpy(out + out_index, lower, lower_length);
+        out_index += lower_length;
+        break;
+      case UTF8PROC_CATEGORY_LO:
+        memcpy(out + out_index, lower, lower_length);
+        out_index += lower_length;
+        break;
+      case UTF8PROC_CATEGORY_ND:
+        memcpy(out + out_index, num, num_length);
+        out_index += num_length;
+        break;
+      case UTF8PROC_CATEGORY_NL:
+        memcpy(out + out_index, num, num_length);
+        out_index += num_length;
+        break;
+      case UTF8PROC_CATEGORY_NO:
+        memcpy(out + out_index, num, num_length);
+        out_index += num_length;
+        break;
+      default:
+        memcpy(out + out_index, data + bytes_read, char_len);
+        out_index += static_cast<int>(char_len);
+        break;
+    }
+    bytes_read += static_cast<int>(char_len);
+  }
+  *out_len = out_index;
+  return out;
+}
+
+GANDIVA_EXPORT
+const char* mask_utf8_utf8_utf8(int64_t context, const char* in, int32_t 
length,
+                                const char* upper, int32_t upper_len, const 
char* lower,
+                                int32_t lower_len, int32_t* out_len) {
+  return mask_utf8_utf8_utf8_utf8(context, in, length, upper, upper_len, 
lower, lower_len,
+                                  "n", 1, out_len);
+}
+
+GANDIVA_EXPORT
+const char* mask_utf8_utf8(int64_t context, const char* in, int32_t length,
+                           const char* upper, int32_t upper_len, int32_t* 
out_len) {
+  return mask_utf8_utf8_utf8_utf8(context, in, length, upper, upper_len, "x", 
1, "n", 1,
+                                  out_len);
+}
+
+GANDIVA_EXPORT
+const char* mask_utf8(int64_t context, const char* in, int32_t length, 
int32_t* out_len) {
+  return mask_utf8_utf8_utf8_utf8(context, in, length, "X", 1, "x", 1, "n", 1, 
out_len);
+}
+
 int64_t gdv_fn_to_date_utf8_utf8(int64_t context_ptr, int64_t holder_ptr,
                                  const char* data, int data_len, bool 
in1_validity,
                                  const char* pattern, int pattern_len, bool 
in2_validity,
@@ -1052,6 +1171,7 @@ void ExportedStubFunctions::AddMappings(Engine* engine) 
const {
   engine->AddGlobalMappingForFunc("to_utc_timezone_timestamp",
                                   types->i64_type() /*return_type*/, args,
                                   
reinterpret_cast<void*>(to_utc_timezone_timestamp));
+
   // from_utc_timezone_timestamp
   args = {
       types->i64_type(),     // context
@@ -1080,5 +1200,63 @@ void ExportedStubFunctions::AddMappings(Engine* engine) 
const {
   engine->AddGlobalMappingForFunc(
       "gdv_mask_show_last_n_utf8_int32", types->i8_ptr_type() /*return_type*/, 
mask_args,
       reinterpret_cast<void*>(gdv_mask_show_last_n_utf8_int32));
+
+  // mask_utf8_utf8_utf8_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // data
+      types->i32_type(),     // data_len
+      types->i8_ptr_type(),  // upper
+      types->i32_type(),     // upper_len
+      types->i8_ptr_type(),  // lower
+      types->i32_type(),     // lower_len
+      types->i8_ptr_type(),  // num
+      types->i32_type(),     // num_len
+      types->i32_ptr_type()  // out_length
+  };
+
+  engine->AddGlobalMappingForFunc("mask_utf8_utf8_utf8_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(mask_utf8_utf8_utf8_utf8));
+
+  // mask_utf8_utf8_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // data
+      types->i32_type(),     // data_len
+      types->i8_ptr_type(),  // upper
+      types->i32_type(),     // upper_len
+      types->i8_ptr_type(),  // lower
+      types->i32_type(),     // lower_len
+      types->i32_ptr_type()  // out_length
+  };
+
+  engine->AddGlobalMappingForFunc("mask_utf8_utf8_utf8",
+                                  types->i8_ptr_type() /*return_type*/, args,
+                                  
reinterpret_cast<void*>(mask_utf8_utf8_utf8));
+
+  // mask_utf8_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // data
+      types->i32_type(),     // data_len
+      types->i8_ptr_type(),  // upper
+      types->i32_type(),     // upper_len
+      types->i32_ptr_type()  // out_length
+  };
+
+  engine->AddGlobalMappingForFunc("mask_utf8_utf8", types->i8_ptr_type() 
/*return_type*/,
+                                  args, 
reinterpret_cast<void*>(mask_utf8_utf8));
+
+  // mask_utf8
+  args = {
+      types->i64_type(),     // context
+      types->i8_ptr_type(),  // data
+      types->i32_type(),     // data_len
+      types->i32_ptr_type()  // out_length
+  };
+
+  engine->AddGlobalMappingForFunc("mask_utf8", types->i8_ptr_type() 
/*return_type*/, args,
+                                  reinterpret_cast<void*>(mask_utf8));
 }
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/gdv_function_stubs.h 
b/cpp/src/gandiva/gdv_function_stubs.h
index bc4dc24b46..5356a91f3c 100644
--- a/cpp/src/gandiva/gdv_function_stubs.h
+++ b/cpp/src/gandiva/gdv_function_stubs.h
@@ -361,4 +361,24 @@ GANDIVA_EXPORT
 const char* gdv_fn_substring_index(int64_t context, const char* txt, int32_t 
txt_len,
                                    const char* pat, int32_t pat_len, int32_t 
cnt,
                                    int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* mask_utf8_utf8_utf8_utf8(int64_t context, const char* in, int32_t 
length,
+                                     const char* upper, int32_t upper_length,
+                                     const char* lower, int32_t lower_length,
+                                     const char* num, int32_t num_length,
+                                     int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* mask_utf8_utf8_utf8(int64_t context, const char* in, int32_t 
length,
+                                const char* upper, int32_t upper_length,
+                                const char* lower, int32_t lower_length,
+                                int32_t* out_len);
+
+GANDIVA_EXPORT
+const char* mask_utf8_utf8(int64_t context, const char* in, int32_t length,
+                           const char* upper, int32_t upper_length, int32_t* 
out_len);
+
+GANDIVA_EXPORT
+const char* mask_utf8(int64_t context, const char* in, int32_t length, 
int32_t* out_len);
 }
diff --git a/cpp/src/gandiva/gdv_function_stubs_test.cc 
b/cpp/src/gandiva/gdv_function_stubs_test.cc
index b174738a31..b4b717c023 100644
--- a/cpp/src/gandiva/gdv_function_stubs_test.cc
+++ b/cpp/src/gandiva/gdv_function_stubs_test.cc
@@ -1271,4 +1271,74 @@ TEST(TestGdvFnStubs, TestShowLastN) {
   result = gdv_mask_show_last_n_utf8_int32(ctx_ptr, data.c_str(), data_len, 6, 
&out_len);
   EXPECT_EQ(expected, std::string(result, out_len));
 }
+
+TEST(TestGdvFnStubs, TestMask) {
+  gandiva::ExecutionContext ctx;
+  int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
+  int32_t out_len = 0;
+
+  std::string data = "AabbcÇdd-9202";
+  std::string expected = "XxxxxXxx-nnnn";
+  int32_t data_len = static_cast<int32_t>(data.length());
+  const char* result = mask_utf8_utf8_utf8_utf8(ctx_ptr, data.c_str(), 
data_len, "X", 1,
+                                                "x", 1, "n", 1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  result = mask_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "X", 1, "x", 
1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  result = mask_utf8_utf8(ctx_ptr, data.c_str(), data_len, "X", 1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  result = mask_utf8(ctx_ptr, data.c_str(), data_len, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+
+  data = "QwErTy:4)ß";
+  expected = "U-l-U-l-U-l-:#)l-";
+  data_len = static_cast<int32_t>(data.length());
+  result = mask_utf8_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "U-", 2, 
"l-", 2,
+                                    "#", 1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  expected = "U-l-U-l-U-l-:n)l-";
+  result =
+      mask_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "U-", 2, "l-", 2, 
&out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  expected = "U-xU-xU-x:n)x";
+  result = mask_utf8_utf8(ctx_ptr, data.c_str(), data_len, "U-", 2, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  expected = "XxXxXx:n)x";
+  result = mask_utf8(ctx_ptr, data.c_str(), data_len, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+
+  data = "";
+  expected = "";
+  data_len = static_cast<int32_t>(data.length());
+  result = mask_utf8_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "X", 1, 
"x", 2, "n",
+                                    1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  result = mask_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "X", 1, "x", 
1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  result = mask_utf8_utf8(ctx_ptr, data.c_str(), data_len, "X", 1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  result = mask_utf8(ctx_ptr, data.c_str(), data_len, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+
+  data = "QwErTy:4)ß";
+  expected = ":)";
+  data_len = static_cast<int32_t>(data.length());
+  result = mask_utf8_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "", 0, 
"", 0, "", 0,
+                                    &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  expected = ":n)";
+  result = mask_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "", 0, "", 0, 
&out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+  expected = "xxx:n)x";
+  result = mask_utf8_utf8(ctx_ptr, data.c_str(), data_len, "", 0, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+
+  data = "hunny-BEE-5121";
+  expected = "*****-\?\?\?-####";
+  data_len = static_cast<int32_t>(data.length());
+  result = mask_utf8_utf8_utf8_utf8(ctx_ptr, data.c_str(), data_len, "\?", 1, 
"*", 1, "#",
+                                    1, &out_len);
+  EXPECT_EQ(std::string(result, out_len), expected);
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/tests/projector_test.cc 
b/cpp/src/gandiva/tests/projector_test.cc
index 55d2f2293f..65597b38f0 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -3226,4 +3226,162 @@ TEST_F(TestProjector, TestMaskShowFirstLastN) {
   EXPECT_ARROW_ARRAY_EQUALS(exp_show_last_n, outputs.at(1));
 }
 
+TEST_F(TestProjector, TestMaskAll) {
+  // schema for input fields
+  auto f0 = field("f0", arrow::utf8());
+  auto f1 = field("f1", arrow::utf8());
+  auto f2 = field("f2", arrow::utf8());
+  auto f3 = field("f3", arrow::utf8());
+  auto schema = arrow::schema({f0, f1, f2, f3});
+
+  // output fields
+  auto res_mask = field("output", arrow::utf8());
+
+  // Build expression
+  auto expr_mask = TreeExprBuilder::MakeExpression("mask", {f0, f1, f2, f3}, 
res_mask);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr_mask}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+  auto array0 =
+      MakeArrowArrayUtf8({"AßÇçd-123", "A的Ççd-123", "AßÇçd-123"}, {true, true, 
true});
+  auto array1 = MakeArrowArrayUtf8({"X", "CAP", "Ç-"}, {true, true, true});
+  auto array2 = MakeArrowArrayUtf8({"x", "low", "l-"}, {true, true, true});
+  auto array3 = MakeArrowArrayUtf8({"n", "#", "[0-9]"}, {true, true, true});
+
+  // expected output
+  auto exp_mask = MakeArrowArrayUtf8(
+      {"XxXxx-nnn", "CAPlowCAPlowlow-###", "Ç-l-Ç-l-l--[0-9][0-9][0-9]"},
+      {true, true, true});
+
+  // prepare input record batch
+  auto in_batch =
+      arrow::RecordBatch::Make(schema, num_records, {array0, array1, array2, 
array3});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mask, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestMaskUpperLower) {
+  // schema for input fields
+  auto f0 = field("f0", arrow::utf8());
+  auto f1 = field("f1", arrow::utf8());
+  auto f2 = field("f2", arrow::utf8());
+  auto schema = arrow::schema({f0, f1, f2});
+
+  // output fields
+  auto res_mask = field("output", arrow::utf8());
+
+  // Build expression
+  auto expr_mask = TreeExprBuilder::MakeExpression("mask", {f0, f1, f2}, 
res_mask);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr_mask}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+  auto array0 =
+      MakeArrowArrayUtf8({"AßÇçd-123", "A的Ççd-123", "AßÇçd-123"}, {true, true, 
true});
+  auto array1 = MakeArrowArrayUtf8({"X", "CAP", "Ç-"}, {true, true, true});
+  auto array2 = MakeArrowArrayUtf8({"x", "low", "l-"}, {true, true, true});
+
+  // expected output
+  auto exp_mask = MakeArrowArrayUtf8(
+      {"XxXxx-nnn", "CAPlowCAPlowlow-nnn", "Ç-l-Ç-l-l--nnn"}, {true, true, 
true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, 
array1, array2});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mask, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestMaskUpper) {
+  // schema for input fields
+  auto f0 = field("f0", arrow::utf8());
+  auto f1 = field("f1", arrow::utf8());
+  auto schema = arrow::schema({f0, f1});
+
+  // output fields
+  auto res_mask = field("output", arrow::utf8());
+
+  // Build expression
+  auto expr_mask = TreeExprBuilder::MakeExpression("mask", {f0, f1}, res_mask);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr_mask}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+  auto array0 =
+      MakeArrowArrayUtf8({"AßÇçd-123", "A的Ççd-123", "AßÇçd-123"}, {true, true, 
true});
+  auto array1 = MakeArrowArrayUtf8({"X", "CAP", "Ç-"}, {true, true, true});
+
+  // expected output
+  auto exp_mask = MakeArrowArrayUtf8({"XxXxx-nnn", "CAPxCAPxx-nnn", 
"Ç-xÇ-xx-nnn"},
+                                     {true, true, true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, 
array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mask, outputs.at(0));
+}
+
+TEST_F(TestProjector, TestMaskDefault) {
+  // schema for input fields
+  auto f0 = field("f0", arrow::utf8());
+  auto schema = arrow::schema({f0});
+
+  // output fields
+  auto res_mask_default = field("output", arrow::utf8());
+
+  // Build expression
+  auto expr_mask = TreeExprBuilder::MakeExpression("mask", {f0}, 
res_mask_default);
+
+  std::shared_ptr<Projector> projector;
+  auto status = Projector::Make(schema, {expr_mask}, TestConfiguration(), 
&projector);
+  EXPECT_TRUE(status.ok());
+
+  // Create a row-batch with some sample data
+  int num_records = 3;
+  auto array0 =
+      MakeArrowArrayUtf8({"ABCcd-123", "A的Ççd-123", "abcd-Ⅷ"}, {true, true, 
true});
+
+  // expected output
+  auto exp_mask =
+      MakeArrowArrayUtf8({"XXXxx-nnn", "XxXxx-nnn", "xxxx-n"}, {true, true, 
true});
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_mask, outputs.at(0));
+}
+
 }  // namespace gandiva

Reply via email to