niyue commented on code in PR #39441:
URL: https://github.com/apache/arrow/pull/39441#discussion_r1443584894


##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -275,4 +275,78 @@ const char* ExtractHolder::operator()(ExecutionContext* 
ctx, const char* user_in
   return result_buffer;
 }
 
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+    const FunctionNode& node) {
+  ARROW_RETURN_IF(
+      node.children().size() < 2,
+      Status::Invalid("'regexp_like' function requires at least two 
parameters"));
+  auto pattern = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+  ARROW_RETURN_IF(
+      pattern == nullptr,
+      Status::Invalid(
+          "'regexp_like' function requires a literal as the second 
parameter"));
+
+  auto pattern_type = pattern->return_type()->id();
+  ARROW_RETURN_IF(
+      !(pattern_type == arrow::Type::STRING || pattern_type == 
arrow::Type::BINARY),
+      Status::Invalid(
+          "'regexp_like' function requires a string literal as the second 
parameter"));
+
+  if (node.children().size() > 2) {
+    auto parameter = dynamic_cast<LiteralNode*>(node.children().at(2).get());
+    if (parameter != nullptr) {
+      auto parameter_type = parameter->return_type()->id();
+      ARROW_RETURN_IF(
+          !(parameter_type == arrow::Type::STRING ||
+            parameter_type == arrow::Type::BINARY),
+          Status::Invalid(
+              "'regexp_like' function requires a string literal as the third 
parameter"));
+      return RegexpLikeHolder::Make(std::get<std::string>(pattern->holder()), 
true,
+                                    
std::get<std::string>(parameter->holder()));
+    }
+  }
+  return RegexpLikeHolder::Make(std::get<std::string>(pattern->holder()), 
false, "");
+}
+
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+    const std::string& regex_pattern, bool used_match_parameter,
+    const std::string& match_parameter) {
+  RE2::Options regex_op;
+  // set re2 use posix regex expression which also called ERE in postgre sql
+  regex_op.set_posix_syntax(true);
+  // oracle's regex_like will default treat source str as single line
+  regex_op.set_one_line(true);
+
+  if (used_match_parameter) {
+    for (auto& parameter : match_parameter) {
+      switch (parameter) {
+        case 'i':
+          regex_op.set_case_sensitive(false);
+          break;
+        case 'c':
+          regex_op.set_case_sensitive(true);
+          break;
+        case 'n':
+          regex_op.set_dot_nl(true);
+          break;
+        case 'm':
+          regex_op.set_one_line(false);
+          break;
+        default:
+          ARROW_RETURN_NOT_OK(Status::Invalid("Parameter only support 
'i','c','n','m'!"));

Review Comment:
   Nit pick: 
   1) I feel we should use `match parameter` instead of just `parameter` in the 
error message. 
   2) it will be better if we could return the specific invalid match parameter 
in the error
   
   We may use error message like `Invalid match parameter '{x}': Only 'i', 'c', 
'n', 'm' are allowed`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to