likun666661 commented on code in PR #39441:
URL: https://github.com/apache/arrow/pull/39441#discussion_r1443894967
##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -275,4 +275,78 @@ const char* ExtractHolder::operator()(ExecutionContext*
ctx, const char* user_in
return result_buffer;
}
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+ const FunctionNode& node) {
+ ARROW_RETURN_IF(
+ node.children().size() < 2,
+ Status::Invalid("'regexp_like' function requires at least two
parameters"));
+ auto pattern = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+ ARROW_RETURN_IF(
+ pattern == nullptr,
+ Status::Invalid(
+ "'regexp_like' function requires a literal as the second
parameter"));
+
+ auto pattern_type = pattern->return_type()->id();
+ ARROW_RETURN_IF(
+ !(pattern_type == arrow::Type::STRING || pattern_type ==
arrow::Type::BINARY),
+ Status::Invalid(
+ "'regexp_like' function requires a string literal as the second
parameter"));
+
+ if (node.children().size() > 2) {
+ auto parameter = dynamic_cast<LiteralNode*>(node.children().at(2).get());
+ if (parameter != nullptr) {
+ auto parameter_type = parameter->return_type()->id();
+ ARROW_RETURN_IF(
+ !(parameter_type == arrow::Type::STRING ||
+ parameter_type == arrow::Type::BINARY),
+ Status::Invalid(
+ "'regexp_like' function requires a string literal as the third
parameter"));
+ return RegexpLikeHolder::Make(std::get<std::string>(pattern->holder()),
true,
+
std::get<std::string>(parameter->holder()));
+ }
+ }
+ return RegexpLikeHolder::Make(std::get<std::string>(pattern->holder()),
false, "");
+}
+
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+ const std::string& regex_pattern, bool used_match_parameter,
+ const std::string& match_parameter) {
+ RE2::Options regex_op;
+ // set re2 use posix regex expression which also called ERE in postgre sql
+ regex_op.set_posix_syntax(true);
+ // oracle's regex_like will default treat source str as single line
+ regex_op.set_one_line(true);
+
+ if (used_match_parameter) {
+ for (auto& parameter : match_parameter) {
+ switch (parameter) {
+ case 'i':
+ regex_op.set_case_sensitive(false);
+ break;
+ case 'c':
+ regex_op.set_case_sensitive(true);
+ break;
+ case 'n':
+ regex_op.set_dot_nl(true);
+ break;
+ case 'm':
+ regex_op.set_one_line(false);
+ break;
+ default:
+ ARROW_RETURN_NOT_OK(Status::Invalid("Parameter only support
'i','c','n','m'!"));
+ }
Review Comment:
In the oracle documentation, it is described as follows: For example, if you
specify 'ic', then Oracle uses case-sensitive matching.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]