js8544 commented on code in PR #39441:
URL: https://github.com/apache/arrow/pull/39441#discussion_r1443621316
##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -275,4 +275,78 @@ const char* ExtractHolder::operator()(ExecutionContext*
ctx, const char* user_in
return result_buffer;
}
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+ const FunctionNode& node) {
+ ARROW_RETURN_IF(
+ node.children().size() < 2,
+ Status::Invalid("'regexp_like' function requires at least two
parameters"));
+ auto pattern = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+ ARROW_RETURN_IF(
+ pattern == nullptr,
+ Status::Invalid(
+ "'regexp_like' function requires a literal as the second
parameter"));
+
+ auto pattern_type = pattern->return_type()->id();
+ ARROW_RETURN_IF(
+ !(pattern_type == arrow::Type::STRING || pattern_type ==
arrow::Type::BINARY),
+ Status::Invalid(
+ "'regexp_like' function requires a string literal as the second
parameter"));
+
+ if (node.children().size() > 2) {
+ auto parameter = dynamic_cast<LiteralNode*>(node.children().at(2).get());
+ if (parameter != nullptr) {
+ auto parameter_type = parameter->return_type()->id();
+ ARROW_RETURN_IF(
+ !(parameter_type == arrow::Type::STRING ||
+ parameter_type == arrow::Type::BINARY),
+ Status::Invalid(
+ "'regexp_like' function requires a string literal as the third
parameter"));
+ return RegexpLikeHolder::Make(std::get<std::string>(pattern->holder()),
true,
+
std::get<std::string>(parameter->holder()));
+ }
+ }
+ return RegexpLikeHolder::Make(std::get<std::string>(pattern->holder()),
false, "");
+}
+
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+ const std::string& regex_pattern, bool used_match_parameter,
+ const std::string& match_parameter) {
+ RE2::Options regex_op;
+ // set re2 use posix regex expression which also called ERE in postgre sql
+ regex_op.set_posix_syntax(true);
+ // oracle's regex_like will default treat source str as single line
+ regex_op.set_one_line(true);
+
+ if (used_match_parameter) {
+ for (auto& parameter : match_parameter) {
+ switch (parameter) {
+ case 'i':
+ regex_op.set_case_sensitive(false);
+ break;
+ case 'c':
+ regex_op.set_case_sensitive(true);
+ break;
+ case 'n':
+ regex_op.set_dot_nl(true);
+ break;
+ case 'm':
+ regex_op.set_one_line(false);
+ break;
+ default:
+ ARROW_RETURN_NOT_OK(Status::Invalid("Parameter only support
'i','c','n','m'!"));
+ }
Review Comment:
What happens in Oracle if both i and c are present? Does it report an error?
##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -275,4 +275,78 @@ const char* ExtractHolder::operator()(ExecutionContext*
ctx, const char* user_in
return result_buffer;
}
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+ const FunctionNode& node) {
+ ARROW_RETURN_IF(
+ node.children().size() < 2,
+ Status::Invalid("'regexp_like' function requires at least two
parameters"));
+ auto pattern = dynamic_cast<LiteralNode*>(node.children().at(1).get());
Review Comment:
Although it's not prevalent in Gandiva, but we typically prefer
`arrow::internal::CheckedCast` for better performance
##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -275,4 +275,78 @@ const char* ExtractHolder::operator()(ExecutionContext*
ctx, const char* user_in
return result_buffer;
}
+Result<std::shared_ptr<RegexpLikeHolder>> RegexpLikeHolder::Make(
+ const FunctionNode& node) {
+ ARROW_RETURN_IF(
+ node.children().size() < 2,
+ Status::Invalid("'regexp_like' function requires at least two
parameters"));
+ auto pattern = dynamic_cast<LiteralNode*>(node.children().at(1).get());
+ ARROW_RETURN_IF(
+ pattern == nullptr,
+ Status::Invalid(
+ "'regexp_like' function requires a literal as the second
parameter"));
+
+ auto pattern_type = pattern->return_type()->id();
+ ARROW_RETURN_IF(
+ !(pattern_type == arrow::Type::STRING || pattern_type ==
arrow::Type::BINARY),
+ Status::Invalid(
+ "'regexp_like' function requires a string literal as the second
parameter"));
+
+ if (node.children().size() > 2) {
+ auto parameter = dynamic_cast<LiteralNode*>(node.children().at(2).get());
Review Comment:
same here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]