lsy3993 commented on code in PR #55074:
URL: https://github.com/apache/doris/pull/55074#discussion_r2321692274
##########
be/src/vec/functions/function_regexp.cpp:
##########
@@ -708,6 +709,192 @@ class FunctionRegexpFunctionality : public IFunction {
}
};
+struct ExecuteImpl {
+ static int _execute_inner_loop(FunctionContext* context, const
ColumnString* str_col,
+ const ColumnString* pattern_col, const
size_t index_now,
+ const int position) {
+ re2::RE2* re = reinterpret_cast<re2::RE2*>(
+ context->get_function_state(FunctionContext::THREAD_LOCAL));
+ std::unique_ptr<re2::RE2> scoped_re;
+ if (re == nullptr) {
+ std::string error_str;
+ DCHECK(pattern_col);
+ const auto& pattern =
pattern_col->get_data_at(index_check_const(index_now, false));
+ bool st = StringFunctions::compile_regex(pattern, &error_str,
StringRef(), StringRef(),
+ scoped_re);
+ if (!st) {
+ context->add_warning(error_str.c_str());
+ throw Exception(Status::InvalidArgument(error_str));
+ return 0;
+ }
+ re = scoped_re.get();
+ }
+
+ const auto& str = str_col->get_data_at(index_now);
+ // start from position
+ int pos = position - 1;
+ if (pos < 0) {
+ return -1;
+ }
+ bool matched = false;
+ size_t actual_position = 0;
+ while (pos < str.size) {
+ auto str_pos = str.data + pos;
+ auto str_size = str.size - pos;
+ re2::StringPiece str_sp_current = re2::StringPiece(str_pos,
str_size);
+ re2::StringPiece match;
+
+ bool success = re->Match(str_sp_current, 0, str_size,
re2::RE2::UNANCHORED, &match, 1);
+ if (!success) {
+ break;
+ }
+ if (match.empty()) {
+ pos += 1;
+ continue;
+ }
+
+ // find the result
+ size_t match_start = match.data() - str_sp_current.data();
+ actual_position = pos + match_start + 1;
+ matched = true;
+ break;
+ }
+
+ if (actual_position > static_cast<size_t>(INT_MAX)) {
+ context->add_warning("Match position exceeds maximum int value");
+ throw Exception(Status::InvalidArgument("Match position exceeds
maximum int value"));
+ return -1;
+ }
+ return matched ? static_cast<int>(actual_position) : -1;
+ }
+};
+
+struct RegexpPositionTwoParamImpl {
+ static DataTypes get_variadic_argument_types() {
+ return {std::make_shared<DataTypeString>(),
std::make_shared<DataTypeString>()};
+ }
+
+ static void execute_impl(FunctionContext* context, ColumnPtr
argument_columns[],
+ size_t input_rows_count, ColumnInt32::Container&
result_data,
+ bool is_const_args) {
+ const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
+ const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
+
+ for (int i = 0; i < input_rows_count; ++i) {
Review Comment:
no need to modify
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]