dwdwqfwe commented on code in PR #51501:
URL: https://github.com/apache/doris/pull/51501#discussion_r2138045798


##########
be/src/vec/functions/function_regexp.cpp:
##########
@@ -50,6 +52,152 @@
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
 
+struct RegexpCountImpl {
+    static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
+                             size_t input_rows_count, ColumnInt64::Container& 
result_data,
+                             NullMap& null_map) {
+        const auto* pattern = 
check_and_get_column<ColumnString>(argument_columns[1].get());
+        const auto* str = 
check_and_get_column<ColumnString>(argument_columns[0].get());
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                result_data[i] = 0;
+                continue;
+            }
+
+            result_data[i] = _execute_inner_loop(context, str, pattern, 
null_map, i);
+        }
+    }
+
+private:
+    static int64_t _execute_inner_loop(FunctionContext* context, const 
ColumnString* str,
+                                       const ColumnString* pattern, NullMap& 
null_map,
+                                       const size_t index_now) {
+        re2::RE2* re = reinterpret_cast<re2::RE2*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        std::unique_ptr<re2::RE2> scoped_re;

Review Comment:
   I used scoped_re int here "bool st = StringFunctions::compile_regex(pattern, 
&error_str, StringRef(), StringRef(),
                                                        scoped_re);"
   and I have checked re is not nullptr at "if (!re) {
               std::string error_str;"



##########
be/src/vec/functions/function_regexp.cpp:
##########
@@ -50,6 +52,152 @@
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
 
+struct RegexpCountImpl {
+    static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
+                             size_t input_rows_count, ColumnInt64::Container& 
result_data,
+                             NullMap& null_map) {
+        const auto* pattern = 
check_and_get_column<ColumnString>(argument_columns[1].get());
+        const auto* str = 
check_and_get_column<ColumnString>(argument_columns[0].get());
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                result_data[i] = 0;
+                continue;
+            }
+
+            result_data[i] = _execute_inner_loop(context, str, pattern, 
null_map, i);
+        }
+    }
+
+private:
+    static int64_t _execute_inner_loop(FunctionContext* context, const 
ColumnString* str,
+                                       const ColumnString* pattern, NullMap& 
null_map,
+                                       const size_t index_now) {
+        re2::RE2* re = reinterpret_cast<re2::RE2*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        std::unique_ptr<re2::RE2> scoped_re;
+
+        if (str->is_null_at(index_now) || pattern->is_null_at(index_now)) {
+            null_map[index_now] = true;
+            return 0;
+        }
+
+        const auto& str_data = str->get_data_at(index_now);
+        const auto& pattern_data = pattern->get_data_at(index_now);
+
+        if (!re) {
+            std::string error_str;
+            bool st = StringFunctions::compile_regex(pattern_data, &error_str, 
StringRef(),
+                                                     StringRef(), scoped_re);
+            if (!st) {
+                context->add_warning(error_str.c_str());
+                null_map[index_now] = true;
+                return 0;
+            }
+            re = scoped_re.get();
+        }
+
+        int64_t count = 0;
+        size_t pos = 0;
+        re2::StringPiece str_sp(str_data.data, str_data.size);
+
+        while (pos < str_data.size) {
+            re2::StringPiece current(str_data.data + pos, str_data.size - pos);
+            re2::StringPiece match;
+
+            if (!re->Match(current, 0, current.size(), re2::RE2::UNANCHORED, 
&match, 1)) {
+                break;
+            }
+
+            if (match.empty()) {
+                pos++;
+            } else {
+                count++;
+                pos += match.data() - current.data() + match.size();
+            }
+        }
+
+        return count;
+    }
+};
+
+class FunctionRegexpCount : public IFunction {
+public:
+    static constexpr auto name = "regexp_count";
+
+    static FunctionPtr create() { return 
std::make_shared<FunctionRegexpCount>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
+        auto int64_type = std::make_shared<DataTypeInt64>();
+        return make_nullable(std::static_pointer_cast<const 
IDataType>(int64_type));
+    }
+
+    Status open(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL && 
context->is_col_constant(1)) {
+            const auto pattern_col = context->get_constant_col(1)->column_ptr;
+            const auto& pattern = pattern_col->get_data_at(0);
+            if (pattern.size == 0) {
+                return Status::OK();
+            }
+            std::string error_str;
+            std::unique_ptr<re2::RE2> scoped_re;
+            bool st = StringFunctions::compile_regex(pattern, &error_str, 
StringRef(), StringRef(),
+                                                     scoped_re);
+            if (!st) {
+                context->set_error(error_str.c_str());
+                return Status::InvalidArgument(error_str);
+            }
+            std::shared_ptr<re2::RE2> re(scoped_re.release());
+            context->set_function_state(scope, 
std::static_pointer_cast<void>(re));
+        }
+        return Status::OK();
+    }
+
+    Status close(FunctionContext* context, FunctionContext::FunctionStateScope 
scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            auto ptr = context->get_function_state(scope);
+            if (ptr) {
+                delete reinterpret_cast<re2::RE2*>(ptr);

Review Comment:
   yes,you are right, I made a habit wrong in here,like add a delete function 
for class



##########
be/src/vec/functions/function_regexp.cpp:
##########
@@ -50,6 +52,152 @@
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
 
+struct RegexpCountImpl {
+    static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
+                             size_t input_rows_count, ColumnInt64::Container& 
result_data,
+                             NullMap& null_map) {
+        const auto* pattern = 
check_and_get_column<ColumnString>(argument_columns[1].get());
+        const auto* str = 
check_and_get_column<ColumnString>(argument_columns[0].get());
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                result_data[i] = 0;
+                continue;
+            }
+
+            result_data[i] = _execute_inner_loop(context, str, pattern, 
null_map, i);
+        }
+    }
+
+private:
+    static int64_t _execute_inner_loop(FunctionContext* context, const 
ColumnString* str,
+                                       const ColumnString* pattern, NullMap& 
null_map,
+                                       const size_t index_now) {
+        re2::RE2* re = reinterpret_cast<re2::RE2*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        std::unique_ptr<re2::RE2> scoped_re;
+
+        if (str->is_null_at(index_now) || pattern->is_null_at(index_now)) {
+            null_map[index_now] = true;
+            return 0;
+        }
+
+        const auto& str_data = str->get_data_at(index_now);
+        const auto& pattern_data = pattern->get_data_at(index_now);
+
+        if (!re) {
+            std::string error_str;
+            bool st = StringFunctions::compile_regex(pattern_data, &error_str, 
StringRef(),
+                                                     StringRef(), scoped_re);
+            if (!st) {
+                context->add_warning(error_str.c_str());
+                null_map[index_now] = true;
+                return 0;
+            }
+            re = scoped_re.get();
+        }
+
+        int64_t count = 0;
+        size_t pos = 0;
+        re2::StringPiece str_sp(str_data.data, str_data.size);

Review Comment:
   yes ,it's a wrong,i used current replace it and i forget delete it



##########
be/src/vec/functions/function_regexp.cpp:
##########
@@ -50,6 +52,152 @@
 namespace doris::vectorized {
 #include "common/compile_check_begin.h"
 
+struct RegexpCountImpl {
+    static void execute_impl(FunctionContext* context, ColumnPtr 
argument_columns[],
+                             size_t input_rows_count, ColumnInt64::Container& 
result_data,
+                             NullMap& null_map) {
+        const auto* pattern = 
check_and_get_column<ColumnString>(argument_columns[1].get());
+        const auto* str = 
check_and_get_column<ColumnString>(argument_columns[0].get());
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                result_data[i] = 0;
+                continue;
+            }
+
+            result_data[i] = _execute_inner_loop(context, str, pattern, 
null_map, i);
+        }
+    }
+
+private:
+    static int64_t _execute_inner_loop(FunctionContext* context, const 
ColumnString* str,
+                                       const ColumnString* pattern, NullMap& 
null_map,
+                                       const size_t index_now) {
+        re2::RE2* re = reinterpret_cast<re2::RE2*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        std::unique_ptr<re2::RE2> scoped_re;
+
+        if (str->is_null_at(index_now) || pattern->is_null_at(index_now)) {
+            null_map[index_now] = true;
+            return 0;
+        }
+
+        const auto& str_data = str->get_data_at(index_now);
+        const auto& pattern_data = pattern->get_data_at(index_now);
+
+        if (!re) {
+            std::string error_str;
+            bool st = StringFunctions::compile_regex(pattern_data, &error_str, 
StringRef(),
+                                                     StringRef(), scoped_re);
+            if (!st) {
+                context->add_warning(error_str.c_str());
+                null_map[index_now] = true;
+                return 0;
+            }
+            re = scoped_re.get();
+        }
+
+        int64_t count = 0;
+        size_t pos = 0;
+        re2::StringPiece str_sp(str_data.data, str_data.size);
+
+        while (pos < str_data.size) {
+            re2::StringPiece current(str_data.data + pos, str_data.size - pos);
+            re2::StringPiece match;
+
+            if (!re->Match(current, 0, current.size(), re2::RE2::UNANCHORED, 
&match, 1)) {
+                break;
+            }
+
+            if (match.empty()) {
+                pos++;
+            } else {
+                count++;
+                pos += match.data() - current.data() + match.size();

Review Comment:
   ok ,i will add in next pr



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to