zclllyybb commented on code in PR #61156:
URL: https://github.com/apache/doris/pull/61156#discussion_r2931248540
##########
be/src/exprs/function/function_regexp.cpp:
##########
@@ -551,55 +553,88 @@ struct RegexpReplaceOneImpl {
template <bool ReturnNull>
struct RegexpExtractImpl {
static constexpr auto name = ReturnNull ? "regexp_extract_or_null" :
"regexp_extract";
- // 3 args
- static void execute_impl(FunctionContext* context, ColumnPtr
argument_columns[],
- size_t input_rows_count, ColumnString::Chars&
result_data,
- ColumnString::Offsets& result_offset, NullMap&
null_map) {
- const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
- const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
- const auto* index_col =
check_and_get_column<ColumnInt64>(argument_columns[2].get());
- for (size_t i = 0; i < input_rows_count; ++i) {
- if (null_map[i]) {
- StringOP::push_null_string(i, result_data, result_offset,
null_map);
- continue;
- }
- const auto& index_data = index_col->get_int(i);
- if (index_data < 0) {
- ReturnNull ? StringOP::push_null_string(i, result_data,
result_offset, null_map)
- : StringOP::push_empty_string(i, result_data,
result_offset);
- continue;
- }
- _execute_inner_loop<false>(context, str_col, pattern_col,
index_data, result_data,
- result_offset, null_map, i);
+ static constexpr size_t num_args = 3;
+
+ static DataTypePtr return_type() { return
make_nullable(std::make_shared<DataTypeString>()); }
+
+ static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) {
+ bool col_const[3];
+ ColumnPtr argument_columns[3];
+ for (int i = 0; i < 3; ++i) {
+ col_const[i] =
is_column_const(*block.get_by_position(arguments[i]).column);
+ }
+ argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
+
*block.get_by_position(arguments[0]).column)
+ .convert_to_full_column()
+ :
block.get_by_position(arguments[0]).column;
+
+ auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto result_data_column = ColumnString::create();
+ auto& result_data = result_data_column->get_chars();
+ auto& result_offset = result_data_column->get_offsets();
+ result_offset.resize(input_rows_count);
+ auto& null_map = result_null_map->get_data();
+
+ default_preprocess_parameter_columns(argument_columns, col_const, {1,
2}, block, arguments);
+
+ if (col_const[1] && col_const[2]) {
Review Comment:
can here handle col0 const but column 1,2 not? do we have testcase of this?
##########
be/src/exprs/function/function_regexp.cpp:
##########
@@ -648,86 +683,178 @@ struct RegexpExtractImpl {
}
};
-struct RegexpExtractAllImpl {
- static constexpr auto name = "regexp_extract_all";
+// Output handler for existing string-formatted result: "['a','b']"
+struct RegexpExtractAllStringOutput {
+ static constexpr const char* func_name = "regexp_extract_all";
+ static DataTypePtr return_type() { return
make_nullable(std::make_shared<DataTypeString>()); }
- size_t get_number_of_arguments() const { return 2; }
+ ColumnString::Chars& result_data;
+ ColumnString::Offsets& result_offset;
- static void execute_impl(FunctionContext* context, ColumnPtr
argument_columns[],
- size_t input_rows_count, ColumnString::Chars&
result_data,
- ColumnString::Offsets& result_offset, NullMap&
null_map) {
- const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
- const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
- for (int i = 0; i < input_rows_count; ++i) {
- if (null_map[i]) {
- StringOP::push_null_string(i, result_data, result_offset,
null_map);
- continue;
+ void push_empty(size_t index) {
+ StringOP::push_empty_string(index, result_data, result_offset);
+ }
+ void push_null(size_t index, NullMap& null_map) {
+ StringOP::push_null_string(index, result_data, result_offset,
null_map);
+ }
+ void push_matches(size_t index, const std::vector<std::string>& matches) {
+ std::string res = "[";
+ for (size_t j = 0; j < matches.size(); ++j) {
+ if (j > 0) {
+ res += ",";
}
- _execute_inner_loop<false>(context, str_col, pattern_col,
result_data, result_offset,
- null_map, i);
+ res += "'" + matches[j] + "'";
}
+ res += "]";
+ StringOP::push_value_string(std::string_view(res), index, result_data,
result_offset);
Review Comment:
could we directly push values. use `std::string` with `+` may lead to multi
times copy
##########
be/src/exprs/function/function_regexp.cpp:
##########
@@ -551,55 +553,88 @@ struct RegexpReplaceOneImpl {
template <bool ReturnNull>
struct RegexpExtractImpl {
static constexpr auto name = ReturnNull ? "regexp_extract_or_null" :
"regexp_extract";
- // 3 args
- static void execute_impl(FunctionContext* context, ColumnPtr
argument_columns[],
- size_t input_rows_count, ColumnString::Chars&
result_data,
- ColumnString::Offsets& result_offset, NullMap&
null_map) {
- const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
- const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
- const auto* index_col =
check_and_get_column<ColumnInt64>(argument_columns[2].get());
- for (size_t i = 0; i < input_rows_count; ++i) {
- if (null_map[i]) {
- StringOP::push_null_string(i, result_data, result_offset,
null_map);
- continue;
- }
- const auto& index_data = index_col->get_int(i);
- if (index_data < 0) {
- ReturnNull ? StringOP::push_null_string(i, result_data,
result_offset, null_map)
- : StringOP::push_empty_string(i, result_data,
result_offset);
- continue;
- }
- _execute_inner_loop<false>(context, str_col, pattern_col,
index_data, result_data,
- result_offset, null_map, i);
+ static constexpr size_t num_args = 3;
+
+ static DataTypePtr return_type() { return
make_nullable(std::make_shared<DataTypeString>()); }
+
+ static Status execute(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
+ uint32_t result, size_t input_rows_count) {
+ bool col_const[3];
+ ColumnPtr argument_columns[3];
+ for (int i = 0; i < 3; ++i) {
+ col_const[i] =
is_column_const(*block.get_by_position(arguments[i]).column);
+ }
+ argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
+
*block.get_by_position(arguments[0]).column)
+ .convert_to_full_column()
+ :
block.get_by_position(arguments[0]).column;
+
+ auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
+ auto result_data_column = ColumnString::create();
+ auto& result_data = result_data_column->get_chars();
+ auto& result_offset = result_data_column->get_offsets();
+ result_offset.resize(input_rows_count);
+ auto& null_map = result_null_map->get_data();
+
+ default_preprocess_parameter_columns(argument_columns, col_const, {1,
2}, block, arguments);
+
+ if (col_const[1] && col_const[2]) {
+ _execute_loop<true>(context, argument_columns, input_rows_count,
result_data,
+ result_offset, null_map);
+ } else {
+ _execute_loop<false>(context, argument_columns, input_rows_count,
result_data,
+ result_offset, null_map);
}
+
+ block.get_by_position(result).column =
+ ColumnNullable::create(std::move(result_data_column),
std::move(result_null_map));
+ return Status::OK();
}
- static void execute_impl_const_args(FunctionContext* context, ColumnPtr
argument_columns[],
- size_t input_rows_count,
ColumnString::Chars& result_data,
- ColumnString::Offsets& result_offset,
NullMap& null_map) {
+private:
+ template <bool Const>
+ static void _execute_loop(FunctionContext* context, ColumnPtr
argument_columns[],
+ size_t input_rows_count, ColumnString::Chars&
result_data,
+ ColumnString::Offsets& result_offset, NullMap&
null_map) {
const auto* str_col =
check_and_get_column<ColumnString>(argument_columns[0].get());
const auto* pattern_col =
check_and_get_column<ColumnString>(argument_columns[1].get());
const auto* index_col =
check_and_get_column<ColumnInt64>(argument_columns[2].get());
- const auto& index_data = index_col->get_int(0);
- if (index_data < 0) {
+ if constexpr (Const) {
+ const auto& index_data = index_col->get_int(0);
+ if (index_data < 0) {
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ ReturnNull ? StringOP::push_null_string(i, result_data,
result_offset, null_map)
+ : StringOP::push_empty_string(i, result_data,
result_offset);
+ }
+ return;
+ }
for (size_t i = 0; i < input_rows_count; ++i) {
- ReturnNull ? StringOP::push_null_string(i, result_data,
result_offset, null_map)
- : StringOP::push_empty_string(i, result_data,
result_offset);
+ if (null_map[i]) {
Review Comment:
when will this be true?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]