This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new e762233277 [feature](string_functions) The 'split_part' function
supports non-constant parameters (#18333)
e762233277 is described below
commit e762233277b488ca3ebc9dd3aff05b34b11536d0
Author: Jerry Hu <[email protected]>
AuthorDate: Mon Apr 3 20:38:06 2023 +0800
[feature](string_functions) The 'split_part' function supports non-constant
parameters (#18333)
---
be/src/vec/functions/function_string.h | 155 ++++++++++-----------
.../string_functions/test_split_part.groovy | 42 +++++-
2 files changed, 114 insertions(+), 83 deletions(-)
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index c891164508..a1e0cb9d32 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -1253,48 +1253,45 @@ public:
auto& res_chars = res->get_chars();
res_offsets.resize(input_rows_count);
- ColumnPtr content_column =
-
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-
- if (auto* nullable = check_and_get_column<const
ColumnNullable>(*content_column)) {
- // Danger: Here must dispose the null map data first! Because
- // argument_columns[0]=nullable->get_nested_column_ptr(); will
release the mem
- // of column nullable mem of null map
- VectorizedUtils::update_null_map(null_map->get_data(),
nullable->get_null_map_data());
- content_column = nullable->get_nested_column_ptr();
- }
-
- for (size_t i = 1; i <= 2; i++) {
- ColumnPtr columnPtr =
remove_nullable(block.get_by_position(arguments[i]).column);
-
- if (!is_column_const(*columnPtr)) {
- return Status::RuntimeError("Argument at index {} for function
{} must be constant",
- i + 1, get_name());
+ const size_t argument_size = arguments.size();
+ ColumnPtr argument_columns[argument_size];
+ for (size_t i = 0; i < argument_size; ++i) {
+ argument_columns[i] =
+
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+ if (auto* nullable = check_and_get_column<const
ColumnNullable>(*argument_columns[i])) {
+ // Danger: Here must dispose the null map data first! Because
+ // argument_columns[i]=nullable->get_nested_column_ptr(); will
release the mem
+ // of column nullable mem of null map
+ VectorizedUtils::update_null_map(null_map->get_data(),
+
nullable->get_null_map_data());
+ argument_columns[i] = nullable->get_nested_column_ptr();
}
}
- auto str_col = assert_cast<const ColumnString*>(content_column.get());
+ auto str_col = assert_cast<const
ColumnString*>(argument_columns[0].get());
- const IColumn& delimiter_col =
*block.get_by_position(arguments[1]).column;
- const auto* delimiter_const = typeid_cast<const
ColumnConst*>(&delimiter_col);
- auto delimiter = delimiter_const->get_field().get<String>();
- int32_t delimiter_size = delimiter.size();
+ auto delimiter_col = assert_cast<const
ColumnString*>(argument_columns[1].get());
- const IColumn& part_num_col =
*block.get_by_position(arguments[2]).column;
- const auto* part_num_col_const = typeid_cast<const
ColumnConst*>(&part_num_col);
- auto part_number = part_num_col_const->get_field().get<Int32>();
+ auto part_num_col = assert_cast<const
ColumnInt32*>(argument_columns[2].get());
+ auto& part_num_col_data = part_num_col->get_data();
- if (part_number >= 0) {
- for (size_t i = 0; i < input_rows_count; ++i) {
- if (part_number == 0) {
- StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
- continue;
- }
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ if (part_num_col_data[i] == 0) {
+ StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
+ continue;
+ }
- auto str = str_col->get_data_at(i);
- if (delimiter_size == 0) {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- } else if (delimiter_size == 1) {
+ auto delimiter = delimiter_col->get_data_at(i);
+ auto delimiter_str = delimiter_col->get_data_at(i).to_string();
+ auto part_number = part_num_col_data[i];
+ auto str = str_col->get_data_at(i);
+ if (delimiter.size == 0) {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ continue;
+ }
+
+ if (part_number > 0) {
+ if (delimiter.size == 1) {
// If delimiter is a char, use memchr to split
int32_t pre_offset = -1;
int32_t offset = -1;
@@ -1303,7 +1300,7 @@ public:
pre_offset = offset;
size_t n = str.size - offset - 1;
const char* pos = reinterpret_cast<const char*>(
- memchr(str.data + offset + 1, delimiter[0],
n));
+ memchr(str.data + offset + 1,
delimiter_str[0], n));
if (pos != nullptr) {
offset = pos - str.data;
num++;
@@ -1325,15 +1322,15 @@ public:
}
} else {
// If delimiter is a string, use memmem to split
- int32_t pre_offset = -delimiter_size;
- int32_t offset = pre_offset;
+ int32_t pre_offset = -delimiter.size;
+ int32_t offset = -delimiter.size;
int32_t num = 0;
while (num < part_number) {
pre_offset = offset;
- size_t n = str.size - offset - delimiter_size;
- char* pos = reinterpret_cast<char*>(
- memmem(str.data + offset + delimiter_size, n,
delimiter.c_str(),
- delimiter_size));
+ size_t n = str.size - offset - delimiter.size;
+ char* pos =
+ reinterpret_cast<char*>(memmem(str.data +
offset + delimiter.size,
+ n,
delimiter.data, delimiter.size));
if (pos != nullptr) {
offset = pos - str.data;
num++;
@@ -1347,60 +1344,54 @@ public:
if (num == part_number) {
StringOP::push_value_string(
std::string_view {reinterpret_cast<const
char*>(
- str.data +
pre_offset + delimiter_size),
- (size_t)offset - pre_offset
- delimiter_size},
+ str.data +
pre_offset + delimiter.size),
+ (size_t)offset - pre_offset
- delimiter.size},
i, res_chars, res_offsets);
} else {
StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
}
}
- }
- } else {
- part_number = -part_number;
- for (size_t i = 0; i < input_rows_count; ++i) {
- if (delimiter_size == 0) {
- StringOP::push_empty_string(i, res_chars, res_offsets);
- } else {
- auto str = str_col->get_data_at(i);
- auto str_str = str.to_string();
- int32_t offset = str.size;
- int32_t pre_offset = offset;
- int32_t num = 0;
- auto substr = str_str;
- while (num <= part_number && offset >= 0) {
- offset = (int)substr.rfind(delimiter, offset);
- if (offset != -1) {
- if (++num == part_number) {
- break;
- }
- pre_offset = offset;
- offset = offset - 1;
- substr = str_str.substr(0, pre_offset);
- } else {
+ } else {
+ part_number = -part_number;
+ auto str_str = str.to_string();
+ int32_t offset = str.size;
+ int32_t pre_offset = offset;
+ int32_t num = 0;
+ auto substr = str_str;
+ while (num <= part_number && offset >= 0) {
+ offset = (int)substr.rfind(delimiter, offset);
+ if (offset != -1) {
+ if (++num == part_number) {
break;
}
+ pre_offset = offset;
+ offset = offset - 1;
+ substr = str_str.substr(0, pre_offset);
+ } else {
+ break;
}
- num = (offset == -1 && num != 0) ? num + 1 : num;
+ }
+ num = (offset == -1 && num != 0) ? num + 1 : num;
- if (num == part_number) {
- if (offset == -1) {
- StringOP::push_value_string(
- std::string_view {reinterpret_cast<const
char*>(str.data),
- (size_t)pre_offset},
- i, res_chars, res_offsets);
- } else {
- StringOP::push_value_string(
- std::string_view {str_str.substr(
- offset + delimiter_size,
- (size_t)pre_offset - offset -
delimiter_size)},
- i, res_chars, res_offsets);
- }
+ if (num == part_number) {
+ if (offset == -1) {
+ StringOP::push_value_string(
+ std::string_view {reinterpret_cast<const
char*>(str.data),
+ (size_t)pre_offset},
+ i, res_chars, res_offsets);
} else {
- StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
+ StringOP::push_value_string(
+ std::string_view {str_str.substr(
+ offset + delimiter.size,
+ (size_t)pre_offset - offset -
delimiter.size)},
+ i, res_chars, res_offsets);
}
+ } else {
+ StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
}
}
}
+
block.get_by_position(result).column =
ColumnNullable::create(std::move(res), std::move(null_map));
return Status::OK();
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
index b808be2f8f..800cd0042c 100644
---
a/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_split_part.groovy
@@ -25,6 +25,46 @@ suite("test_split_part") {
where
split_part("bCKHDX07at", "5.7.37", cast(name as int)) is not null;
"""
- exception "Argument at index 3 for function split_part must be constant"
}
+
+ qt_1 "select split_part(k8, '1', 1), k8, split_part(concat(k8, '12'), '1',
1) from test_query_db.test order by k8 limit 2;"
+
+ sql """
+ DROP TABLE IF EXISTS `test_split_part_non_const`;
+ """
+ sql """
+ CREATE TABLE `test_split_part_non_const`(
+ `id` LARGEINT,
+ `name` VARCHAR(20),
+ `age` SMALLINT,
+ `part` int,
+ `address` VARCHAR(100),
+ `test_t` string,
+ `date` DATE
+ )
+ DUPLICATE KEY (`id`,`name`)
+ DISTRIBUTED BY HASH (`id`) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+ sql """
+ INSERT INTO test_split_part_non_const VALUES (1,"kkk",18, -1,
"beijing","a,b,c,d,e,f","2022-06-28"),
+ (2, "kkk",18, -2, "shanghai","a,b,c,d,e,f","2022-07-28"),
+ (3, "kkk",20, -3, "beijing","a,b,c,d,e,f","2022-06-28"),
+ (4, "hhh",45, -4, "beijing","a,b,c,d,e,f","2022-05-28");
+ """
+ qt_non_const1 """
+ select *, split_part(test_t, ',', id) from test_split_part_non_const
order by id, name, age;
+ """
+
+ qt_non_const2 """
+ select *, split_part(test_t, ',c', id) from test_split_part_non_const
order by id, name, age;
+ """
+
+ qt_non_const3 """
+ select *, split_part(test_t, ',', part) from test_split_part_non_const
order by id, name, age;
+ """
+
+ qt_non_const4 """
+ select *, split_part(test_t, ',c', part) from test_split_part_non_const
order by id, name, age;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]