This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1f17551fb04 [improve](function) support collect_list with nested types
param (#47965)
1f17551fb04 is described below
commit 1f17551fb044d99dbfca25f417841fad75786315
Author: amory <[email protected]>
AuthorDate: Thu Feb 20 10:31:22 2025 +0800
[improve](function) support collect_list with nested types param (#47965)
before we do not support collect_list with array/map/struct type will
meet:
```
mysql> SELECT id, collect_list(kastr) FROM test_array_agg_complex GROUP BY
id ORDER BY id;
ERROR 1105 (HY000): errCode = 2, detailMessage =
(172.21.16.12)[INTERNAL_ERROR]Agg Function collect_list(array<text>) is not
implemented
```
after we can use it with array/map/struct para
---
.../aggregate_function_collect.cpp | 8 +++
.../aggregate_function_collect.h | 70 ++++++++++++++++++++-
.../data/query_p0/aggregate/array_agg.out | Bin 8132 -> 37278 bytes
.../suites/query_p0/aggregate/array_agg.groovy | 16 +++++
4 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
index 15806c739ed..c1abefec218 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
@@ -49,6 +49,11 @@ AggregateFunctionPtr do_create_agg_function_collect(bool
distinct, const DataTyp
AggregateFunctionCollectListData<T, HasLimit>, HasLimit,
std::false_type>>(
argument_types, result_is_nullable);
}
+ } else if (!distinct) {
+ // void type means support array/map/struct type for collect_list
+ return creator_without_type::create<AggregateFunctionCollect<
+ AggregateFunctionCollectListData<void, HasLimit>, HasLimit,
std::false_type>>(
+ argument_types, result_is_nullable);
}
return nullptr;
}
@@ -93,6 +98,9 @@ AggregateFunctionPtr
create_aggregate_function_collect_impl(const std::string& n
if constexpr (ShowNull::value) {
return do_create_agg_function_collect<void, HasLimit, ShowNull>(
distinct, argument_types, result_is_nullable);
+ } else {
+ return do_create_agg_function_collect<void, HasLimit, ShowNull>(
+ distinct, argument_types, result_is_nullable);
}
}
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h
b/be/src/vec/aggregate_functions/aggregate_function_collect.h
index 1b4eadf259d..755458d662a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h
@@ -194,6 +194,9 @@ struct AggregateFunctionCollectListData {
PaddedPODArray<ElementType> data;
Int64 max_size = -1;
+ AggregateFunctionCollectListData() {}
+ AggregateFunctionCollectListData(const DataTypes& argument_types) {}
+
size_t size() const { return data.size(); }
void add(const IColumn& column, size_t row_num) {
@@ -306,6 +309,67 @@ struct AggregateFunctionCollectListData<StringRef,
HasLimit> {
}
};
+template <typename HasLimit>
+struct AggregateFunctionCollectListData<void, HasLimit> {
+ using ElementType = StringRef;
+ using Self = AggregateFunctionCollectListData<void, HasLimit>;
+ MutableColumnPtr column_data;
+ Int64 max_size = -1;
+
+ AggregateFunctionCollectListData() {}
+ AggregateFunctionCollectListData(const DataTypes& argument_types) {
+ DataTypePtr column_type = argument_types[0];
+ column_data = column_type->create_column();
+ }
+
+ size_t size() const { return column_data->size(); }
+
+ void add(const IColumn& column, size_t row_num) {
column_data->insert_from(column, row_num); }
+
+ void merge(const AggregateFunctionCollectListData& rhs) {
+ if constexpr (HasLimit::value) {
+ if (max_size == -1) {
+ max_size = rhs.max_size;
+ }
+ max_size = rhs.max_size;
+
+ column_data->insert_range_from(
+ *rhs.column_data, 0,
+ std::min(assert_cast<size_t, TypeCheckOnRelease::DISABLE>(
+ static_cast<size_t>(max_size - size())),
+ rhs.size()));
+ } else {
+ column_data->insert_range_from(*rhs.column_data, 0, rhs.size());
+ }
+ }
+
+ void write(BufferWritable& buf) const {
+ const size_t size = column_data->size();
+ write_binary(size, buf);
+ for (size_t i = 0; i < size; i++) {
+ write_string_binary(column_data->get_data_at(i), buf);
+ }
+ write_var_int(max_size, buf);
+ }
+
+ void read(BufferReadable& buf) {
+ size_t size = 0;
+ read_binary(size, buf);
+ column_data->reserve(size);
+
+ StringRef s;
+ for (size_t i = 0; i < size; i++) {
+ read_string_binary(s, buf);
+ column_data->insert_data(s.data, s.size);
+ }
+ read_var_int(max_size, buf);
+ }
+
+ void reset() { column_data->clear(); }
+
+ void insert_result_into(IColumn& to) const {
to.insert_range_from(*column_data, 0, size()); }
+};
+
template <typename T>
struct AggregateFunctionArrayAggData {
using ElementType = T;
@@ -623,7 +687,11 @@ public:
new (place) Data();
}
} else {
- new (place) Data();
+ if constexpr (std::is_same_v<Data,
AggregateFunctionCollectListData<void, HasLimit>>) {
+ new (place) Data(argument_types);
+ } else {
+ new (place) Data();
+ }
}
}
diff --git a/regression-test/data/query_p0/aggregate/array_agg.out
b/regression-test/data/query_p0/aggregate/array_agg.out
index 62ffb5fcf47..4bdf7671da5 100644
Binary files a/regression-test/data/query_p0/aggregate/array_agg.out and
b/regression-test/data/query_p0/aggregate/array_agg.out differ
diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy
b/regression-test/suites/query_p0/aggregate/array_agg.groovy
index 42fb3b131a4..6342baa8212 100644
--- a/regression-test/suites/query_p0/aggregate/array_agg.groovy
+++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy
@@ -276,6 +276,22 @@ suite("array_agg") {
order_qt_sql_array_agg_array """ SELECT id, array_agg(kastr) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
order_qt_sql_array_agg_map """ SELECT id, array_agg(km) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
order_qt_sql_array_agg_struct """ SELECT id, array_agg(ks) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_collect_list_array """ SELECT id, collect_list(kastr) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_collect_list_map """ SELECT id, collect_list(km) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_collect_list_struct """ SELECT id, collect_list(ks) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_group_array_array """ SELECT group_array(kastr) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_group_array_map """ SELECT group_array(km) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_group_array_struct """ SELECT group_array(ks) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ // add limit for param
+ order_qt_sql_array_agg_array_limit """ SELECT id, array_agg(kastr) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_array_agg_map_limit """ SELECT id, array_agg(km) FROM
test_array_agg_complex GROUP BY id ORDER BY id """
+ order_qt_sql_array_agg_struct_limit """ SELECT id, array_agg(ks) FROM
test_array_agg_complex GROUP BY id ORDER BY id"""
+ order_qt_sql_collect_list_array_limit """ SELECT id, collect_list(kastr,
2) FROM test_array_agg_complex GROUP BY id ORDER BY id"""
+ order_qt_sql_collect_list_map_limit """ SELECT id, collect_list(km, 2)
FROM test_array_agg_complex GROUP BY id ORDER BY id"""
+ order_qt_sql_collect_list_struct_limit """ SELECT id, collect_list(ks, 3)
FROM test_array_agg_complex GROUP BY id ORDER BY id"""
+ order_qt_sql_group_array_array_limit """ SELECT group_array(kastr, 3) FROM
test_array_agg_complex GROUP BY id ORDER BY id"""
+ order_qt_sql_group_array_map_limit """ SELECT group_array(km, 7) FROM
test_array_agg_complex GROUP BY id ORDER BY id"""
+ order_qt_sql_group_array_struct_limit """ SELECT group_array(ks, 7) FROM
test_array_agg_complex GROUP BY id ORDER BY id"""
sql """ DROP TABLE IF EXISTS test_array_agg_ip;"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]