This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 45dfbeb8b3 [feature](agg) add the aggregation function 'array_agg'
#23474
45dfbeb8b3 is described below
commit 45dfbeb8b313d1a7a290570e31af2ad7b2d2a39d
Author: xy <[email protected]>
AuthorDate: Thu Sep 7 10:21:55 2023 +0800
[feature](agg) add the aggregation function 'array_agg' #23474
This function requires one arguments just as ARRAY_AGG(col) and col means
the column whose values you want to aggregate.
This function Aggregates the values including NULL in a column into an
array and returns a value of the ARRAY data type.
---
.../aggregate_function_collect.cpp | 58 ++--
.../aggregate_function_collect.h | 317 ++++++++++++++++++++-
.../sql-functions/aggregate-functions/array-agg.md | 84 ++++++
.../sql-functions/aggregate-functions/array-agg.md | 84 ++++++
.../apache/doris/analysis/FunctionCallExpr.java | 3 +-
.../apache/doris/catalog/AggregateFunction.java | 3 +-
.../java/org/apache/doris/catalog/FunctionSet.java | 5 +
.../data/query_p0/aggregate/array_agg.out | 67 +++++
.../suites/query_p0/aggregate/array_agg.groovy | 195 +++++++++++++
9 files changed, 778 insertions(+), 38 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
index d6aab14411..2831f39aa3 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
@@ -27,21 +27,29 @@
namespace doris::vectorized {
-template <typename T, typename HasLimit>
+template <typename T, typename HasLimit, typename ShowNull>
AggregateFunctionPtr do_create_agg_function_collect(bool distinct, const
DataTypes& argument_types,
const bool
result_is_nullable) {
+ if (argument_types[0]->is_nullable()) {
+ if constexpr (ShowNull::value) {
+ return
creator_without_type::create_ignore_nullable<AggregateFunctionCollect<
+ AggregateFunctionArrayAggData<T>, std::false_type,
std::true_type>>(
+ argument_types, result_is_nullable);
+ }
+ }
+
if (distinct) {
- return creator_without_type::create<
- AggregateFunctionCollect<AggregateFunctionCollectSetData<T,
HasLimit>, HasLimit>>(
+ return creator_without_type::create<AggregateFunctionCollect<
+ AggregateFunctionCollectSetData<T, HasLimit>, HasLimit,
std::false_type>>(
argument_types, result_is_nullable);
} else {
- return creator_without_type::create<
- AggregateFunctionCollect<AggregateFunctionCollectListData<T,
HasLimit>, HasLimit>>(
+ return creator_without_type::create<AggregateFunctionCollect<
+ AggregateFunctionCollectListData<T, HasLimit>, HasLimit,
std::false_type>>(
argument_types, result_is_nullable);
}
}
-template <typename HasLimit>
+template <typename HasLimit, typename ShowNull>
AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string&
name,
const DataTypes&
argument_types,
const bool
result_is_nullable) {
@@ -51,25 +59,25 @@ AggregateFunctionPtr
create_aggregate_function_collect_impl(const std::string& n
}
WhichDataType which(remove_nullable(argument_types[0]));
-#define DISPATCH(TYPE)
\
- if (which.idx == TypeIndex::TYPE)
\
- return do_create_agg_function_collect<TYPE, HasLimit>(distinct,
argument_types, \
-
result_is_nullable);
+#define DISPATCH(TYPE)
\
+ if (which.idx == TypeIndex::TYPE)
\
+ return do_create_agg_function_collect<TYPE, HasLimit,
ShowNull>(distinct, argument_types, \
+
result_is_nullable);
FOR_NUMERIC_TYPES(DISPATCH)
FOR_DECIMAL_TYPES(DISPATCH)
#undef DISPATCH
if (which.is_date_or_datetime()) {
- return do_create_agg_function_collect<Int64, HasLimit>(distinct,
argument_types,
-
result_is_nullable);
+ return do_create_agg_function_collect<Int64, HasLimit,
ShowNull>(distinct, argument_types,
+
result_is_nullable);
} else if (which.is_date_v2()) {
- return do_create_agg_function_collect<UInt32, HasLimit>(distinct,
argument_types,
-
result_is_nullable);
+ return do_create_agg_function_collect<UInt32, HasLimit,
ShowNull>(distinct, argument_types,
+
result_is_nullable);
} else if (which.is_date_time_v2()) {
- return do_create_agg_function_collect<UInt64, HasLimit>(distinct,
argument_types,
-
result_is_nullable);
+ return do_create_agg_function_collect<UInt64, HasLimit,
ShowNull>(distinct, argument_types,
+
result_is_nullable);
} else if (which.is_string()) {
- return do_create_agg_function_collect<StringRef, HasLimit>(distinct,
argument_types,
-
result_is_nullable);
+ return do_create_agg_function_collect<StringRef, HasLimit, ShowNull>(
+ distinct, argument_types, result_is_nullable);
}
LOG(WARNING) << fmt::format("unsupported input type {} for aggregate
function {}",
@@ -81,12 +89,17 @@ AggregateFunctionPtr
create_aggregate_function_collect(const std::string& name,
const DataTypes&
argument_types,
const bool
result_is_nullable) {
if (argument_types.size() == 1) {
- return create_aggregate_function_collect_impl<std::false_type>(name,
argument_types,
-
result_is_nullable);
+ if (name == "array_agg") {
+ return create_aggregate_function_collect_impl<std::false_type,
std::true_type>(
+ name, argument_types, result_is_nullable);
+ } else {
+ return create_aggregate_function_collect_impl<std::false_type,
std::false_type>(
+ name, argument_types, result_is_nullable);
+ }
}
if (argument_types.size() == 2) {
- return create_aggregate_function_collect_impl<std::true_type>(name,
argument_types,
-
result_is_nullable);
+ return create_aggregate_function_collect_impl<std::true_type,
std::false_type>(
+ name, argument_types, result_is_nullable);
}
LOG(WARNING) << fmt::format("number of parameters for aggregate function
{}, should be 1 or 2",
name);
@@ -96,6 +109,7 @@ AggregateFunctionPtr create_aggregate_function_collect(const
std::string& name,
void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory&
factory) {
factory.register_function_both("collect_list",
create_aggregate_function_collect);
factory.register_function_both("collect_set",
create_aggregate_function_collect);
+ factory.register_function_both("array_agg",
create_aggregate_function_collect);
factory.register_alias("collect_list", "group_array");
factory.register_alias("collect_set", "group_uniq_array");
}
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h
b/be/src/vec/aggregate_functions/aggregate_function_collect.h
index 4e774b887e..78aa01ac68 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h
@@ -288,30 +288,173 @@ struct AggregateFunctionCollectListData<StringRef,
HasLimit> {
}
};
-template <typename Data, typename HasLimit>
+template <typename T>
+struct AggregateFunctionArrayAggData {
+ using ElementType = T;
+ using ColVecType = ColumnVectorOrDecimal<ElementType>;
+ MutableColumnPtr column_data;
+ ColVecType* nested_column;
+ NullMap* null_map;
+
+ AggregateFunctionArrayAggData(const DataTypes& argument_types) {
+ if constexpr (IsDecimalNumber<T>) {
+ DataTypePtr column_type = make_nullable(argument_types[0]);
+ column_data = column_type->create_column();
+ null_map =
&(assert_cast<ColumnNullable&>(*column_data).get_null_map_data());
+ nested_column = assert_cast<ColVecType*>(
+
assert_cast<ColumnNullable&>(*column_data).get_nested_column_ptr().get());
+ }
+ }
+
+ AggregateFunctionArrayAggData() {
+ if constexpr (!IsDecimalNumber<T>) {
+ column_data = ColumnNullable::create(ColVecType::create(),
ColumnUInt8::create());
+ null_map =
&(assert_cast<ColumnNullable&>(*column_data).get_null_map_data());
+ nested_column = assert_cast<ColVecType*>(
+
assert_cast<ColumnNullable&>(*column_data).get_nested_column_ptr().get());
+ }
+ }
+
+ void add(const IColumn& column, size_t row_num) {
+ const auto& col = assert_cast<const ColumnNullable&>(column);
+ const auto& vec = assert_cast<const
ColVecType&>(col.get_nested_column()).get_data();
+ null_map->push_back(col.get_null_map_data()[row_num]);
+ nested_column->get_data().push_back(vec[row_num]);
+ DCHECK(null_map->size() == nested_column->size());
+ }
+
+ void deserialize_and_merge(const IColumn& column, size_t row_num) {
+ auto& to_arr = assert_cast<const ColumnArray&>(column);
+ auto& to_nested_col = to_arr.get_data();
+ auto col_null = reinterpret_cast<const
ColumnNullable*>(&to_nested_col);
+ const auto& vec = assert_cast<const
ColVecType&>(col_null->get_nested_column()).get_data();
+ auto start = to_arr.get_offsets()[row_num - 1];
+ auto end = start + to_arr.get_offsets()[row_num] -
to_arr.get_offsets()[row_num - 1];
+ for (auto i = start; i < end; ++i) {
+ null_map->push_back(col_null->get_null_map_data()[i]);
+ nested_column->get_data().push_back(vec[i]);
+ }
+ }
+
+ void reset() {
+ null_map->clear();
+ nested_column->clear();
+ }
+
+ void insert_result_into(IColumn& to) const {
+ auto& to_arr = assert_cast<ColumnArray&>(to);
+ auto& to_nested_col = to_arr.get_data();
+ auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
+ auto& vec =
assert_cast<ColVecType&>(col_null->get_nested_column()).get_data();
+ size_t num_rows = null_map->size();
+ auto& nested_column_data = nested_column->get_data();
+ for (size_t i = 0; i < num_rows; ++i) {
+ col_null->get_null_map_data().push_back((*null_map)[i]);
+ vec.push_back(nested_column_data[i]);
+ }
+ to_arr.get_offsets().push_back(to_nested_col.size());
+ }
+};
+
+template <>
+struct AggregateFunctionArrayAggData<StringRef> {
+ using ElementType = StringRef;
+ using ColVecType = ColumnString;
+ MutableColumnPtr column_data;
+ ColVecType* nested_column;
+ NullMap* null_map;
+
+ AggregateFunctionArrayAggData() {
+ column_data = ColumnNullable::create(ColVecType::create(),
ColumnUInt8::create());
+ null_map =
&(assert_cast<ColumnNullable&>(*column_data).get_null_map_data());
+ nested_column = assert_cast<ColVecType*>(
+
assert_cast<ColumnNullable&>(*column_data).get_nested_column_ptr().get());
+ }
+
+ void add(const IColumn& column, size_t row_num) {
+ const auto& col = assert_cast<const ColumnNullable&>(column);
+ const auto& vec = assert_cast<const
ColVecType&>(col.get_nested_column());
+ null_map->push_back(col.get_null_map_data()[row_num]);
+ nested_column->insert_from(vec, row_num);
+ DCHECK(null_map->size() == nested_column->size());
+ }
+
+ void deserialize_and_merge(const IColumn& column, size_t row_num) {
+ auto& to_arr = assert_cast<const ColumnArray&>(column);
+ auto& to_nested_col = to_arr.get_data();
+ auto col_null = reinterpret_cast<const
ColumnNullable*>(&to_nested_col);
+ const auto& vec = assert_cast<const
ColVecType&>(col_null->get_nested_column());
+ auto start = to_arr.get_offsets()[row_num - 1];
+ auto end = start + to_arr.get_offsets()[row_num] -
to_arr.get_offsets()[row_num - 1];
+ for (auto i = start; i < end; ++i) {
+ null_map->push_back(col_null->get_null_map_data()[i]);
+ nested_column->insert_from(vec, i);
+ }
+ }
+
+ void reset() {
+ null_map->clear();
+ nested_column->clear();
+ }
+
+ void insert_result_into(IColumn& to) const {
+ auto& to_arr = assert_cast<ColumnArray&>(to);
+ auto& to_nested_col = to_arr.get_data();
+ auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
+ auto& vec = assert_cast<ColVecType&>(col_null->get_nested_column());
+ size_t num_rows = null_map->size();
+ for (size_t i = 0; i < num_rows; ++i) {
+ col_null->get_null_map_data().push_back((*null_map)[i]);
+ vec.insert_from(*nested_column, i);
+ }
+ to_arr.get_offsets().push_back(to_nested_col.size());
+ }
+};
+
+//ShowNull is just used to support array_agg because array_agg needs to
display NULL
+//todo: Supports order by sorting for array_agg
+template <typename Data, typename HasLimit, typename ShowNull>
class AggregateFunctionCollect
- : public IAggregateFunctionDataHelper<Data,
AggregateFunctionCollect<Data, HasLimit>> {
+ : public IAggregateFunctionDataHelper<Data,
+ AggregateFunctionCollect<Data,
HasLimit, ShowNull>> {
using GenericType = AggregateFunctionCollectSetData<StringRef, HasLimit>;
static constexpr bool ENABLE_ARENA = std::is_same_v<Data, GenericType>;
public:
+ using BaseHelper = IAggregateFunctionHelper<AggregateFunctionCollect<Data,
HasLimit, ShowNull>>;
+
AggregateFunctionCollect(const DataTypes& argument_types,
UInt64 max_size_ =
std::numeric_limits<UInt64>::max())
- : IAggregateFunctionDataHelper<Data,
AggregateFunctionCollect<Data, HasLimit>>(
+ : IAggregateFunctionDataHelper<Data,
+ AggregateFunctionCollect<Data,
HasLimit, ShowNull>>(
{argument_types}),
return_type(argument_types[0]) {}
std::string get_name() const override {
- if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename
Data::ElementType,
-
HasLimit>,
- Data>) {
+ if constexpr (ShowNull::value) {
+ return "array_agg";
+ } else if constexpr (std::is_same_v<AggregateFunctionCollectListData<
+ typename
Data::ElementType, HasLimit>,
+ Data>) {
return "collect_list";
} else {
return "collect_set";
}
}
+ void create(AggregateDataPtr __restrict place) const override {
+ if constexpr (ShowNull::value) {
+ if constexpr (IsDecimalNumber<typename Data::ElementType>) {
+ new (place) Data(argument_types);
+ } else {
+ new (place) Data();
+ }
+ } else {
+ new (place) Data();
+ }
+ }
+
DataTypePtr get_return_type() const override {
return std::make_shared<DataTypeArray>(make_nullable(return_type));
}
@@ -343,35 +486,181 @@ public:
auto& rhs_data = this->data(rhs);
if constexpr (ENABLE_ARENA) {
data.merge(rhs_data, arena);
- } else {
+ } else if constexpr (!ShowNull::value) {
data.merge(rhs_data);
}
}
void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
- this->data(place).write(buf);
+ if constexpr (!ShowNull::value) {
+ this->data(place).write(buf);
+ }
}
void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
Arena*) const override {
- this->data(place).read(buf);
+ if constexpr (!ShowNull::value) {
+ this->data(place).read(buf);
+ }
}
void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
auto& to_arr = assert_cast<ColumnArray&>(to);
auto& to_nested_col = to_arr.get_data();
- if (to_nested_col.is_nullable()) {
+ if constexpr (ShowNull::value) {
+ DCHECK(to_nested_col.is_nullable());
+ this->data(place).insert_result_into(to);
+ } else {
+ if (to_nested_col.is_nullable()) {
+ auto col_null =
reinterpret_cast<ColumnNullable*>(&to_nested_col);
+
this->data(place).insert_result_into(col_null->get_nested_column());
+
col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(),
0);
+ } else {
+ this->data(place).insert_result_into(to_nested_col);
+ }
+ to_arr.get_offsets().push_back(to_nested_col.size());
+ }
+ }
+
+ void serialize_without_key_to_column(ConstAggregateDataPtr __restrict
place,
+ IColumn& to) const override {
+ if constexpr (ShowNull::value) {
+ this->data(place).insert_result_into(to);
+ } else {
+ return BaseHelper::serialize_without_key_to_column(place, to);
+ }
+ }
+
+ void deserialize_and_merge_from_column(AggregateDataPtr __restrict place,
const IColumn& column,
+ Arena* arena) const override {
+ if constexpr (ShowNull::value) {
+ const size_t num_rows = column.size();
+ for (size_t i = 0; i != num_rows; ++i) {
+ this->data(place).deserialize_and_merge(column, i);
+ }
+ } else {
+ return BaseHelper::deserialize_and_merge_from_column(place,
column, arena);
+ }
+ }
+
+ void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t
offset,
+ AggregateDataPtr rhs, const ColumnString*
column, Arena* arena,
+ const size_t num_rows) const override {
+ if constexpr (ShowNull::value) {
+ for (size_t i = 0; i != num_rows; ++i) {
+ this->data(places[i]).deserialize_and_merge(*assert_cast<const
IColumn*>(column),
+ i);
+ }
+ } else {
+ return BaseHelper::deserialize_and_merge_vec(places, offset, rhs,
column, arena,
+ num_rows);
+ }
+ }
+
+ void deserialize_from_column(AggregateDataPtr places, const IColumn&
column, Arena* arena,
+ size_t num_rows) const override {
+ if constexpr (ShowNull::value) {
+ for (size_t i = 0; i != num_rows; ++i) {
+ this->data(places).deserialize_and_merge(column, i);
+ }
+ } else {
+ return BaseHelper::deserialize_from_column(places, column, arena,
num_rows);
+ }
+ }
+
+ void deserialize_and_merge_from_column_range(AggregateDataPtr __restrict
place,
+ const IColumn& column, size_t
begin, size_t end,
+ Arena* arena) const override {
+ if constexpr (ShowNull::value) {
+ DCHECK(end <= column.size() && begin <= end) << ", begin:" <<
begin << ", end:" << end
+ << ", column.size():"
<< column.size();
+ for (size_t i = begin; i <= end; ++i) {
+ this->data(place).deserialize_and_merge(column, i);
+ }
+ } else {
+ return BaseHelper::deserialize_and_merge_from_column_range(place,
column, begin, end,
+ arena);
+ }
+ }
+
+ void deserialize_and_merge_vec_selected(const AggregateDataPtr* places,
size_t offset,
+ AggregateDataPtr rhs, const
ColumnString* column,
+ Arena* arena, const size_t
num_rows) const override {
+ if constexpr (ShowNull::value) {
+ for (size_t i = 0; i != num_rows; ++i) {
+ if (places[i]) {
+ this->data(places[i]).deserialize_and_merge(
+ *assert_cast<const IColumn*>(column), i);
+ }
+ }
+ } else {
+ return BaseHelper::deserialize_and_merge_vec_selected(places,
offset, rhs, column,
+ arena,
num_rows);
+ }
+ }
+
+ void serialize_to_column(const std::vector<AggregateDataPtr>& places,
size_t offset,
+ MutableColumnPtr& dst, const size_t num_rows)
const override {
+ if constexpr (ShowNull::value) {
+ for (size_t i = 0; i != num_rows; ++i) {
+ Data& data_ = this->data(places[i] + offset);
+ data_.insert_result_into(*dst);
+ }
+ } else {
+ return BaseHelper::serialize_to_column(places, offset, dst,
num_rows);
+ }
+ }
+
+ void streaming_agg_serialize_to_column(const IColumn** columns,
MutableColumnPtr& dst,
+ const size_t num_rows, Arena*
arena) const override {
+ if constexpr (ShowNull::value) {
+ auto& to_arr = assert_cast<ColumnArray&>(*dst);
+ auto& to_nested_col = to_arr.get_data();
+ DCHECK(num_rows == columns[0]->size());
auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
-
this->data(place).insert_result_into(col_null->get_nested_column());
-
col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(),
0);
+ const auto& col_src = assert_cast<const
ColumnNullable&>(*(columns[0]));
+
+ for (size_t i = 0; i < num_rows; ++i) {
+
col_null->get_null_map_data().push_back(col_src.get_null_map_data()[i]);
+ if constexpr (std::is_same_v<StringRef, typename
Data::ElementType>) {
+ auto& vec =
assert_cast<ColumnString&>(col_null->get_nested_column());
+ const auto& vec_src =
+ assert_cast<const
ColumnString&>(col_src.get_nested_column());
+ vec.insert_from(vec_src, i);
+ } else {
+ using ColVecType = ColumnVectorOrDecimal<typename
Data::ElementType>;
+ auto& vec =
assert_cast<ColVecType&>(col_null->get_nested_column()).get_data();
+ auto& vec_src =
+ assert_cast<const
ColVecType&>(col_src.get_nested_column()).get_data();
+ vec.push_back(vec_src[i]);
+ }
+ to_arr.get_offsets().push_back(to_nested_col.size());
+ }
+
} else {
- this->data(place).insert_result_into(to_nested_col);
+ return BaseHelper::streaming_agg_serialize_to_column(columns, dst,
num_rows, arena);
+ }
+ }
+
+ [[nodiscard]] MutableColumnPtr create_serialize_column() const override {
+ if constexpr (ShowNull::value) {
+ return get_return_type()->create_column();
+ } else {
+ return ColumnString::create();
+ }
+ }
+
+ [[nodiscard]] DataTypePtr get_serialized_type() const override {
+ if constexpr (ShowNull::value) {
+ return std::make_shared<DataTypeArray>(make_nullable(return_type));
+ } else {
+ return IAggregateFunction::get_serialized_type();
}
- to_arr.get_offsets().push_back(to_nested_col.size());
}
private:
DataTypePtr return_type;
+ using IAggregateFunction::argument_types;
};
} // namespace doris::vectorized
\ No newline at end of file
diff --git
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
new file mode 100644
index 0000000000..de61da2862
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
@@ -0,0 +1,84 @@
+---
+{
+ "title": "ARRAY_AGG",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## ARRAY_AGG
+
+### description
+
+#### Syntax
+
+`ARRAY_AGG(col)`
+
+Concatenation of values in a column (including the null value) into an array
can be used for multiple rows to one row (row to column).
+
+### notice
+
+- The order of the elements in an array is not guaranteed.
+- Returns the array generated by the transformation. The element type in the
array is the same as the col type.
+
+### example
+
+```sql
+mysql> select * from test_doris_array_agg;
+
++------+------+
+
+| c1 | c2 |
+
++------+------+
+
+| 1 | a |
+
+| 1 | b |
+
+| 2 | c |
+
+| 2 | NULL |
+
+| 3 | NULL |
+
++------+------+
+
+mysql> select c1, array_agg(c2) from test_doris_array_agg group by c1;
+
++------+-----------------+
+
+| c1 | array_agg(`c2`) |
+
++------+-----------------+
+
+| 1 | ["a","b"] |
+
+| 2 | [NULL,"c"] |
+
+| 3 | [NULL] |
+
++------+-----------------+
+```
+
+### keywords
+
+ARRAY_AGG
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
new file mode 100644
index 0000000000..34549ab059
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/array-agg.md
@@ -0,0 +1,84 @@
+---
+{
+ "title": "ARRAY_AGG",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## ARRAY_AGG
+
+### description
+
+#### Syntax
+
+`ARRAY_AGG(col)`
+
+将一列中的值(包括空值 null)串联成一个数组,可以用于多行转一行(行转列)。
+
+### notice
+
+- 数组中元素不保证顺序。
+- 返回转换生成的数组。数组中的元素类型与 `col` 类型一致。
+
+### example
+
+```sql
+mysql> select * from test_doris_array_agg;
+
++------+------+
+
+| c1 | c2 |
+
++------+------+
+
+| 1 | a |
+
+| 1 | b |
+
+| 2 | c |
+
+| 2 | NULL |
+
+| 3 | NULL |
+
++------+------+
+
+mysql> select c1, array_agg(c2) from test_doris_array_agg group by c1;
+
++------+-----------------+
+
+| c1 | array_agg(`c2`) |
+
++------+-----------------+
+
+| 1 | ["a","b"] |
+
+| 2 | [NULL,"c"] |
+
+| 3 | [NULL] |
+
++------+-----------------+
+```
+
+### keywords
+
+ARRAY_AGG
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 53e3458c3e..855f2036d8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -1566,7 +1566,8 @@ public class FunctionCallExpr extends Expr {
}
if (fnName.getFunction().equalsIgnoreCase("collect_list")
- || fnName.getFunction().equalsIgnoreCase("collect_set")) {
+ || fnName.getFunction().equalsIgnoreCase("collect_set")
+ || fnName.getFunction().equalsIgnoreCase("array_agg")) {
fn.setReturnType(new ArrayType(getChild(0).type));
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
index b646c7ef98..2dceb302b7 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
@@ -54,7 +54,8 @@ public class AggregateFunction extends Function {
FunctionSet.INTERSECT_COUNT,
FunctionSet.ORTHOGONAL_BITMAP_UNION_COUNT,
FunctionSet.COUNT, "approx_count_distinct", "ndv",
FunctionSet.BITMAP_UNION_INT,
FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize",
FunctionSet.WINDOW_FUNNEL, FunctionSet.RETENTION,
- FunctionSet.SEQUENCE_MATCH, FunctionSet.SEQUENCE_COUNT,
FunctionSet.MAP_AGG, FunctionSet.BITMAP_AGG);
+ FunctionSet.SEQUENCE_MATCH, FunctionSet.SEQUENCE_COUNT,
FunctionSet.MAP_AGG, FunctionSet.BITMAP_AGG,
+ FunctionSet.ARRAY_AGG);
public static ImmutableSet<String>
ALWAYS_NULLABLE_AGGREGATE_FUNCTION_NAME_SET =
ImmutableSet.of("stddev_samp", "variance_samp", "var_samp",
"percentile_approx");
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index cde3cecf17..e1c62cc739 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -572,6 +572,8 @@ public class FunctionSet<T> {
public static final String GROUP_ARRAY = "group_array";
+ public static final String ARRAY_AGG = "array_agg";
+
// Populate all the aggregate builtins in the catalog.
// null symbols indicate the function does not need that step of the
evaluation.
// An empty symbol indicates a TODO for the BE to implement the function.
@@ -1402,6 +1404,9 @@ public class FunctionSet<T> {
AggregateFunction.createBuiltin(GROUP_ARRAY,
Lists.newArrayList(t, Type.INT), new ArrayType(t),
t, "", "", "", "", "", true, false, true, true));
+ addBuiltin(AggregateFunction.createBuiltin(ARRAY_AGG,
Lists.newArrayList(t), new ArrayType(t), t, "", "", "", "", "",
+ true, false, true, true));
+
//first_value/last_value for array
addBuiltin(AggregateFunction.createAnalyticBuiltin("first_value",
Lists.newArrayList(new ArrayType(t)), new ArrayType(t),
Type.ARRAY,
diff --git a/regression-test/data/query_p0/aggregate/array_agg.out
b/regression-test/data/query_p0/aggregate/array_agg.out
new file mode 100644
index 0000000000..ee7260205a
--- /dev/null
+++ b/regression-test/data/query_p0/aggregate/array_agg.out
@@ -0,0 +1,67 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql1 --
+["LC", "LB", "alex"]
+["LC", "LB", "LA"]
+["LC", NULL, "LA"]
+["LC", "LB", "LA"]
+[NULL, "LC", "LB", "LA"]
+[NULL, "LC", "LC", "LC", "LC"]
+[NULL, "LC", "LC", "LC", "LC"]
+
+-- !sql2 --
+["alex", NULL, "LC", "LC", "LC", "LC"]
+["LB"]
+["LC"]
+["LA"]
+["LB"]
+["LC"]
+["LA"]
+["LC"]
+["LA"]
+["LB"]
+["LC"]
+["LA"]
+["LB"]
+[NULL, "LC"]
+[NULL, "LC", "LC"]
+[NULL, "LC", "LC"]
+
+-- !sql3 --
+["LC", "LB", "alex", "LC", "LB", "LA", "LC", NULL, "LA", "LC", "LB", "LA",
NULL, "LC", "LB", "LA", NULL, "LC", "LC", "LC", "LC", NULL, "LC", "LC", "LC",
"LC"]
+
+-- !sql4 --
+["V1_3", "V1_2", NULL, "V2_3", "V2_2", "V2_1", "V3_3", NULL, "V3_1", "V4_3",
"V4_2", "V4_1", "V5_3", "V5_3", "V5_2", "V5_1", "V6_3", NULL, "V6_3", NULL,
"V6_3", "V7_3", NULL, "V7_3", NULL, "V7_3"]
+
+-- !sql5 --
+1 [2, 1, NULL]
+2 [5, 5, 4]
+3 [NULL, 6, 6]
+4 [6, 6, 5]
+5 [NULL, 5, 6]
+6 [NULL, NULL, NULL, NULL, NULL]
+7 [NULL, NULL, NULL, NULL, NULL]
+
+-- !sql6 --
+[""]
+
+-- !sql7 --
+["LC", "LB", "alex"]
+["LC", "LB", "LA"]
+["LC", NULL, "LA"]
+["LC", "LB", "LA"]
+["LC", "LB", "LA"]
+[""]
+[""]
+
+-- !sql8 --
+[NULL]
+
+-- !sql9 --
+1 [1.11, NULL, NULL]
+2 [1.21, NULL, 1.23]
+3 [1.24, 1.23, 1.21]
+4 [1.22, NULL, 1.22]
+5 [NULL, NULL, NULL]
+7 [NULL]
+8 [NULL]
+
diff --git a/regression-test/suites/query_p0/aggregate/array_agg.groovy
b/regression-test/suites/query_p0/aggregate/array_agg.groovy
new file mode 100644
index 0000000000..1463eed647
--- /dev/null
+++ b/regression-test/suites/query_p0/aggregate/array_agg.groovy
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("array_agg") {
+ sql "DROP TABLE IF EXISTS `test_array_agg`;"
+ sql "DROP TABLE IF EXISTS `test_array_agg_int`;"
+ sql "DROP TABLE IF EXISTS `test_array_agg_decimal`;"
+ sql """
+ CREATE TABLE `test_array_agg` (
+ `id` int(11) NOT NULL,
+ `label_name` varchar(32) default null,
+ `value_field` string default null,
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );
+ """
+
+ sql """
+ CREATE TABLE `test_array_agg_int` (
+ `id` int(11) NOT NULL,
+ `label_name` varchar(32) default null,
+ `value_field` string default null,
+ `age` int(11) default null
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );
+ """
+
+ sql """
+ CREATE TABLE `test_array_agg_decimal` (
+ `id` int(11) NOT NULL,
+ `label_name` varchar(32) default null,
+ `value_field` string default null,
+ `age` int(11) default null,
+ `o_totalprice` DECIMAL(15, 2) default NULL,
+ `label_name_not_null` varchar(32) not null
+ )ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );
+ """
+
+ sql """
+ insert into `test_array_agg` values
+ (1, "alex",NULL),
+ (1, "LB", "V1_2"),
+ (1, "LC", "V1_3"),
+ (2, "LA", "V2_1"),
+ (2, "LB", "V2_2"),
+ (2, "LC", "V2_3"),
+ (3, "LA", "V3_1"),
+ (3, NULL, NULL),
+ (3, "LC", "V3_3"),
+ (4, "LA", "V4_1"),
+ (4, "LB", "V4_2"),
+ (4, "LC", "V4_3"),
+ (5, "LA", "V5_1"),
+ (5, "LB", "V5_2"),
+ (5, "LC", "V5_3"),
+ (5, NULL, "V5_3"),
+ (6, "LC", "V6_3"),
+ (6, "LC", NULL),
+ (6, "LC", "V6_3"),
+ (6, "LC", NULL),
+ (6, NULL, "V6_3"),
+ (7, "LC", "V7_3"),
+ (7, "LC", NULL),
+ (7, "LC", "V7_3"),
+ (7, "LC", NULL),
+ (7, NULL, "V7_3");
+ """
+
+ sql """
+ insert into `test_array_agg_int` values
+ (1, "alex",NULL,NULL),
+ (1, "LB", "V1_2",1),
+ (1, "LC", "V1_3",2),
+ (2, "LA", "V2_1",4),
+ (2, "LB", "V2_2",5),
+ (2, "LC", "V2_3",5),
+ (3, "LA", "V3_1",6),
+ (3, NULL, NULL,6),
+ (3, "LC", "V3_3",NULL),
+ (4, "LA", "V4_1",5),
+ (4, "LB", "V4_2",6),
+ (4, "LC", "V4_3",6),
+ (5, "LA", "V5_1",6),
+ (5, "LB", "V5_2",5),
+ (5, "LC", "V5_3",NULL),
+ (6, "LC", "V6_3",NULL),
+ (6, "LC", NULL,NULL),
+ (6, "LC", "V6_3",NULL),
+ (6, "LC", NULL,NULL),
+ (6, NULL, "V6_3",NULL),
+ (7, "LC", "V7_3",NULL),
+ (7, "LC", NULL,NULL),
+ (7, "LC", "V7_3",NULL),
+ (7, "LC", NULL,NULL),
+ (7, NULL, "V7_3",NULL);
+ """
+
+ sql """
+ insert into `test_array_agg_decimal` values
+ (1, "alex",NULL,NULL,NULL,"alex"),
+ (1, "LB", "V1_2",1,NULL,"alexxing"),
+ (1, "LC", "V1_3",2,1.11,"alexcoco"),
+ (2, "LA", "V2_1",4,1.23,"alex662"),
+ (2, "LB", "",5,NULL,""),
+ (2, "LC", "",5,1.21,"alexcoco1"),
+ (3, "LA", "V3_1",6,1.21,"alexcoco2"),
+ (3, NULL, NULL,6,1.23,"alexcoco3"),
+ (3, "LC", "V3_3",NULL,1.24,"alexcoco662"),
+ (4, "LA", "",5,1.22,"alexcoco662"),
+ (4, "LB", "V4_2",6,NULL,"alexcoco662"),
+ (4, "LC", "V4_3",6,1.22,"alexcoco662"),
+ (5, "LA", "V5_1",6,NULL,"alexcoco662"),
+ (5, "LB", "V5_2",5,NULL,"alexcoco662"),
+ (5, "LC", "V5_3",NULL,NULL,"alexcoco662"),
+ (7, "", NULL,NULL,NULL,"alexcoco1"),
+ (8, "", NULL,0,NULL,"alexcoco2");
+ """
+
+ qt_sql1 """
+ SELECT array_agg(`label_name`) FROM `test_array_agg` GROUP BY `id` order
by id;
+ """
+ qt_sql2 """
+ SELECT array_agg(label_name) FROM `test_array_agg` GROUP BY value_field
order by value_field;
+ """
+ qt_sql3 """
+ SELECT array_agg(`label_name`) FROM `test_array_agg`;
+ """
+ qt_sql4 """
+ SELECT array_agg(`value_field`) FROM `test_array_agg`;
+ """
+ qt_sql5 """
+ SELECT id, array_agg(age) FROM test_array_agg_int GROUP BY id order by id;
+ """
+
+ qt_sql6 """
+ select array_agg(label_name) from test_array_agg_decimal where id=7;
+ """
+
+ qt_sql7 """
+ select array_agg(label_name) from test_array_agg_decimal group by id order
by id;
+ """
+
+ qt_sql8 """
+ select array_agg(age) from test_array_agg_decimal where id=7;
+ """
+
+ qt_sql9 """
+ select id,array_agg(o_totalprice) from test_array_agg_decimal group by id
order by id;
+ """
+
+ sql "DROP TABLE `test_array_agg`"
+ sql "DROP TABLE `test_array_agg_int`"
+ sql "DROP TABLE `test_array_agg_decimal`"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]