This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new d846408bfdb branch-4.0: [function](agg) support agg function of
group_array_union #57166 (#57596)
d846408bfdb is described below
commit d846408bfdb7b5da532b9bdf687d8c341657ca8c
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 4 21:22:30 2025 +0800
branch-4.0: [function](agg) support agg function of group_array_union
#57166 (#57596)
Cherry-picked from #57166
Co-authored-by: zhangstar333 <[email protected]>
---
be/src/exprs/hybrid_set.h | 53 ++-
.../aggregate_function_group_array_intersect.cpp | 66 ---
.../aggregate_function_group_array_intersect.h | 507 ---------------------
.../aggregate_function_group_array_set_op.cpp | 158 +++++++
.../aggregate_function_group_array_set_op.h | 480 +++++++++++++++++++
.../aggregate_function_simple_factory.cpp | 4 +-
.../agg_group_array_intersect_test.cpp | 38 +-
.../doris/catalog/BuiltinAggregateFunctions.java | 2 +
.../expressions/functions/agg/GroupArrayUnion.java | 87 ++++
.../visitor/AggregateFunctionVisitor.java | 5 +
.../query_p0/aggregate/group_array_intersect.out | 95 ++++
.../aggregate/group_array_intersect.groovy | 32 ++
12 files changed, 930 insertions(+), 597 deletions(-)
diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h
index 4fa96511421..f4445276937 100644
--- a/be/src/exprs/hybrid_set.h
+++ b/be/src/exprs/hybrid_set.h
@@ -221,6 +221,9 @@ public:
// use in vectorize execute engine
virtual void insert(void* data, size_t) = 0;
+ virtual void insert_range_from(const vectorized::ColumnPtr& column, size_t
start,
+ size_t end) = 0;
+
virtual void insert_fixed_len(const vectorized::ColumnPtr& column, size_t
start) = 0;
virtual void insert(HybridSetBase* set) {
@@ -291,8 +294,16 @@ public:
void insert(void* data, size_t /*unused*/) override { insert(data); }
void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start)
override {
- const auto size = column->size();
+ insert_range_from(column, start, column->size());
+ }
+ void insert_range_from(const vectorized::ColumnPtr& column, size_t start,
size_t end) override {
+ if (end > column->size()) {
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "Parameters start = {}, end = {}, are out
of bound in "
+ "HybridSet::insert_range_from method
(data.size() = {}).",
+ start, end, column->size());
+ }
if (column->is_nullable()) {
const auto* nullable = assert_cast<const
vectorized::ColumnNullable*>(column.get());
const auto& col = nullable->get_nested_column();
@@ -301,7 +312,7 @@ public:
.get_data();
const ElementType* data = (ElementType*)col.get_raw_data().data;
- for (size_t i = start; i < size; i++) {
+ for (size_t i = start; i < end; i++) {
if (!nullmap[i]) {
_set.insert(*(data + i));
} else {
@@ -310,7 +321,7 @@ public:
}
} else {
const ElementType* data =
(ElementType*)column->get_raw_data().data;
- for (size_t i = start; i < size; i++) {
+ for (size_t i = start; i < end; i++) {
_set.insert(*(data + i));
}
}
@@ -448,6 +459,16 @@ public:
}
void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start)
override {
+ insert_range_from(column, start, column->size());
+ }
+
+ void insert_range_from(const vectorized::ColumnPtr& column, size_t start,
size_t end) override {
+ if (end > column->size()) {
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "Parameters start = {}, end = {}, are out
of bound in "
+ "StringSet::insert_range_from method
(data.size() = {}).",
+ start, end, column->size());
+ }
if (column->is_nullable()) {
const auto* nullable = assert_cast<const
vectorized::ColumnNullable*>(column.get());
const auto& nullmap =
@@ -456,19 +477,19 @@ public:
if (nullable->get_nested_column().is_column_string64()) {
_insert_fixed_len_string(assert_cast<const
vectorized::ColumnString64&>(
nullable->get_nested_column()),
- nullmap.data(), start,
nullmap.size());
+ nullmap.data(), start, end);
} else {
_insert_fixed_len_string(
assert_cast<const
vectorized::ColumnString&>(nullable->get_nested_column()),
- nullmap.data(), start, nullmap.size());
+ nullmap.data(), start, end);
}
} else {
if (column->is_column_string64()) {
_insert_fixed_len_string(assert_cast<const
vectorized::ColumnString64&>(*column),
- nullptr, start, column->size());
+ nullptr, start, end);
} else {
_insert_fixed_len_string(assert_cast<const
vectorized::ColumnString&>(*column),
- nullptr, start, column->size());
+ nullptr, start, end);
}
}
}
@@ -618,6 +639,16 @@ public:
}
void insert_fixed_len(const vectorized::ColumnPtr& column, size_t start)
override {
+ insert_range_from(column, start, column->size());
+ }
+
+ void insert_range_from(const vectorized::ColumnPtr& column, size_t start,
size_t end) override {
+ if (end > column->size()) {
+ throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
+ "Parameters start = {}, end = {}, are out
of bound in "
+ "StringSet::insert_range_from method
(data.size() = {}).",
+ start, end, column->size());
+ }
if (column->is_nullable()) {
const auto* nullable = assert_cast<const
vectorized::ColumnNullable*>(column.get());
const auto& nullmap =
@@ -626,19 +657,19 @@ public:
if (nullable->get_nested_column().is_column_string64()) {
_insert_fixed_len_string(assert_cast<const
vectorized::ColumnString64&>(
nullable->get_nested_column()),
- nullmap.data(), start,
nullmap.size());
+ nullmap.data(), start, end);
} else {
_insert_fixed_len_string(
assert_cast<const
vectorized::ColumnString&>(nullable->get_nested_column()),
- nullmap.data(), start, nullmap.size());
+ nullmap.data(), start, end);
}
} else {
if (column->is_column_string64()) {
_insert_fixed_len_string(assert_cast<const
vectorized::ColumnString64&>(*column),
- nullptr, start, column->size());
+ nullptr, start, end);
} else {
_insert_fixed_len_string(assert_cast<const
vectorized::ColumnString&>(*column),
- nullptr, start, column->size());
+ nullptr, start, end);
}
}
}
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp
b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp
deleted file mode 100644
index 697e5776824..00000000000
---
a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
-// and modified by Doris
-
-#include "vec/aggregate_functions/aggregate_function_group_array_intersect.h"
-
-#include "vec/aggregate_functions/factory_helpers.h"
-#include "vec/aggregate_functions/helpers.h"
-
-namespace doris::vectorized {
-#include "common/compile_check_begin.h"
-
-inline AggregateFunctionPtr
create_aggregate_function_group_array_intersect_impl(
- const std::string& name, const DataTypes& argument_types, const bool
result_is_nullable,
- const AggregateFunctionAttr& attr) {
- const auto& nested_type = remove_nullable(
- dynamic_cast<const
DataTypeArray&>(*(argument_types[0])).get_nested_type());
- AggregateFunctionPtr res = creator_with_type_list<
- TYPE_TINYINT, TYPE_SMALLINT, TYPE_INT, TYPE_BIGINT, TYPE_LARGEINT,
TYPE_DATEV2,
-
TYPE_DATETIMEV2>::create<AggregateFunctionGroupArrayIntersect>(argument_types,
-
result_is_nullable,
-
attr);
-
- if (!res) {
- res = AggregateFunctionPtr(new
AggregateFunctionGroupArrayIntersectGeneric(argument_types));
- }
- return res;
-}
-
-AggregateFunctionPtr create_aggregate_function_group_array_intersect(
- const std::string& name, const DataTypes& argument_types, const bool
result_is_nullable,
- const AggregateFunctionAttr& attr) {
- assert_arity_range(name, argument_types, 1, 1);
- const DataTypePtr& argument_type = remove_nullable(argument_types[0]);
-
- if (argument_type->get_primitive_type() != TYPE_ARRAY) {
- throw Exception(ErrorCode::INVALID_ARGUMENT,
- "Aggregate function groupArrayIntersect accepts only
array type argument. "
- "Provided argument type: " +
- argument_type->get_name());
- }
- return create_aggregate_function_group_array_intersect_impl(name,
{argument_type},
-
result_is_nullable, attr);
-}
-
-void
register_aggregate_function_group_array_intersect(AggregateFunctionSimpleFactory&
factory) {
- factory.register_function_both("group_array_intersect",
-
create_aggregate_function_group_array_intersect);
-}
-} // namespace doris::vectorized
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h
b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h
deleted file mode 100644
index dd0982902ea..00000000000
--- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h
+++ /dev/null
@@ -1,507 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-// This file is copied from
-//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
-// and modified by Doris
-
-#include <memory>
-
-#include "exprs/hybrid_set.h"
-#include "vec/aggregate_functions/aggregate_function.h"
-#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
-#include "vec/columns/column_array.h"
-#include "vec/common/assert_cast.h"
-#include "vec/core/field.h"
-#include "vec/data_types/data_type_array.h"
-#include "vec/data_types/data_type_date_or_datetime_v2.h"
-#include "vec/data_types/data_type_string.h"
-
-namespace doris::vectorized {
-#include "common/compile_check_begin.h"
-class Arena;
-class BufferReadable;
-class BufferWritable;
-} // namespace doris::vectorized
-
-namespace doris::vectorized {
-
-template <PrimitiveType T>
-class NullableNumericOrDateSet
- : public HybridSet<T == TYPE_BOOLEAN ? TYPE_TINYINT : T,
- DynamicContainer<typename PrimitiveTypeTraits<
- T == TYPE_BOOLEAN ? TYPE_TINYINT :
T>::CppType>> {
-public:
- NullableNumericOrDateSet()
- : HybridSet < T
- == TYPE_BOOLEAN
- ? TYPE_TINYINT
- : T,
- DynamicContainer < typename PrimitiveTypeTraits < T == TYPE_BOOLEAN ?
TYPE_TINYINT
- : T >
::CppType >> (true) {}
-
- void change_contain_null_value(bool target_value) { this->_contain_null =
target_value; }
-};
-
-template <PrimitiveType T>
-struct AggregateFunctionGroupArrayIntersectData {
- using ColVecType = typename PrimitiveTypeTraits<T>::ColumnType;
- using NullableNumericOrDateSetType = NullableNumericOrDateSet<T>;
- using Set = std::unique_ptr<NullableNumericOrDateSetType>;
-
- AggregateFunctionGroupArrayIntersectData()
- : value(std::make_unique<NullableNumericOrDateSetType>()) {}
-
- Set value;
- bool init = false;
-
- void reset() {
- init = false;
- value = std::make_unique<NullableNumericOrDateSetType>();
- }
-
- void process_col_data(auto& column_data, size_t offset, size_t arr_size,
Set& set) {
- const bool is_column_data_nullable = column_data.is_nullable();
-
- const ColumnNullable* col_null = nullptr;
- const ColVecType* nested_column_data = nullptr;
-
- if (is_column_data_nullable) {
- const auto* const_col_data = &column_data;
- col_null = static_cast<const ColumnNullable*>(const_col_data);
- nested_column_data = &assert_cast<const ColVecType&,
TypeCheckOnRelease::DISABLE>(
- col_null->get_nested_column());
- } else {
- nested_column_data = &static_cast<const ColVecType&>(column_data);
- }
-
- if (!init) {
- for (size_t i = 0; i < arr_size; ++i) {
- const bool is_null_element =
- is_column_data_nullable && col_null->is_null_at(offset
+ i);
- const typename PrimitiveTypeTraits<T>::ColumnItemType*
src_data =
- is_null_element ? nullptr :
&(nested_column_data->get_element(offset + i));
-
- set->insert(src_data);
- }
- init = true;
- } else if (!set->empty()) {
- Set new_set = std::make_unique<NullableNumericOrDateSetType>();
-
- for (size_t i = 0; i < arr_size; ++i) {
- const bool is_null_element =
- is_column_data_nullable && col_null->is_null_at(offset
+ i);
- const typename PrimitiveTypeTraits<T>::ColumnItemType*
src_data =
- is_null_element ? nullptr :
&(nested_column_data->get_element(offset + i));
-
- if ((!is_null_element && set->find(src_data)) ||
- (set->contain_null() && is_null_element)) {
- new_set->insert(src_data);
- }
- }
- set = std::move(new_set);
- }
- }
-};
-
-/// Puts all values to the hybrid set. Returns an array of unique values.
Implemented for numeric/date types.
-template <PrimitiveType T>
-class AggregateFunctionGroupArrayIntersect
- : public
IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
-
AggregateFunctionGroupArrayIntersect<T>>,
- UnaryExpression,
- NotNullableAggregateFunction {
-private:
- using State = AggregateFunctionGroupArrayIntersectData<T>;
- DataTypePtr argument_type;
-
-public:
- AggregateFunctionGroupArrayIntersect(const DataTypes& argument_types_)
- :
IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
-
AggregateFunctionGroupArrayIntersect<T>>(
- argument_types_),
- argument_type(argument_types_[0]) {}
-
- AggregateFunctionGroupArrayIntersect(const DataTypes& argument_types_,
- const bool result_is_nullable)
- :
IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectData<T>,
-
AggregateFunctionGroupArrayIntersect<T>>(
- argument_types_),
- argument_type(argument_types_[0]) {}
-
- String get_name() const override { return "group_array_intersect"; }
-
- DataTypePtr get_return_type() const override { return argument_type; }
-
- void reset(AggregateDataPtr __restrict place) const override {
this->data(place).reset(); }
-
- void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
- Arena&) const override {
- auto& data = this->data(place);
- auto& set = data.value;
-
- const bool col_is_nullable = (*columns[0]).is_nullable();
- const ColumnArray& column =
- col_is_nullable
- ? assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(
- assert_cast<const ColumnNullable&,
TypeCheckOnRelease::DISABLE>(
- *columns[0])
- .get_nested_column())
- : assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(*columns[0]);
-
- const auto& offsets = column.get_offsets();
- const auto offset = offsets[row_num - 1];
- const auto arr_size = offsets[row_num] - offset;
- const auto& column_data = column.get_data();
-
- data.process_col_data(column_data, offset, arr_size, set);
- }
-
- void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
- Arena&) const override {
- auto& data = this->data(place);
- auto& set = data.value;
- auto& rhs_set = this->data(rhs).value;
-
- if (!this->data(rhs).init) {
- return;
- }
-
- auto& init = data.init;
- if (!init) {
- set->change_contain_null_value(rhs_set->contain_null());
- HybridSetBase::IteratorBase* it = rhs_set->begin();
- while (it->has_next()) {
- const void* value = it->get_value();
- set->insert(value);
- it->next();
- }
- init = true;
- } else if (!set->empty()) {
- auto create_new_set = [](auto& lhs_val, auto& rhs_val) {
- typename State::Set new_set =
- std::make_unique<typename
State::NullableNumericOrDateSetType>();
- HybridSetBase::IteratorBase* it = lhs_val->begin();
- while (it->has_next()) {
- const void* value = it->get_value();
- if ((rhs_val->find(value))) {
- new_set->insert(value);
- }
- it->next();
- }
- new_set->change_contain_null_value(lhs_val->contain_null() &&
- rhs_val->contain_null());
- return new_set;
- };
- auto new_set = rhs_set->size() < set->size() ?
create_new_set(rhs_set, set)
- : create_new_set(set,
rhs_set);
- set = std::move(new_set);
- }
- }
-
- void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
- auto& data = this->data(place);
- auto& set = data.value;
- auto& init = data.init;
- const bool is_set_contain_null = set->contain_null();
-
- buf.write_binary(is_set_contain_null);
- buf.write_binary(init);
- buf.write_var_uint(set->size());
- HybridSetBase::IteratorBase* it = set->begin();
-
- while (it->has_next()) {
- const typename PrimitiveTypeTraits<T>::CppType* value_ptr =
- static_cast<const typename
PrimitiveTypeTraits<T>::CppType*>(it->get_value());
- buf.write_binary((*value_ptr));
- it->next();
- }
- }
-
- void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
- Arena&) const override {
- auto& data = this->data(place);
- bool is_set_contain_null;
-
- buf.read_binary(is_set_contain_null);
- data.value->change_contain_null_value(is_set_contain_null);
- buf.read_binary(data.init);
- UInt64 size;
- buf.read_var_uint(size);
-
- typename PrimitiveTypeTraits<T>::CppType element;
- for (UInt64 i = 0; i < size; ++i) {
- buf.read_binary(element);
- data.value->insert(static_cast<void*>(&element));
- }
- }
-
- void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
- ColumnArray& arr_to = assert_cast<ColumnArray&>(to);
- ColumnArray::Offsets64& offsets_to = arr_to.get_offsets();
- auto& to_nested_col = arr_to.get_data();
- const bool is_nullable = to_nested_col.is_nullable();
-
- auto insert_values = [](typename State::ColVecType& nested_col, auto&
set,
- bool is_nullable = false, ColumnNullable*
col_null = nullptr) {
- size_t old_size = nested_col.get_data().size();
- size_t res_size = set->size();
- size_t i = 0;
-
- if (is_nullable && set->contain_null()) {
- col_null->insert_data(nullptr, 0);
- res_size += 1;
- i = 1;
- }
-
- nested_col.get_data().resize(old_size + res_size);
-
- HybridSetBase::IteratorBase* it = set->begin();
- while (it->has_next()) {
- const auto value =
- *reinterpret_cast<const typename
PrimitiveTypeTraits<T>::ColumnItemType*>(
- it->get_value());
- nested_col.get_data()[old_size + i] = value;
- if (is_nullable) {
- col_null->get_null_map_data().push_back(0);
- }
- it->next();
- ++i;
- }
- };
-
- const auto& set = this->data(place).value;
- if (is_nullable) {
- auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
- auto& nested_col =
- assert_cast<typename
State::ColVecType&>(col_null->get_nested_column());
- offsets_to.push_back(offsets_to.back() + set->size() +
(set->contain_null() ? 1 : 0));
- insert_values(nested_col, set, true, col_null);
- } else {
- auto& nested_col = static_cast<typename
State::ColVecType&>(to_nested_col);
- offsets_to.push_back(offsets_to.back() + set->size());
- insert_values(nested_col, set);
- }
- }
-};
-
-/// Generic implementation, it uses serialized representation as object
descriptor.
-class NullableStringSet : public StringValueSet<DynamicContainer<StringRef>> {
-public:
- NullableStringSet() : StringValueSet<DynamicContainer<StringRef>>(true) {}
-
- void change_contain_null_value(bool target_value) { this->_contain_null =
target_value; }
-};
-
-struct AggregateFunctionGroupArrayIntersectGenericData {
- using Set = std::unique_ptr<NullableStringSet>;
-
- AggregateFunctionGroupArrayIntersectGenericData()
- : value(std::make_unique<NullableStringSet>()) {}
- Set value;
- bool init = false;
-
- void reset() {
- init = false;
- value = std::make_unique<NullableStringSet>();
- }
-};
-
-/** Template parameter with true value should be used for columns that store
their elements in memory continuously.
- * For such columns group_array_intersect() can be implemented more
efficiently (especially for small numeric arrays).
- */
-class AggregateFunctionGroupArrayIntersectGeneric
- : public
IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData,
-
AggregateFunctionGroupArrayIntersectGeneric> {
-private:
- using State = AggregateFunctionGroupArrayIntersectGenericData;
- DataTypePtr input_data_type;
-
-public:
- AggregateFunctionGroupArrayIntersectGeneric(const DataTypes&
input_data_type_)
- :
IAggregateFunctionDataHelper<AggregateFunctionGroupArrayIntersectGenericData,
-
AggregateFunctionGroupArrayIntersectGeneric>(
- input_data_type_),
- input_data_type(input_data_type_[0]) {}
-
- String get_name() const override { return "group_array_intersect"; }
-
- DataTypePtr get_return_type() const override { return input_data_type; }
-
- void reset(AggregateDataPtr __restrict place) const override {
this->data(place).reset(); }
-
- void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
- Arena& arena) const override {
- auto& data = this->data(place);
- auto& init = data.init;
- auto& set = data.value;
-
- const bool col_is_nullable = (*columns[0]).is_nullable();
- const ColumnArray& column =
- col_is_nullable
- ? assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(
- assert_cast<const ColumnNullable&,
TypeCheckOnRelease::DISABLE>(
- *columns[0])
- .get_nested_column())
- : assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(*columns[0]);
-
- const auto nested_column_data = column.get_data_ptr();
- const auto& offsets = column.get_offsets();
- const auto offset = offsets[row_num - 1];
- const auto arr_size = offsets[row_num] - offset;
- const auto& column_data = column.get_data();
- const bool is_column_data_nullable = column_data.is_nullable();
- const ColumnNullable* col_null = nullptr;
-
- if (is_column_data_nullable) {
- col_null = static_cast<const ColumnNullable*>(&column_data);
- }
-
- auto process_element = [&](size_t i) {
- const bool is_null_element =
- is_column_data_nullable && col_null->is_null_at(offset +
i);
-
- StringRef src = nested_column_data->get_data_at(offset + i);
-
- src.data = is_null_element ? nullptr : arena.insert(src.data,
src.size);
- return src;
- };
-
- if (!init) {
- for (size_t i = 0; i < arr_size; ++i) {
- StringRef src = process_element(i);
- set->insert((void*)src.data, src.size);
- }
- init = true;
- } else if (!set->empty()) {
- typename State::Set new_set =
std::make_unique<NullableStringSet>();
-
- for (size_t i = 0; i < arr_size; ++i) {
- StringRef src = process_element(i);
- if ((set->find(src.data, src.size) && src.data != nullptr) ||
- (set->contain_null() && src.data == nullptr)) {
- new_set->insert((void*)src.data, src.size);
- }
- }
- set = std::move(new_set);
- }
- }
-
- void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
- Arena&) const override {
- auto& data = this->data(place);
- auto& set = data.value;
- auto& rhs_set = this->data(rhs).value;
-
- if (!this->data(rhs).init) {
- return;
- }
-
- auto& init = data.init;
- if (!init) {
- set->change_contain_null_value(rhs_set->contain_null());
- HybridSetBase::IteratorBase* it = rhs_set->begin();
- while (it->has_next()) {
- const auto* value = reinterpret_cast<const
StringRef*>(it->get_value());
- set->insert((void*)(value->data), value->size);
- it->next();
- }
- init = true;
- } else if (!set->empty()) {
- auto create_new_set = [](auto& lhs_val, auto& rhs_val) {
- typename State::Set new_set =
std::make_unique<NullableStringSet>();
- HybridSetBase::IteratorBase* it = lhs_val->begin();
- while (it->has_next()) {
- const auto* value = reinterpret_cast<const
StringRef*>(it->get_value());
- if (rhs_val->find(value)) {
- new_set->insert((void*)value->data, value->size);
- }
- it->next();
- }
- new_set->change_contain_null_value(lhs_val->contain_null() &&
- rhs_val->contain_null());
- return new_set;
- };
- auto new_set = rhs_set->size() < set->size() ?
create_new_set(rhs_set, set)
- : create_new_set(set,
rhs_set);
- set = std::move(new_set);
- }
- }
-
- void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
- auto& data = this->data(place);
- auto& set = data.value;
- auto& init = data.init;
- const bool is_set_contain_null = set->contain_null();
-
- buf.write_binary(is_set_contain_null);
- buf.write_binary(init);
- buf.write_var_uint(set->size());
-
- HybridSetBase::IteratorBase* it = set->begin();
- while (it->has_next()) {
- const auto* value = reinterpret_cast<const
StringRef*>(it->get_value());
- buf.write_binary(*value);
- it->next();
- }
- }
-
- void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
- Arena& arena) const override {
- auto& data = this->data(place);
- bool is_set_contain_null;
-
- buf.read_binary(is_set_contain_null);
- data.value->change_contain_null_value(is_set_contain_null);
- buf.read_binary(data.init);
- UInt64 size;
- buf.read_var_uint(size);
-
- StringRef element;
- for (UInt64 i = 0; i < size; ++i) {
- element = buf.read_binary_into(arena);
- data.value->insert((void*)element.data, element.size);
- }
- }
-
- void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
- auto& arr_to = assert_cast<ColumnArray&>(to);
- ColumnArray::Offsets64& offsets_to = arr_to.get_offsets();
- auto& data_to = arr_to.get_data();
- auto col_null = reinterpret_cast<ColumnNullable*>(&data_to);
-
- const auto& set = this->data(place).value;
- auto res_size = set->size();
-
- if (set->contain_null()) {
- col_null->insert_data(nullptr, 0);
- res_size += 1;
- }
-
- offsets_to.push_back(offsets_to.back() + res_size);
-
- HybridSetBase::IteratorBase* it = set->begin();
- while (it->has_next()) {
- const auto* value = reinterpret_cast<const
StringRef*>(it->get_value());
- data_to.insert_data(value->data, value->size);
- it->next();
- }
- }
-};
-
-} // namespace doris::vectorized
-
-#include "common/compile_check_end.h"
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_group_array_set_op.cpp
b/be/src/vec/aggregate_functions/aggregate_function_group_array_set_op.cpp
new file mode 100644
index 00000000000..b0eaf53179f
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_set_op.cpp
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_group_array_set_op.h"
+
+#include <glog/logging.h>
+
+#include "runtime/define_primitive_type.h"
+#include "runtime/primitive_type.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+
+template <template <PrimitiveType> class ImplNumericData, typename
ImplStringData>
+inline AggregateFunctionPtr create_aggregate_function_group_array_impl(
+ const DataTypes& argument_types, const bool result_is_nullable,
+ const AggregateFunctionAttr& attr) {
+ const auto& nested_type = remove_nullable(
+ assert_cast<const
DataTypeArray&>(*(argument_types[0])).get_nested_type());
+
+ switch (nested_type->get_primitive_type()) {
+ case doris::PrimitiveType::TYPE_BOOLEAN:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_BOOLEAN>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_TINYINT:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_TINYINT>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_SMALLINT:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_SMALLINT>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_INT:
+ return creator_without_type::create<
+ AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_INT>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_BIGINT:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_BIGINT>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_LARGEINT:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_LARGEINT>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DATEV2:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DATEV2>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DATETIMEV2:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DATETIMEV2>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DOUBLE:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DOUBLE>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_FLOAT:
+ return creator_without_type::create<
+ AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_FLOAT>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DECIMAL32:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DECIMAL32>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DECIMAL64:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DECIMAL64>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DECIMAL128I:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DECIMAL128I>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_DECIMAL256:
+ return creator_without_type::create<
+
AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_DECIMAL256>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_IPV4:
+ return creator_without_type::create<
+ AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_IPV4>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_IPV6:
+ return creator_without_type::create<
+ AggregateFunctionGroupArraySetOp<ImplNumericData<TYPE_IPV6>>>(
+ argument_types, result_is_nullable, attr);
+ case PrimitiveType::TYPE_STRING:
+ case PrimitiveType::TYPE_VARCHAR:
+ case PrimitiveType::TYPE_CHAR:
+ return
creator_without_type::create<AggregateFunctionGroupArraySetOp<ImplStringData>>(
+ argument_types, result_is_nullable, attr);
+ default:
+ LOG(WARNING) << " got invalid of nested type: " <<
nested_type->get_name();
+ return nullptr;
+ }
+}
+
+AggregateFunctionPtr create_aggregate_function_group_array_intersect(
+ const std::string& name, const DataTypes& argument_types, const bool
result_is_nullable,
+ const AggregateFunctionAttr& attr) {
+ assert_arity_range(name, argument_types, 1, 1);
+ const DataTypePtr& argument_type = remove_nullable(argument_types[0]);
+
+ if (argument_type->get_primitive_type() != TYPE_ARRAY) {
+ throw Exception(
+ ErrorCode::INVALID_ARGUMENT,
+ "Aggregate function group_array_intersect accepts only array
type argument. "
+ "Provided argument type: " +
+ argument_type->get_name());
+ }
+ return
create_aggregate_function_group_array_impl<GroupArrayNumericIntersectData,
+
GroupArrayStringIntersectData>(
+ {argument_type}, result_is_nullable, attr);
+}
+
+AggregateFunctionPtr create_aggregate_function_group_array_union(
+ const std::string& name, const DataTypes& argument_types, const bool
result_is_nullable,
+ const AggregateFunctionAttr& attr) {
+ assert_arity_range(name, argument_types, 1, 1);
+ const DataTypePtr& argument_type = remove_nullable(argument_types[0]);
+
+ if (argument_type->get_primitive_type() != TYPE_ARRAY) {
+ throw Exception(ErrorCode::INVALID_ARGUMENT,
+ "Aggregate function group_array_union accepts only
array type argument. "
+ "Provided argument type: " +
+ argument_type->get_name());
+ }
+ return
create_aggregate_function_group_array_impl<GroupArrayNumericUnionData,
+
GroupArrayStringUnionData>(
+ {argument_type}, result_is_nullable, attr);
+}
+
+void
register_aggregate_function_group_array_set_op(AggregateFunctionSimpleFactory&
factory) {
+ factory.register_function_both("group_array_intersect",
+
create_aggregate_function_group_array_intersect);
+ factory.register_function_both("group_array_union",
+
create_aggregate_function_group_array_union);
+}
+} // namespace doris::vectorized
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_group_array_set_op.h
b/be/src/vec/aggregate_functions/aggregate_function_group_array_set_op.h
new file mode 100644
index 00000000000..dc7831b96e6
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_set_op.h
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+//
https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp
+// and modified by Doris
+
+#include <memory>
+#include <string>
+
+#include "exprs/hybrid_set.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_date_or_datetime_v2.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+namespace doris::vectorized {
+#include "common/compile_check_begin.h"
+class Arena;
+class BufferReadable;
+class BufferWritable;
+} // namespace doris::vectorized
+
+namespace doris::vectorized {
+
+template <PrimitiveType T>
+class NullableNumericOrDateSet
+ : public HybridSet<T == TYPE_BOOLEAN ? TYPE_TINYINT : T,
+ DynamicContainer<typename PrimitiveTypeTraits<
+ T == TYPE_BOOLEAN ? TYPE_TINYINT :
T>::CppType>> {
+public:
+ NullableNumericOrDateSet()
+ : HybridSet < T
+ == TYPE_BOOLEAN
+ ? TYPE_TINYINT
+ : T,
+ DynamicContainer < typename PrimitiveTypeTraits < T == TYPE_BOOLEAN ?
TYPE_TINYINT
+ : T >
::CppType >> (true) {}
+
+ void change_contain_null_value(bool target_value) { this->_contain_null =
target_value; }
+};
+
+template <PrimitiveType T>
+struct GroupArraySetOpNumericBaseData {
+ using ColVecType = typename PrimitiveTypeTraits<T>::ColumnType;
+ using CppType = typename PrimitiveTypeTraits<T>::CppType;
+ using ColumnItemType = typename PrimitiveTypeTraits<T>::ColumnItemType;
+ using NullableNumericOrDateSetType = NullableNumericOrDateSet<T>;
+ using Set = std::unique_ptr<NullableNumericOrDateSetType>;
+
+ GroupArraySetOpNumericBaseData() :
set(std::make_unique<NullableNumericOrDateSetType>()) {}
+
+ Set set;
+ bool init = false;
+
+ void reset() {
+ init = false;
+ set = std::make_unique<NullableNumericOrDateSetType>();
+ }
+
+ void serialize(BufferWritable& buf) const {
+ const bool is_set_contain_null = this->set->contain_null();
+ buf.write_binary(is_set_contain_null);
+ buf.write_binary(init);
+ buf.write_var_uint(set->size());
+
+ HybridSetBase::IteratorBase* it = set->begin();
+ while (it->has_next()) {
+ const auto* value_ptr = static_cast<const
CppType*>(it->get_value());
+ buf.write_binary((*value_ptr));
+ it->next();
+ }
+ }
+
+ void deserialize(BufferReadable& buf) {
+ bool is_set_contain_null;
+ buf.read_binary(is_set_contain_null);
+ this->set->change_contain_null_value(is_set_contain_null);
+ buf.read_binary(this->init);
+ UInt64 size;
+ buf.read_var_uint(size);
+
+ CppType element;
+ for (UInt64 i = 0; i < size; ++i) {
+ buf.read_binary(element);
+ this->set->insert(static_cast<void*>(&element));
+ }
+ }
+
+ void insert_result_into(IColumn& to) const {
+ auto& arr_to = assert_cast<ColumnArray&>(to);
+ ColumnArray::Offsets64& offsets_to = arr_to.get_offsets();
+ auto& to_nested_col = arr_to.get_data();
+ DCHECK(to_nested_col.is_nullable())
+ << "should be array(nullable(column)) " << to.dump_structure();
+
+ auto insert_values = [](ColVecType& nested_col, auto& set,
ColumnNullable* col_null) {
+ size_t old_size = nested_col.get_data().size();
+ size_t res_size = set->size();
+ size_t i = 0;
+
+ if (set->contain_null()) {
+ col_null->insert_default();
+ res_size += 1;
+ i = 1;
+ }
+
+ nested_col.get_data().resize(old_size + res_size);
+ HybridSetBase::IteratorBase* it = set->begin();
+ while (it->has_next()) {
+ const auto value = *reinterpret_cast<const
ColumnItemType*>(it->get_value());
+ nested_col.get_data()[old_size + i] = value;
+ col_null->get_null_map_data().push_back(0);
+ it->next();
+ ++i;
+ }
+ };
+
+ auto* col_null = assert_cast<ColumnNullable*>(&to_nested_col);
+ auto& nested_col =
assert_cast<ColVecType&>(col_null->get_nested_column());
+ offsets_to.push_back(offsets_to.back() + this->set->size() +
+ (this->set->contain_null() ? 1 : 0));
+ insert_values(nested_col, this->set, col_null);
+ }
+};
+
+template <PrimitiveType T>
+struct GroupArrayNumericIntersectData : public
GroupArraySetOpNumericBaseData<T> {
+ using Base = GroupArraySetOpNumericBaseData<T>;
+ using Set = Base::Set;
+ using NullableNumericOrDateSetType = Base::NullableNumericOrDateSetType;
+ using ColVecType = Base::ColVecType;
+
+ static std::string get_name() { return "group_array_intersect"; }
+
+ void process_col_data(const auto& column_data, size_t offset, size_t
arr_size) {
+ if (!this->init) {
+ this->set->insert_range_from(column_data, offset, offset +
arr_size);
+ this->init = true;
+ } else if (!this->set->empty()) {
+ // for intersect, need to create a new set to store the
intersection result
+ Set new_set = std::make_unique<NullableNumericOrDateSetType>();
+ const auto& col_nullable = assert_cast<const
ColumnNullable&>(*column_data);
+ const ColVecType& nested_column_data =
+ assert_cast<const
ColVecType&>(col_nullable.get_nested_column());
+ for (size_t i = 0; i < arr_size; ++i) {
+ const bool is_null_element = col_nullable.is_null_at(offset +
i);
+ const typename PrimitiveTypeTraits<T>::ColumnItemType*
src_data =
+ is_null_element ? nullptr :
&(nested_column_data.get_element(offset + i));
+
+ if ((!is_null_element && this->set->find(src_data)) ||
+ (this->set->contain_null() && is_null_element)) {
+ new_set->insert(src_data);
+ }
+ }
+ this->set = std::move(new_set);
+ }
+ }
+
+ void merge(const auto& rhs_set) {
+ if (!this->init) {
+ this->set->change_contain_null_value(rhs_set->contain_null());
+ HybridSetBase::IteratorBase* it = rhs_set->begin();
+ while (it->has_next()) {
+ const void* value = it->get_value();
+ this->set->insert(value);
+ it->next();
+ }
+ this->init = true;
+ } else if (!this->set->empty()) {
+ auto create_new_set = [](auto& lhs_val, auto& rhs_val) {
+ Set new_set = std::make_unique<NullableNumericOrDateSetType>();
+ HybridSetBase::IteratorBase* it = lhs_val->begin();
+ while (it->has_next()) {
+ const void* value = it->get_value();
+ if ((rhs_val->find(value))) {
+ new_set->insert(value);
+ }
+ it->next();
+ }
+ new_set->change_contain_null_value(lhs_val->contain_null() &&
+ rhs_val->contain_null());
+ return new_set;
+ };
+ auto new_set = rhs_set->size() < this->set->size() ?
create_new_set(rhs_set, this->set)
+ :
create_new_set(this->set, rhs_set);
+ this->set = std::move(new_set);
+ }
+ }
+};
+
+template <PrimitiveType T>
+struct GroupArrayNumericUnionData : public GroupArraySetOpNumericBaseData<T> {
+ using Base = GroupArraySetOpNumericBaseData<T>;
+ using Set = Base::Set;
+
+ static std::string get_name() { return "group_array_union"; }
+
+ void process_col_data(const auto& column_data, size_t offset, size_t
arr_size) {
+ this->set->insert_range_from(column_data, offset, offset + arr_size);
+ this->init = true;
+ }
+
+ void merge(const auto& rhs_set) {
+ this->init = true;
+ this->set->change_contain_null_value(this->set->contain_null() ||
rhs_set->contain_null());
+ HybridSetBase::IteratorBase* it = rhs_set->begin();
+ while (it->has_next()) {
+ const void* value = it->get_value();
+ this->set->insert(value);
+ it->next();
+ }
+ }
+};
+
+class NullableStringSet : public StringSet<DynamicContainer<std::string>> {
+public:
+ NullableStringSet() : StringSet<DynamicContainer<std::string>>(true) {}
+
+ void change_contain_null_value(bool target_value) { this->_contain_null =
target_value; }
+};
+
+struct GroupArraySetOpStringBaseData {
+ using Set = std::unique_ptr<NullableStringSet>;
+
+ GroupArraySetOpStringBaseData() :
set(std::make_unique<NullableStringSet>()) {}
+ Set set;
+ bool init = false;
+
+ void reset() {
+ init = false;
+ set = std::make_unique<NullableStringSet>();
+ }
+
+ void serialize(BufferWritable& buf) const {
+ const bool is_set_contain_null = this->set->contain_null();
+ buf.write_binary(is_set_contain_null);
+ buf.write_binary(this->init);
+ buf.write_var_uint(this->set->size());
+
+ HybridSetBase::IteratorBase* it = this->set->begin();
+ while (it->has_next()) {
+ const auto* value = reinterpret_cast<const
std::string*>(it->get_value());
+ buf.write_binary(*value);
+ it->next();
+ }
+ }
+
+ void deserialize(BufferReadable& buf) {
+ bool is_set_contain_null;
+ buf.read_binary(is_set_contain_null);
+ this->set->change_contain_null_value(is_set_contain_null);
+ buf.read_binary(this->init);
+ UInt64 size;
+ buf.read_var_uint(size);
+
+ StringRef element;
+ for (UInt64 i = 0; i < size; ++i) {
+ buf.read_binary(element);
+ this->set->insert((void*)element.data, element.size);
+ }
+ }
+
+ void insert_result_into(IColumn& to) const {
+ auto& arr_to = assert_cast<ColumnArray&>(to);
+ ColumnArray::Offsets64& offsets_to = arr_to.get_offsets();
+ auto& data_to = arr_to.get_data();
+ auto* col_null = assert_cast<ColumnNullable*>(&data_to);
+ auto res_size = this->set->size();
+
+ if (this->set->contain_null()) {
+ col_null->insert_default();
+ res_size += 1;
+ }
+
+ offsets_to.push_back(offsets_to.back() + res_size);
+ HybridSetBase::IteratorBase* it = this->set->begin();
+ while (it->has_next()) {
+ const auto* value = reinterpret_cast<const
std::string*>(it->get_value());
+ data_to.insert_data(value->data(), value->size());
+ it->next();
+ }
+ }
+};
+
+struct GroupArrayStringIntersectData : public GroupArraySetOpStringBaseData {
+ using Base = GroupArraySetOpStringBaseData;
+ using Set = Base::Set;
+
+ static std::string get_name() { return "group_array_intersect"; }
+
+ void process_col_data(const auto& column_data, size_t offset, size_t
arr_size) {
+ const auto* col_null = assert_cast<const
ColumnNullable*>(column_data.get());
+ const auto& nested_column_data =
+ assert_cast<const
ColumnString&>(col_null->get_nested_column());
+
+ if (!init) {
+ for (size_t i = 0; i < arr_size; ++i) {
+ if (col_null->is_null_at(offset + i)) {
+ set->insert(nullptr);
+ } else {
+ auto src = nested_column_data.get_data_at(offset + i);
+ set->insert((void*)src.data, src.size);
+ }
+ }
+ init = true;
+ } else if (!this->set->empty()) {
+ Set new_set = std::make_unique<NullableStringSet>();
+ for (size_t i = 0; i < arr_size; ++i) {
+ if (col_null->is_null_at(offset + i) &&
this->set->contain_null()) {
+ new_set->insert(nullptr);
+ } else {
+ auto src = nested_column_data.get_data_at(offset + i);
+ if (this->set->find((void*)src.data, src.size)) {
+ new_set->insert((void*)src.data, src.size);
+ }
+ }
+ }
+ set = std::move(new_set);
+ }
+ }
+
+ void merge(const auto& rhs_set) {
+ if (!this->init) {
+ this->set->change_contain_null_value(rhs_set->contain_null());
+ HybridSetBase::IteratorBase* it = rhs_set->begin();
+ while (it->has_next()) {
+ const void* value = it->get_value();
+ this->set->insert(value);
+ it->next();
+ }
+ this->init = true;
+ } else if (!this->set->empty()) {
+ auto create_new_set = [](auto& lhs_val, auto& rhs_val) {
+ Set new_set = std::make_unique<NullableStringSet>();
+ HybridSetBase::IteratorBase* it = lhs_val->begin();
+ while (it->has_next()) {
+ const void* value = it->get_value();
+ if ((rhs_val->find(value))) {
+ new_set->insert(value);
+ }
+ it->next();
+ }
+ new_set->change_contain_null_value(lhs_val->contain_null() &&
+ rhs_val->contain_null());
+ return new_set;
+ };
+ auto new_set = rhs_set->size() < this->set->size() ?
create_new_set(rhs_set, this->set)
+ :
create_new_set(this->set, rhs_set);
+ this->set = std::move(new_set);
+ }
+ }
+};
+
+struct GroupArrayStringUnionData : public GroupArraySetOpStringBaseData {
+ using Base = GroupArraySetOpStringBaseData;
+ using Set = Base::Set;
+
+ static std::string get_name() { return "group_array_union"; }
+
+ void process_col_data(const auto& column_data, size_t offset, size_t
arr_size) {
+ const auto* col_null = assert_cast<const
ColumnNullable*>(column_data.get());
+ const auto& nested_column_data =
+ assert_cast<const
ColumnString&>(col_null->get_nested_column());
+
+ for (size_t i = 0; i < arr_size; ++i) {
+ if (col_null->is_null_at(offset + i)) {
+ set->insert(nullptr);
+ } else {
+ auto src = nested_column_data.get_data_at(offset + i);
+ set->insert((void*)src.data, src.size);
+ }
+ }
+ init = true;
+ }
+
+ void merge(const auto& rhs_set) {
+ this->init = true;
+ this->set->change_contain_null_value(this->set->contain_null() ||
rhs_set->contain_null());
+ HybridSetBase::IteratorBase* it = rhs_set->begin();
+ while (it->has_next()) {
+ const void* value = it->get_value();
+ this->set->insert(value);
+ it->next();
+ }
+ }
+};
+
+/// Puts all values to the hybrid set. Returns an array of unique values
+template <typename ImplData>
+class AggregateFunctionGroupArraySetOp
+ : public IAggregateFunctionDataHelper<ImplData,
AggregateFunctionGroupArraySetOp<ImplData>>,
+ UnaryExpression,
+ NotNullableAggregateFunction {
+private:
+ DataTypePtr argument_type;
+
+public:
+ AggregateFunctionGroupArraySetOp(const DataTypes& argument_types_)
+ : IAggregateFunctionDataHelper<ImplData,
AggregateFunctionGroupArraySetOp>(
+ argument_types_),
+ argument_type(argument_types_[0]) {}
+
+ AggregateFunctionGroupArraySetOp(const DataTypes& argument_types_,
+ const bool result_is_nullable)
+ : IAggregateFunctionDataHelper<ImplData,
AggregateFunctionGroupArraySetOp>(
+ argument_types_),
+ argument_type(argument_types_[0]) {}
+
+ String get_name() const override { return ImplData::get_name(); }
+
+ DataTypePtr get_return_type() const override { return argument_type; }
+
+ void reset(AggregateDataPtr __restrict place) const override {
this->data(place).reset(); }
+
+ void add(AggregateDataPtr __restrict place, const IColumn** columns,
ssize_t row_num,
+ Arena& arena) const override {
+ const bool col_is_nullable = (*columns[0]).is_nullable();
+ const ColumnArray& column =
+ col_is_nullable
+ ? assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(
+ assert_cast<const ColumnNullable&,
TypeCheckOnRelease::DISABLE>(
+ *columns[0])
+ .get_nested_column())
+ : assert_cast<const ColumnArray&,
TypeCheckOnRelease::DISABLE>(*columns[0]);
+
+ const auto& offsets = column.get_offsets();
+ const auto offset = offsets[row_num - 1];
+ const auto arr_size = offsets[row_num] - offset;
+ const auto& column_data = column.get_data_ptr();
+ DCHECK(column_data->is_nullable())
+ << "should be array(nullable(column)) " <<
column.dump_structure();
+ this->data(place).process_col_data(column_data, offset, arr_size);
+ }
+
+ void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+ Arena&) const override {
+ if (!this->data(rhs).init) {
+ return;
+ }
+ auto& rhs_set = this->data(rhs).set;
+ this->data(place).merge(rhs_set);
+ }
+
+ void serialize(ConstAggregateDataPtr __restrict place, BufferWritable&
buf) const override {
+ this->data(place).serialize(buf);
+ }
+
+ void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+ Arena& arena) const override {
+ this->data(place).deserialize(buf);
+ }
+
+ void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
+ this->data(place).insert_result_into(to);
+ }
+};
+
+} // namespace doris::vectorized
+
+#include "common/compile_check_end.h"
diff --git
a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
index 91d6fe83c21..2fd696f8415 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -51,7 +51,7 @@ void
register_aggregate_function_stddev_variance_pop(AggregateFunctionSimpleFact
void
register_aggregate_function_stddev_variance_samp(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_topn(AggregateFunctionSimpleFactory& factory);
void
register_aggregate_function_approx_count_distinct(AggregateFunctionSimpleFactory&
factory);
-void
register_aggregate_function_group_array_intersect(AggregateFunctionSimpleFactory&
factory);
+void
register_aggregate_function_group_array_set_op(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_group_concat(AggregateFunctionSimpleFactory&
factory);
void register_aggregate_function_percentile(AggregateFunctionSimpleFactory&
factory);
void
register_aggregate_function_percentile_old(AggregateFunctionSimpleFactory&
factory);
@@ -95,7 +95,7 @@ AggregateFunctionSimpleFactory&
AggregateFunctionSimpleFactory::instance() {
register_aggregate_function_uniq_distribute_key(instance);
register_aggregate_function_bit(instance);
register_aggregate_function_bitmap(instance);
- register_aggregate_function_group_array_intersect(instance);
+ register_aggregate_function_group_array_set_op(instance);
register_aggregate_function_group_concat(instance);
register_aggregate_function_quantile_state(instance);
register_aggregate_function_combinator_distinct(instance);
diff --git a/be/test/vec/aggregate_functions/agg_group_array_intersect_test.cpp
b/be/test/vec/aggregate_functions/agg_group_array_intersect_test.cpp
index 3edfd122821..9d5008e7d6f 100644
--- a/be/test/vec/aggregate_functions/agg_group_array_intersect_test.cpp
+++ b/be/test/vec/aggregate_functions/agg_group_array_intersect_test.cpp
@@ -36,7 +36,7 @@
const int agg_test_batch_size = 2;
namespace doris::vectorized {
-void
register_aggregate_function_group_array_intersect(AggregateFunctionSimpleFactory&
factory);
+void
register_aggregate_function_group_array_set_op(AggregateFunctionSimpleFactory&
factory);
template <PrimitiveType T>
void sort_numeric_array(Array& array) {
@@ -68,17 +68,22 @@ void validate_numeric_test(MutableColumnPtr& test_col_data)
{
nested_column->insert_value((typename
PrimitiveTypeTraits<T>::ColumnItemType)11);
nested_column->insert_value((typename
PrimitiveTypeTraits<T>::ColumnItemType)2);
nested_column->insert_value((typename
PrimitiveTypeTraits<T>::ColumnItemType)3);
+ auto null_map_column = ColumnUInt8::create();
+ null_map_column->get_data().resize_fill(nested_column->size(), 0);
auto offsets_column = ColumnArray::ColumnOffsets::create();
offsets_column->insert(vectorized::Field::create_field<TYPE_BIGINT>(3));
offsets_column->insert(vectorized::Field::create_field<TYPE_BIGINT>(6));
- test_col_data = ColumnArray::create(std::move(nested_column),
std::move(offsets_column));
+ // array nested column should be nullable
+ test_col_data = ColumnArray::create(
+ ColumnNullable::create(std::move(nested_column),
std::move(null_map_column)),
+ std::move(offsets_column));
EXPECT_EQ(test_col_data->size(), 2);
// Prepare test function and parameters.
AggregateFunctionSimpleFactory factory;
- register_aggregate_function_group_array_intersect(factory);
+ register_aggregate_function_group_array_set_op(factory);
const auto nested =
T == TYPE_DATEV2
? std::dynamic_pointer_cast<const
IDataType>(std::make_shared<DataTypeDateV2>())
@@ -104,8 +109,11 @@ void validate_numeric_test(MutableColumnPtr&
test_col_data) {
}
// Check result.
- ColumnArray ans(PrimitiveTypeTraits<T>::ColumnType::create(),
- ColumnArray::ColumnOffsets::create());
+ auto nested_result_column = PrimitiveTypeTraits<T>::ColumnType::create();
+ auto null_map_result_column = ColumnUInt8::create();
+ auto nullable_nested_result_column =
ColumnNullable::create(std::move(nested_result_column),
+
std::move(null_map_result_column));
+ ColumnArray ans(std::move(nullable_nested_result_column),
ColumnArray::ColumnOffsets::create());
agg_function->insert_result_into(place, ans);
Field actual_field;
ans.get(0, actual_field);
@@ -153,7 +161,7 @@ void validate_numeric_nullable_test(MutableColumnPtr&
test_col_data) {
// Prepare test function and parameters.
AggregateFunctionSimpleFactory factory;
- register_aggregate_function_group_array_intersect(factory);
+ register_aggregate_function_group_array_set_op(factory);
const auto nested =
T == TYPE_DATEV2
@@ -244,19 +252,23 @@ TEST(AggGroupArrayIntersectTest, string_test) {
nested_column->insert_data("aaaa", 4);
nested_column->insert_data("b", 1);
nested_column->insert_data("c", 1);
+ auto null_map_column = ColumnUInt8::create();
+ null_map_column->get_data().resize_fill(nested_column->size(), 0);
auto offsets_column = ColumnArray::ColumnOffsets::create();
offsets_column->insert(vectorized::Field::create_field<TYPE_BIGINT>(3));
offsets_column->insert(vectorized::Field::create_field<TYPE_BIGINT>(6));
- auto column_array_string =
- ColumnArray::create(std::move(nested_column),
std::move(offsets_column));
+ // array nested column should be nullable
+ auto column_array_string = ColumnArray::create(
+ ColumnNullable::create(std::move(nested_column),
std::move(null_map_column)),
+ std::move(offsets_column));
EXPECT_EQ(column_array_string->size(), 2);
// Prepare test function and parameters.
AggregateFunctionSimpleFactory factory;
- register_aggregate_function_group_array_intersect(factory);
+ register_aggregate_function_group_array_set_op(factory);
DataTypePtr data_type_array_string(
std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
DataTypes data_types = {data_type_array_string};
@@ -275,7 +287,11 @@ TEST(AggGroupArrayIntersectTest, string_test) {
}
// Check result.
- ColumnArray ans(ColumnString::create(),
ColumnArray::ColumnOffsets::create());
+ auto nested_result_column = ColumnString::create();
+ auto null_map_result_column = ColumnUInt8::create();
+ auto nullable_nested_result_column =
ColumnNullable::create(std::move(nested_result_column),
+
std::move(null_map_result_column));
+ ColumnArray ans(std::move(nullable_nested_result_column),
ColumnArray::ColumnOffsets::create());
agg_function->insert_result_into(place, ans);
Field actual_field;
ans.get(0, actual_field);
@@ -321,7 +337,7 @@ TEST(AggGroupArrayIntersectTest, string_nullable_test) {
// Prepare test function and parameters.
AggregateFunctionSimpleFactory factory;
- register_aggregate_function_group_array_intersect(factory);
+ register_aggregate_function_group_array_set_op(factory);
DataTypePtr data_type_array_string(
std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()));
DataTypes data_types = {data_type_array_string};
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
index bf68560e5c3..eb15275058a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinAggregateFunctions.java
@@ -39,6 +39,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
import org.apache.doris.nereids.trees.expressions.functions.agg.Covar;
import org.apache.doris.nereids.trees.expressions.functions.agg.CovarSamp;
import
org.apache.doris.nereids.trees.expressions.functions.agg.GroupArrayIntersect;
+import
org.apache.doris.nereids.trees.expressions.functions.agg.GroupArrayUnion;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
@@ -128,6 +129,7 @@ public class BuiltinAggregateFunctions implements
FunctionHelper {
agg(Covar.class, "covar", "covar_pop"),
agg(CovarSamp.class, "covar_samp"),
agg(GroupArrayIntersect.class, "group_array_intersect"),
+ agg(GroupArrayUnion.class, "group_array_union"),
agg(GroupBitAnd.class, "group_bit_and"),
agg(GroupBitOr.class, "group_bit_or"),
agg(GroupBitXor.class, "group_bit_xor"),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/GroupArrayUnion.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/GroupArrayUnion.java
new file mode 100644
index 00000000000..a760b18ef37
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/GroupArrayUnion.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.agg;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.literal.ArrayLiteral;
+import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.coercion.AnyDataType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * AggregateFunction 'group_array_union'.
+ */
+public class GroupArrayUnion extends NotNullableAggregateFunction
+ implements UnaryExpression, ExplicitlyCastableSignature {
+
+ public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+ FunctionSignature.retArgType(0)
+ .args(ArrayType.of(new AnyDataType(0))));
+
+ /**
+ * constructor with 1 argument.
+ */
+ public GroupArrayUnion(Expression arg) {
+ super("group_array_union", arg);
+ }
+
+ /**
+ * constructor with 1 argument.
+ */
+ public GroupArrayUnion(boolean distinct, Expression arg) {
+ super("group_array_union", false, arg);
+ }
+
+ /** constructor for withChildren and reuse signature */
+ private GroupArrayUnion(AggregateFunctionParams functionParams) {
+ super(functionParams);
+ }
+
+ /**
+ * withChildren.
+ */
+ @Override
+ public AggregateFunction withDistinctAndChildren(boolean distinct,
List<Expression> children) {
+ Preconditions.checkArgument(children.size() == 1);
+ return new GroupArrayUnion(getFunctionParams(distinct, children));
+ }
+
+ @Override
+ public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+ return visitor.visitGroupArrayUnion(this, context);
+ }
+
+ @Override
+ public List<FunctionSignature> getSignatures() {
+ return SIGNATURES;
+ }
+
+ @Override
+ public Expression resultForEmptyInput() {
+ return new ArrayLiteral(new ArrayList<>(), this.getDataType());
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
index fa98c577428..28af6ecda11 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/AggregateFunctionVisitor.java
@@ -40,6 +40,7 @@ import
org.apache.doris.nereids.trees.expressions.functions.agg.CountByEnum;
import org.apache.doris.nereids.trees.expressions.functions.agg.Covar;
import org.apache.doris.nereids.trees.expressions.functions.agg.CovarSamp;
import
org.apache.doris.nereids.trees.expressions.functions.agg.GroupArrayIntersect;
+import
org.apache.doris.nereids.trees.expressions.functions.agg.GroupArrayUnion;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitAnd;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitOr;
import org.apache.doris.nereids.trees.expressions.functions.agg.GroupBitXor;
@@ -202,6 +203,10 @@ public interface AggregateFunctionVisitor<R, C> {
return visitAggregateFunction(groupArrayIntersect, context);
}
+ default R visitGroupArrayUnion(GroupArrayUnion groupArrayUnion, C context)
{
+ return visitAggregateFunction(groupArrayUnion, context);
+ }
+
default R visitGroupBitAnd(GroupBitAnd groupBitAnd, C context) {
return visitNullableAggregateFunction(groupBitAnd, context);
}
diff --git a/regression-test/data/query_p0/aggregate/group_array_intersect.out
b/regression-test/data/query_p0/aggregate/group_array_intersect.out
index 7778d24b703..07b64d8de3e 100644
--- a/regression-test/data/query_p0/aggregate/group_array_intersect.out
+++ b/regression-test/data/query_p0/aggregate/group_array_intersect.out
@@ -73,6 +73,80 @@
-- !groupby_3 --
18 ["a", "b", "c", "d", "e", "f"]
+-- !int_1_union --
+[null, 12, 13]
+
+-- !int_2_union --
+[null, 12, 13]
+
+-- !int_3_union --
+[null, 0]
+
+-- !int_4_union --
+[null]
+
+-- !int_5_union --
+[6, 7, 8]
+
+-- !int_6_union --
+[null]
+
+-- !int_7_union --
+[null, 12, 13]
+
+-- !int_8_union --
+[null]
+
+-- !int_9_union --
+[null, 12, 13]
+
+-- !float_1_union --
+[6.3, 7.3]
+
+-- !float_2_union --
+[6.3, 7.3, 8.3]
+
+-- !float_3_union --
+[6.3, 7.3, 8.3, 9.3]
+
+-- !datetimev2_1_union --
+["2024-03-23 00:00:00.000", "2024-03-24 00:00:00.000", "2024-03-25
00:00:00.000"]
+
+-- !datetimev2_2_union --
+["2024-03-23 00:00:00.000", "2024-03-24 00:00:00.000", "2024-03-25
00:00:00.000"]
+
+-- !datev2_1_union --
+["2024-03-25", "2024-03-29", "2024-05-23"]
+
+-- !datev2_2_union --
+[null, "2024-03-29", "2024-05-23"]
+
+-- !string_1_union --
+[null, "a"]
+
+-- !string_2_union --
+[null, "a", "b", "c", "d", "e", "f"]
+
+-- !bigint_union --
+[1234567890123456, 2333333333333333]
+
+-- !decimal_union --
+[1.34000, 2.00123, 2.00189]
+
+-- !groupby_1_union --
+0 [0]
+1 [1, 2, 3, 4, 5]
+
+-- !groupby_2_union --
+18 ["a", "b", "c", "d", "e", "f"]
+19 ["a", "aa", "b", "bb", "c", "cc", "d", "dd", "f", "ff"]
+20 [null, "a"]
+21 [null]
+22 ["x", "y"]
+
+-- !groupby_3_union --
+18 ["a", "b", "c", "d", "e", "f"]
+
-- !notnull_1 --
[]
@@ -94,3 +168,24 @@
-- !notnull_7 --
[]
+-- !notnull_11_union --
+[null, 6.6, 7.7]
+
+-- !notnull_12_union --
+[6, 7, 8] [6.6, 7.7, 8.8]
+
+-- !notnull_13_union --
+[6.6, 7.7, 8.8]
+
+-- !notnull_14_union --
+["a", "aa", "b", "bb", "c", "cc", "d", "dd", "e", "f", "ff"]
+
+-- !notnull_15_union --
+[6, 7] [6.6, 7.7] ["a"]
+
+-- !notnull_16_union --
+["a", "aa", "b", "bb", "c", "cc", "d", "dd", "e", "f", "ff", "x", "y"]
+
+-- !notnull_17_union --
+[null, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]
+
diff --git
a/regression-test/suites/query_p0/aggregate/group_array_intersect.groovy
b/regression-test/suites/query_p0/aggregate/group_array_intersect.groovy
index 6141dea3da8..7492e533455 100644
--- a/regression-test/suites/query_p0/aggregate/group_array_intersect.groovy
+++ b/regression-test/suites/query_p0/aggregate/group_array_intersect.groovy
@@ -72,6 +72,29 @@ suite("group_array_intersect") {
qt_groupby_2 """select id,
array_sort(group_array_intersect(c_array_string)) from
group_array_intersect_test where c_array_string is not null group by id order
by id;"""
qt_groupby_3 """select id,
array_sort(group_array_intersect(c_array_string)) from
group_array_intersect_test where id = 18 group by id order by id;"""
+ qt_int_1_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (6, 12);"""
+ qt_int_2_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (14, 12);"""
+ qt_int_3_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (0, 6);"""
+ qt_int_4_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (13);"""
+ qt_int_5_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (2, 5);"""
+ qt_int_6_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (6, 13);"""
+ qt_int_7_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (12);"""
+ qt_int_8_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (6, 7);"""
+ qt_int_9_union """select array_sort(group_array_union(c_array_int)) from
group_array_intersect_test where id in (9, 12);"""
+ qt_float_1_union """select array_sort(group_array_union(c_array_float))
from group_array_intersect_test where id = 7;"""
+ qt_float_2_union """select array_sort(group_array_union(c_array_float))
from group_array_intersect_test where id between 7 and 8;"""
+ qt_float_3_union """select array_sort(group_array_union(c_array_float))
from group_array_intersect_test where id in (7, 9);"""
+ qt_datetimev2_1_union """select
array_sort(group_array_union(c_array_datetimev2)) from
group_array_intersect_test;"""
+ qt_datetimev2_2_union """select
array_sort(group_array_union(c_array_datetimev2)) from
group_array_intersect_test where id in (10, 11);"""
+ qt_datev2_1_union """select array_sort(group_array_union(c_array_datev2))
from group_array_intersect_test where id in (15, 16);"""
+ qt_datev2_2_union """select array_sort(group_array_union(c_array_datev2))
from group_array_intersect_test where id in (15, 17);"""
+ qt_string_1_union """select array_sort(group_array_union(c_array_string))
from group_array_intersect_test where id in (17, 20);"""
+ qt_string_2_union """select array_sort(group_array_union(c_array_string))
from group_array_intersect_test where id in (18, 20);"""
+ qt_bigint_union """select array_sort(group_array_union(c_array_bigint))
from group_array_intersect_test where id in (23, 24);"""
+ qt_decimal_union """select array_sort(group_array_union(c_array_decimal))
from group_array_intersect_test where id in (25, 26);"""
+ qt_groupby_1_union """select id,
array_sort(group_array_union(c_array_int)) from group_array_intersect_test
where id <= 1 group by id order by id;"""
+ qt_groupby_2_union """select id,
array_sort(group_array_union(c_array_string)) from group_array_intersect_test
where c_array_string is not null group by id order by id;"""
+ qt_groupby_3_union """select id,
array_sort(group_array_union(c_array_string)) from group_array_intersect_test
where id = 18 group by id order by id;"""
sql "DROP TABLE IF EXISTS `group_array_intersect_test_not_null`;"
sql """
@@ -104,4 +127,13 @@ suite("group_array_intersect") {
qt_notnull_5 """select array_sort(group_array_intersect(c_array_int)),
array_sort(group_array_intersect(c_array_float)),
array_sort(group_array_intersect(c_array_string)) from
group_array_intersect_test_not_null where id between 3 and 4;"""
qt_notnull_6 """select array_sort(group_array_intersect(c_array_string))
from group_array_intersect_test_not_null where id between 1 and 5;"""
qt_notnull_7 """select array_sort(group_array_intersect(c_array_float))
from group_array_intersect_test_not_null where id between 1 and 5;"""
+
+ qt_notnull_11_union """select array_sort(group_array_union(c_array_float))
from group_array_intersect_test_not_null where array_size(c_array_float)
between 1 and 2;"""
+ qt_notnull_12_union """select array_sort(group_array_union(c_array_int)),
array_sort(group_array_union(c_array_float)) from
group_array_intersect_test_not_null where id between 2 and 3;"""
+ qt_notnull_13_union """select array_sort(group_array_union(c_array_float))
from group_array_intersect_test_not_null where array_size(c_array_float)
between 2 and 3;"""
+ qt_notnull_14_union """select
array_sort(group_array_union(c_array_string)) from
group_array_intersect_test_not_null where id between 1 and 2;"""
+ qt_notnull_15_union """select array_sort(group_array_union(c_array_int)),
array_sort(group_array_union(c_array_float)),
array_sort(group_array_union(c_array_string)) from
group_array_intersect_test_not_null where id between 3 and 4;"""
+ qt_notnull_16_union """select
array_sort(group_array_union(c_array_string)) from
group_array_intersect_test_not_null where id between 1 and 5;"""
+ qt_notnull_17_union """select array_sort(group_array_union(c_array_float))
from group_array_intersect_test_not_null where id between 1 and 5;"""
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]