This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 103ffb779e [fix](executor) collect_list supports second parameter in
1.2.4.1 (#19707)
103ffb779e is described below
commit 103ffb779e1f559d323f37c07c5b8deee16d24cb
Author: Mryange <[email protected]>
AuthorDate: Tue May 16 19:03:46 2023 +0800
[fix](executor) collect_list supports second parameter in 1.2.4.1 (#19707)
---
.../aggregate_function_collect.cpp | 101 ++--
.../aggregate_function_collect.h | 202 ++++---
.../apache/doris/analysis/FunctionCallExpr.java | 5 +
.../java/org/apache/doris/catalog/FunctionSet.java | 18 +
.../aggregate_functions/test_aggregate_collect.out | 77 ++-
.../test_aggregate_group_array.out | 27 +
.../test_aggregate_collect.groovy | 608 ++++++++++++++++++++-
7 files changed, 896 insertions(+), 142 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
index b0e713c87f..276f4c34bd 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.cpp
@@ -18,75 +18,82 @@
#include "vec/aggregate_functions/aggregate_function_collect.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/helpers.h"
namespace doris::vectorized {
-template <typename T>
-AggregateFunctionPtr create_agg_function_collect(bool distinct, const
DataTypes& argument_types) {
+#define FOR_DECIMAL_TYPES(M) \
+ M(Decimal32) \
+ M(Decimal64) \
+ M(Decimal128) \
+ M(Decimal128I)
+
+template <typename T, typename HasLimit>
+AggregateFunctionPtr do_create_agg_function_collect(bool distinct,
+ const DataTypePtr&
argument_type) {
if (distinct) {
return AggregateFunctionPtr(
- new
AggregateFunctionCollect<AggregateFunctionCollectSetData<T>>(argument_types));
+ new
AggregateFunctionCollect<AggregateFunctionCollectSetData<T, HasLimit>,
+ HasLimit>(argument_type));
} else {
return AggregateFunctionPtr(
- new
AggregateFunctionCollect<AggregateFunctionCollectListData<T>>(argument_types));
+ new
AggregateFunctionCollect<AggregateFunctionCollectListData<T, HasLimit>,
+ HasLimit>(argument_type));
}
}
-AggregateFunctionPtr create_aggregate_function_collect(const std::string& name,
- const DataTypes&
argument_types,
- const Array& parameters,
- const bool
result_is_nullable) {
- if (argument_types.size() != 1) {
- LOG(WARNING) << fmt::format("Illegal number {} of argument for
aggregate function {}",
- argument_types.size(), name);
- return nullptr;
- }
-
+template <typename HasLimit>
+AggregateFunctionPtr create_aggregate_function_collect_impl(const std::string&
name,
+ const DataTypePtr&
argument_type,
+ const Array&,
const bool) {
bool distinct = false;
if (name == "collect_set") {
distinct = true;
}
- WhichDataType type(argument_types[0]);
- if (type.is_uint8()) {
- return create_agg_function_collect<UInt8>(distinct, argument_types);
- } else if (type.is_int8()) {
- return create_agg_function_collect<Int8>(distinct, argument_types);
- } else if (type.is_int16()) {
- return create_agg_function_collect<Int16>(distinct, argument_types);
- } else if (type.is_int32()) {
- return create_agg_function_collect<Int32>(distinct, argument_types);
- } else if (type.is_int64()) {
- return create_agg_function_collect<Int64>(distinct, argument_types);
- } else if (type.is_int128()) {
- return create_agg_function_collect<Int128>(distinct, argument_types);
- } else if (type.is_float32()) {
- return create_agg_function_collect<Float32>(distinct, argument_types);
- } else if (type.is_float64()) {
- return create_agg_function_collect<Float64>(distinct, argument_types);
- } else if (type.is_decimal32()) {
- return create_agg_function_collect<Decimal32>(distinct,
argument_types);
- } else if (type.is_decimal64()) {
- return create_agg_function_collect<Decimal64>(distinct,
argument_types);
- } else if (type.is_decimal128()) {
- return create_agg_function_collect<Decimal128>(distinct,
argument_types);
- } else if (type.is_decimal128i()) {
- return create_agg_function_collect<Decimal128I>(distinct,
argument_types);
- } else if (type.is_date()) {
- return create_agg_function_collect<Int64>(distinct, argument_types);
- } else if (type.is_date_time()) {
- return create_agg_function_collect<Int64>(distinct, argument_types);
- } else if (type.is_string()) {
- return create_agg_function_collect<StringRef>(distinct,
argument_types);
+ WhichDataType which(argument_type);
+#define DISPATCH(TYPE) \
+ if (which.idx == TypeIndex::TYPE) \
+ return do_create_agg_function_collect<TYPE, HasLimit>(distinct,
argument_type);
+ FOR_NUMERIC_TYPES(DISPATCH)
+ FOR_DECIMAL_TYPES(DISPATCH)
+#undef DISPATCH
+ if (which.is_date_or_datetime()) {
+ return do_create_agg_function_collect<Int64, HasLimit>(distinct,
argument_type);
+ } else if (which.is_date_v2()) {
+ return do_create_agg_function_collect<UInt32, HasLimit>(distinct,
argument_type);
+ } else if (which.is_date_time_v2()) {
+ return do_create_agg_function_collect<UInt64, HasLimit>(distinct,
argument_type);
+ } else if (which.is_string()) {
+ return do_create_agg_function_collect<StringRef, HasLimit>(distinct,
argument_type);
}
LOG(WARNING) << fmt::format("unsupported input type {} for aggregate
function {}",
- argument_types[0]->get_name(), name);
+ argument_type->get_name(), name);
+ return nullptr;
+}
+
+AggregateFunctionPtr create_aggregate_function_collect(const std::string& name,
+ const DataTypes&
argument_types,
+ const Array&,
+ const bool
result_is_nullable) {
+ if (argument_types.size() == 1) {
+ return create_aggregate_function_collect_impl<std::false_type>(name,
argument_types[0], {},
+ true);
+ }
+ if (argument_types.size() == 2) {
+ return create_aggregate_function_collect_impl<std::true_type>(name,
argument_types[0], {},
+ true);
+ }
+ LOG(WARNING) << fmt::format("number of parameters for aggregate function
{}, should be 1 or 2",
+ name);
return nullptr;
}
void register_aggregate_function_collect_list(AggregateFunctionSimpleFactory&
factory) {
factory.register_function("collect_list",
create_aggregate_function_collect);
factory.register_function("collect_set",
create_aggregate_function_collect);
+ factory.register_alias("collect_list", "group_array");
+ factory.register_alias("collect_set", "group_uniq_array");
}
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h
b/be/src/vec/aggregate_functions/aggregate_function_collect.h
index e78fdfe082..5bd60170bb 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_collect.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h
@@ -17,6 +17,8 @@
#pragma once
+#include <algorithm>
+
#include "common/status.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/key_holder_helpers.h"
@@ -31,122 +33,180 @@
namespace doris::vectorized {
-template <typename T>
+template <typename T, typename HasLimit>
struct AggregateFunctionCollectSetData {
using ElementType = T;
using ColVecType = ColumnVectorOrDecimal<ElementType>;
using ElementNativeType = typename NativeType<T>::Type;
+ using SelfType = AggregateFunctionCollectSetData;
using Set = HashSetWithStackMemory<ElementNativeType,
DefaultHash<ElementNativeType>, 4>;
- Set set;
+ Set data_set;
+ Int64 max_size = -1;
+
+ size_t size() const { return data_set.size(); }
void add(const IColumn& column, size_t row_num) {
- const auto& vec = assert_cast<const ColVecType&>(column).get_data();
- set.insert(vec[row_num]);
+ data_set.insert(assert_cast<const
ColVecType&>(column).get_data()[row_num]);
+ }
+
+ void merge(const SelfType& rhs) {
+ if constexpr (HasLimit::value) {
+ for (auto& rhs_elem : rhs.data_set) {
+ if (size() >= max_size) {
+ return;
+ }
+ data_set.insert(rhs_elem.get_value());
+ }
+ } else {
+ data_set.merge(rhs.data_set);
+ }
}
- void merge(const AggregateFunctionCollectSetData& rhs) {
set.merge(rhs.set); }
- void write(BufferWritable& buf) const { set.write(buf); }
- void read(BufferReadable& buf) { set.read(buf); }
- void reset() { set.clear(); }
+
+ void write(BufferWritable& buf) const { data_set.write(buf); }
+
+ void read(BufferReadable& buf) { data_set.read(buf); }
+
void insert_result_into(IColumn& to) const {
auto& vec = assert_cast<ColVecType&>(to).get_data();
- vec.reserve(set.size());
- for (auto item : set) {
+ vec.reserve(size());
+ for (const auto& item : data_set) {
vec.push_back(item.key);
}
}
+
+ void reset() { data_set.clear(); }
};
-template <>
-struct AggregateFunctionCollectSetData<StringRef> {
+template <typename HasLimit>
+struct AggregateFunctionCollectSetData<StringRef, HasLimit> {
using ElementType = StringRef;
using ColVecType = ColumnString;
+ using SelfType = AggregateFunctionCollectSetData<ElementType, HasLimit>;
using Set = HashSetWithSavedHashWithStackMemory<ElementType,
DefaultHash<ElementType>, 4>;
- Set set;
+ Set data_set;
+ Int64 max_size = -1;
+
+ size_t size() const { return data_set.size(); }
void add(const IColumn& column, size_t row_num, Arena* arena) {
Set::LookupResult it;
bool inserted;
auto key_holder = get_key_holder<true>(column, row_num, *arena);
- set.emplace(key_holder, it, inserted);
+ data_set.emplace(key_holder, it, inserted);
}
- void merge(const AggregateFunctionCollectSetData& rhs, Arena* arena) {
- Set::LookupResult it;
+ void merge(const SelfType& rhs, Arena* arena) {
bool inserted;
- for (const auto& elem : rhs.set) {
- set.emplace(ArenaKeyHolder {elem.get_value(), *arena}, it,
inserted);
+ Set::LookupResult it;
+ for (auto& rhs_elem : rhs.data_set) {
+ if constexpr (HasLimit::value) {
+ if (size() >= max_size) {
+ return;
+ }
+ }
+ assert(arena != nullptr);
+ data_set.emplace(ArenaKeyHolder {rhs_elem.get_value(), *arena},
it, inserted);
}
}
+
void write(BufferWritable& buf) const {
- write_var_uint(set.size(), buf);
- for (const auto& elem : set) {
+ write_var_uint(size(), buf);
+ for (const auto& elem : data_set) {
write_string_binary(elem.get_value(), buf);
}
}
- void read(BufferReadable& buf) {
- UInt64 rows;
- read_var_uint(rows, buf);
+ void read(BufferReadable& buf) {
+ UInt64 size;
+ read_var_uint(size, buf);
StringRef ref;
- for (size_t i = 0; i < rows; ++i) {
+ for (size_t i = 0; i < size; ++i) {
read_string_binary(ref, buf);
- set.insert(ref);
+ data_set.insert(ref);
}
}
- void reset() { set.clear(); }
+
void insert_result_into(IColumn& to) const {
auto& vec = assert_cast<ColVecType&>(to);
- vec.reserve(set.size());
- for (const auto& item : set) {
+ vec.reserve(size());
+ for (const auto& item : data_set) {
vec.insert_data(item.key.data, item.key.size);
}
}
+
+ void reset() { data_set.clear(); }
};
-template <typename T>
+template <typename T, typename HasLimit>
struct AggregateFunctionCollectListData {
using ElementType = T;
using ColVecType = ColumnVectorOrDecimal<ElementType>;
+ using SelfType = AggregateFunctionCollectListData<ElementType, HasLimit>;
PaddedPODArray<ElementType> data;
+ Int64 max_size = -1;
+
+ size_t size() const { return data.size(); }
void add(const IColumn& column, size_t row_num) {
const auto& vec = assert_cast<const ColVecType&>(column).get_data();
data.push_back(vec[row_num]);
}
- void merge(const AggregateFunctionCollectListData& rhs) {
- data.insert(rhs.data.begin(), rhs.data.end());
+
+ void merge(const SelfType& rhs) {
+ if constexpr (HasLimit::value) {
+ for (auto& rhs_elem : rhs.data) {
+ if (size() >= max_size) {
+ return;
+ }
+ data.push_back(rhs_elem);
+ }
+ } else {
+ data.insert(rhs.data.begin(), rhs.data.end());
+ }
}
+
void write(BufferWritable& buf) const {
- write_var_uint(data.size(), buf);
- buf.write(data.raw_data(), data.size() * sizeof(ElementType));
+ write_var_uint(size(), buf);
+ buf.write(data.raw_data(), size() * sizeof(ElementType));
}
+
void read(BufferReadable& buf) {
UInt64 rows = 0;
read_var_uint(rows, buf);
data.resize(rows);
buf.read(reinterpret_cast<char*>(data.data()), rows *
sizeof(ElementType));
}
+
void reset() { data.clear(); }
+
void insert_result_into(IColumn& to) const {
auto& vec = assert_cast<ColVecType&>(to).get_data();
size_t old_size = vec.size();
- vec.resize(old_size + data.size());
- memcpy(vec.data() + old_size, data.data(), data.size() *
sizeof(ElementType));
+ vec.resize(old_size + size());
+ memcpy(vec.data() + old_size, data.data(), size() *
sizeof(ElementType));
}
};
-template <>
-struct AggregateFunctionCollectListData<StringRef> {
+template <typename HasLimit>
+struct AggregateFunctionCollectListData<StringRef, HasLimit> {
using ElementType = StringRef;
using ColVecType = ColumnString;
MutableColumnPtr data;
+ Int64 max_size = -1;
+
+ AggregateFunctionCollectListData() { data = ColVecType::create(); }
- AggregateFunctionCollectListData<ElementType>() { data =
ColVecType::create(); }
+ size_t size() const { return data->size(); }
void add(const IColumn& column, size_t row_num) {
data->insert_from(column, row_num); }
void merge(const AggregateFunctionCollectListData& rhs) {
- data->insert_range_from(*rhs.data, 0, rhs.data->size());
+ if constexpr (HasLimit::value) {
+ data->insert_range_from(*rhs.data, 0,
+ std::min(static_cast<size_t>(max_size -
size()), rhs.size()));
+ } else {
+ data->insert_range_from(*rhs.data, 0, rhs.size());
+ }
}
void write(BufferWritable& buf) const {
@@ -177,24 +237,27 @@ struct AggregateFunctionCollectListData<StringRef> {
void insert_result_into(IColumn& to) const {
auto& to_str = assert_cast<ColVecType&>(to);
- to_str.insert_range_from(*data, 0, data->size());
+ to_str.insert_range_from(*data, 0, size());
}
};
-template <typename Data>
-class AggregateFunctionCollect final
- : public IAggregateFunctionDataHelper<Data,
AggregateFunctionCollect<Data>> {
-public:
- static constexpr bool alloc_memory_in_arena =
- std::is_same_v<Data, AggregateFunctionCollectSetData<StringRef>>;
+template <typename Data, typename HasLimit>
+class AggregateFunctionCollect
+ : public IAggregateFunctionDataHelper<Data,
AggregateFunctionCollect<Data, HasLimit>> {
+ using GenericType = AggregateFunctionCollectSetData<StringRef, HasLimit>;
- AggregateFunctionCollect(const DataTypes& argument_types_)
- : IAggregateFunctionDataHelper<Data,
AggregateFunctionCollect<Data>>(argument_types_,
-
{}),
- _argument_type(argument_types_[0]) {}
+ static constexpr bool ENABLE_ARENA = std::is_same_v<Data, GenericType>;
+
+public:
+ AggregateFunctionCollect(const DataTypePtr& argument_type,
+ UInt64 max_size_ =
std::numeric_limits<UInt64>::max())
+ : IAggregateFunctionDataHelper<Data,
AggregateFunctionCollect<Data, HasLimit>>(
+ {argument_type},{}),
+ return_type(argument_type) {}
std::string get_name() const override {
- if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename
Data::ElementType>,
+ if constexpr (std::is_same_v<AggregateFunctionCollectListData<typename
Data::ElementType,
+
HasLimit>,
Data>) {
return "collect_list";
} else {
@@ -203,27 +266,38 @@ public:
}
DataTypePtr get_return_type() const override {
- return std::make_shared<DataTypeArray>(make_nullable(_argument_type));
+ return std::make_shared<DataTypeArray>(make_nullable(return_type));
}
+ bool allocates_memory_in_arena() const override { return ENABLE_ARENA; }
+
void add(AggregateDataPtr __restrict place, const IColumn** columns,
size_t row_num,
Arena* arena) const override {
- assert(!columns[0]->is_null_at(row_num));
- if constexpr (alloc_memory_in_arena) {
- this->data(place).add(*columns[0], row_num, arena);
+ auto& data = this->data(place);
+ if constexpr (HasLimit::value) {
+ if (data.max_size == -1) {
+ data.max_size =
+ (UInt64) static_cast<const
ColumnInt32*>(columns[1])->get_element(row_num);
+ }
+ if (data.size() >= data.max_size) {
+ return;
+ }
+ }
+ if constexpr (ENABLE_ARENA) {
+ data.add(*columns[0], row_num, arena);
} else {
- this->data(place).add(*columns[0], row_num);
+ data.add(*columns[0], row_num);
}
}
- void reset(AggregateDataPtr place) const override {
this->data(place).reset(); }
-
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
Arena* arena) const override {
- if constexpr (alloc_memory_in_arena) {
- this->data(place).merge(this->data(rhs), arena);
+ auto& data = this->data(place);
+ auto& rhs_data = this->data(rhs);
+ if constexpr (ENABLE_ARENA) {
+ data.merge(rhs_data, arena);
} else {
- this->data(place).merge(this->data(rhs));
+ data.merge(rhs_data);
}
}
@@ -249,10 +323,8 @@ public:
to_arr.get_offsets().push_back(to_nested_col.size());
}
- bool allocates_memory_in_arena() const override { return
alloc_memory_in_arena; }
-
private:
- DataTypePtr _argument_type;
+ DataTypePtr return_type;
};
-} // namespace doris::vectorized
+} // namespace doris::vectorized
\ No newline at end of file
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index 2500e67b7e..8ef4b36c88 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -1307,6 +1307,11 @@ public class FunctionCallExpr extends Expr {
fn.setReturnType(new ArrayType(getChild(0).type));
}
+ if (fnName.getFunction().equalsIgnoreCase("group_uniq_array")
+ || fnName.getFunction().equalsIgnoreCase("group_array")) {
+ fn.setReturnType(new ArrayType(getChild(0).type));
+ }
+
if (fnName.getFunction().equalsIgnoreCase("from_unixtime")
|| fnName.getFunction().equalsIgnoreCase("date_format")) {
// if has only one child, it has default time format: yyyy-MM-dd
HH:mm:ss.SSSSSS
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 9b6ea9b16b..b6c69c7708 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -1405,6 +1405,10 @@ public class FunctionSet<T> {
public static final String SEQUENCE_COUNT = "sequence_count";
+ public static final String GROUP_UNIQ_ARRAY = "group_uniq_array";
+
+ public static final String GROUP_ARRAY = "group_array";
+
// Populate all the aggregate builtins in the catalog.
// null symbols indicate the function does not need that step of the
evaluation.
// An empty symbol indicates a TODO for the BE to implement the function.
@@ -2587,6 +2591,10 @@ public class FunctionSet<T> {
"", "", "", "", "", true, false, true, true));
addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET,
Lists.newArrayList(t), new ArrayType(t), t,
"", "", "", "", "", true, false, true, true));
+ addBuiltin(AggregateFunction.createBuiltin(COLLECT_LIST,
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
+ "", "", "", "", "", true, false, true, true));
+ addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET,
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
+ "", "", "", "", "", true, false, true, true));
addBuiltin(
AggregateFunction.createBuiltin("topn_array",
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
"", "", "", "", "", true, false, true, true));
@@ -2605,6 +2613,16 @@ public class FunctionSet<T> {
.createBuiltin("topn_weighted",
Lists.newArrayList(t, Type.BIGINT, Type.INT, Type.INT),
new ArrayType(t), t,
"", "", "", "", "", true, false, true,
true));
+ addBuiltin(AggregateFunction.createBuiltin(GROUP_UNIQ_ARRAY,
Lists.newArrayList(t), new ArrayType(t), t,
+ "", "", "", "", "", true, false, true, true));
+ addBuiltin(
+ AggregateFunction.createBuiltin(GROUP_UNIQ_ARRAY,
Lists.newArrayList(t, Type.INT), new ArrayType(t),
+ t, "", "", "", "", "", true, false, true, true));
+ addBuiltin(AggregateFunction.createBuiltin(GROUP_ARRAY,
Lists.newArrayList(t), new ArrayType(t), t,
+ "", "", "", "", "", true, false, true, true));
+ addBuiltin(
+ AggregateFunction.createBuiltin(GROUP_ARRAY,
Lists.newArrayList(t, Type.INT), new ArrayType(t),
+ t, "", "", "", "", "", true, false, true, true));
}
// Avg
diff --git
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
index f26cf1146b..5e75187e66 100644
---
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
+++
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
@@ -1,20 +1,83 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !select --
-1 ['hello', 'hello'] [2022-07-04, 2022-07-04] [1.23, 1.23]
-2 \N \N \N
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 false 10 20 30 4444444444444 55555555555 0.1
0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01
2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 false 11 21 33 4444444444444 55555555555 0.1
0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01
2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 true 11 12 13 1444444444444 1555555555 1.1
1.222 13333.33 d varchar2 string2 2022-12-02
2022-12-02 2022-12-02T22:23:23 2022-12-02T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 false 21 22 23 2444444444444 255555555 2.1
2.222 23333.33 f varchar3 string3 2022-12-03
2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 true 31 32 33 3444444444444 3555555555 3.1
3.222 33333.33 l varchar3 string3 2022-12-03
2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 false 10 20 30 944444444444 9555555555 9.1
9.222 93333.33 p varchar9 string9 2022-12-09
2022-12-09 2022-12-09T22:23:23 2022-12-09T22:23:24.999999 not null
-- !select --
-1 ['hello'] [2022-07-04] [1.23]
-2 \N \N \N
+\N \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N ['not null']
+[1] [11] [12] [13] [1444444444444] [1555555555] [1.1] [1.222]
[13333.33] ['d'] ['varchar2'] ['string2'] [2022-12-02]
[2022-12-02] [2022-12-02 22:23:23] [2022-12-02 22:23:24.999999] ['not
null']
+[0] [21] [22] [23] [2444444444444] [255555555] [2.1] [2.222]
[23333.33] ['f'] ['varchar3'] ['string3'] [2022-12-03]
[2022-12-03] [2022-12-03 22:23:23] [2022-12-03 22:23:24.999999] ['not
null']
+[0] [10] [20] [30] [4444444444444, 944444444444] [9555555555,
55555555555] [0.1, 9.1] [9.222, 0.222] [3333.33, 93333.33]
['c', 'p'] ['varchar1', 'varchar9'] ['string9', 'string1']
[2022-12-09, 2022-12-01] [2022-12-01, 2022-12-09] [2022-12-09
22:23:23, 2022-12-01 22:23:23] [2022-12-09 22:23:24.999999, 2022-12-01
22:23:24.999999] ['not null']
+[0, 1] [11, 31] [21, 32] [33] [4444444444444, 3444444444444]
[3555555555, 55555555555] [0.1, 3.1] [3.222, 0.222] [3333.33,
33333.33] ['c', 'l'] ['varchar3', 'varchar1'] ['string1',
'string3'] [2022-12-03, 2022-12-01] [2022-12-01, 2022-12-03]
[2022-12-03 22:23:23, 2022-12-01 22:23:23] [2022-12-03 22:23:24.999999,
2022-12-01 22:23:24.999999] ['not null']
-- !select --
-['hello', 'hello'] ['hello', 'hello', 'hello', 'hello']
+[0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12, 22, 32,
20] [30, 33, 13, 23, 33, 30] [4444444444444, 4444444444444,
1444444444444, 2444444444444, 3444444444444, 944444444444] [55555555555,
55555555555, 1555555555, 255555555, 3555555555, 9555555555] [0.1, 0.1,
1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222] [3333.33,
3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd', 'f', 'l',
'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varchar9
[...]
-- !select --
-['hello'] ['hello']
+1 1 1 1 1 3 1 2 1 1
1 1 1 2 1 1 1
-- !select --
-1 [1, 2, 1, 2] ['hello'] [2022-07-04, 2022-07-04] [1.23]
['hello', 'hello', 'hello', 'hello']
+1 1 1 1 1 3 1 2 1 1
1 1 1 2 1 1 1
+
+-- !select --
+1 [0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23,
33] [2444444444444, 4444444444444, 3444444444444, 1444444444444,
944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555]
[0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222]
[23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd',
'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9',
'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02,
2022-12-01] [2022-12-01 [...]
+
+-- !select --
+1 1 1 1 1 1 3 1 2 1
1 1 1 1 2 1 1 1
+
+-- !select --
+1 [0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12,
22, 32, 20] [30, 33, 13, 23, 33, 30] [4444444444444,
4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444]
[55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555]
[0.1, 0.1, 1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222]
[3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd',
'f', 'l', 'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3',
'varcha [...]
+
+-- !select --
+1 1 1 1 1 1 3 1 2 1
1 1 1 1 2 1 1 1
+
+-- !select --
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 false 10 20 30 4444444444444 55555555555 0.1
0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01
2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 false 11 21 33 4444444444444 55555555555 0.1
0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01
2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 true 11 12 13 1444444444444 1555555555 1.1
1.222 13333.33 d varchar2 string2 2022-12-02
2022-12-02 2022-12-02T22:23:23 2022-12-02T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 false 21 22 23 2444444444444 255555555 2.1
2.222 23333.33 f varchar3 string3 2022-12-03
2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 true 31 32 33 3444444444444 3555555555 3.1
3.222 33333.33 l varchar3 string3 2022-12-03
2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 false 10 20 30 944444444444 9555555555 9.1
9.222 93333.33 p varchar9 string9 2022-12-09
2022-12-09 2022-12-09T22:23:23 2022-12-09T22:23:24.999999 not null
+
+-- !select --
+[0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33]
[2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444]
[1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1,
9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33,
33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f']
['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2',
'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01]
[2022-12-01, [...]
+
+-- !select --
+[0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12, 22, 32,
20] [30, 33, 13, 23, 33, 30] [4444444444444, 4444444444444,
1444444444444, 2444444444444, 3444444444444, 944444444444] [55555555555,
55555555555, 1555555555, 255555555, 3555555555, 9555555555] [0.1, 0.1,
1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222] [3333.33,
3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd', 'f', 'l',
'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3', 'varchar9
[...]
+
+-- !select --
+1 1 1 1 1 3 1 2 1 1
1 1 1 2 1 1 1
+
+-- !select --
+1 1 1 1 1 3 1 2 1 1
1 1 1 2 1 1 1
+
+-- !select --
+1 [0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23,
33] [2444444444444, 4444444444444, 3444444444444, 1444444444444,
944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555]
[0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222]
[23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd',
'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9',
'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02,
2022-12-01] [2022-12-01 [...]
+
+-- !select --
+1 1 1 1 1 1 3 1 2 1
1 1 1 1 2 1 1 1
+
+-- !select --
+1 [0, 0, 1, 0, 1, 0] [10, 11, 11, 21, 31, 10] [20, 21, 12,
22, 32, 20] [30, 33, 13, 23, 33, 30] [4444444444444,
4444444444444, 1444444444444, 2444444444444, 3444444444444, 944444444444]
[55555555555, 55555555555, 1555555555, 255555555, 3555555555, 9555555555]
[0.1, 0.1, 1.1, 2.1, 3.1, 9.1] [0.222, 0.222, 1.222, 2.222, 3.222, 9.222]
[3333.33, 3333.33, 13333.33, 23333.33, 33333.33, 93333.33] ['c', 'c', 'd',
'f', 'l', 'p'] ['varchar1', 'varchar1', 'varchar2', 'varchar3', 'varchar3',
'varcha [...]
+
+-- !select --
+1 1 1 1 1 1 3 1 2 1
1 1 1 1 2 1 1 1
-- !select43 --
[10, 8]
diff --git
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out
new file mode 100644
index 0000000000..9addf1e4bf
--- /dev/null
+++
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_group_array.out
@@ -0,0 +1,27 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 false 10 20 30 4444444444444 55555555555 0.1
0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01
2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 false 11 21 33 4444444444444 55555555555 0.1
0.222 3333.33 c varchar1 string1 2022-12-01 2022-12-01
2022-12-01T22:23:23 2022-12-01T22:23:24.999999 not null
+1 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+1 true 11 12 13 1444444444444 1555555555 1.1
1.222 13333.33 d varchar2 string2 2022-12-02
2022-12-02 2022-12-02T22:23:23 2022-12-02T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 false 21 22 23 2444444444444 255555555 2.1
2.222 23333.33 f varchar3 string3 2022-12-03
2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 true 31 32 33 3444444444444 3555555555 3.1
3.222 33333.33 l varchar3 string3 2022-12-03
2022-12-03 2022-12-03T22:23:23 2022-12-03T22:23:24.999999 not null
+2 \N \N \N \N \N \N \N \N \N
\N \N \N \N \N \N \N not null
+2 false 10 20 30 944444444444 9555555555 9.1
9.222 93333.33 p varchar9 string9 2022-12-09
2022-12-09 2022-12-09T22:23:23 2022-12-09T22:23:24.999999 not null
+
+-- !select --
+[0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23, 33]
[2444444444444, 4444444444444, 3444444444444, 1444444444444, 944444444444]
[1555555555, 3555555555, 9555555555, 255555555, 55555555555] [0.1, 1.1, 3.1,
9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222] [23333.33, 3333.33,
33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd', 'f']
['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9', 'string2',
'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02, 2022-12-01]
[2022-12-01, [...]
+
+-- !select --
+[0] [10] [20] [30] [4444444444444] [1555555555, 255555555,
55555555555] [0.1] [1.222, 0.222] [3333.33] ['c'] ['varchar1']
['string1'] [2022-12-01] [2022-12-01, 2022-12-02] [2022-12-01
22:23:23] [2022-12-01 22:23:24.999999] ['not null']
+
+-- !select --
+1 [0, 1] [21, 11, 31, 10] [21, 12, 20, 22, 32] [30, 13, 23,
33] [2444444444444, 4444444444444, 3444444444444, 1444444444444,
944444444444] [1555555555, 3555555555, 9555555555, 255555555, 55555555555]
[0.1, 1.1, 3.1, 9.1, 2.1] [3.222, 2.222, 9.222, 1.222, 0.222]
[23333.33, 3333.33, 33333.33, 93333.33, 13333.33] ['c', 'p', 'l', 'd',
'f'] ['varchar3', 'varchar1', 'varchar2', 'varchar9'] ['string9',
'string2', 'string1', 'string3'] [2022-12-03, 2022-12-09, 2022-12-02,
2022-12-01] [2022-12-01 [...]
+
+-- !select --
+1 [0] [10] [20] [30] [4444444444444] [1555555555, 255555555,
55555555555] [0.1] [1.222, 0.222] [3333.33] ['c'] ['varchar1']
['string1'] [2022-12-01] [2022-12-01, 2022-12-02] [2022-12-01
22:23:23] [2022-12-01 22:23:24.999999] ['not null']
+
diff --git
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
index a53e795064..24eb2f7865 100644
---
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
+++
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
@@ -16,37 +16,599 @@
// under the License.
suite("test_aggregate_collect") {
+
sql "set enable_vectorized_engine = true"
def tableName = "collect_test"
- def tableCTAS = "collect_test_ctas"
+ def tableCTAS1 = "collect_set_test_ctas1"
+ def tableCTAS2 = "collect_set_test_ctas2"
+ def tableCTAS3 = "collect_list_test_ctas3"
+ def tableCTAS4 = "collect_list_test_ctas4"
+
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS1}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS2}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS3}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS4}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName} (
+ c_id INT,
+ c_bool BOOLEAN,
+ c_tinyint TINYINT,
+ c_smallint SMALLINT,
+ c_int INT,
+ c_bigint BIGINT,
+ c_largeint LARGEINT,
+ c_float FLOAT,
+ c_double DOUBLE,
+ c_decimal DECIMAL(9, 2),
+ c_char CHAR,
+ c_varchar VARCHAR(10),
+ c_string STRING,
+ c_date DATE,
+ c_datev2 DATEV2,
+ c_date_time DATETIME,
+ c_date_timev2 DATETIMEV2(6),
+ c_string_not_null VARCHAR(10) NOT NULL
+ )
+ DISTRIBUTED BY HASH(c_int) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+
+ sql """
+ INSERT INTO ${tableName} values
+ (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (1, false, 10, 20, 30, 4444444444444, 55555555555, 0.1, 0.222,
3333.33, 'c', 'varchar1', 'string1',
+ '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName} values
+ (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (1, false, 11, 21, 33, 4444444444444, 55555555555, 0.1, 0.222,
3333.33, 'c', 'varchar1', 'string1',
+ '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName} values
+ (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (1, true, 11, 12, 13, 1444444444444, 1555555555, 1.1, 1.222,
13333.33, 'd', 'varchar2', 'string2',
+ '2022-12-02', '2022-12-02', '2022-12-02 22:23:23', '2022-12-02
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName} values
+ (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (2, false, 21, 22, 23, 2444444444444, 255555555, 2.1, 2.222,
23333.33, 'f', 'varchar3', 'string3',
+ '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName} values
+ (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (2, true, 31, 32, 33, 3444444444444, 3555555555, 3.1, 3.222,
33333.33, 'l', 'varchar3', 'string3',
+ '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName} values
+ (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (2, false, 10, 20, 30, 944444444444, 9555555555, 9.1, 9.222,
93333.33, 'p', 'varchar9', 'string9',
+ '2022-12-09', '2022-12-09', '2022-12-09 22:23:23', '2022-12-09
22:23:24.999999', 'not null')
+ """
+
+ qt_select """
+ SELECT * FROM ${tableName}
+ """
+
+ qt_select """
+ SELECT
+ collect_set(c_bool),
+ collect_set(c_tinyint),
+ collect_set(c_smallint),
+ collect_set(c_int),
+ collect_set(c_bigint),
+ collect_set(c_largeint),
+ collect_set(c_float),
+ collect_set(c_double),
+ collect_set(c_decimal),
+ collect_set(c_char),
+ collect_set(c_varchar),
+ collect_set(c_string),
+ collect_set(c_date),
+ collect_set(c_datev2),
+ collect_set(c_date_time),
+ collect_set(c_date_timev2),
+ collect_set(c_string_not_null)
+ FROM
+ ${tableName}
+ group by c_int order by c_int
+ """
+
+ qt_select """
+ SELECT
+ collect_list(c_bool),
+ collect_list(c_tinyint),
+ collect_list(c_smallint),
+ collect_list(c_int),
+ collect_list(c_bigint),
+ collect_list(c_largeint),
+ collect_list(c_float),
+ collect_list(c_double),
+ collect_list(c_decimal),
+ collect_list(c_char),
+ collect_list(c_varchar),
+ collect_list(c_string),
+ collect_list(c_date),
+ collect_list(c_datev2),
+ collect_list(c_date_time),
+ collect_list(c_date_timev2),
+ collect_list(c_string_not_null)
+ FROM
+ ${tableName}
+ """
+
+ qt_select """
+ SELECT
+ size(collect_set(c_bool,1)),
+ size(collect_set(c_tinyint,1)),
+ size(collect_set(c_smallint,1)),
+ size(collect_set(c_int,1)),
+ size(collect_set(c_bigint,1)),
+ size(collect_set(c_largeint,3)),
+ size(collect_set(c_float,1)),
+ size(collect_set(c_double,2)),
+ size(collect_set(c_decimal,1)),
+ size(collect_set(c_char,1)),
+ size(collect_set(c_varchar,1)),
+ size(collect_set(c_string,1)),
+ size(collect_set(c_date,1)),
+ size(collect_set(c_datev2,2)),
+ size(collect_set(c_date_time,1)),
+ size(collect_set(c_date_timev2,1)),
+ size(collect_set(c_string_not_null,1))
+ FROM
+ ${tableName}
+ """
+
+ qt_select """
+ SELECT
+ size(collect_list(c_bool,1)),
+ size(collect_list(c_tinyint,1)),
+ size(collect_list(c_smallint,1)),
+ size(collect_list(c_int,1)),
+ size(collect_list(c_bigint,1)),
+ size(collect_list(c_largeint,3)),
+ size(collect_list(c_float,1)),
+ size(collect_list(c_double,2)),
+ size(collect_list(c_decimal,1)),
+ size(collect_list(c_char,1)),
+ size(collect_list(c_varchar,1)),
+ size(collect_list(c_string,1)),
+ size(collect_list(c_date,1)),
+ size(collect_list(c_datev2,2)),
+ size(collect_list(c_date_time,1)),
+ size(collect_list(c_date_timev2,1)),
+ size(collect_list(c_string_not_null,1))
+ FROM
+ ${tableName}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS1} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ collect_set(c_bool),
+ collect_set(c_tinyint),
+ collect_set(c_smallint),
+ collect_set(c_int),
+ collect_set(c_bigint),
+ collect_set(c_largeint),
+ collect_set(c_float),
+ collect_set(c_double),
+ collect_set(c_decimal),
+ collect_set(c_char),
+ collect_set(c_varchar),
+ collect_set(c_string),
+ collect_set(c_date),
+ collect_set(c_datev2),
+ collect_set(c_date_time),
+ collect_set(c_date_timev2),
+ collect_set(c_string_not_null)
+ FROM
+ ${tableName}
+ """
+
+
+ sql """
+ CREATE TABLE ${tableCTAS2} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ size(collect_set(c_bool,1)),
+ size(collect_set(c_tinyint,1)),
+ size(collect_set(c_smallint,1)),
+ size(collect_set(c_int,1)),
+ size(collect_set(c_bigint,1)),
+ size(collect_set(c_largeint,3)),
+ size(collect_set(c_float,1)),
+ size(collect_set(c_double,2)),
+ size(collect_set(c_decimal,1)),
+ size(collect_set(c_char,1)),
+ size(collect_set(c_varchar,1)),
+ size(collect_set(c_string,1)),
+ size(collect_set(c_date,1)),
+ size(collect_set(c_datev2,2)),
+ size(collect_set(c_date_time,1)),
+ size(collect_set(c_date_timev2,1)),
+ size(collect_set(c_string_not_null,1))
+ FROM
+ ${tableName}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS3} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ collect_list(c_bool),
+ collect_list(c_tinyint),
+ collect_list(c_smallint),
+ collect_list(c_int),
+ collect_list(c_bigint),
+ collect_list(c_largeint),
+ collect_list(c_float),
+ collect_list(c_double),
+ collect_list(c_decimal),
+ collect_list(c_char),
+ collect_list(c_varchar),
+ collect_list(c_string),
+ collect_list(c_date),
+ collect_list(c_datev2),
+ collect_list(c_date_time),
+ collect_list(c_date_timev2),
+ collect_list(c_string_not_null)
+ FROM
+ ${tableName}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS4} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ size(collect_list(c_bool,1)),
+ size(collect_list(c_tinyint,1)),
+ size(collect_list(c_smallint,1)),
+ size(collect_list(c_int,1)),
+ size(collect_list(c_bigint,1)),
+ size(collect_list(c_largeint,3)),
+ size(collect_list(c_float,1)),
+ size(collect_list(c_double,2)),
+ size(collect_list(c_decimal,1)),
+ size(collect_list(c_char,1)),
+ size(collect_list(c_varchar,1)),
+ size(collect_list(c_string,1)),
+ size(collect_list(c_date,1)),
+ size(collect_list(c_datev2,2)),
+ size(collect_list(c_date_time,1)),
+ size(collect_list(c_date_timev2,1)),
+ size(collect_list(c_string_not_null,1))
+ FROM
+ ${tableName}
+ """
+
+ qt_select "SELECT * FROM ${tableCTAS1}"
+ qt_select "SELECT * FROM ${tableCTAS2}"
+ qt_select "SELECT * FROM ${tableCTAS3}"
+ qt_select "SELECT * FROM ${tableCTAS4}"
+
sql "DROP TABLE IF EXISTS ${tableName}"
- sql "DROP TABLE IF EXISTS ${tableCTAS}"
- sql """
- CREATE TABLE IF NOT EXISTS ${tableName} (
- c_int INT,
- c_string VARCHAR(10),
- c_date Date,
- c_decimal DECIMAL(10, 2),
- c_string_not_null VARCHAR(10) NOT NULL
+ sql "DROP TABLE IF EXISTS ${tableCTAS1}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS2}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS3}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS4}"
+
+ def tableName_11 = "group_uniq_array_test"
+ def tableCTAS1_11 = "group_uniq_array_test_ctas1"
+ def tableCTAS2_11 = "group_uniq_array_test_ctas2"
+ def tableCTAS3_11 = "group_array_test_ctas3"
+ def tableCTAS4_11 = "group_array_test_ctas4"
+
+ sql "DROP TABLE IF EXISTS ${tableName_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS1_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS2_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS3_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS4_11}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName_11} (
+ c_id INT,
+ c_bool BOOLEAN,
+ c_tinyint TINYINT,
+ c_smallint SMALLINT,
+ c_int INT,
+ c_bigint BIGINT,
+ c_largeint LARGEINT,
+ c_float FLOAT,
+ c_double DOUBLE,
+ c_decimal DECIMAL(9, 2),
+ c_char CHAR,
+ c_varchar VARCHAR(10),
+ c_string STRING,
+ c_date DATE,
+ c_datev2 DATEV2,
+ c_date_time DATETIME,
+ c_date_timev2 DATETIMEV2(6),
+ c_string_not_null VARCHAR(10) NOT NULL
)
DISTRIBUTED BY HASH(c_int) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
- )
+ )
+ """
+
+ sql """
+ INSERT INTO ${tableName_11} values
+ (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (1, false, 10, 20, 30, 4444444444444, 55555555555, 0.1, 0.222,
3333.33, 'c', 'varchar1', 'string1',
+ '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName_11} values
+ (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (1, false, 11, 21, 33, 4444444444444, 55555555555, 0.1, 0.222,
3333.33, 'c', 'varchar1', 'string1',
+ '2022-12-01', '2022-12-01', '2022-12-01 22:23:23', '2022-12-01
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName_11} values
+ (1, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (1, true, 11, 12, 13, 1444444444444, 1555555555, 1.1, 1.222,
13333.33, 'd', 'varchar2', 'string2',
+ '2022-12-02', '2022-12-02', '2022-12-02 22:23:23', '2022-12-02
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName_11} values
+ (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (2, false, 21, 22, 23, 2444444444444, 255555555, 2.1, 2.222,
23333.33, 'f', 'varchar3', 'string3',
+ '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03
22:23:24.999999', 'not null')
+ """
+
+ sql """
+ INSERT INTO ${tableName_11} values
+ (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (2, true, 31, 32, 33, 3444444444444, 3555555555, 3.1, 3.222,
33333.33, 'l', 'varchar3', 'string3',
+ '2022-12-03', '2022-12-03', '2022-12-03 22:23:23', '2022-12-03
22:23:24.999999', 'not null')
"""
- sql "INSERT INTO ${tableName} values(1,'hello','2022-07-04',1.23,'hello'),
(2,NULL,NULL,NULL,'hello')"
- sql "INSERT INTO ${tableName} values(1,'hello','2022-07-04',1.23,'hello'),
(2,NULL,NULL,NULL,'hello')"
- qt_select "select
c_int,collect_list(c_string),collect_list(c_date),collect_list(c_decimal) from
${tableName} group by c_int order by c_int"
- qt_select "select
c_int,collect_set(c_string),collect_set(c_date),collect_set(c_decimal) from
${tableName} group by c_int order by c_int"
+ sql """
+ INSERT INTO ${tableName_11} values
+ (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL,
+ NULL, NULL, NULL, NULL, 'not null'),
+ (2, false, 10, 20, 30, 944444444444, 9555555555, 9.1, 9.222,
93333.33, 'p', 'varchar9', 'string9',
+ '2022-12-09', '2022-12-09', '2022-12-09 22:23:23', '2022-12-09
22:23:24.999999', 'not null')
+ """
+
+ qt_select """
+ SELECT * FROM ${tableName_11}
+ """
- // test without GROUP BY
- qt_select "select collect_list(c_string),collect_list(c_string_not_null)
from ${tableName}"
- qt_select "select collect_set(c_string),collect_set(c_string_not_null)
from ${tableName}"
+ qt_select """
+ SELECT
+ group_uniq_array(c_bool),
+ group_uniq_array(c_tinyint),
+ group_uniq_array(c_smallint),
+ group_uniq_array(c_int),
+ group_uniq_array(c_bigint),
+ group_uniq_array(c_largeint),
+ group_uniq_array(c_float),
+ group_uniq_array(c_double),
+ group_uniq_array(c_decimal),
+ group_uniq_array(c_char),
+ group_uniq_array(c_varchar),
+ group_uniq_array(c_string),
+ group_uniq_array(c_date),
+ group_uniq_array(c_datev2),
+ group_uniq_array(c_date_time),
+ group_uniq_array(c_date_timev2),
+ group_uniq_array(c_string_not_null)
+ FROM
+ ${tableName_11}
+ """
+
+ qt_select """
+ SELECT
+ group_array(c_bool),
+ group_array(c_tinyint),
+ group_array(c_smallint),
+ group_array(c_int),
+ group_array(c_bigint),
+ group_array(c_largeint),
+ group_array(c_float),
+ group_array(c_double),
+ group_array(c_decimal),
+ group_array(c_char),
+ group_array(c_varchar),
+ group_array(c_string),
+ group_array(c_date),
+ group_array(c_datev2),
+ group_array(c_date_time),
+ group_array(c_date_timev2),
+ group_array(c_string_not_null)
+ FROM
+ ${tableName_11}
+ """
- sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS
SELECT
1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null)
FROM ${tableName} """
- qt_select "SELECT * from ${tableCTAS}"
+ qt_select """
+ SELECT
+ size(group_uniq_array(c_bool,1)),
+ size(group_uniq_array(c_tinyint,1)),
+ size(group_uniq_array(c_smallint,1)),
+ size(group_uniq_array(c_int,1)),
+ size(group_uniq_array(c_bigint,1)),
+ size(group_uniq_array(c_largeint,3)),
+ size(group_uniq_array(c_float,1)),
+ size(group_uniq_array(c_double,2)),
+ size(group_uniq_array(c_decimal,1)),
+ size(group_uniq_array(c_char,1)),
+ size(group_uniq_array(c_varchar,1)),
+ size(group_uniq_array(c_string,1)),
+ size(group_uniq_array(c_date,1)),
+ size(group_uniq_array(c_datev2,2)),
+ size(group_uniq_array(c_date_time,1)),
+ size(group_uniq_array(c_date_timev2,1)),
+ size(group_uniq_array(c_string_not_null,1))
+ FROM
+ ${tableName_11}
+ """
+
+ qt_select """
+ SELECT
+ size(group_array(c_bool,1)),
+ size(group_array(c_tinyint,1)),
+ size(group_array(c_smallint,1)),
+ size(group_array(c_int,1)),
+ size(group_array(c_bigint,1)),
+ size(group_array(c_largeint,3)),
+ size(group_array(c_float,1)),
+ size(group_array(c_double,2)),
+ size(group_array(c_decimal,1)),
+ size(group_array(c_char,1)),
+ size(group_array(c_varchar,1)),
+ size(group_array(c_string,1)),
+ size(group_array(c_date,1)),
+ size(group_array(c_datev2,2)),
+ size(group_array(c_date_time,1)),
+ size(group_array(c_date_timev2,1)),
+ size(group_array(c_string_not_null,1))
+ FROM
+ ${tableName_11}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS1_11} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ group_uniq_array(c_bool),
+ group_uniq_array(c_tinyint),
+ group_uniq_array(c_smallint),
+ group_uniq_array(c_int),
+ group_uniq_array(c_bigint),
+ group_uniq_array(c_largeint),
+ group_uniq_array(c_float),
+ group_uniq_array(c_double),
+ group_uniq_array(c_decimal),
+ group_uniq_array(c_char),
+ group_uniq_array(c_varchar),
+ group_uniq_array(c_string),
+ group_uniq_array(c_date),
+ group_uniq_array(c_datev2),
+ group_uniq_array(c_date_time),
+ group_uniq_array(c_date_timev2),
+ group_uniq_array(c_string_not_null)
+ FROM
+ ${tableName_11}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS2_11} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ size(group_uniq_array(c_bool,1)),
+ size(group_uniq_array(c_tinyint,1)),
+ size(group_uniq_array(c_smallint,1)),
+ size(group_uniq_array(c_int,1)),
+ size(group_uniq_array(c_bigint,1)),
+ size(group_uniq_array(c_largeint,3)),
+ size(group_uniq_array(c_float,1)),
+ size(group_uniq_array(c_double,2)),
+ size(group_uniq_array(c_decimal,1)),
+ size(group_uniq_array(c_char,1)),
+ size(group_uniq_array(c_varchar,1)),
+ size(group_uniq_array(c_string,1)),
+ size(group_uniq_array(c_date,1)),
+ size(group_uniq_array(c_datev2,2)),
+ size(group_uniq_array(c_date_time,1)),
+ size(group_uniq_array(c_date_timev2,1)),
+ size(group_uniq_array(c_string_not_null,1))
+ FROM
+ ${tableName_11}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS3_11} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ group_array(c_bool),
+ group_array(c_tinyint),
+ group_array(c_smallint),
+ group_array(c_int),
+ group_array(c_bigint),
+ group_array(c_largeint),
+ group_array(c_float),
+ group_array(c_double),
+ group_array(c_decimal),
+ group_array(c_char),
+ group_array(c_varchar),
+ group_array(c_string),
+ group_array(c_date),
+ group_array(c_datev2),
+ group_array(c_date_time),
+ group_array(c_date_timev2),
+ group_array(c_string_not_null)
+ FROM
+ ${tableName_11}
+ """
+
+ sql """
+ CREATE TABLE ${tableCTAS4_11} PROPERTIES("replication_num" = "1") AS
+ SELECT
+ 1,
+ size(group_array(c_bool,1)),
+ size(group_array(c_tinyint,1)),
+ size(group_array(c_smallint,1)),
+ size(group_array(c_int,1)),
+ size(group_array(c_bigint,1)),
+ size(group_array(c_largeint,3)),
+ size(group_array(c_float,1)),
+ size(group_array(c_double,2)),
+ size(group_array(c_decimal,1)),
+ size(group_array(c_char,1)),
+ size(group_array(c_varchar,1)),
+ size(group_array(c_string,1)),
+ size(group_array(c_date,1)),
+ size(group_array(c_datev2,2)),
+ size(group_array(c_date_time,1)),
+ size(group_array(c_date_timev2,1)),
+ size(group_array(c_string_not_null,1))
+ FROM
+ ${tableName_11}
+ """
+
+ qt_select "SELECT * FROM ${tableCTAS1_11}"
+ qt_select "SELECT * FROM ${tableCTAS2_11}"
+ qt_select "SELECT * FROM ${tableCTAS3_11}"
+ qt_select "SELECT * FROM ${tableCTAS4_11}"
+
+ sql "DROP TABLE IF EXISTS ${tableName_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS1_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS2_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS3_11}"
+ sql "DROP TABLE IF EXISTS ${tableCTAS4_11}"
// topn_array
def tableName_12 = "topn_array"
@@ -67,8 +629,8 @@ suite("test_aggregate_collect") {
sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576),
(2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576)
,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)"
qt_select43 "select topn_array(level,2) from ${tableName_12}"
- qt_select44 "select topn_array(level,2,100) from ${tableName_12}"
- qt_select45 "select topn_array(dt,2,100) from ${tableName_12}"
- qt_select46 "select topn_array(num,2,100) from ${tableName_12}"
- sql "DROP TABLE IF EXISTS ${tableName_12}"
+ qt_select44 "select topn_array(level,2,100) from ${tableName_12}"
+ qt_select45 "select topn_array(dt,2,100) from ${tableName_12}"
+ qt_select46 "select topn_array(num,2,100) from ${tableName_12}"
+ sql "DROP TABLE IF EXISTS ${tableName_12}"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]