This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 374303186c [Vectorized](function) support topn_array function (#13869)
374303186c is described below
commit 374303186c996342c1c1bbbefd01e13a2cc5112f
Author: zhangstar333 <[email protected]>
AuthorDate: Wed Nov 2 19:49:23 2022 +0800
[Vectorized](function) support topn_array function (#13869)
---
.../aggregate_function_topn.cpp | 61 ++++++++++
.../aggregate_functions/aggregate_function_topn.h | 133 +++++++++++++++++----
be/test/vec/aggregate_functions/agg_test.cpp | 2 +-
.../aggregate_functions/vec_window_funnel_test.cpp | 7 +-
.../aggregate-functions/topn_array.md | 61 ++++++++++
docs/sidebars.json | 1 +
.../aggregate-functions/topn_array.md | 60 ++++++++++
.../java/org/apache/doris/catalog/FunctionSet.java | 7 ++
.../main/java/org/apache/doris/catalog/Type.java | 2 +
.../test_select_stddev_variance_window.out | 132 ++++++++++----------
.../aggregate_functions/test_aggregate_collect.out | 12 ++
.../test_select_stddev_variance_window.out | 132 ++++++++++----------
.../test_select_stddev_variance_window.groovy | 12 +-
.../test_aggregate_collect.groovy | 24 ++++
.../test_select_stddev_variance_window.groovy | 12 +-
15 files changed, 482 insertions(+), 176 deletions(-)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
index 19f52fbff8..74bb154d6a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
@@ -17,6 +17,8 @@
#include <vec/aggregate_functions/aggregate_function_topn.h>
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
namespace doris::vectorized {
AggregateFunctionPtr create_aggregate_function_topn(const std::string& name,
@@ -38,8 +40,67 @@ AggregateFunctionPtr create_aggregate_function_topn(const
std::string& name,
return nullptr;
}
+template <bool has_default_param>
+AggregateFunctionPtr create_topn_array(const DataTypes& argument_types) {
+ auto type = argument_types[0].get();
+ if (type->is_nullable()) {
+ type = assert_cast<const
DataTypeNullable*>(type)->get_nested_type().get();
+ }
+
+ WhichDataType which(*type);
+
+#define DISPATCH(TYPE)
\
+ if (which.idx == TypeIndex::TYPE)
\
+ return AggregateFunctionPtr(
\
+ new AggregateFunctionTopNArray<
\
+ AggregateFunctionTopNImplArray<TYPE,
has_default_param>, TYPE>( \
+ argument_types));
+ FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+ if (which.is_string_or_fixed_string()) {
+ return AggregateFunctionPtr(new AggregateFunctionTopNArray<
+
AggregateFunctionTopNImplArray<std::string, has_default_param>,
+ std::string>(argument_types));
+ }
+ if (which.is_decimal()) {
+ return AggregateFunctionPtr(
+ new AggregateFunctionTopNArray<
+ AggregateFunctionTopNImplArray<Decimal128,
has_default_param>, Decimal128>(
+ argument_types));
+ }
+ if (which.is_date_or_datetime() || which.is_date_time_v2()) {
+ return AggregateFunctionPtr(
+ new AggregateFunctionTopNArray<
+ AggregateFunctionTopNImplArray<Int64,
has_default_param>, Int64>(
+ argument_types));
+ }
+ if (which.is_date_v2()) {
+ return AggregateFunctionPtr(
+ new AggregateFunctionTopNArray<
+ AggregateFunctionTopNImplArray<UInt32,
has_default_param>, UInt32>(
+ argument_types));
+ }
+
+ LOG(WARNING) << fmt::format("Illegal argument type for aggregate function
topn_array is: {}",
+ type->get_name());
+ return nullptr;
+}
+
+AggregateFunctionPtr create_aggregate_function_topn_array(const std::string&
name,
+ const DataTypes&
argument_types,
+ const Array&
parameters,
+ const bool
result_is_nullable) {
+ bool has_default_param = (argument_types.size() == 3);
+ if (has_default_param) {
+ return create_topn_array<true>(argument_types);
+ } else {
+ return create_topn_array<false>(argument_types);
+ }
+}
+
void register_aggregate_function_topn(AggregateFunctionSimpleFactory& factory)
{
factory.register_function("topn", create_aggregate_function_topn);
+ factory.register_function("topn_array",
create_aggregate_function_topn_array);
}
} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.h
b/be/src/vec/aggregate_functions/aggregate_function_topn.h
index ae9fdf322d..b7aa6ae14c 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_topn.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_topn.h
@@ -21,28 +21,31 @@
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
+#include <type_traits>
#include <unordered_map>
#include "vec/aggregate_functions/aggregate_function.h"
-#include "vec/aggregate_functions/aggregate_function_group_concat.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
-#include "vec/aggregate_functions/helpers.h"
-#include "vec/data_types/data_type_date_time.h"
-#include "vec/data_types/data_type_decimal.h"
-#include "vec/data_types/data_type_number.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_string.h"
#include "vec/io/io_helper.h"
namespace doris::vectorized {
// space-saving algorithm
+template <typename T>
struct AggregateFunctionTopNData {
+ using ColVecType =
+ std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>,
ColumnVector<T>>;
void set_paramenters(int input_top_num, int space_expand_rate = 50) {
top_num = input_top_num;
capacity = (uint64_t)top_num * space_expand_rate;
}
- void add(const std::string& value) {
+ void add(const T& value) {
auto it = counter_map.find(value);
if (it != counter_map.end()) {
it->second++;
@@ -93,13 +96,13 @@ struct AggregateFunctionTopNData {
}
}
- std::vector<std::pair<uint64_t, std::string>> get_remain_vector() const {
- std::vector<std::pair<uint64_t, std::string>> counter_vector;
+ std::vector<std::pair<uint64_t, T>> get_remain_vector() const {
+ std::vector<std::pair<uint64_t, T>> counter_vector;
for (auto it : counter_map) {
counter_vector.emplace_back(it.second, it.first);
}
std::sort(counter_vector.begin(), counter_vector.end(),
- std::greater<std::pair<uint64_t, std::string>>());
+ std::greater<std::pair<uint64_t, T>>());
return counter_vector;
}
@@ -127,7 +130,7 @@ struct AggregateFunctionTopNData {
read_binary(element_number, buf);
counter_map.clear();
- std::pair<std::string, uint64_t> element;
+ std::pair<T, uint64_t> element;
for (auto i = 0; i < element_number; i++) {
read_binary(element.first, buf);
read_binary(element.second, buf);
@@ -152,11 +155,24 @@ struct AggregateFunctionTopNData {
return buffer.GetString();
}
+ void insert_result_into(IColumn& to) const {
+ auto counter_vector = get_remain_vector();
+ for (int i = 0; i < std::min((int)counter_vector.size(), top_num);
i++) {
+ const auto& element = counter_vector[i];
+ if constexpr (std::is_same_v<T, std::string>) {
+
static_cast<ColumnString&>(to).insert_data(element.second.c_str(),
+
element.second.length());
+ } else {
+
static_cast<ColVecType&>(to).get_data().push_back(element.second);
+ }
+ }
+ }
+
void reset() { counter_map.clear(); }
int top_num = 0;
uint64_t capacity = 0;
- phmap::flat_hash_map<std::string, uint64_t> counter_map;
+ phmap::flat_hash_map<T, uint64_t> counter_map;
};
struct StringDataImplTopN {
@@ -170,8 +186,8 @@ struct StringDataImplTopN {
template <typename DataHelper>
struct AggregateFunctionTopNImplInt {
- static void add(AggregateFunctionTopNData& __restrict place, const
IColumn** columns,
- size_t row_num) {
+ static void add(AggregateFunctionTopNData<std::string>& __restrict place,
+ const IColumn** columns, size_t row_num) {
place.set_paramenters(static_cast<const
ColumnInt32*>(columns[1])->get_element(row_num));
place.add(DataHelper::to_string(*columns[0], row_num));
}
@@ -179,27 +195,49 @@ struct AggregateFunctionTopNImplInt {
template <typename DataHelper>
struct AggregateFunctionTopNImplIntInt {
- static void add(AggregateFunctionTopNData& __restrict place, const
IColumn** columns,
- size_t row_num) {
+ static void add(AggregateFunctionTopNData<std::string>& __restrict place,
+ const IColumn** columns, size_t row_num) {
place.set_paramenters(static_cast<const
ColumnInt32*>(columns[1])->get_element(row_num),
static_cast<const
ColumnInt32*>(columns[2])->get_element(row_num));
place.add(DataHelper::to_string(*columns[0], row_num));
}
};
-//base function
-template <typename Impl>
-class AggregateFunctionTopN final
- : public IAggregateFunctionDataHelper<AggregateFunctionTopNData,
- AggregateFunctionTopN<Impl>> {
-public:
- AggregateFunctionTopN(const DataTypes& argument_types_)
- : IAggregateFunctionDataHelper<AggregateFunctionTopNData,
AggregateFunctionTopN<Impl>>(
- argument_types_, {}) {}
+template <typename T, bool has_default_param>
+struct AggregateFunctionTopNImplArray {
+ using ColVecType =
+ std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>,
ColumnVector<T>>;
+ static void add(AggregateFunctionTopNData<T>& __restrict place, const
IColumn** columns,
+ size_t row_num) {
+ if constexpr (has_default_param) {
+ place.set_paramenters(
+ static_cast<const
ColumnInt32*>(columns[1])->get_element(row_num),
+ static_cast<const
ColumnInt32*>(columns[2])->get_element(row_num));
- String get_name() const override { return "topn"; }
+ } else {
+ place.set_paramenters(
+ static_cast<const
ColumnInt32*>(columns[1])->get_element(row_num));
+ }
+ if constexpr (std::is_same_v<T, std::string>) {
+ StringRef ref = static_cast<const
ColumnString&>(*columns[0]).get_data_at(row_num);
+ place.add(std::string(ref.data, ref.size));
+ } else {
+ T val = static_cast<const
ColVecType&>(*columns[0]).get_data()[row_num];
+ place.add(val);
+ }
+ }
+};
- DataTypePtr get_return_type() const override { return
std::make_shared<DataTypeString>(); }
+//base function
+template <typename Impl, typename T>
+class AggregateFunctionTopNBase
+ : public IAggregateFunctionDataHelper<AggregateFunctionTopNData<T>,
+ AggregateFunctionTopNBase<Impl,
T>> {
+public:
+ AggregateFunctionTopNBase(const DataTypes& argument_types_)
+ : IAggregateFunctionDataHelper<AggregateFunctionTopNData<T>,
+ AggregateFunctionTopNBase<Impl,
T>>(argument_types_,
+
{}) {}
void add(AggregateDataPtr __restrict place, const IColumn** columns,
size_t row_num,
Arena*) const override {
@@ -221,6 +259,18 @@ public:
Arena*) const override {
this->data(place).read(buf);
}
+};
+
+//topn function return string
+template <typename Impl, typename T = std::string>
+class AggregateFunctionTopN final : public AggregateFunctionTopNBase<Impl, T> {
+public:
+ AggregateFunctionTopN(const DataTypes& argument_types_)
+ : AggregateFunctionTopNBase<Impl, T>(argument_types_) {}
+
+ String get_name() const override { return "topn"; }
+
+ DataTypePtr get_return_type() const override { return
std::make_shared<DataTypeString>(); }
void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
std::string result = this->data(place).get();
@@ -228,4 +278,35 @@ public:
}
};
+//topn function return array
+template <typename Impl, typename T>
+class AggregateFunctionTopNArray final : public
AggregateFunctionTopNBase<Impl, T> {
+public:
+ AggregateFunctionTopNArray(const DataTypes& argument_types_)
+ : AggregateFunctionTopNBase<Impl, T>(argument_types_),
+ _argument_type(argument_types_[0]) {}
+
+ String get_name() const override { return "topn_array"; }
+
+ DataTypePtr get_return_type() const override {
+ return std::make_shared<DataTypeArray>(make_nullable(_argument_type));
+ }
+
+ void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
+ auto& to_arr = assert_cast<ColumnArray&>(to);
+ auto& to_nested_col = to_arr.get_data();
+ if (to_nested_col.is_nullable()) {
+ auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
+
this->data(place).insert_result_into(col_null->get_nested_column());
+
col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(),
0);
+ } else {
+ this->data(place).insert_result_into(to_nested_col);
+ }
+ to_arr.get_offsets().push_back(to_nested_col.size());
+ }
+
+private:
+ DataTypePtr _argument_type;
+};
+
} // namespace doris::vectorized
diff --git a/be/test/vec/aggregate_functions/agg_test.cpp
b/be/test/vec/aggregate_functions/agg_test.cpp
index b9840fb099..951b1b8e3b 100644
--- a/be/test/vec/aggregate_functions/agg_test.cpp
+++ b/be/test/vec/aggregate_functions/agg_test.cpp
@@ -89,7 +89,7 @@ TEST(AggTest, topn_test) {
agg_function->add(place, const_cast<const IColumn**>(columns), i,
nullptr);
}
- std::string result =
reinterpret_cast<AggregateFunctionTopNData*>(place)->get();
+ std::string result =
reinterpret_cast<AggregateFunctionTopNData<std::string>*>(place)->get();
std::string expect_result =
"{\"1\":2048,\"2\":683,\"3\":341,\"4\":205,\"5\":137,\"6\":97,\"7\":73,\"8\":57,\"9\":"
"46,\"10\":37}";
diff --git a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
index 904d5ce80c..50bf612fd2 100644
--- a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
+++ b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
@@ -17,16 +17,14 @@
#include <gtest/gtest.h>
-#include "common/logging.h"
#include "gtest/gtest.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
-#include "vec/aggregate_functions/aggregate_function_topn.h"
#include "vec/columns/column_vector.h"
#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
-
namespace doris::vectorized {
void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory&
factory);
@@ -43,8 +41,7 @@ public:
std::make_shared<DataTypeInt64>(),
std::make_shared<DataTypeString>(),
std::make_shared<DataTypeDateTime>(),
std::make_shared<DataTypeUInt8>(),
std::make_shared<DataTypeUInt8>(),
std::make_shared<DataTypeUInt8>(),
- std::make_shared<DataTypeUInt8>(),
- };
+ std::make_shared<DataTypeUInt8>()};
Array array;
agg_function = factory.get("window_funnel", data_types, array, false);
EXPECT_NE(agg_function, nullptr);
diff --git
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
new file mode 100644
index 0000000000..d266287638
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
@@ -0,0 +1,61 @@
+---
+{
+ "title": "topn_array",
+ "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## topn_array
+### description
+#### Syntax
+
+`ARRAY<T> topn_array(expr, INT top_num[, INT space_expand_rate])`
+
+The topn function uses the Space-Saving algorithm to calculate the top_num
frequent items in expr, and the result is the
+frequent items and their occurrence times, which is an approximation
+
+The space_expand_rate parameter is optional and is used to set the number of
counters used in the Space-Saving algorithm
+```
+counter numbers = top_num * space_expand_rate
+```
+The higher value of space_expand_rate, the more accurate result will be. The
default value is 50
+
+### example
+```
+mysql> select topn_array(k3,3) from baseall;
++--------------------------+
+| topn_array(`k3`, 3) |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+
+mysql> select topn_array(k3,3,100) from baseall;
++--------------------------+
+| topn_array(`k3`, 3, 100) |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+```
+### keywords
+TOPN, TOPN_ARRAY
\ No newline at end of file
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 0333af1eeb..ebf2da3ec4 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -430,6 +430,7 @@
"sql-manual/sql-functions/aggregate-functions/percentile",
"sql-manual/sql-functions/aggregate-functions/hll_union_agg",
"sql-manual/sql-functions/aggregate-functions/topn",
+
"sql-manual/sql-functions/aggregate-functions/topn_array",
"sql-manual/sql-functions/aggregate-functions/count",
"sql-manual/sql-functions/aggregate-functions/sum",
"sql-manual/sql-functions/aggregate-functions/max_by",
diff --git
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
new file mode 100644
index 0000000000..8b4985c25d
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
@@ -0,0 +1,60 @@
+---
+{
+ "title": "topn_array",
+ "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## topn_array
+### description
+#### Syntax
+
+`ARRAY<T> topn_array(expr, INT top_num[, INT space_expand_rate])`
+
+该topn_array函数使用Space-Saving算法计算expr中的top_num个频繁项,结果为频繁项及其出现次数,该结果为近似值
+
+space_expand_rate参数是可选项,该值用来设置Space-Saving算法中使用的counter个数
+```
+counter numbers = top_num * space_expand_rate
+```
+space_expand_rate的值越大,结果越准确,默认值为50
+
+### example
+```
+mysql> select topn_array(k3,3) from baseall;
++--------------------------+
+| topn_array(`k3`, 3) |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+
+mysql> select topn_array(k3,3,100) from baseall;
++--------------------------+
+| topn_array(`k3`, 3, 100) |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+```
+### keywords
+TOPN, TOPN_ARRAY
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 6f9154cac8..acbb0a5a18 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -2466,6 +2466,13 @@ public class FunctionSet<T> {
"", "", "", "", "", true, false, true, true));
addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET,
Lists.newArrayList(t), new ArrayType(t), t,
"", "", "", "", "", true, false, true, true));
+ addBuiltin(
+ AggregateFunction.createBuiltin("topn_array",
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
+ "", "", "", "", "", true, false, true, true));
+ addBuiltin(
+ AggregateFunction
+ .createBuiltin("topn_array", Lists.newArrayList(t,
Type.INT, Type.INT), new ArrayType(t), t,
+ "", "", "", "", "", true, false, true,
true));
}
// Avg
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 3f6fd5ef0e..be8217e42b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -158,6 +158,8 @@ public abstract class Type {
arraySubTypes.add(DECIMALV2);
arraySubTypes.add(DATE);
arraySubTypes.add(DATETIME);
+ arraySubTypes.add(DATEV2);
+ arraySubTypes.add(DATETIMEV2);
arraySubTypes.add(CHAR);
arraySubTypes.add(VARCHAR);
arraySubTypes.add(STRING);
diff --git
a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
index b3a9ee95e5..24fd821c9a 100644
--- a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
+++ b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
@@ -918,55 +918,55 @@
15 1991.8000000000002
-- !select_default --
-1 5069.4970703125
-2 1991.0
-3 1991.0
-4 1991.0
-5 5069.4970703125
-6 5069.4970703125
-7 1991.0
-8 5069.4970703125
-9 11224.505859375
-10 5068.5966796875
-11 17379.5
-12 11223.8056640625
-13 17379.0
-14 23534.203125
-15 1991.7000732421875
+1 1989.0
+2 1987.5
+3 1987.5
+4 1987.5
+5 1989.0
+6 1989.0
+7 1987.5
+8 1989.0
+9 1990.0
+10 1989.0
+11 1991.0
+12 1123.0
+13 255.0
+14 1123.0
+15 1990.0
-- !select_default --
1 \N
2 \N
3 1986.0
-4 1989.0
+4 1987.5
5 1989.0
-6 1989.0
-7 1990.7999267578125
-8 29689.203125
-9 29688.802734375
-10 1990.7999267578125
-11 29689.40234375
+6 1987.0
+7 1989.0
+8 1989.0
+9 1985.0
+10 1989.0
+11 1991.0
12 1991.0
-13 29689.40234375
-14 29689.40234375
-15 1990.7999267578125
+13 1991.0
+14 1991.0
+15 1989.0
-- !select_default --
-1 1989.0
-2 1989.0
-3 1990.7999267578125
-4 1990.39990234375
-5 29689.203125
-6 23533.6015625
-7 1991.0
-8 17379.0
-9 17379.0
-10 17379.0
-11 17379.5
-12 17379.0
-13 17379.0
-14 23534.203125
-15 1991.7000732421875
+1 1987.0
+2 1987.5
+3 1989.0
+4 1987.5
+5 1989.0
+6 1987.0
+7 1989.0
+8 1989.0
+9 1989.0
+10 1991.0
+11 1991.0
+12 1991.0
+13 255.0
+14 1123.0
+15 1990.0
-- !select_default --
1 1989.0
@@ -986,36 +986,36 @@
15 1992.0
-- !select_default --
-1 5069.4970703125
-2 1991.0
-3 5068.5966796875
-4 11223.8056640625
-5 11224.505859375
-6 17379.5
-7 17379.0
-8 1991.7000732421875
-9 1991.9000244140625
-10 23534.203125
-11 1992.0
-12 29515.802734375
-13 255.0
+1 1989.0
+2 1987.5
+3 1989.0
+4 1123.0
+5 1990.0
+6 1991.0
+7 255.0
+8 1990.0
+9 1991.0
+10 1123.0
+11 1990.5
+12 255.0
+13 -16256.0
14 255.0
15 1992.0
-- !select_default --
1 1989.0
2 1986.0
-3 1989.0
-4 1990.7999267578125
-5 1989.0
-6 29689.203125
-7 1990.39990234375
-8 23533.6015625
-9 17379.0
-10 1991.0
-11 11223.8056640625
-12 11223.8056640625
-13 5068.5966796875
-14 1991.0
-15 5069.4970703125
+3 1987.5
+4 1989.0
+5 1987.0
+6 1989.0
+7 1987.5
+8 1987.0
+9 1989.0
+10 1989.0
+11 1989.0
+12 1990.0
+13 1989.0
+14 1987.5
+15 1989.0
diff --git
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
index d501683f7f..f26cf1146b 100644
---
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
+++
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
@@ -16,3 +16,15 @@
-- !select --
1 [1, 2, 1, 2] ['hello'] [2022-07-04, 2022-07-04] [1.23]
['hello', 'hello', 'hello', 'hello']
+-- !select43 --
+[10, 8]
+
+-- !select44 --
+[10, 8]
+
+-- !select45 --
+[2022-11-02, 2022-11-01]
+
+-- !select46 --
+[6.8754576, 0.576]
+
diff --git
a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
index b3a9ee95e5..24fd821c9a 100644
---
a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
+++
b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
@@ -918,55 +918,55 @@
15 1991.8000000000002
-- !select_default --
-1 5069.4970703125
-2 1991.0
-3 1991.0
-4 1991.0
-5 5069.4970703125
-6 5069.4970703125
-7 1991.0
-8 5069.4970703125
-9 11224.505859375
-10 5068.5966796875
-11 17379.5
-12 11223.8056640625
-13 17379.0
-14 23534.203125
-15 1991.7000732421875
+1 1989.0
+2 1987.5
+3 1987.5
+4 1987.5
+5 1989.0
+6 1989.0
+7 1987.5
+8 1989.0
+9 1990.0
+10 1989.0
+11 1991.0
+12 1123.0
+13 255.0
+14 1123.0
+15 1990.0
-- !select_default --
1 \N
2 \N
3 1986.0
-4 1989.0
+4 1987.5
5 1989.0
-6 1989.0
-7 1990.7999267578125
-8 29689.203125
-9 29688.802734375
-10 1990.7999267578125
-11 29689.40234375
+6 1987.0
+7 1989.0
+8 1989.0
+9 1985.0
+10 1989.0
+11 1991.0
12 1991.0
-13 29689.40234375
-14 29689.40234375
-15 1990.7999267578125
+13 1991.0
+14 1991.0
+15 1989.0
-- !select_default --
-1 1989.0
-2 1989.0
-3 1990.7999267578125
-4 1990.39990234375
-5 29689.203125
-6 23533.6015625
-7 1991.0
-8 17379.0
-9 17379.0
-10 17379.0
-11 17379.5
-12 17379.0
-13 17379.0
-14 23534.203125
-15 1991.7000732421875
+1 1987.0
+2 1987.5
+3 1989.0
+4 1987.5
+5 1989.0
+6 1987.0
+7 1989.0
+8 1989.0
+9 1989.0
+10 1991.0
+11 1991.0
+12 1991.0
+13 255.0
+14 1123.0
+15 1990.0
-- !select_default --
1 1989.0
@@ -986,36 +986,36 @@
15 1992.0
-- !select_default --
-1 5069.4970703125
-2 1991.0
-3 5068.5966796875
-4 11223.8056640625
-5 11224.505859375
-6 17379.5
-7 17379.0
-8 1991.7000732421875
-9 1991.9000244140625
-10 23534.203125
-11 1992.0
-12 29515.802734375
-13 255.0
+1 1989.0
+2 1987.5
+3 1989.0
+4 1123.0
+5 1990.0
+6 1991.0
+7 255.0
+8 1990.0
+9 1991.0
+10 1123.0
+11 1990.5
+12 255.0
+13 -16256.0
14 255.0
15 1992.0
-- !select_default --
1 1989.0
2 1986.0
-3 1989.0
-4 1990.7999267578125
-5 1989.0
-6 29689.203125
-7 1990.39990234375
-8 23533.6015625
-9 17379.0
-10 1991.0
-11 11223.8056640625
-12 11223.8056640625
-13 5068.5966796875
-14 1991.0
-15 5069.4970703125
+3 1987.5
+4 1989.0
+5 1987.0
+6 1989.0
+7 1987.5
+8 1987.0
+9 1989.0
+10 1989.0
+11 1989.0
+12 1990.0
+13 1989.0
+14 1987.5
+15 1989.0
diff --git
a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
index 390418ddcf..311ae350c8 100644
---
a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
+++
b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
@@ -144,12 +144,12 @@ suite("test_select_stddev_variance_window") {
qt_select_default "select k1, percentile(k2,0.8) over (partition by k6
order by k1 rows between current row and unbounded following) from ${tableName}
order by k1;"
qt_select_default "select k1, percentile(k2,0.8) over (partition by k6
order by k1) from ${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between 3 preceding and unbounded following)
from ${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from
${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 following) from
${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between current row and current row) from
${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between current row and unbounded following)
from ${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1) from ${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between 3 preceding and unbounded following)
from ${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from
${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 following) from
${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between current row and current row) from
${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between current row and unbounded following)
from ${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1) from ${tableName} order by k1;"
}
diff --git
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
index cd7e61d408..a53e795064 100644
---
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
+++
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
@@ -47,4 +47,28 @@ suite("test_aggregate_collect") {
sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS
SELECT
1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null)
FROM ${tableName} """
qt_select "SELECT * from ${tableCTAS}"
+
+ // topn_array
+ def tableName_12 = "topn_array"
+
+ sql "DROP TABLE IF EXISTS ${tableName_12}"
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName_12} (
+ id int,
+ level int,
+ dt datev2,
+ num decimal(27,9)
+ )
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+ sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576),
(2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576)
,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)"
+
+ qt_select43 "select topn_array(level,2) from ${tableName_12}"
+ qt_select44 "select topn_array(level,2,100) from ${tableName_12}"
+ qt_select45 "select topn_array(dt,2,100) from ${tableName_12}"
+ qt_select46 "select topn_array(num,2,100) from ${tableName_12}"
+ sql "DROP TABLE IF EXISTS ${tableName_12}"
}
diff --git
a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
index b81f400dad..fb319a4192 100644
---
a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
+++
b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
@@ -147,12 +147,12 @@ suite("test_select_stddev_variance_window") {
qt_select_default "select k1, percentile(k2,0.8) over (partition by k6
order by k1 rows between current row and unbounded following) from ${tableName}
order by k1;"
qt_select_default "select k1, percentile(k2,0.8) over (partition by k6
order by k1) from ${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between 3 preceding and unbounded following)
from ${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from
${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 following) from
${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between current row and current row) from
${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1 rows between current row and unbounded following)
from ${tableName} order by k1;"
- qt_select_default "select k1, percentile_approx(k2,0.8,4096) over
(partition by k6 order by k1) from ${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between 3 preceding and unbounded following)
from ${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from
${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between 3 preceding and 1 following) from
${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between current row and current row) from
${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1 rows between current row and unbounded following)
from ${tableName} order by k1;"
+ qt_select_default "select k1, percentile_approx(k2,0.5,4096) over
(partition by k6 order by k1) from ${tableName} order by k1;"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]