[doris] branch master updated: [Vectorized](function) support topn_array function (#13869)

lihaopeng Wed, 02 Nov 2022 04:49:35 -0700

This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 374303186c [Vectorized](function) support topn_array function (#13869)
374303186c is described below

commit 374303186c996342c1c1bbbefd01e13a2cc5112f
Author: zhangstar333 <[email protected]>
AuthorDate: Wed Nov 2 19:49:23 2022 +0800

    [Vectorized](function) support topn_array function (#13869)
---
 .../aggregate_function_topn.cpp                    |  61 ++++++++++
 .../aggregate_functions/aggregate_function_topn.h  | 133 +++++++++++++++++----
 be/test/vec/aggregate_functions/agg_test.cpp       |   2 +-
 .../aggregate_functions/vec_window_funnel_test.cpp |   7 +-
 .../aggregate-functions/topn_array.md              |  61 ++++++++++
 docs/sidebars.json                                 |   1 +
 .../aggregate-functions/topn_array.md              |  60 ++++++++++
 .../java/org/apache/doris/catalog/FunctionSet.java |   7 ++
 .../main/java/org/apache/doris/catalog/Type.java   |   2 +
 .../test_select_stddev_variance_window.out         | 132 ++++++++++----------
 .../aggregate_functions/test_aggregate_collect.out |  12 ++
 .../test_select_stddev_variance_window.out         | 132 ++++++++++----------
 .../test_select_stddev_variance_window.groovy      |  12 +-
 .../test_aggregate_collect.groovy                  |  24 ++++
 .../test_select_stddev_variance_window.groovy      |  12 +-
 15 files changed, 482 insertions(+), 176 deletions(-)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp 
b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
index 19f52fbff8..74bb154d6a 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
+++ b/be/src/vec/aggregate_functions/aggregate_function_topn.cpp
@@ -17,6 +17,8 @@
 
 #include <vec/aggregate_functions/aggregate_function_topn.h>
 
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/core/types.h"
 namespace doris::vectorized {
 
 AggregateFunctionPtr create_aggregate_function_topn(const std::string& name,
@@ -38,8 +40,67 @@ AggregateFunctionPtr create_aggregate_function_topn(const 
std::string& name,
     return nullptr;
 }
 
+template <bool has_default_param>
+AggregateFunctionPtr create_topn_array(const DataTypes& argument_types) {
+    auto type = argument_types[0].get();
+    if (type->is_nullable()) {
+        type = assert_cast<const 
DataTypeNullable*>(type)->get_nested_type().get();
+    }
+
+    WhichDataType which(*type);
+
+#define DISPATCH(TYPE)                                                         
         \
+    if (which.idx == TypeIndex::TYPE)                                          
         \
+        return AggregateFunctionPtr(                                           
         \
+                new AggregateFunctionTopNArray<                                
         \
+                        AggregateFunctionTopNImplArray<TYPE, 
has_default_param>, TYPE>( \
+                        argument_types));
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.is_string_or_fixed_string()) {
+        return AggregateFunctionPtr(new AggregateFunctionTopNArray<
+                                    
AggregateFunctionTopNImplArray<std::string, has_default_param>,
+                                    std::string>(argument_types));
+    }
+    if (which.is_decimal()) {
+        return AggregateFunctionPtr(
+                new AggregateFunctionTopNArray<
+                        AggregateFunctionTopNImplArray<Decimal128, 
has_default_param>, Decimal128>(
+                        argument_types));
+    }
+    if (which.is_date_or_datetime() || which.is_date_time_v2()) {
+        return AggregateFunctionPtr(
+                new AggregateFunctionTopNArray<
+                        AggregateFunctionTopNImplArray<Int64, 
has_default_param>, Int64>(
+                        argument_types));
+    }
+    if (which.is_date_v2()) {
+        return AggregateFunctionPtr(
+                new AggregateFunctionTopNArray<
+                        AggregateFunctionTopNImplArray<UInt32, 
has_default_param>, UInt32>(
+                        argument_types));
+    }
+
+    LOG(WARNING) << fmt::format("Illegal argument  type for aggregate function 
topn_array is: {}",
+                                type->get_name());
+    return nullptr;
+}
+
+AggregateFunctionPtr create_aggregate_function_topn_array(const std::string& 
name,
+                                                          const DataTypes& 
argument_types,
+                                                          const Array& 
parameters,
+                                                          const bool 
result_is_nullable) {
+    bool has_default_param = (argument_types.size() == 3);
+    if (has_default_param) {
+        return create_topn_array<true>(argument_types);
+    } else {
+        return create_topn_array<false>(argument_types);
+    }
+}
+
 void register_aggregate_function_topn(AggregateFunctionSimpleFactory& factory) 
{
     factory.register_function("topn", create_aggregate_function_topn);
+    factory.register_function("topn_array", 
create_aggregate_function_topn_array);
 }
 
 } // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.h 
b/be/src/vec/aggregate_functions/aggregate_function_topn.h
index ae9fdf322d..b7aa6ae14c 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_topn.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_topn.h
@@ -21,28 +21,31 @@
 #include <rapidjson/stringbuffer.h>
 #include <rapidjson/writer.h>
 
+#include <type_traits>
 #include <unordered_map>
 
 #include "vec/aggregate_functions/aggregate_function.h"
-#include "vec/aggregate_functions/aggregate_function_group_concat.h"
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
-#include "vec/aggregate_functions/helpers.h"
-#include "vec/data_types/data_type_date_time.h"
-#include "vec/data_types/data_type_decimal.h"
-#include "vec/data_types/data_type_number.h"
+#include "vec/columns/column_array.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_array.h"
 #include "vec/data_types/data_type_string.h"
 #include "vec/io/io_helper.h"
 
 namespace doris::vectorized {
 
 // space-saving algorithm
+template <typename T>
 struct AggregateFunctionTopNData {
+    using ColVecType =
+            std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, 
ColumnVector<T>>;
     void set_paramenters(int input_top_num, int space_expand_rate = 50) {
         top_num = input_top_num;
         capacity = (uint64_t)top_num * space_expand_rate;
     }
 
-    void add(const std::string& value) {
+    void add(const T& value) {
         auto it = counter_map.find(value);
         if (it != counter_map.end()) {
             it->second++;
@@ -93,13 +96,13 @@ struct AggregateFunctionTopNData {
         }
     }
 
-    std::vector<std::pair<uint64_t, std::string>> get_remain_vector() const {
-        std::vector<std::pair<uint64_t, std::string>> counter_vector;
+    std::vector<std::pair<uint64_t, T>> get_remain_vector() const {
+        std::vector<std::pair<uint64_t, T>> counter_vector;
         for (auto it : counter_map) {
             counter_vector.emplace_back(it.second, it.first);
         }
         std::sort(counter_vector.begin(), counter_vector.end(),
-                  std::greater<std::pair<uint64_t, std::string>>());
+                  std::greater<std::pair<uint64_t, T>>());
         return counter_vector;
     }
 
@@ -127,7 +130,7 @@ struct AggregateFunctionTopNData {
         read_binary(element_number, buf);
 
         counter_map.clear();
-        std::pair<std::string, uint64_t> element;
+        std::pair<T, uint64_t> element;
         for (auto i = 0; i < element_number; i++) {
             read_binary(element.first, buf);
             read_binary(element.second, buf);
@@ -152,11 +155,24 @@ struct AggregateFunctionTopNData {
         return buffer.GetString();
     }
 
+    void insert_result_into(IColumn& to) const {
+        auto counter_vector = get_remain_vector();
+        for (int i = 0; i < std::min((int)counter_vector.size(), top_num); 
i++) {
+            const auto& element = counter_vector[i];
+            if constexpr (std::is_same_v<T, std::string>) {
+                
static_cast<ColumnString&>(to).insert_data(element.second.c_str(),
+                                                           
element.second.length());
+            } else {
+                
static_cast<ColVecType&>(to).get_data().push_back(element.second);
+            }
+        }
+    }
+
     void reset() { counter_map.clear(); }
 
     int top_num = 0;
     uint64_t capacity = 0;
-    phmap::flat_hash_map<std::string, uint64_t> counter_map;
+    phmap::flat_hash_map<T, uint64_t> counter_map;
 };
 
 struct StringDataImplTopN {
@@ -170,8 +186,8 @@ struct StringDataImplTopN {
 
 template <typename DataHelper>
 struct AggregateFunctionTopNImplInt {
-    static void add(AggregateFunctionTopNData& __restrict place, const 
IColumn** columns,
-                    size_t row_num) {
+    static void add(AggregateFunctionTopNData<std::string>& __restrict place,
+                    const IColumn** columns, size_t row_num) {
         place.set_paramenters(static_cast<const 
ColumnInt32*>(columns[1])->get_element(row_num));
         place.add(DataHelper::to_string(*columns[0], row_num));
     }
@@ -179,27 +195,49 @@ struct AggregateFunctionTopNImplInt {
 
 template <typename DataHelper>
 struct AggregateFunctionTopNImplIntInt {
-    static void add(AggregateFunctionTopNData& __restrict place, const 
IColumn** columns,
-                    size_t row_num) {
+    static void add(AggregateFunctionTopNData<std::string>& __restrict place,
+                    const IColumn** columns, size_t row_num) {
         place.set_paramenters(static_cast<const 
ColumnInt32*>(columns[1])->get_element(row_num),
                               static_cast<const 
ColumnInt32*>(columns[2])->get_element(row_num));
         place.add(DataHelper::to_string(*columns[0], row_num));
     }
 };
 
-//base function
-template <typename Impl>
-class AggregateFunctionTopN final
-        : public IAggregateFunctionDataHelper<AggregateFunctionTopNData,
-                                              AggregateFunctionTopN<Impl>> {
-public:
-    AggregateFunctionTopN(const DataTypes& argument_types_)
-            : IAggregateFunctionDataHelper<AggregateFunctionTopNData, 
AggregateFunctionTopN<Impl>>(
-                      argument_types_, {}) {}
+template <typename T, bool has_default_param>
+struct AggregateFunctionTopNImplArray {
+    using ColVecType =
+            std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, 
ColumnVector<T>>;
+    static void add(AggregateFunctionTopNData<T>& __restrict place, const 
IColumn** columns,
+                    size_t row_num) {
+        if constexpr (has_default_param) {
+            place.set_paramenters(
+                    static_cast<const 
ColumnInt32*>(columns[1])->get_element(row_num),
+                    static_cast<const 
ColumnInt32*>(columns[2])->get_element(row_num));
 
-    String get_name() const override { return "topn"; }
+        } else {
+            place.set_paramenters(
+                    static_cast<const 
ColumnInt32*>(columns[1])->get_element(row_num));
+        }
+        if constexpr (std::is_same_v<T, std::string>) {
+            StringRef ref = static_cast<const 
ColumnString&>(*columns[0]).get_data_at(row_num);
+            place.add(std::string(ref.data, ref.size));
+        } else {
+            T val = static_cast<const 
ColVecType&>(*columns[0]).get_data()[row_num];
+            place.add(val);
+        }
+    }
+};
 
-    DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeString>(); }
+//base function
+template <typename Impl, typename T>
+class AggregateFunctionTopNBase
+        : public IAggregateFunctionDataHelper<AggregateFunctionTopNData<T>,
+                                              AggregateFunctionTopNBase<Impl, 
T>> {
+public:
+    AggregateFunctionTopNBase(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<AggregateFunctionTopNData<T>,
+                                           AggregateFunctionTopNBase<Impl, 
T>>(argument_types_,
+                                                                               
{}) {}
 
     void add(AggregateDataPtr __restrict place, const IColumn** columns, 
size_t row_num,
              Arena*) const override {
@@ -221,6 +259,18 @@ public:
                      Arena*) const override {
         this->data(place).read(buf);
     }
+};
+
+//topn function return string
+template <typename Impl, typename T = std::string>
+class AggregateFunctionTopN final : public AggregateFunctionTopNBase<Impl, T> {
+public:
+    AggregateFunctionTopN(const DataTypes& argument_types_)
+            : AggregateFunctionTopNBase<Impl, T>(argument_types_) {}
+
+    String get_name() const override { return "topn"; }
+
+    DataTypePtr get_return_type() const override { return 
std::make_shared<DataTypeString>(); }
 
     void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& 
to) const override {
         std::string result = this->data(place).get();
@@ -228,4 +278,35 @@ public:
     }
 };
 
+//topn function return array
+template <typename Impl, typename T>
+class AggregateFunctionTopNArray final : public 
AggregateFunctionTopNBase<Impl, T> {
+public:
+    AggregateFunctionTopNArray(const DataTypes& argument_types_)
+            : AggregateFunctionTopNBase<Impl, T>(argument_types_),
+              _argument_type(argument_types_[0]) {}
+
+    String get_name() const override { return "topn_array"; }
+
+    DataTypePtr get_return_type() const override {
+        return std::make_shared<DataTypeArray>(make_nullable(_argument_type));
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& 
to) const override {
+        auto& to_arr = assert_cast<ColumnArray&>(to);
+        auto& to_nested_col = to_arr.get_data();
+        if (to_nested_col.is_nullable()) {
+            auto col_null = reinterpret_cast<ColumnNullable*>(&to_nested_col);
+            
this->data(place).insert_result_into(col_null->get_nested_column());
+            
col_null->get_null_map_data().resize_fill(col_null->get_nested_column().size(), 
0);
+        } else {
+            this->data(place).insert_result_into(to_nested_col);
+        }
+        to_arr.get_offsets().push_back(to_nested_col.size());
+    }
+
+private:
+    DataTypePtr _argument_type;
+};
+
 } // namespace doris::vectorized
diff --git a/be/test/vec/aggregate_functions/agg_test.cpp 
b/be/test/vec/aggregate_functions/agg_test.cpp
index b9840fb099..951b1b8e3b 100644
--- a/be/test/vec/aggregate_functions/agg_test.cpp
+++ b/be/test/vec/aggregate_functions/agg_test.cpp
@@ -89,7 +89,7 @@ TEST(AggTest, topn_test) {
         agg_function->add(place, const_cast<const IColumn**>(columns), i, 
nullptr);
     }
 
-    std::string result = 
reinterpret_cast<AggregateFunctionTopNData*>(place)->get();
+    std::string result = 
reinterpret_cast<AggregateFunctionTopNData<std::string>*>(place)->get();
     std::string expect_result =
             
"{\"1\":2048,\"2\":683,\"3\":341,\"4\":205,\"5\":137,\"6\":97,\"7\":73,\"8\":57,\"9\":"
             "46,\"10\":37}";
diff --git a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp 
b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
index 904d5ce80c..50bf612fd2 100644
--- a/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
+++ b/be/test/vec/aggregate_functions/vec_window_funnel_test.cpp
@@ -17,16 +17,14 @@
 
 #include <gtest/gtest.h>
 
-#include "common/logging.h"
 #include "gtest/gtest.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
-#include "vec/aggregate_functions/aggregate_function_topn.h"
 #include "vec/columns/column_vector.h"
 #include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_date_time.h"
 #include "vec/data_types/data_type_number.h"
 #include "vec/data_types/data_type_string.h"
-
 namespace doris::vectorized {
 
 void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& 
factory);
@@ -43,8 +41,7 @@ public:
                 std::make_shared<DataTypeInt64>(),    
std::make_shared<DataTypeString>(),
                 std::make_shared<DataTypeDateTime>(), 
std::make_shared<DataTypeUInt8>(),
                 std::make_shared<DataTypeUInt8>(),    
std::make_shared<DataTypeUInt8>(),
-                std::make_shared<DataTypeUInt8>(),
-        };
+                std::make_shared<DataTypeUInt8>()};
         Array array;
         agg_function = factory.get("window_funnel", data_types, array, false);
         EXPECT_NE(agg_function, nullptr);
diff --git 
a/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md 
b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
new file mode 100644
index 0000000000..d266287638
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
@@ -0,0 +1,61 @@
+---
+{
+    "title": "topn_array",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## topn_array
+### description
+#### Syntax
+
+`ARRAY<T> topn_array(expr, INT top_num[, INT space_expand_rate])`
+
+The topn function uses the Space-Saving algorithm to calculate the top_num 
frequent items in expr, and the result is the 
+frequent items and their occurrence times, which is an approximation
+
+The space_expand_rate parameter is optional and is used to set the number of 
counters used in the Space-Saving algorithm
+```
+counter numbers = top_num * space_expand_rate
+```
+The higher value of space_expand_rate, the more accurate result will be. The 
default value is 50
+
+### example
+```
+mysql> select topn_array(k3,3) from baseall;
++--------------------------+
+| topn_array(`k3`, 3)      |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+
+mysql> select topn_array(k3,3,100) from baseall;
++--------------------------+
+| topn_array(`k3`, 3, 100) |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+```
+### keywords
+TOPN, TOPN_ARRAY
\ No newline at end of file
diff --git a/docs/sidebars.json b/docs/sidebars.json
index 0333af1eeb..ebf2da3ec4 100644
--- a/docs/sidebars.json
+++ b/docs/sidebars.json
@@ -430,6 +430,7 @@
                                 
"sql-manual/sql-functions/aggregate-functions/percentile",
                                 
"sql-manual/sql-functions/aggregate-functions/hll_union_agg",
                                 
"sql-manual/sql-functions/aggregate-functions/topn",
+                                
"sql-manual/sql-functions/aggregate-functions/topn_array",
                                 
"sql-manual/sql-functions/aggregate-functions/count",
                                 
"sql-manual/sql-functions/aggregate-functions/sum",
                                 
"sql-manual/sql-functions/aggregate-functions/max_by",
diff --git 
a/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md 
b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
new file mode 100644
index 0000000000..8b4985c25d
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-functions/aggregate-functions/topn_array.md
@@ -0,0 +1,60 @@
+---
+{
+    "title": "topn_array",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+## topn_array
+### description
+#### Syntax
+
+`ARRAY<T> topn_array(expr, INT top_num[, INT space_expand_rate])`
+
+该topn_array函数使用Space-Saving算法计算expr中的top_num个频繁项，结果为频繁项及其出现次数，该结果为近似值
+
+space_expand_rate参数是可选项，该值用来设置Space-Saving算法中使用的counter个数
+```
+counter numbers = top_num * space_expand_rate
+```
+space_expand_rate的值越大，结果越准确，默认值为50
+
+### example
+```
+mysql> select topn_array(k3,3) from baseall;
++--------------------------+
+| topn_array(`k3`, 3)      |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+
+mysql> select topn_array(k3,3,100) from baseall;
++--------------------------+
+| topn_array(`k3`, 3, 100) |
++--------------------------+
+| [3021, 2147483647, 5014] |
++--------------------------+
+1 row in set (0.02 sec)
+```
+### keywords
+TOPN, TOPN_ARRAY
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 6f9154cac8..acbb0a5a18 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -2466,6 +2466,13 @@ public class FunctionSet<T> {
                     "", "", "", "", "", true, false, true, true));
             addBuiltin(AggregateFunction.createBuiltin(COLLECT_SET, 
Lists.newArrayList(t), new ArrayType(t), t,
                     "", "", "", "", "", true, false, true, true));
+            addBuiltin(
+                    AggregateFunction.createBuiltin("topn_array", 
Lists.newArrayList(t, Type.INT), new ArrayType(t), t,
+                            "", "", "", "", "", true, false, true, true));
+            addBuiltin(
+                    AggregateFunction
+                            .createBuiltin("topn_array", Lists.newArrayList(t, 
Type.INT, Type.INT), new ArrayType(t), t,
+                                    "", "", "", "", "", true, false, true, 
true));
         }
 
         // Avg
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 3f6fd5ef0e..be8217e42b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -158,6 +158,8 @@ public abstract class Type {
         arraySubTypes.add(DECIMALV2);
         arraySubTypes.add(DATE);
         arraySubTypes.add(DATETIME);
+        arraySubTypes.add(DATEV2);
+        arraySubTypes.add(DATETIMEV2);
         arraySubTypes.add(CHAR);
         arraySubTypes.add(VARCHAR);
         arraySubTypes.add(STRING);
diff --git 
a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out 
b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
index b3a9ee95e5..24fd821c9a 100644
--- a/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
+++ b/regression-test/data/correctness_p0/test_select_stddev_variance_window.out
@@ -918,55 +918,55 @@
 15     1991.8000000000002
 
 -- !select_default --
-1      5069.4970703125
-2      1991.0
-3      1991.0
-4      1991.0
-5      5069.4970703125
-6      5069.4970703125
-7      1991.0
-8      5069.4970703125
-9      11224.505859375
-10     5068.5966796875
-11     17379.5
-12     11223.8056640625
-13     17379.0
-14     23534.203125
-15     1991.7000732421875
+1      1989.0
+2      1987.5
+3      1987.5
+4      1987.5
+5      1989.0
+6      1989.0
+7      1987.5
+8      1989.0
+9      1990.0
+10     1989.0
+11     1991.0
+12     1123.0
+13     255.0
+14     1123.0
+15     1990.0
 
 -- !select_default --
 1      \N
 2      \N
 3      1986.0
-4      1989.0
+4      1987.5
 5      1989.0
-6      1989.0
-7      1990.7999267578125
-8      29689.203125
-9      29688.802734375
-10     1990.7999267578125
-11     29689.40234375
+6      1987.0
+7      1989.0
+8      1989.0
+9      1985.0
+10     1989.0
+11     1991.0
 12     1991.0
-13     29689.40234375
-14     29689.40234375
-15     1990.7999267578125
+13     1991.0
+14     1991.0
+15     1989.0
 
 -- !select_default --
-1      1989.0
-2      1989.0
-3      1990.7999267578125
-4      1990.39990234375
-5      29689.203125
-6      23533.6015625
-7      1991.0
-8      17379.0
-9      17379.0
-10     17379.0
-11     17379.5
-12     17379.0
-13     17379.0
-14     23534.203125
-15     1991.7000732421875
+1      1987.0
+2      1987.5
+3      1989.0
+4      1987.5
+5      1989.0
+6      1987.0
+7      1989.0
+8      1989.0
+9      1989.0
+10     1991.0
+11     1991.0
+12     1991.0
+13     255.0
+14     1123.0
+15     1990.0
 
 -- !select_default --
 1      1989.0
@@ -986,36 +986,36 @@
 15     1992.0
 
 -- !select_default --
-1      5069.4970703125
-2      1991.0
-3      5068.5966796875
-4      11223.8056640625
-5      11224.505859375
-6      17379.5
-7      17379.0
-8      1991.7000732421875
-9      1991.9000244140625
-10     23534.203125
-11     1992.0
-12     29515.802734375
-13     255.0
+1      1989.0
+2      1987.5
+3      1989.0
+4      1123.0
+5      1990.0
+6      1991.0
+7      255.0
+8      1990.0
+9      1991.0
+10     1123.0
+11     1990.5
+12     255.0
+13     -16256.0
 14     255.0
 15     1992.0
 
 -- !select_default --
 1      1989.0
 2      1986.0
-3      1989.0
-4      1990.7999267578125
-5      1989.0
-6      29689.203125
-7      1990.39990234375
-8      23533.6015625
-9      17379.0
-10     1991.0
-11     11223.8056640625
-12     11223.8056640625
-13     5068.5966796875
-14     1991.0
-15     5069.4970703125
+3      1987.5
+4      1989.0
+5      1987.0
+6      1989.0
+7      1987.5
+8      1987.0
+9      1989.0
+10     1989.0
+11     1989.0
+12     1990.0
+13     1989.0
+14     1987.5
+15     1989.0
 
diff --git 
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
index d501683f7f..f26cf1146b 100644
--- 
a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
+++ 
b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.out
@@ -16,3 +16,15 @@
 -- !select --
 1      [1, 2, 1, 2]    ['hello']       [2022-07-04, 2022-07-04]        [1.23]  
['hello', 'hello', 'hello', 'hello']
 
+-- !select43 --
+[10, 8]
+
+-- !select44 --
+[10, 8]
+
+-- !select45 --
+[2022-11-02, 2022-11-01]
+
+-- !select46 --
+[6.8754576, 0.576]
+
diff --git 
a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
 
b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
index b3a9ee95e5..24fd821c9a 100644
--- 
a/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
+++ 
b/regression-test/data/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.out
@@ -918,55 +918,55 @@
 15     1991.8000000000002
 
 -- !select_default --
-1      5069.4970703125
-2      1991.0
-3      1991.0
-4      1991.0
-5      5069.4970703125
-6      5069.4970703125
-7      1991.0
-8      5069.4970703125
-9      11224.505859375
-10     5068.5966796875
-11     17379.5
-12     11223.8056640625
-13     17379.0
-14     23534.203125
-15     1991.7000732421875
+1      1989.0
+2      1987.5
+3      1987.5
+4      1987.5
+5      1989.0
+6      1989.0
+7      1987.5
+8      1989.0
+9      1990.0
+10     1989.0
+11     1991.0
+12     1123.0
+13     255.0
+14     1123.0
+15     1990.0
 
 -- !select_default --
 1      \N
 2      \N
 3      1986.0
-4      1989.0
+4      1987.5
 5      1989.0
-6      1989.0
-7      1990.7999267578125
-8      29689.203125
-9      29688.802734375
-10     1990.7999267578125
-11     29689.40234375
+6      1987.0
+7      1989.0
+8      1989.0
+9      1985.0
+10     1989.0
+11     1991.0
 12     1991.0
-13     29689.40234375
-14     29689.40234375
-15     1990.7999267578125
+13     1991.0
+14     1991.0
+15     1989.0
 
 -- !select_default --
-1      1989.0
-2      1989.0
-3      1990.7999267578125
-4      1990.39990234375
-5      29689.203125
-6      23533.6015625
-7      1991.0
-8      17379.0
-9      17379.0
-10     17379.0
-11     17379.5
-12     17379.0
-13     17379.0
-14     23534.203125
-15     1991.7000732421875
+1      1987.0
+2      1987.5
+3      1989.0
+4      1987.5
+5      1989.0
+6      1987.0
+7      1989.0
+8      1989.0
+9      1989.0
+10     1991.0
+11     1991.0
+12     1991.0
+13     255.0
+14     1123.0
+15     1990.0
 
 -- !select_default --
 1      1989.0
@@ -986,36 +986,36 @@
 15     1992.0
 
 -- !select_default --
-1      5069.4970703125
-2      1991.0
-3      5068.5966796875
-4      11223.8056640625
-5      11224.505859375
-6      17379.5
-7      17379.0
-8      1991.7000732421875
-9      1991.9000244140625
-10     23534.203125
-11     1992.0
-12     29515.802734375
-13     255.0
+1      1989.0
+2      1987.5
+3      1989.0
+4      1123.0
+5      1990.0
+6      1991.0
+7      255.0
+8      1990.0
+9      1991.0
+10     1123.0
+11     1990.5
+12     255.0
+13     -16256.0
 14     255.0
 15     1992.0
 
 -- !select_default --
 1      1989.0
 2      1986.0
-3      1989.0
-4      1990.7999267578125
-5      1989.0
-6      29689.203125
-7      1990.39990234375
-8      23533.6015625
-9      17379.0
-10     1991.0
-11     11223.8056640625
-12     11223.8056640625
-13     5068.5966796875
-14     1991.0
-15     5069.4970703125
+3      1987.5
+4      1989.0
+5      1987.0
+6      1989.0
+7      1987.5
+8      1987.0
+9      1989.0
+10     1989.0
+11     1989.0
+12     1990.0
+13     1989.0
+14     1987.5
+15     1989.0
 
diff --git 
a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
 
b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
index 390418ddcf..311ae350c8 100644
--- 
a/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
+++ 
b/regression-test/suites/correctness_p0/test_select_stddev_variance_window.groovy
@@ -144,12 +144,12 @@ suite("test_select_stddev_variance_window") {
     qt_select_default  "select k1, percentile(k2,0.8) over (partition by k6 
order by k1 rows between current row and unbounded following) from ${tableName} 
order by k1;"
     qt_select_default  "select k1, percentile(k2,0.8) over (partition by k6 
order by k1) from ${tableName} order by k1;"
 
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between 3 preceding and unbounded following) 
from ${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from  
${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 following) from  
${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between current row and current row) from  
${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between current row and unbounded following) 
from ${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1) from ${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between 3 preceding and unbounded following) 
from ${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from  
${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 following) from  
${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between current row and current row) from  
${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between current row and unbounded following) 
from ${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1) from ${tableName} order by k1;"
 }
 
 
diff --git 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
index cd7e61d408..a53e795064 100644
--- 
a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_collect.groovy
@@ -47,4 +47,28 @@ suite("test_aggregate_collect") {
 
     sql """ CREATE TABLE ${tableCTAS} PROPERTIES("replication_num" = "1") AS 
SELECT 
1,collect_list(c_int),collect_set(c_string),collect_list(c_date),collect_set(c_decimal),collect_list(c_string_not_null)
 FROM ${tableName} """
     qt_select "SELECT * from ${tableCTAS}"
+
+    // topn_array
+    def tableName_12 = "topn_array"
+
+    sql "DROP TABLE IF EXISTS ${tableName_12}"
+    sql """
+        CREATE TABLE IF NOT EXISTS ${tableName_12} (
+            id int,
+                 level int,
+            dt datev2,
+            num decimal(27,9)
+        )
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES (
+          "replication_num" = "1"
+        ) 
+        """
+    sql "INSERT INTO ${tableName_12} values(1,10,'2022-11-1',6.8754576), 
(2,8,'2022-11-3',0.576), (2,10,'2022-11-2',1.234) ,(3,10,'2022-11-2',0.576) 
,(5,29,'2022-11-2',6.8754576) ,(6,8,'2022-11-1',6.8754576)"
+
+    qt_select43 "select topn_array(level,2) from ${tableName_12}"
+    qt_select44 "select topn_array(level,2,100) from ${tableName_12}" 
+    qt_select45 "select topn_array(dt,2,100) from ${tableName_12}"  
+    qt_select46 "select topn_array(num,2,100) from ${tableName_12}"  
+    sql "DROP TABLE IF EXISTS ${tableName_12}"    
 }
diff --git 
a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
 
b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
index b81f400dad..fb319a4192 100644
--- 
a/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/window_functions/test_select_stddev_variance_window.groovy
@@ -147,12 +147,12 @@ suite("test_select_stddev_variance_window") {
     qt_select_default  "select k1, percentile(k2,0.8) over (partition by k6 
order by k1 rows between current row and unbounded following) from ${tableName} 
order by k1;"
     qt_select_default  "select k1, percentile(k2,0.8) over (partition by k6 
order by k1) from ${tableName} order by k1;"
 
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between 3 preceding and unbounded following) 
from ${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from  
${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 following) from  
${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between current row and current row) from  
${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1 rows between current row and unbounded following) 
from ${tableName} order by k1;"
-    qt_select_default  "select k1, percentile_approx(k2,0.8,4096) over 
(partition by k6 order by k1) from ${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between 3 preceding and unbounded following) 
from ${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 preceding) from  
${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between 3 preceding and 1 following) from  
${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between current row and current row) from  
${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1 rows between current row and unbounded following) 
from ${tableName} order by k1;"
+    qt_select_default  "select k1, percentile_approx(k2,0.5,4096) over 
(partition by k6 order by k1) from ${tableName} order by k1;"
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch master updated: [Vectorized](function) support topn_array function (#13869)

Reply via email to