This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new dc8102bbf4c branch-2.1: [Bug](distinct) fix distinct function with 
over return error result #51875 (#52313)
dc8102bbf4c is described below

commit dc8102bbf4c32108c4b394fbec7d41e27a46f17d
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jun 30 09:45:50 2025 +0800

    branch-2.1: [Bug](distinct) fix distinct function with over return error 
result #51875 (#52313)
    
    Cherry-picked from #51875
    
    Co-authored-by: zhangstar333 <[email protected]>
---
 .../aggregate_function_distinct.h                  |  13 ++++++++++++
 .../test_aggregate_window_functions.out            | Bin 21094 -> 21093 bytes
 .../sql_functions/window_functions/test_sum.out    | Bin 143 -> 242 bytes
 .../sql_functions/window_functions/test_sum.groovy |  22 +++++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h 
b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
index 33ea4e17dc0..9eb213ef123 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
@@ -64,6 +64,8 @@ struct AggregateFunctionDistinctSingleNumericData {
     using Self = AggregateFunctionDistinctSingleNumericData<T, stable>;
     Container data;
 
+    void clear() { data.clear(); }
+
     void add(const IColumn** columns, size_t /* columns_num */, size_t 
row_num, Arena*) {
         const auto& vec = assert_cast<const 
ColumnVector<T>&>(*columns[0]).get_data();
         if constexpr (stable) {
@@ -122,6 +124,8 @@ struct AggregateFunctionDistinctGenericData {
     using Self = AggregateFunctionDistinctGenericData;
     Container data;
 
+    void clear() { data.clear(); }
+
     void merge(const Self& rhs, Arena* arena) {
         DCHECK(!stable);
         if constexpr (!stable) {
@@ -315,6 +319,15 @@ public:
         nested_func->add_batch_single_place(arguments[0]->size(), 
get_nested_place(place),
                                             arguments_raw.data(), &arena);
         nested_func->insert_result_into(get_nested_place(place), to);
+        // for distinct agg function, the real calculate is 
add_batch_single_place at last step of insert_result_into function.
+        // but with distinct agg and over() window function together, the 
result will be inserted into many times with different rows
+        // so we need to clear the data, thus not to affect the next 
insert_result_into
+        this->data(place).clear();
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).clear();
+        nested_func->reset(get_nested_place(place));
     }
 
     size_t size_of_data() const override { return prefix_size + 
nested_func->size_of_data(); }
diff --git 
a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
 
b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
index 03569f1aedf..e478e7afec6 100644
Binary files 
a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
 and 
b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
 differ
diff --git 
a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out 
b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out
index 9185f64fa6e..84f4bccb2d1 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out and 
b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out 
differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
 
b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
index 3611400568d..e61f586181a 100644
--- 
a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
@@ -21,5 +21,27 @@ suite("test_sum") {
                       (partition by k1 order by k3 range between current row 
and unbounded following) as w 
                   from test_query_db.test order by k1, w
               """
+
+    sql "create database if not exists multi_db"
+    sql "use multi_db"
+    sql "DROP TABLE IF EXISTS multi"
+    sql """
+        CREATE TABLE multi (
+            id int,
+            v1 int,
+            v2 varchar
+            ) ENGINE = OLAP
+            DUPLICATE KEY(id) COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(id) BUCKETS 2
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );
+        """ 
+    sql """
+        insert into multi values (1, 1, 'a'),(1, 1, 'a'), (2, 1, 'a'), (3, 1, 
'a');
+        """ 
+    qt_sql_window_muti1 """   select multi_distinct_group_concat(v2) over() 
from multi; """
+    qt_sql_window_muti2 """   select multi_distinct_sum(v1) over() from multi; 
"""
+    qt_sql_window_muti3 """   select multi_distinct_count(v1) over() from 
multi; """
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to