This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new dc8102bbf4c branch-2.1: [Bug](distinct) fix distinct function with
over return error result #51875 (#52313)
dc8102bbf4c is described below
commit dc8102bbf4c32108c4b394fbec7d41e27a46f17d
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Jun 30 09:45:50 2025 +0800
branch-2.1: [Bug](distinct) fix distinct function with over return error
result #51875 (#52313)
Cherry-picked from #51875
Co-authored-by: zhangstar333 <[email protected]>
---
.../aggregate_function_distinct.h | 13 ++++++++++++
.../test_aggregate_window_functions.out | Bin 21094 -> 21093 bytes
.../sql_functions/window_functions/test_sum.out | Bin 143 -> 242 bytes
.../sql_functions/window_functions/test_sum.groovy | 22 +++++++++++++++++++++
4 files changed, 35 insertions(+)
diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h
b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
index 33ea4e17dc0..9eb213ef123 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
@@ -64,6 +64,8 @@ struct AggregateFunctionDistinctSingleNumericData {
using Self = AggregateFunctionDistinctSingleNumericData<T, stable>;
Container data;
+ void clear() { data.clear(); }
+
void add(const IColumn** columns, size_t /* columns_num */, size_t
row_num, Arena*) {
const auto& vec = assert_cast<const
ColumnVector<T>&>(*columns[0]).get_data();
if constexpr (stable) {
@@ -122,6 +124,8 @@ struct AggregateFunctionDistinctGenericData {
using Self = AggregateFunctionDistinctGenericData;
Container data;
+ void clear() { data.clear(); }
+
void merge(const Self& rhs, Arena* arena) {
DCHECK(!stable);
if constexpr (!stable) {
@@ -315,6 +319,15 @@ public:
nested_func->add_batch_single_place(arguments[0]->size(),
get_nested_place(place),
arguments_raw.data(), &arena);
nested_func->insert_result_into(get_nested_place(place), to);
+ // for distinct agg function, the real calculate is
add_batch_single_place at last step of insert_result_into function.
+ // but with distinct agg and over() window function together, the
result will be inserted into many times with different rows
+ // so we need to clear the data, thus not to affect the next
insert_result_into
+ this->data(place).clear();
+ }
+
+ void reset(AggregateDataPtr place) const override {
+ this->data(place).clear();
+ nested_func->reset(get_nested_place(place));
}
size_t size_of_data() const override { return prefix_size +
nested_func->size_of_data(); }
diff --git
a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
index 03569f1aedf..e478e7afec6 100644
Binary files
a/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
and
b/regression-test/data/nereids_p0/sql_functions/aggregate_functions/test_aggregate_window_functions.out
differ
diff --git
a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out
b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out
index 9185f64fa6e..84f4bccb2d1 100644
Binary files
a/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out and
b/regression-test/data/query_p0/sql_functions/window_functions/test_sum.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
index 3611400568d..e61f586181a 100644
---
a/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
+++
b/regression-test/suites/query_p0/sql_functions/window_functions/test_sum.groovy
@@ -21,5 +21,27 @@ suite("test_sum") {
(partition by k1 order by k3 range between current row
and unbounded following) as w
from test_query_db.test order by k1, w
"""
+
+ sql "create database if not exists multi_db"
+ sql "use multi_db"
+ sql "DROP TABLE IF EXISTS multi"
+ sql """
+ CREATE TABLE multi (
+ id int,
+ v1 int,
+ v2 varchar
+ ) ENGINE = OLAP
+ DUPLICATE KEY(id) COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(id) BUCKETS 2
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ sql """
+ insert into multi values (1, 1, 'a'),(1, 1, 'a'), (2, 1, 'a'), (3, 1,
'a');
+ """
+ qt_sql_window_muti1 """ select multi_distinct_group_concat(v2) over()
from multi; """
+ qt_sql_window_muti2 """ select multi_distinct_sum(v1) over() from multi;
"""
+ qt_sql_window_muti3 """ select multi_distinct_count(v1) over() from
multi; """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]