This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new c3fd70c8d80 branch-3.1: [enhance](agg) Make aggfuncs that not support 
multi_distinct can be split in splitMultiDistinct (#58973)
c3fd70c8d80 is described below

commit c3fd70c8d80e0d8a8e29de7b426f24db306299c4
Author: feiniaofeiafei <[email protected]>
AuthorDate: Fri Dec 19 14:23:28 2025 +0800

    branch-3.1: [enhance](agg) Make aggfuncs that not support multi_distinct 
can be split in splitMultiDistinct (#58973)
    
    1.Query like this will use cte to split and compute agg before this pr, but 
it is not necessary.
    select count(distinct a),sum(distinct a) from t1;
    after this pr, it will not split.
    2.Query like this will report error:
    select array_agg(distinct a), array_agg(distinct b) from t1;
    
    errCode = 2, detailMessage = array_agg(DISTINCT a#1) can't support multi 
distinct.
    
    after this pr, the query will use cte to split and compute, and will not 
report error.
---
 .../nereids/rules/rewrite/SplitMultiDistinct.java  | 16 +++++++-----
 .../nereids/trees/plans/algebra/Aggregate.java     | 14 ++++++++++
 .../distinct_split/disitinct_split.out             | 30 ++++++++++++++++++++++
 .../data/nereids_syntax_p0/analyze_agg.out         |  3 +++
 .../distinct_split/disitinct_split.groovy          | 10 ++++++++
 .../suites/nereids_syntax_p0/analyze_agg.groovy    |  5 +---
 6 files changed, 67 insertions(+), 11 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
index 6adb3270b5b..437d3a186f6 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinct.java
@@ -47,10 +47,8 @@ import com.google.common.collect.ImmutableList;
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.stream.Collectors;
 
 /**
@@ -216,23 +214,27 @@ public class SplitMultiDistinct extends 
DefaultPlanRewriter<DistinctSplitContext
         if (agg.getSourceRepeat().isPresent()) {
             return false;
         }
-        Set<Expression> distinctFunc = new HashSet<>();
+        if (agg.distinctFuncNum() < 2 || agg.getDistinctArguments().size() < 
2) {
+            return false;
+        }
         boolean distinctMultiColumns = false;
+        boolean hasNotSupportMultiDistinctFunc = false;
         for (NamedExpression namedExpression : agg.getOutputExpressions()) {
             if (!(namedExpression instanceof Alias) || 
!(namedExpression.child(0) instanceof AggregateFunction)) {
                 continue;
             }
             AggregateFunction aggFunc = (AggregateFunction) 
namedExpression.child(0);
-            if (aggFunc instanceof SupportMultiDistinct && 
aggFunc.isDistinct()) {
+            if (aggFunc.isDistinct()) {
+                hasNotSupportMultiDistinctFunc = hasNotSupportMultiDistinctFunc
+                        || !(aggFunc instanceof SupportMultiDistinct);
                 aliases.add((Alias) namedExpression);
-                distinctFunc.add(aggFunc);
                 distinctMultiColumns = distinctMultiColumns || 
isDistinctMultiColumns(aggFunc);
             } else {
                 otherAggFuncs.add((Alias) namedExpression);
             }
         }
-        if (distinctFunc.size() <= 1) {
-            return false;
+        if (hasNotSupportMultiDistinctFunc) {
+            return true;
         }
         // when this aggregate is not distinctMultiColumns, and group by 
expressions is not empty
         // e.g. sql1: select count(distinct a), count(distinct b) from t1 
group by c;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
index 7a283c740e5..371dd43f91d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
@@ -106,4 +106,18 @@ public interface Aggregate<CHILD_TYPE extends Plan> 
extends UnaryPlan<CHILD_TYPE
         return getOutputExpressions().stream().allMatch(e -> e instanceof Slot)
                 && getGroupByExpressions().stream().allMatch(e -> e instanceof 
Slot);
     }
+
+    /**
+     * distinctFuncNum
+     * @return number of distinct aggregate functions
+     */
+    default int distinctFuncNum() {
+        int num = 0;
+        for (AggregateFunction aggFunc : getAggregateFunctions()) {
+            if (aggFunc.isDistinct()) {
+                ++num;
+            }
+        }
+        return num;
+    }
 }
diff --git 
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out 
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
index ede0fb5259c..ceed693e8cc 100644
--- a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
+++ b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
@@ -501,3 +501,33 @@ PhysicalResultSink
 -- !null_hash --
 1      \N      0       0.0
 
+-- !array_agg_nogby --
+[2]    [1]
+
+-- !array_agg_gby --
+[2]    [1]
+[2]    [1]
+
+-- !array_agg_and_other --
+[2]    2
+
+-- !not_split_cte_when_same_col --
+3      2       1.5
+
+-- !not_split_cte_when_same_col_shape --
+PhysicalResultSink
+--hashAgg[DISTINCT_LOCAL]
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[test_distinct_multi]
+
+-- !not_split_cte_when_same_col_gby --
+3      2       1.5
+
+-- !not_split_cte_when_same_col_gby__shape --
+PhysicalResultSink
+--hashAgg[DISTINCT_LOCAL]
+----hashAgg[GLOBAL]
+------hashAgg[LOCAL]
+--------PhysicalOlapScan[test_distinct_multi]
+
diff --git a/regression-test/data/nereids_syntax_p0/analyze_agg.out 
b/regression-test/data/nereids_syntax_p0/analyze_agg.out
index 8316c4aefe2..b0428c9e56b 100644
--- a/regression-test/data/nereids_syntax_p0/analyze_agg.out
+++ b/regression-test/data/nereids_syntax_p0/analyze_agg.out
@@ -1,6 +1,9 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !sql --
 
+-- !multi_agg_distinct_func --
+0      \N
+
 -- !test_sum0 --
 0      0
 0      3
diff --git 
a/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy 
b/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
index 9efe6ce487a..569c086112c 100644
--- 
a/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/distinct_split/disitinct_split.groovy
@@ -213,4 +213,14 @@ suite("distinct_split") {
     sql "create table test_distinct_multi_null_hash(a int, b int, c int, d 
varchar(10), e date) distributed by hash(a) properties('replication_num'='1');"
     sql "insert into test_distinct_multi_null_hash 
values(1,null,null,null,'2024-12-08');"
     qt_null_hash "SELECT a, b, count(distinct c,e), count(distinct 
concat(d,e))/count(distinct e) FROM test_distinct_multi_null_hash where e = 
'2024-12-08' GROUP BY a, b;"
+
+    // test agg function not support multi_distinct
+    sql "select array_agg(distinct b), array_agg(distinct a) from 
test_distinct_multi"
+    qt_array_agg_nogby "select array_agg(distinct b), array_agg(distinct a) 
from test_distinct_multi where a=1 and b=2"
+    qt_array_agg_gby "select array_agg(distinct b), array_agg(distinct a) from 
test_distinct_multi where a=1 and b=2 group by c"
+    qt_array_agg_and_other "select array_agg(distinct b), count(distinct a) 
from test_distinct_multi where b=2"
+    qt_not_split_cte_when_same_col "select sum(distinct a), count(distinct 
a),avg(distinct a) from test_distinct_multi"
+    qt_not_split_cte_when_same_col_shape "explain shape plan select 
sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi"
+    order_qt_not_split_cte_when_same_col_gby "select sum(distinct a), 
count(distinct a),avg(distinct a) from test_distinct_multi group by b"
+    qt_not_split_cte_when_same_col_gby__shape "explain shape plan select 
sum(distinct a), count(distinct a),avg(distinct a) from test_distinct_multi 
group by b"
 }
diff --git a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy 
b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
index cf93cad471c..5903b9e474e 100644
--- a/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
+++ b/regression-test/suites/nereids_syntax_p0/analyze_agg.groovy
@@ -70,10 +70,7 @@ suite("analyze_agg") {
                  tt2.c;
     """
 
-    test {
-        sql "select count(distinct t2.b), variance(distinct t2.c) from t2"
-        exception "variance(DISTINCT c#2) can't support multi distinct."
-    }
+    qt_multi_agg_distinct_func "select count(distinct t2.b), variance(distinct 
t2.c) from t2"
 
     // should not bind g /g in group by again, otherwise will throw exception
     sql "select g / g as nu, sum(c) from t2 group by nu"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to