This is an automated email from the ASF dual-hosted git repository.

huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a1ac334e4d [fix](nereids) change default agg strategy to 
multi_distinct when not have statistics (#55855)
8a1ac334e4d is described below

commit 8a1ac334e4d75bea2f7d37602189fb9d55e499f8
Author: feiniaofeiafei <[email protected]>
AuthorDate: Fri Sep 12 16:49:06 2025 +0800

    [fix](nereids) change default agg strategy to multi_distinct when not have 
statistics (#55855)
    
    Related PR: #54079
    
    Problem Summary:
    Using the multi-distinct strategy instead of multi-stage/CTE splitting
    strategy for AGG in scenarios without statistical information,
    consistent with the default strategy before refactor(#54079), can
    achieve better performance in scenarios without statistical information.
---
 .../rules/rewrite/DistinctAggStrategySelector.java |   5 ++-
 .../rules/rewrite/DistinctAggregateRewriter.java   |   6 ++--
 .../nereids/trees/plans/algebra/Aggregate.java     |  13 +++++++
 .../rewrite/DistinctAggregateRewriterTest.java     |  38 +++++++++++++++++++++
 .../rules/rewrite/SplitMultiDistinctTest.java      |  15 ++++++++
 .../adjust_nullable/test_adjust_nullable.out       | Bin 1121 -> 489 bytes
 .../agg_skew_rewrite/agg_skew_rewrite.out          | Bin 8320 -> 8202 bytes
 .../nereids_rules_p0/agg_strategy/agg_strategy.out | Bin 21517 -> 21077 bytes
 .../distinct_agg_strategy_selector.out             | Bin 4760 -> 4377 bytes
 .../agg_strategy/test_variables.out                | Bin 6275 -> 6211 bytes
 .../distinct_split/disitinct_split.out             | Bin 13518 -> 13158 bytes
 .../merge_aggregate/merge_aggregate.out            | Bin 7977 -> 7803 bytes
 .../data/shape_check/clickbench/query10.out        | Bin 467 -> 415 bytes
 .../data/shape_check/clickbench/query11.out        | Bin 508 -> 472 bytes
 .../data/shape_check/clickbench/query12.out        | Bin 508 -> 472 bytes
 .../data/shape_check/clickbench/query14.out        | Bin 504 -> 468 bytes
 .../data/shape_check/clickbench/query23.out        | Bin 567 -> 531 bytes
 .../data/shape_check/clickbench/query9.out         | Bin 448 -> 414 bytes
 .../tpch_sf1000/nostats_rf_prune/q16.out           | Bin 1200 -> 1148 bytes
 .../shape_check/tpch_sf1000/shape_no_stats/q16.out | Bin 1200 -> 1148 bytes
 .../distinct_agg_strategy_selector.groovy          |   2 +-
 .../mv/agg_variety/agg_variety.groovy              |   1 +
 22 files changed, 76 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
index 5b4e4092e54..3b9caa40908 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
@@ -151,9 +151,12 @@ public class DistinctAggStrategySelector extends 
DefaultPlanRewriter<DistinctSel
                 }
             }
         } else {
-            if 
(AggregateUtils.hasUnknownStatistics(agg.getGroupByExpressions(), childStats)) {
+            if (agg.hasSkewHint()) {
                 return false;
             }
+            if 
(AggregateUtils.hasUnknownStatistics(agg.getGroupByExpressions(), childStats)) {
+                return true;
+            }
             // The joint ndv of Group by key is high, so multi_distinct is not 
selected;
             if (aggStats.getRowCount() >= row * 
AggregateUtils.LOW_CARDINALITY_THRESHOLD) {
                 return false;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
index 2b780497368..e34d56d383c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
@@ -41,6 +41,7 @@ import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.statistics.ColumnStatistic;
 import org.apache.doris.statistics.Statistics;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 
@@ -89,7 +90,8 @@ public class DistinctAggregateRewriter implements 
RewriteRuleFactory {
         );
     }
 
-    private boolean shouldUseMultiDistinct(LogicalAggregate<? extends Plan> 
aggregate) {
+    @VisibleForTesting
+    boolean shouldUseMultiDistinct(LogicalAggregate<? extends Plan> aggregate) 
{
         // count(distinct a,b) cannot use multi_distinct
         if (AggregateUtils.containsCountDistinctMultiExpr(aggregate)) {
             return false;
@@ -111,7 +113,7 @@ public class DistinctAggregateRewriter implements 
RewriteRuleFactory {
         // has unknown statistics, split to bottom and top agg
         if 
(AggregateUtils.hasUnknownStatistics(aggregate.getGroupByExpressions(), 
aggChildStats)
                 || AggregateUtils.hasUnknownStatistics(dstArgs, 
aggChildStats)) {
-            return false;
+            return true;
         }
 
         double gbyNdv = aggStats.getRowCount();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
index 2d3058e434a..99ab10f982b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
@@ -167,4 +167,17 @@ public interface Aggregate<CHILD_TYPE extends Plan> 
extends UnaryPlan<CHILD_TYPE
                 && !getGroupByExpressions().isEmpty()
                 && !(new 
HashSet<>(getGroupByExpressions()).containsAll(distinctArguments));
     }
+
+    /**
+     * hasSkewHint
+     * @return true if there is at least one skew hint
+     */
+    default boolean hasSkewHint() {
+        for (AggregateFunction aggFunc : getAggregateFunctions()) {
+            if (aggFunc.isSkew()) {
+                return true;
+            }
+        }
+        return false;
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
index 1664cc18dfc..fcab4e2ae8c 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
@@ -19,13 +19,18 @@ package org.apache.doris.nereids.rules.rewrite;
 
 import 
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
 import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import 
org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinctCount;
 import 
org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinctGroupConcat;
 import org.apache.doris.nereids.trees.expressions.functions.agg.Sum0;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
 import org.apache.doris.nereids.util.MemoPatternMatchSupported;
 import org.apache.doris.nereids.util.PlanChecker;
 import org.apache.doris.utframe.TestWithFeService;
 
+import mockit.Mock;
+import mockit.MockUp;
 import org.junit.jupiter.api.Test;
 
 public class DistinctAggregateRewriterTest extends TestWithFeService 
implements MemoPatternMatchSupported {
@@ -39,8 +44,18 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
         
connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION");
     }
 
+    private void applyMock() {
+        new MockUp<DistinctAggregateRewriter>() {
+            @Mock
+            boolean shouldUseMultiDistinct(LogicalAggregate<? extends Plan> 
aggregate) {
+                return false;
+            }
+        };
+    }
+
     @Test
     void testSplitSingleDistinctAgg() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select b, count(distinct a) from 
test.distinct_agg_split_t group by b")
                 .rewrite()
@@ -59,6 +74,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testSplitSingleDistinctAggOtherFunctionCount() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select b, count(distinct a), count(a) from 
test.distinct_agg_split_t group by b")
                 .rewrite()
@@ -77,6 +93,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testSplitSingleDistinctWithOtherAgg() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select b, count(distinct a), sum(c) from 
test.distinct_agg_split_t group by b")
                 .rewrite()
@@ -93,6 +110,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testNotSplitWhenNoGroupBy() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select count(distinct a) from 
test.distinct_agg_split_t")
                 .rewrite()
@@ -102,6 +120,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testSplitWhenNoGroupByHasGroupConcatDistinctOrderBy() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select group_concat(distinct a, '' order by b) from 
test.distinct_agg_split_t")
                 .rewrite()
@@ -113,6 +132,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testSplitWhenNoGroupByHasGroupConcatDistinct() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select group_concat(distinct a, '') from 
test.distinct_agg_split_t")
                 .rewrite()
@@ -124,6 +144,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testMultiExprDistinct() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select b, sum(a), count(distinct a,c) from 
test.distinct_agg_split_t group by b")
                 .rewrite()
@@ -142,6 +163,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testNotSplitWhenNoDistinct() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select b, sum(a), count(c) from 
test.distinct_agg_split_t group by b")
                 .rewrite()
@@ -151,6 +173,7 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
 
     @Test
     void testSplitWithComplexExpression() {
+        applyMock();
         PlanChecker.from(connectContext)
                 .analyze("select b, count(distinct a + 1) from 
test.distinct_agg_split_t group by b")
                 .rewrite()
@@ -161,4 +184,19 @@ public class DistinctAggregateRewriterTest extends 
TestWithFeService implements
                         ).when(agg -> agg.getGroupByExpressions().size() == 1
                                 && 
agg.getGroupByExpressions().get(0).toSql().equals("b")));
     }
+
+    @Test
+    void testMultiDistinct() {
+        connectContext.getSessionVariable().setAggPhase(2);
+        PlanChecker.from(connectContext)
+                .analyze("select b, count(distinct a), sum(c) from 
test.distinct_agg_split_t group by b")
+                .rewrite()
+                .printlnTree()
+                .matches(
+                        logicalAggregate().when(agg -> 
agg.getGroupByExpressions().size() == 1
+                                && 
agg.getGroupByExpressions().get(0).toSql().equals("b")
+                                && 
agg.getAggregateFunctions().stream().noneMatch(AggregateFunction::isDistinct)
+                                && 
agg.getAggregateFunctions().stream().anyMatch(f -> f instanceof 
MultiDistinctCount)
+                        ));
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
index 870124d2a49..fe8af6fee86 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
@@ -199,4 +199,19 @@ public class SplitMultiDistinctTest extends 
TestWithFeService implements MemoPat
             );
         });
     }
+
+    @Test
+    void multiSumWithGby() {
+        String sql = "select sum(distinct b), sum(distinct a) from 
test_distinct_multi group by c";
+        PlanChecker.from(connectContext).checkExplain(sql, planner -> {
+            Plan plan = planner.getOptimizedPlan();
+            MatchingUtils.assertMatches(plan,
+                    physicalResultSink(
+                            physicalDistribute(
+                                    physicalProject(
+                                            physicalHashAggregate(
+                                                    physicalDistribute(
+                                                            
physicalHashAggregate(any())))))));
+        });
+    }
 }
diff --git 
a/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
 
b/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
index 09626347b11..05ca7f01e20 100644
Binary files 
a/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
 and 
b/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
 differ
diff --git 
a/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out 
b/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out
index 360052e0f2a..813a08ce40d 100644
Binary files 
a/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out 
and 
b/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out 
differ
diff --git 
a/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out 
b/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out
index e63c5951f4b..a405272b3d3 100644
Binary files 
a/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out and 
b/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out differ
diff --git 
a/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
 
b/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
index ea5361dc350..05ef347e89b 100644
Binary files 
a/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
 and 
b/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
 differ
diff --git 
a/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out 
b/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out
index 4c9866e9568..0a009cc77df 100644
Binary files 
a/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out and 
b/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out differ
diff --git 
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out 
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
index a8bdae2200e..735f924821a 100644
Binary files 
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out and 
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out 
differ
diff --git 
a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out 
b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
index 51002fa7feb..f66de4badaa 100644
Binary files 
a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out and 
b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out 
differ
diff --git a/regression-test/data/shape_check/clickbench/query10.out 
b/regression-test/data/shape_check/clickbench/query10.out
index ae9174ce1c1..c7840564369 100644
Binary files a/regression-test/data/shape_check/clickbench/query10.out and 
b/regression-test/data/shape_check/clickbench/query10.out differ
diff --git a/regression-test/data/shape_check/clickbench/query11.out 
b/regression-test/data/shape_check/clickbench/query11.out
index 856e55187f9..4b5e4486d3f 100644
Binary files a/regression-test/data/shape_check/clickbench/query11.out and 
b/regression-test/data/shape_check/clickbench/query11.out differ
diff --git a/regression-test/data/shape_check/clickbench/query12.out 
b/regression-test/data/shape_check/clickbench/query12.out
index d47a7e129e3..10928363a83 100644
Binary files a/regression-test/data/shape_check/clickbench/query12.out and 
b/regression-test/data/shape_check/clickbench/query12.out differ
diff --git a/regression-test/data/shape_check/clickbench/query14.out 
b/regression-test/data/shape_check/clickbench/query14.out
index 54afcc6268c..35eedce41b9 100644
Binary files a/regression-test/data/shape_check/clickbench/query14.out and 
b/regression-test/data/shape_check/clickbench/query14.out differ
diff --git a/regression-test/data/shape_check/clickbench/query23.out 
b/regression-test/data/shape_check/clickbench/query23.out
index 5c6ed877934..76a91b3ad49 100644
Binary files a/regression-test/data/shape_check/clickbench/query23.out and 
b/regression-test/data/shape_check/clickbench/query23.out differ
diff --git a/regression-test/data/shape_check/clickbench/query9.out 
b/regression-test/data/shape_check/clickbench/query9.out
index b35cb2e2a80..dcece9f0ce7 100644
Binary files a/regression-test/data/shape_check/clickbench/query9.out and 
b/regression-test/data/shape_check/clickbench/query9.out differ
diff --git 
a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out 
b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out
index 1ed947eacab..7b04caaf3e0 100644
Binary files 
a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out and 
b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out differ
diff --git 
a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out 
b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out
index 1ed947eacab..7b04caaf3e0 100644
Binary files 
a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out and 
b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out differ
diff --git 
a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
 
b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
index bd317546768..406a409390b 100644
--- 
a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
@@ -38,6 +38,6 @@ suite("distinct_agg_strategy_selector") {
     sql "drop stats t1000"
     qt_no_stats_should_use_cte """explain shape plan
     select count(distinct a_1) , count(distinct b_5) from t1000;"""
-    qt_no_stats_should_use_cte_with_group_by """explain shape plan
+    qt_no_stats_should_use_multi_distinct """explain shape plan
     select count(distinct d_20) , count(distinct b_5) from t1000 group by 
a_1;"""
 }
\ No newline at end of file
diff --git 
a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy 
b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
index 27a8b693dfe..d12442a26f1 100644
--- a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
@@ -21,6 +21,7 @@ suite("agg_variety") {
     sql "use ${db}"
     sql "set runtime_filter_mode=OFF";
     sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
+    sql "set pre_materialized_view_rewrite_strategy = TRY_IN_RBO"
 
     sql """
     drop table if exists orders


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to