This is an automated email from the ASF dual-hosted git repository.
huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8a1ac334e4d [fix](nereids) change default agg strategy to
multi_distinct when not have statistics (#55855)
8a1ac334e4d is described below
commit 8a1ac334e4d75bea2f7d37602189fb9d55e499f8
Author: feiniaofeiafei <[email protected]>
AuthorDate: Fri Sep 12 16:49:06 2025 +0800
[fix](nereids) change default agg strategy to multi_distinct when not have
statistics (#55855)
Related PR: #54079
Problem Summary:
Using the multi-distinct strategy instead of multi-stage/CTE splitting
strategy for AGG in scenarios without statistical information,
consistent with the default strategy before refactor(#54079), can
achieve better performance in scenarios without statistical information.
---
.../rules/rewrite/DistinctAggStrategySelector.java | 5 ++-
.../rules/rewrite/DistinctAggregateRewriter.java | 6 ++--
.../nereids/trees/plans/algebra/Aggregate.java | 13 +++++++
.../rewrite/DistinctAggregateRewriterTest.java | 38 +++++++++++++++++++++
.../rules/rewrite/SplitMultiDistinctTest.java | 15 ++++++++
.../adjust_nullable/test_adjust_nullable.out | Bin 1121 -> 489 bytes
.../agg_skew_rewrite/agg_skew_rewrite.out | Bin 8320 -> 8202 bytes
.../nereids_rules_p0/agg_strategy/agg_strategy.out | Bin 21517 -> 21077 bytes
.../distinct_agg_strategy_selector.out | Bin 4760 -> 4377 bytes
.../agg_strategy/test_variables.out | Bin 6275 -> 6211 bytes
.../distinct_split/disitinct_split.out | Bin 13518 -> 13158 bytes
.../merge_aggregate/merge_aggregate.out | Bin 7977 -> 7803 bytes
.../data/shape_check/clickbench/query10.out | Bin 467 -> 415 bytes
.../data/shape_check/clickbench/query11.out | Bin 508 -> 472 bytes
.../data/shape_check/clickbench/query12.out | Bin 508 -> 472 bytes
.../data/shape_check/clickbench/query14.out | Bin 504 -> 468 bytes
.../data/shape_check/clickbench/query23.out | Bin 567 -> 531 bytes
.../data/shape_check/clickbench/query9.out | Bin 448 -> 414 bytes
.../tpch_sf1000/nostats_rf_prune/q16.out | Bin 1200 -> 1148 bytes
.../shape_check/tpch_sf1000/shape_no_stats/q16.out | Bin 1200 -> 1148 bytes
.../distinct_agg_strategy_selector.groovy | 2 +-
.../mv/agg_variety/agg_variety.groovy | 1 +
22 files changed, 76 insertions(+), 4 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
index 5b4e4092e54..3b9caa40908 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggStrategySelector.java
@@ -151,9 +151,12 @@ public class DistinctAggStrategySelector extends
DefaultPlanRewriter<DistinctSel
}
}
} else {
- if
(AggregateUtils.hasUnknownStatistics(agg.getGroupByExpressions(), childStats)) {
+ if (agg.hasSkewHint()) {
return false;
}
+ if
(AggregateUtils.hasUnknownStatistics(agg.getGroupByExpressions(), childStats)) {
+ return true;
+ }
// The joint ndv of Group by key is high, so multi_distinct is not
selected;
if (aggStats.getRowCount() >= row *
AggregateUtils.LOW_CARDINALITY_THRESHOLD) {
return false;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
index 2b780497368..e34d56d383c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriter.java
@@ -41,6 +41,7 @@ import org.apache.doris.qe.ConnectContext;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.Statistics;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
@@ -89,7 +90,8 @@ public class DistinctAggregateRewriter implements
RewriteRuleFactory {
);
}
- private boolean shouldUseMultiDistinct(LogicalAggregate<? extends Plan>
aggregate) {
+ @VisibleForTesting
+ boolean shouldUseMultiDistinct(LogicalAggregate<? extends Plan> aggregate)
{
// count(distinct a,b) cannot use multi_distinct
if (AggregateUtils.containsCountDistinctMultiExpr(aggregate)) {
return false;
@@ -111,7 +113,7 @@ public class DistinctAggregateRewriter implements
RewriteRuleFactory {
// has unknown statistics, split to bottom and top agg
if
(AggregateUtils.hasUnknownStatistics(aggregate.getGroupByExpressions(),
aggChildStats)
|| AggregateUtils.hasUnknownStatistics(dstArgs,
aggChildStats)) {
- return false;
+ return true;
}
double gbyNdv = aggStats.getRowCount();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
index 2d3058e434a..99ab10f982b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/algebra/Aggregate.java
@@ -167,4 +167,17 @@ public interface Aggregate<CHILD_TYPE extends Plan>
extends UnaryPlan<CHILD_TYPE
&& !getGroupByExpressions().isEmpty()
&& !(new
HashSet<>(getGroupByExpressions()).containsAll(distinctArguments));
}
+
+ /**
+ * hasSkewHint
+ * @return true if there is at least one skew hint
+ */
+ default boolean hasSkewHint() {
+ for (AggregateFunction aggFunc : getAggregateFunctions()) {
+ if (aggFunc.isSkew()) {
+ return true;
+ }
+ }
+ return false;
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
index 1664cc18dfc..fcab4e2ae8c 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/DistinctAggregateRewriterTest.java
@@ -19,13 +19,18 @@ package org.apache.doris.nereids.rules.rewrite;
import
org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction;
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
+import
org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinctCount;
import
org.apache.doris.nereids.trees.expressions.functions.agg.MultiDistinctGroupConcat;
import org.apache.doris.nereids.trees.expressions.functions.agg.Sum0;
import org.apache.doris.nereids.trees.expressions.functions.scalar.If;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.util.MemoPatternMatchSupported;
import org.apache.doris.nereids.util.PlanChecker;
import org.apache.doris.utframe.TestWithFeService;
+import mockit.Mock;
+import mockit.MockUp;
import org.junit.jupiter.api.Test;
public class DistinctAggregateRewriterTest extends TestWithFeService
implements MemoPatternMatchSupported {
@@ -39,8 +44,18 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
connectContext.getSessionVariable().setDisableNereidsRules("PRUNE_EMPTY_PARTITION");
}
+ private void applyMock() {
+ new MockUp<DistinctAggregateRewriter>() {
+ @Mock
+ boolean shouldUseMultiDistinct(LogicalAggregate<? extends Plan>
aggregate) {
+ return false;
+ }
+ };
+ }
+
@Test
void testSplitSingleDistinctAgg() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select b, count(distinct a) from
test.distinct_agg_split_t group by b")
.rewrite()
@@ -59,6 +74,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testSplitSingleDistinctAggOtherFunctionCount() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select b, count(distinct a), count(a) from
test.distinct_agg_split_t group by b")
.rewrite()
@@ -77,6 +93,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testSplitSingleDistinctWithOtherAgg() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select b, count(distinct a), sum(c) from
test.distinct_agg_split_t group by b")
.rewrite()
@@ -93,6 +110,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testNotSplitWhenNoGroupBy() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select count(distinct a) from
test.distinct_agg_split_t")
.rewrite()
@@ -102,6 +120,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testSplitWhenNoGroupByHasGroupConcatDistinctOrderBy() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select group_concat(distinct a, '' order by b) from
test.distinct_agg_split_t")
.rewrite()
@@ -113,6 +132,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testSplitWhenNoGroupByHasGroupConcatDistinct() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select group_concat(distinct a, '') from
test.distinct_agg_split_t")
.rewrite()
@@ -124,6 +144,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testMultiExprDistinct() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select b, sum(a), count(distinct a,c) from
test.distinct_agg_split_t group by b")
.rewrite()
@@ -142,6 +163,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testNotSplitWhenNoDistinct() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select b, sum(a), count(c) from
test.distinct_agg_split_t group by b")
.rewrite()
@@ -151,6 +173,7 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
@Test
void testSplitWithComplexExpression() {
+ applyMock();
PlanChecker.from(connectContext)
.analyze("select b, count(distinct a + 1) from
test.distinct_agg_split_t group by b")
.rewrite()
@@ -161,4 +184,19 @@ public class DistinctAggregateRewriterTest extends
TestWithFeService implements
).when(agg -> agg.getGroupByExpressions().size() == 1
&&
agg.getGroupByExpressions().get(0).toSql().equals("b")));
}
+
+ @Test
+ void testMultiDistinct() {
+ connectContext.getSessionVariable().setAggPhase(2);
+ PlanChecker.from(connectContext)
+ .analyze("select b, count(distinct a), sum(c) from
test.distinct_agg_split_t group by b")
+ .rewrite()
+ .printlnTree()
+ .matches(
+ logicalAggregate().when(agg ->
agg.getGroupByExpressions().size() == 1
+ &&
agg.getGroupByExpressions().get(0).toSql().equals("b")
+ &&
agg.getAggregateFunctions().stream().noneMatch(AggregateFunction::isDistinct)
+ &&
agg.getAggregateFunctions().stream().anyMatch(f -> f instanceof
MultiDistinctCount)
+ ));
+ }
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
index 870124d2a49..fe8af6fee86 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/SplitMultiDistinctTest.java
@@ -199,4 +199,19 @@ public class SplitMultiDistinctTest extends
TestWithFeService implements MemoPat
);
});
}
+
+ @Test
+ void multiSumWithGby() {
+ String sql = "select sum(distinct b), sum(distinct a) from
test_distinct_multi group by c";
+ PlanChecker.from(connectContext).checkExplain(sql, planner -> {
+ Plan plan = planner.getOptimizedPlan();
+ MatchingUtils.assertMatches(plan,
+ physicalResultSink(
+ physicalDistribute(
+ physicalProject(
+ physicalHashAggregate(
+ physicalDistribute(
+
physicalHashAggregate(any())))))));
+ });
+ }
}
diff --git
a/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
b/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
index 09626347b11..05ca7f01e20 100644
Binary files
a/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
and
b/regression-test/data/nereids_rules_p0/adjust_nullable/test_adjust_nullable.out
differ
diff --git
a/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out
b/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out
index 360052e0f2a..813a08ce40d 100644
Binary files
a/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out
and
b/regression-test/data/nereids_rules_p0/agg_skew_rewrite/agg_skew_rewrite.out
differ
diff --git
a/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out
b/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out
index e63c5951f4b..a405272b3d3 100644
Binary files
a/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out and
b/regression-test/data/nereids_rules_p0/agg_strategy/agg_strategy.out differ
diff --git
a/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
b/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
index ea5361dc350..05ef347e89b 100644
Binary files
a/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
and
b/regression-test/data/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.out
differ
diff --git
a/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out
b/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out
index 4c9866e9568..0a009cc77df 100644
Binary files
a/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out and
b/regression-test/data/nereids_rules_p0/agg_strategy/test_variables.out differ
diff --git
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
index a8bdae2200e..735f924821a 100644
Binary files
a/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out and
b/regression-test/data/nereids_rules_p0/distinct_split/disitinct_split.out
differ
diff --git
a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
index 51002fa7feb..f66de4badaa 100644
Binary files
a/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out and
b/regression-test/data/nereids_rules_p0/merge_aggregate/merge_aggregate.out
differ
diff --git a/regression-test/data/shape_check/clickbench/query10.out
b/regression-test/data/shape_check/clickbench/query10.out
index ae9174ce1c1..c7840564369 100644
Binary files a/regression-test/data/shape_check/clickbench/query10.out and
b/regression-test/data/shape_check/clickbench/query10.out differ
diff --git a/regression-test/data/shape_check/clickbench/query11.out
b/regression-test/data/shape_check/clickbench/query11.out
index 856e55187f9..4b5e4486d3f 100644
Binary files a/regression-test/data/shape_check/clickbench/query11.out and
b/regression-test/data/shape_check/clickbench/query11.out differ
diff --git a/regression-test/data/shape_check/clickbench/query12.out
b/regression-test/data/shape_check/clickbench/query12.out
index d47a7e129e3..10928363a83 100644
Binary files a/regression-test/data/shape_check/clickbench/query12.out and
b/regression-test/data/shape_check/clickbench/query12.out differ
diff --git a/regression-test/data/shape_check/clickbench/query14.out
b/regression-test/data/shape_check/clickbench/query14.out
index 54afcc6268c..35eedce41b9 100644
Binary files a/regression-test/data/shape_check/clickbench/query14.out and
b/regression-test/data/shape_check/clickbench/query14.out differ
diff --git a/regression-test/data/shape_check/clickbench/query23.out
b/regression-test/data/shape_check/clickbench/query23.out
index 5c6ed877934..76a91b3ad49 100644
Binary files a/regression-test/data/shape_check/clickbench/query23.out and
b/regression-test/data/shape_check/clickbench/query23.out differ
diff --git a/regression-test/data/shape_check/clickbench/query9.out
b/regression-test/data/shape_check/clickbench/query9.out
index b35cb2e2a80..dcece9f0ce7 100644
Binary files a/regression-test/data/shape_check/clickbench/query9.out and
b/regression-test/data/shape_check/clickbench/query9.out differ
diff --git
a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out
b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out
index 1ed947eacab..7b04caaf3e0 100644
Binary files
a/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out and
b/regression-test/data/shape_check/tpch_sf1000/nostats_rf_prune/q16.out differ
diff --git
a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out
b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out
index 1ed947eacab..7b04caaf3e0 100644
Binary files
a/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out and
b/regression-test/data/shape_check/tpch_sf1000/shape_no_stats/q16.out differ
diff --git
a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
index bd317546768..406a409390b 100644
---
a/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
+++
b/regression-test/suites/nereids_rules_p0/agg_strategy/distinct_agg_strategy_selector.groovy
@@ -38,6 +38,6 @@ suite("distinct_agg_strategy_selector") {
sql "drop stats t1000"
qt_no_stats_should_use_cte """explain shape plan
select count(distinct a_1) , count(distinct b_5) from t1000;"""
- qt_no_stats_should_use_cte_with_group_by """explain shape plan
+ qt_no_stats_should_use_multi_distinct """explain shape plan
select count(distinct d_20) , count(distinct b_5) from t1000 group by
a_1;"""
}
\ No newline at end of file
diff --git
a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
index 27a8b693dfe..d12442a26f1 100644
--- a/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/agg_variety/agg_variety.groovy
@@ -21,6 +21,7 @@ suite("agg_variety") {
sql "use ${db}"
sql "set runtime_filter_mode=OFF";
sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
+ sql "set pre_materialized_view_rewrite_strategy = TRY_IN_RBO"
sql """
drop table if exists orders
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]