This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1470d37c42 [enhancement](Nereids) choose aggregate phase by
group-by-key unique property (#18423)
1470d37c42 is described below
commit 1470d37c42037abde0744f62406b3f307fab6fe5
Author: minghong <[email protected]>
AuthorDate: Mon Apr 10 16:30:51 2023 +0800
[enhancement](Nereids) choose aggregate phase by group-by-key unique
property (#18423)
when group-by-keys does not contain unique column
1. with out distinct: we prefer two phase aggregate to one phase aggregate
2. with distinct: we prefer three phase aggregate to two phase aggregate
---
.../rules/implementation/AggregateStrategies.java | 25 ++++++++++++++++++++++
.../org/apache/doris/statistics/Statistics.java | 18 ++++++++++++++++
.../rewrite/logical/AggregateStrategiesTest.java | 4 ++++
.../nereids_syntax_p0/aggregate_strategies.groovy | 2 +-
4 files changed, 48 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 0e10be6af1..a65440379d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -64,6 +64,7 @@ import
org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggrega
import
org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggregate.PushDownAggOp;
import org.apache.doris.nereids.util.ExpressionUtils;
import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
@@ -303,6 +304,20 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
}
}
+ private boolean aggregateOnUniqueColumn(
+ LogicalAggregate<? extends Plan> logicalAgg) {
+ if (logicalAgg.child() instanceof GroupPlan) {
+ Statistics childStats = ((GroupPlan)
logicalAgg.child()).getGroup().getStatistics();
+ if (childStats != null) {
+ return logicalAgg.getGroupByExpressions().stream().anyMatch(
+ expression ->
+ childStats.almostUniqueExpression(expression)
+ );
+ }
+ }
+ return false;
+ }
+
/**
* sql: select count(*) from tbl group by id
*
@@ -331,6 +346,11 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
*/
private List<PhysicalHashAggregate<Plan>> onePhaseAggregateWithoutDistinct(
LogicalAggregate<? extends Plan> logicalAgg, ConnectContext
connectContext) {
+ if (!logicalAgg.getGroupByExpressions().isEmpty()
+ && !aggregateOnUniqueColumn(logicalAgg)) {
+ // twoPhaseAggregate beats onePhaseAggregate
+ return null;
+ }
RequireProperties requireGather =
RequireProperties.of(PhysicalProperties.GATHER);
AggregateParam inputToResultParam = AggregateParam.localResult();
List<NamedExpression> newOutput =
ExpressionUtils.rewriteDownShortCircuit(
@@ -757,6 +777,11 @@ public class AggregateStrategies implements
ImplementationRuleFactory {
*/
private List<PhysicalHashAggregate<? extends Plan>>
twoPhaseAggregateWithDistinct(
LogicalAggregate<? extends Plan> logicalAgg, ConnectContext
connectContext) {
+ if (!logicalAgg.getGroupByExpressions().isEmpty()
+ && !aggregateOnUniqueColumn(logicalAgg)) {
+ // threePhaseAggregate beats twoPhaseAggregate
+ return null;
+ }
Set<AggregateFunction> aggregateFunctions =
logicalAgg.getAggregateFunctions();
Set<Expression> distinctArguments = aggregateFunctions.stream()
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index e9c85a7cb2..977a6da2c1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -17,6 +17,7 @@
package org.apache.doris.statistics;
+import org.apache.doris.nereids.stats.ExpressionEstimation;
import org.apache.doris.nereids.stats.StatsMathUtil;
import org.apache.doris.nereids.trees.expressions.Expression;
@@ -184,4 +185,21 @@ public class Statistics {
}
return zero;
}
+
+ public boolean almostUniqueExpression(Expression expr) {
+ ExpressionEstimation estimator = new ExpressionEstimation();
+ double ndvErrorThreshold = 0.9;
+ ColumnStatistic colStats = expr.accept(estimator, this);
+ if (colStats.ndv > colStats.count * ndvErrorThreshold) {
+ return true;
+ }
+ return false;
+ }
+
+ public boolean isStatsUnknown(Expression expr) {
+ ExpressionEstimation estimator = new ExpressionEstimation();
+ ColumnStatistic colStats = expr.accept(estimator, this);
+ return colStats.isUnKnown;
+ }
+
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
index 33c5c1f909..dc919eee98 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
@@ -250,7 +250,11 @@ public class AggregateStrategiesTest implements
MemoPatternMatchSupported {
);
}
+ // TODO aggregate estimation is not accurate enough.
+ // we choose 3Phase as RBO. Re-open this case when we could compare cost
between 2phase and 3phase.
@Test
+ @Disabled
+ @Developing("reopen this case when we could choose agg phase by CBO")
public void distinctWithNormalAggregateFunctionApply2PhaseRule() {
Slot id = rStudent.getOutput().get(0);
Slot name = rStudent.getOutput().get(2).toSlot();
diff --git
a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
index ec1d5355fd..d7336017e9 100644
--- a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
+++ b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
@@ -82,7 +82,7 @@ suite("aggregate_strategies") {
explain {
sql """
select
-
/*+SET_VAR(disable_nereids_rules='ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT')*/
+
/*+SET_VAR(disable_nereids_rules='ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,
FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/
count(distinct id)
from $tableName
"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]