This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1470d37c42 [enhancement](Nereids) choose aggregate phase by 
group-by-key unique property (#18423)
1470d37c42 is described below

commit 1470d37c42037abde0744f62406b3f307fab6fe5
Author: minghong <[email protected]>
AuthorDate: Mon Apr 10 16:30:51 2023 +0800

    [enhancement](Nereids) choose aggregate phase by group-by-key unique 
property (#18423)
    
    when group-by-keys does not contain unique column
    1. with out distinct: we prefer two phase aggregate to one phase aggregate
    2. with distinct: we prefer three phase aggregate to two phase aggregate
---
 .../rules/implementation/AggregateStrategies.java  | 25 ++++++++++++++++++++++
 .../org/apache/doris/statistics/Statistics.java    | 18 ++++++++++++++++
 .../rewrite/logical/AggregateStrategiesTest.java   |  4 ++++
 .../nereids_syntax_p0/aggregate_strategies.groovy  |  2 +-
 4 files changed, 48 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
index 0e10be6af1..a65440379d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java
@@ -64,6 +64,7 @@ import 
org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggrega
 import 
org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggregate.PushDownAggOp;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.statistics.Statistics;
 
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -303,6 +304,20 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
         }
     }
 
+    private boolean aggregateOnUniqueColumn(
+            LogicalAggregate<? extends Plan> logicalAgg) {
+        if (logicalAgg.child() instanceof GroupPlan) {
+            Statistics childStats = ((GroupPlan) 
logicalAgg.child()).getGroup().getStatistics();
+            if (childStats != null) {
+                return logicalAgg.getGroupByExpressions().stream().anyMatch(
+                        expression ->
+                            childStats.almostUniqueExpression(expression)
+                );
+            }
+        }
+        return false;
+    }
+
     /**
      * sql: select count(*) from tbl group by id
      *
@@ -331,6 +346,11 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
      */
     private List<PhysicalHashAggregate<Plan>> onePhaseAggregateWithoutDistinct(
             LogicalAggregate<? extends Plan> logicalAgg, ConnectContext 
connectContext) {
+        if (!logicalAgg.getGroupByExpressions().isEmpty()
+                && !aggregateOnUniqueColumn(logicalAgg)) {
+            // twoPhaseAggregate beats onePhaseAggregate
+            return null;
+        }
         RequireProperties requireGather = 
RequireProperties.of(PhysicalProperties.GATHER);
         AggregateParam inputToResultParam = AggregateParam.localResult();
         List<NamedExpression> newOutput = 
ExpressionUtils.rewriteDownShortCircuit(
@@ -757,6 +777,11 @@ public class AggregateStrategies implements 
ImplementationRuleFactory {
      */
     private List<PhysicalHashAggregate<? extends Plan>> 
twoPhaseAggregateWithDistinct(
             LogicalAggregate<? extends Plan> logicalAgg, ConnectContext 
connectContext) {
+        if (!logicalAgg.getGroupByExpressions().isEmpty()
+                && !aggregateOnUniqueColumn(logicalAgg)) {
+            // threePhaseAggregate beats twoPhaseAggregate
+            return null;
+        }
         Set<AggregateFunction> aggregateFunctions = 
logicalAgg.getAggregateFunctions();
 
         Set<Expression> distinctArguments = aggregateFunctions.stream()
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java 
b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
index e9c85a7cb2..977a6da2c1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.statistics;
 
+import org.apache.doris.nereids.stats.ExpressionEstimation;
 import org.apache.doris.nereids.stats.StatsMathUtil;
 import org.apache.doris.nereids.trees.expressions.Expression;
 
@@ -184,4 +185,21 @@ public class Statistics {
         }
         return zero;
     }
+
+    public boolean almostUniqueExpression(Expression expr) {
+        ExpressionEstimation estimator = new ExpressionEstimation();
+        double ndvErrorThreshold = 0.9;
+        ColumnStatistic colStats = expr.accept(estimator, this);
+        if (colStats.ndv > colStats.count * ndvErrorThreshold) {
+            return true;
+        }
+        return false;
+    }
+
+    public boolean isStatsUnknown(Expression expr) {
+        ExpressionEstimation estimator = new ExpressionEstimation();
+        ColumnStatistic colStats = expr.accept(estimator, this);
+        return colStats.isUnKnown;
+    }
+
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
index 33c5c1f909..dc919eee98 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/logical/AggregateStrategiesTest.java
@@ -250,7 +250,11 @@ public class AggregateStrategiesTest implements 
MemoPatternMatchSupported {
                 );
     }
 
+    // TODO aggregate estimation is not accurate enough.
+    //  we choose 3Phase as RBO. Re-open this case when we could compare cost 
between 2phase and 3phase.
     @Test
+    @Disabled
+    @Developing("reopen this case when we could choose agg phase by CBO")
     public void distinctWithNormalAggregateFunctionApply2PhaseRule() {
         Slot id = rStudent.getOutput().get(0);
         Slot name = rStudent.getOutput().get(2).toSlot();
diff --git 
a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy 
b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
index ec1d5355fd..d7336017e9 100644
--- a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
+++ b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy
@@ -82,7 +82,7 @@ suite("aggregate_strategies") {
         explain {
             sql """
             select
-                
/*+SET_VAR(disable_nereids_rules='ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT')*/
+                
/*+SET_VAR(disable_nereids_rules='ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,TWO_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI,THREE_PHASE_AGGREGATE_WITH_DISTINCT,
 FOUR_PHASE_AGGREGATE_WITH_DISTINCT')*/
                 count(distinct id)
                 from $tableName
             """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to