This is an automated email from the ASF dual-hosted git repository.

jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 844b7c8cba1 [enhancement](Nereids): check stats unreliable when 
deriving stats (#26103)
844b7c8cba1 is described below

commit 844b7c8cba1ac4747b4d2c1071ad9a8c0ddac819
Author: 谢健 <[email protected]>
AuthorDate: Mon Oct 30 17:21:42 2023 +0800

    [enhancement](Nereids): check stats unreliable when deriving stats (#26103)
    
    check stats unreliable when deriving stats
---
 .../java/org/apache/doris/nereids/PlanContext.java | 10 ++++++-
 .../org/apache/doris/nereids/cost/CostModelV1.java | 31 +++-------------------
 .../java/org/apache/doris/nereids/memo/Group.java  | 10 ++++++-
 .../doris/nereids/stats/StatsCalculator.java       |  4 +++
 .../trees/plans/physical/AbstractPhysicalJoin.java |  8 ++++++
 .../trees/plans/physical/PhysicalHashJoin.java     |  7 -----
 6 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
index 847d18afdc8..07a2cf4eed2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
@@ -33,9 +33,10 @@ import java.util.List;
 public class PlanContext {
 
     private final List<Statistics> childrenStats;
-    private Statistics planStats;
+    private final Statistics planStats;
     private final int arity;
     private boolean isBroadcastJoin = false;
+    private final boolean isStatsReliable;
 
     /**
      * Constructor for PlanContext.
@@ -43,15 +44,18 @@ public class PlanContext {
     public PlanContext(GroupExpression groupExpression) {
         this.arity = groupExpression.arity();
         this.planStats = groupExpression.getOwnerGroup().getStatistics();
+        this.isStatsReliable = 
groupExpression.getOwnerGroup().isStatsReliable();
         this.childrenStats = new ArrayList<>(groupExpression.arity());
         for (int i = 0; i < groupExpression.arity(); i++) {
             childrenStats.add(groupExpression.childStatistics(i));
         }
     }
 
+    // This is used in GraphSimplifier
     public PlanContext(Statistics planStats, List<Statistics> childrenStats) {
         this.planStats = planStats;
         this.childrenStats = childrenStats;
+        this.isStatsReliable = false;
         this.arity = this.childrenStats.size();
     }
 
@@ -71,6 +75,10 @@ public class PlanContext {
         return planStats;
     }
 
+    public boolean isStatsReliable() {
+        return isStatsReliable;
+    }
+
     /**
      * Get child statistics.
      */
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index a2364eb0258..8c2b467920e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -22,7 +22,6 @@ import org.apache.doris.nereids.properties.DistributionSpec;
 import org.apache.doris.nereids.properties.DistributionSpecGather;
 import org.apache.doris.nereids.properties.DistributionSpecHash;
 import org.apache.doris.nereids.properties.DistributionSpecReplicated;
-import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows;
 import 
org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan;
@@ -303,7 +302,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
             }
             // TODO: since the outputs rows may expand a lot, penalty on it 
will cause bc never be chosen.
             // will refine this in next generation cost model.
-            if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+            if (!context.isStatsReliable()) {
                 // forbid broadcast join when stats is unknown
                 return CostV1.of(rightRowCount * buildSideFactor + 1 / 
leftRowCount,
                         rightRowCount,
@@ -315,7 +314,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
                     0
             );
         }
-        if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+        if (!context.isStatsReliable()) {
             return CostV1.of(rightRowCount + 1 / leftRowCount,
                     rightRowCount,
                     0);
@@ -326,30 +325,6 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
         );
     }
 
-    private boolean isStatsUnknown(PhysicalHashJoin<? extends Plan, ? extends 
Plan> join,
-            Statistics build, Statistics probe) {
-        for (Slot slot : join.getConditionSlot()) {
-            if ((build.columnStatistics().containsKey(slot) && 
!build.columnStatistics().get(slot).isUnKnown)
-                    || (probe.columnStatistics().containsKey(slot) && 
!probe.columnStatistics().get(slot).isUnKnown)) {
-                continue;
-            }
-            return true;
-        }
-        return false;
-    }
-
-    private boolean isStatsUnknown(PhysicalNestedLoopJoin<? extends Plan, ? 
extends Plan> join,
-            Statistics build, Statistics probe) {
-        for (Slot slot : join.getConditionSlot()) {
-            if ((build.columnStatistics().containsKey(slot) && 
!build.columnStatistics().get(slot).isUnKnown)
-                    || (probe.columnStatistics().containsKey(slot) && 
!probe.columnStatistics().get(slot).isUnKnown)) {
-                continue;
-            }
-            return true;
-        }
-        return false;
-    }
-
     @Override
     public Cost visitPhysicalNestedLoopJoin(
             PhysicalNestedLoopJoin<? extends Plan, ? extends Plan> 
nestedLoopJoin,
@@ -358,7 +333,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
         Preconditions.checkState(context.arity() == 2);
         Statistics leftStatistics = context.getChildStatistics(0);
         Statistics rightStatistics = context.getChildStatistics(1);
-        if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) {
+        if (!context.isStatsReliable()) {
             return CostV1.of(rightStatistics.getRowCount() + 1 / 
leftStatistics.getRowCount(),
                     rightStatistics.getRowCount(),
                     0);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
index 750de4fa8f2..8009156e134 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
@@ -59,7 +59,7 @@ public class Group {
     private final List<GroupExpression> logicalExpressions = 
Lists.newArrayList();
     private final List<GroupExpression> physicalExpressions = 
Lists.newArrayList();
     private final List<GroupExpression> enforcers = Lists.newArrayList();
-
+    private boolean isStatsReliable = true;
     private LogicalProperties logicalProperties;
 
     // Map of cost lower bounds
@@ -119,6 +119,14 @@ public class Group {
         return groupExpression;
     }
 
+    public void setStatsReliable(boolean statsReliable) {
+        this.isStatsReliable = statsReliable;
+    }
+
+    public boolean isStatsReliable() {
+        return isStatsReliable;
+    }
+
     public void addLogicalExpression(GroupExpression groupExpression) {
         groupExpression.setOwnerGroup(this);
         logicalExpressions.add(groupExpression);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 6ee5c642b90..0fad8ea1d1a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -221,8 +221,12 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         Plan plan = groupExpression.getPlan();
         Statistics newStats = plan.accept(this, null);
         newStats.enforceValid();
+
         // We ensure that the rowCount remains unchanged in order to make the 
cost of each plan comparable.
         if (groupExpression.getOwnerGroup().getStatistics() == null) {
+            boolean isReliable = 
groupExpression.getPlan().getExpressions().stream()
+                    .noneMatch(e -> 
newStats.isInputSlotsUnknown(e.getInputSlots()));
+            groupExpression.getOwnerGroup().setStatsReliable(isReliable);
             groupExpression.getOwnerGroup().setStatistics(newStats);
             groupExpression.setEstOutputRowCount(newStats.getRowCount());
         } else {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
index acd2e381392..c6bdd7b6b20 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
@@ -35,6 +35,7 @@ import org.apache.doris.statistics.Statistics;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList.Builder;
+import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import org.json.JSONObject;
 
@@ -42,7 +43,9 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Objects;
 import java.util.Optional;
+import java.util.Set;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 /**
  * Abstract class for all physical join node.
@@ -217,6 +220,11 @@ public abstract class AbstractPhysicalJoin<
                 .build();
     }
 
+    public Set<Slot> getConditionSlot() {
+        return Stream.concat(hashJoinConjuncts.stream(), 
otherJoinConjuncts.stream())
+                .flatMap(expr -> 
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+    }
+
     @Override
     public String toString() {
         List<Object> args = Lists.newArrayList("type", joinType,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
index a6bd8cfdd68..b60afd67308 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
@@ -43,7 +43,6 @@ import org.apache.doris.statistics.Statistics;
 import org.apache.doris.thrift.TRuntimeFilterType;
 
 import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
@@ -52,7 +51,6 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 
 /**
  * Physical hash join plan.
@@ -246,11 +244,6 @@ public class PhysicalHashJoin<
         return pushedDown;
     }
 
-    public Set<Slot> getConditionSlot() {
-        return Stream.concat(hashJoinConjuncts.stream(), 
otherJoinConjuncts.stream())
-                .flatMap(expr -> 
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
-    }
-
     @Override
     public String shapeInfo() {
         StringBuilder builder = new StringBuilder();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to