This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 844b7c8cba1 [enhancement](Nereids): check stats unreliable when
deriving stats (#26103)
844b7c8cba1 is described below
commit 844b7c8cba1ac4747b4d2c1071ad9a8c0ddac819
Author: 谢健 <[email protected]>
AuthorDate: Mon Oct 30 17:21:42 2023 +0800
[enhancement](Nereids): check stats unreliable when deriving stats (#26103)
check stats unreliable when deriving stats
---
.../java/org/apache/doris/nereids/PlanContext.java | 10 ++++++-
.../org/apache/doris/nereids/cost/CostModelV1.java | 31 +++-------------------
.../java/org/apache/doris/nereids/memo/Group.java | 10 ++++++-
.../doris/nereids/stats/StatsCalculator.java | 4 +++
.../trees/plans/physical/AbstractPhysicalJoin.java | 8 ++++++
.../trees/plans/physical/PhysicalHashJoin.java | 7 -----
6 files changed, 33 insertions(+), 37 deletions(-)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
index 847d18afdc8..07a2cf4eed2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/PlanContext.java
@@ -33,9 +33,10 @@ import java.util.List;
public class PlanContext {
private final List<Statistics> childrenStats;
- private Statistics planStats;
+ private final Statistics planStats;
private final int arity;
private boolean isBroadcastJoin = false;
+ private final boolean isStatsReliable;
/**
* Constructor for PlanContext.
@@ -43,15 +44,18 @@ public class PlanContext {
public PlanContext(GroupExpression groupExpression) {
this.arity = groupExpression.arity();
this.planStats = groupExpression.getOwnerGroup().getStatistics();
+ this.isStatsReliable =
groupExpression.getOwnerGroup().isStatsReliable();
this.childrenStats = new ArrayList<>(groupExpression.arity());
for (int i = 0; i < groupExpression.arity(); i++) {
childrenStats.add(groupExpression.childStatistics(i));
}
}
+ // This is used in GraphSimplifier
public PlanContext(Statistics planStats, List<Statistics> childrenStats) {
this.planStats = planStats;
this.childrenStats = childrenStats;
+ this.isStatsReliable = false;
this.arity = this.childrenStats.size();
}
@@ -71,6 +75,10 @@ public class PlanContext {
return planStats;
}
+ public boolean isStatsReliable() {
+ return isStatsReliable;
+ }
+
/**
* Get child statistics.
*/
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index a2364eb0258..8c2b467920e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -22,7 +22,6 @@ import org.apache.doris.nereids.properties.DistributionSpec;
import org.apache.doris.nereids.properties.DistributionSpecGather;
import org.apache.doris.nereids.properties.DistributionSpecHash;
import org.apache.doris.nereids.properties.DistributionSpecReplicated;
-import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows;
import
org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan;
@@ -303,7 +302,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
}
// TODO: since the outputs rows may expand a lot, penalty on it
will cause bc never be chosen.
// will refine this in next generation cost model.
- if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+ if (!context.isStatsReliable()) {
// forbid broadcast join when stats is unknown
return CostV1.of(rightRowCount * buildSideFactor + 1 /
leftRowCount,
rightRowCount,
@@ -315,7 +314,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
0
);
}
- if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+ if (!context.isStatsReliable()) {
return CostV1.of(rightRowCount + 1 / leftRowCount,
rightRowCount,
0);
@@ -326,30 +325,6 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
);
}
- private boolean isStatsUnknown(PhysicalHashJoin<? extends Plan, ? extends
Plan> join,
- Statistics build, Statistics probe) {
- for (Slot slot : join.getConditionSlot()) {
- if ((build.columnStatistics().containsKey(slot) &&
!build.columnStatistics().get(slot).isUnKnown)
- || (probe.columnStatistics().containsKey(slot) &&
!probe.columnStatistics().get(slot).isUnKnown)) {
- continue;
- }
- return true;
- }
- return false;
- }
-
- private boolean isStatsUnknown(PhysicalNestedLoopJoin<? extends Plan, ?
extends Plan> join,
- Statistics build, Statistics probe) {
- for (Slot slot : join.getConditionSlot()) {
- if ((build.columnStatistics().containsKey(slot) &&
!build.columnStatistics().get(slot).isUnKnown)
- || (probe.columnStatistics().containsKey(slot) &&
!probe.columnStatistics().get(slot).isUnKnown)) {
- continue;
- }
- return true;
- }
- return false;
- }
-
@Override
public Cost visitPhysicalNestedLoopJoin(
PhysicalNestedLoopJoin<? extends Plan, ? extends Plan>
nestedLoopJoin,
@@ -358,7 +333,7 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
Preconditions.checkState(context.arity() == 2);
Statistics leftStatistics = context.getChildStatistics(0);
Statistics rightStatistics = context.getChildStatistics(1);
- if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) {
+ if (!context.isStatsReliable()) {
return CostV1.of(rightStatistics.getRowCount() + 1 /
leftStatistics.getRowCount(),
rightStatistics.getRowCount(),
0);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
index 750de4fa8f2..8009156e134 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Group.java
@@ -59,7 +59,7 @@ public class Group {
private final List<GroupExpression> logicalExpressions =
Lists.newArrayList();
private final List<GroupExpression> physicalExpressions =
Lists.newArrayList();
private final List<GroupExpression> enforcers = Lists.newArrayList();
-
+ private boolean isStatsReliable = true;
private LogicalProperties logicalProperties;
// Map of cost lower bounds
@@ -119,6 +119,14 @@ public class Group {
return groupExpression;
}
+ public void setStatsReliable(boolean statsReliable) {
+ this.isStatsReliable = statsReliable;
+ }
+
+ public boolean isStatsReliable() {
+ return isStatsReliable;
+ }
+
public void addLogicalExpression(GroupExpression groupExpression) {
groupExpression.setOwnerGroup(this);
logicalExpressions.add(groupExpression);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index 6ee5c642b90..0fad8ea1d1a 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -221,8 +221,12 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
Plan plan = groupExpression.getPlan();
Statistics newStats = plan.accept(this, null);
newStats.enforceValid();
+
// We ensure that the rowCount remains unchanged in order to make the
cost of each plan comparable.
if (groupExpression.getOwnerGroup().getStatistics() == null) {
+ boolean isReliable =
groupExpression.getPlan().getExpressions().stream()
+ .noneMatch(e ->
newStats.isInputSlotsUnknown(e.getInputSlots()));
+ groupExpression.getOwnerGroup().setStatsReliable(isReliable);
groupExpression.getOwnerGroup().setStatistics(newStats);
groupExpression.setEstOutputRowCount(newStats.getRowCount());
} else {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
index acd2e381392..c6bdd7b6b20 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java
@@ -35,6 +35,7 @@ import org.apache.doris.statistics.Statistics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableList.Builder;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import org.json.JSONObject;
@@ -42,7 +43,9 @@ import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
+import java.util.Set;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* Abstract class for all physical join node.
@@ -217,6 +220,11 @@ public abstract class AbstractPhysicalJoin<
.build();
}
+ public Set<Slot> getConditionSlot() {
+ return Stream.concat(hashJoinConjuncts.stream(),
otherJoinConjuncts.stream())
+ .flatMap(expr ->
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+ }
+
@Override
public String toString() {
List<Object> args = Lists.newArrayList("type", joinType,
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
index a6bd8cfdd68..b60afd67308 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
@@ -43,7 +43,6 @@ import org.apache.doris.statistics.Statistics;
import org.apache.doris.thrift.TRuntimeFilterType;
import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
@@ -52,7 +51,6 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
-import java.util.stream.Stream;
/**
* Physical hash join plan.
@@ -246,11 +244,6 @@ public class PhysicalHashJoin<
return pushedDown;
}
- public Set<Slot> getConditionSlot() {
- return Stream.concat(hashJoinConjuncts.stream(),
otherJoinConjuncts.stream())
- .flatMap(expr ->
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
- }
-
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]