This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new ff786d05fb4 [pick](Branch2.0) generate left deep tree when stats is
unknown (#25702)
ff786d05fb4 is described below
commit ff786d05fb4d05aee444a267883fb7998f8ea91f
Author: 谢健 <[email protected]>
AuthorDate: Sun Oct 22 00:46:43 2023 +0800
[pick](Branch2.0) generate left deep tree when stats is unknown (#25702)
---
.../org/apache/doris/nereids/cost/CostModelV1.java | 43 +++++++++++++++++++++-
.../apache/doris/nereids/stats/JoinEstimation.java | 17 ++++++++-
.../trees/plans/physical/PhysicalHashJoin.java | 8 ++++
.../plans/physical/PhysicalNestedLoopJoin.java | 8 ++++
4 files changed, 74 insertions(+), 2 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index aa8f4d6cc7c..2aca6017b7b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.DistributionSpec;
import org.apache.doris.nereids.properties.DistributionSpecGather;
import org.apache.doris.nereids.properties.DistributionSpecHash;
import org.apache.doris.nereids.properties.DistributionSpecReplicated;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows;
import
org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan;
@@ -311,17 +312,53 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
}
// TODO: since the outputs rows may expand a lot, penalty on it
will cause bc never be chosen.
// will refine this in next generation cost model.
+ if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+ // forbid broadcast join when stats is unknown
+ return CostV1.of(rightRowCount * buildSideFactor + 1 /
leftRowCount,
+ rightRowCount,
+ 0
+ );
+ }
return CostV1.of(leftRowCount + rightRowCount * buildSideFactor +
outputRowCount * probeSideFactor,
rightRowCount,
0
);
}
+ if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+ return CostV1.of(rightRowCount + 1 / leftRowCount,
+ rightRowCount,
+ 0);
+ }
return CostV1.of(leftRowCount + rightRowCount + outputRowCount,
rightRowCount,
0
);
}
+ private boolean isStatsUnknown(PhysicalHashJoin<? extends Plan, ? extends
Plan> join,
+ Statistics build, Statistics probe) {
+ for (Slot slot : join.getConditionSlot()) {
+ if ((build.columnStatistics().containsKey(slot) &&
!build.columnStatistics().get(slot).isUnKnown)
+ || (probe.columnStatistics().containsKey(slot) &&
!probe.columnStatistics().get(slot).isUnKnown)) {
+ continue;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ private boolean isStatsUnknown(PhysicalNestedLoopJoin<? extends Plan, ?
extends Plan> join,
+ Statistics build, Statistics probe) {
+ for (Slot slot : join.getConditionSlot()) {
+ if ((build.columnStatistics().containsKey(slot) &&
!build.columnStatistics().get(slot).isUnKnown)
+ || (probe.columnStatistics().containsKey(slot) &&
!probe.columnStatistics().get(slot).isUnKnown)) {
+ continue;
+ }
+ return true;
+ }
+ return false;
+ }
+
@Override
public Cost visitPhysicalNestedLoopJoin(
PhysicalNestedLoopJoin<? extends Plan, ? extends Plan>
nestedLoopJoin,
@@ -330,7 +367,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
Preconditions.checkState(context.arity() == 2);
Statistics leftStatistics = context.getChildStatistics(0);
Statistics rightStatistics = context.getChildStatistics(1);
-
+ if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) {
+ return CostV1.of(rightStatistics.getRowCount() + 1 /
leftStatistics.getRowCount(),
+ rightStatistics.getRowCount(),
+ 0);
+ }
return CostV1.of(
leftStatistics.getRowCount() * rightStatistics.getRowCount(),
rightStatistics.getRowCount(),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index ef4575e3308..0498d68d793 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -147,6 +147,21 @@ public class JoinEstimation {
}
private static Statistics estimateNestLoopJoin(Statistics leftStats,
Statistics rightStats, Join join) {
+ if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats,
join)) {
+ double rowCount = (leftStats.getRowCount() +
rightStats.getRowCount());
+ // We do more like the nested loop join with one rows than inner
join
+ if (leftStats.getRowCount() == 1 || rightStats.getRowCount() == 1)
{
+ rowCount *= 0.99;
+ } else {
+ rowCount *= 1.01;
+ }
+ rowCount = Math.max(1, rowCount);
+ return new StatisticsBuilder()
+ .setRowCount(rowCount)
+ .putColumnStatistics(leftStats.columnStatistics())
+ .putColumnStatistics(rightStats.columnStatistics())
+ .build();
+ }
return new StatisticsBuilder()
.setRowCount(Math.max(1, leftStats.getRowCount() *
rightStats.getRowCount()))
.putColumnStatistics(leftStats.columnStatistics())
@@ -156,7 +171,7 @@ public class JoinEstimation {
private static Statistics estimateInnerJoin(Statistics leftStats,
Statistics rightStats, Join join) {
if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats,
join)) {
- double rowCount = Math.max(leftStats.getRowCount(),
rightStats.getRowCount());
+ double rowCount = leftStats.getRowCount() +
rightStats.getRowCount();
rowCount = Math.max(1, rowCount);
return new StatisticsBuilder()
.setRowCount(rowCount)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
index 84dc09b2a37..357667f748c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
@@ -24,6 +24,7 @@ import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.expressions.ExprId;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.AbstractPlan;
import org.apache.doris.nereids.trees.plans.JoinHint;
import org.apache.doris.nereids.trees.plans.JoinType;
@@ -34,6 +35,7 @@ import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import java.util.Comparator;
@@ -41,6 +43,7 @@ import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* Physical hash join plan.
@@ -215,6 +218,11 @@ public class PhysicalHashJoin<
}
}
+ public Set<Slot> getConditionSlot() {
+ return Stream.concat(hashJoinConjuncts.stream(),
otherJoinConjuncts.stream())
+ .flatMap(expr ->
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+ }
+
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
index 3b92830b6a0..76b4859fb13 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.JoinHint;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
@@ -31,11 +32,13 @@ import org.apache.doris.nereids.util.Utils;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.stream.Stream;
/**
* Use nested loop algorithm to do join.
@@ -169,6 +172,11 @@ public class PhysicalNestedLoopJoin<
return bitMapRuntimeFilterConditions.isEmpty();
}
+ public Set<Slot> getConditionSlot() {
+ return Stream.concat(hashJoinConjuncts.stream(),
otherJoinConjuncts.stream())
+ .flatMap(expr ->
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+ }
+
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder("NestedLoopJoin");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]