This is an automated email from the ASF dual-hosted git repository.
jakevin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2c3bc65fae9 [feature](Nereids): generate left deep tree when stats is
unknown (#25620)
2c3bc65fae9 is described below
commit 2c3bc65fae93987c2ffa702538071f7d7c6204a5
Author: 谢健 <[email protected]>
AuthorDate: Mon Oct 23 15:41:55 2023 +0800
[feature](Nereids): generate left deep tree when stats is unknown (#25620)
generate left deep tree when stats is unknown
---
.../org/apache/doris/nereids/cost/CostModelV1.java | 43 +++++++++++++++++++++-
.../apache/doris/nereids/stats/JoinEstimation.java | 17 ++++++++-
.../trees/plans/physical/PhysicalHashJoin.java | 7 ++++
.../plans/physical/PhysicalNestedLoopJoin.java | 8 ++++
4 files changed, 73 insertions(+), 2 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
index 2e19fa1fb67..a2364eb0258 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.DistributionSpec;
import org.apache.doris.nereids.properties.DistributionSpecGather;
import org.apache.doris.nereids.properties.DistributionSpecHash;
import org.apache.doris.nereids.properties.DistributionSpecReplicated;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.physical.PhysicalAssertNumRows;
import
org.apache.doris.nereids.trees.plans.physical.PhysicalDeferMaterializeOlapScan;
@@ -302,17 +303,53 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
}
// TODO: since the outputs rows may expand a lot, penalty on it
will cause bc never be chosen.
// will refine this in next generation cost model.
+ if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+ // forbid broadcast join when stats is unknown
+ return CostV1.of(rightRowCount * buildSideFactor + 1 /
leftRowCount,
+ rightRowCount,
+ 0
+ );
+ }
return CostV1.of(leftRowCount + rightRowCount * buildSideFactor +
outputRowCount * probeSideFactor,
rightRowCount,
0
);
}
+ if (isStatsUnknown(physicalHashJoin, buildStats, probeStats)) {
+ return CostV1.of(rightRowCount + 1 / leftRowCount,
+ rightRowCount,
+ 0);
+ }
return CostV1.of(leftRowCount + rightRowCount + outputRowCount,
rightRowCount,
0
);
}
+ private boolean isStatsUnknown(PhysicalHashJoin<? extends Plan, ? extends
Plan> join,
+ Statistics build, Statistics probe) {
+ for (Slot slot : join.getConditionSlot()) {
+ if ((build.columnStatistics().containsKey(slot) &&
!build.columnStatistics().get(slot).isUnKnown)
+ || (probe.columnStatistics().containsKey(slot) &&
!probe.columnStatistics().get(slot).isUnKnown)) {
+ continue;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ private boolean isStatsUnknown(PhysicalNestedLoopJoin<? extends Plan, ?
extends Plan> join,
+ Statistics build, Statistics probe) {
+ for (Slot slot : join.getConditionSlot()) {
+ if ((build.columnStatistics().containsKey(slot) &&
!build.columnStatistics().get(slot).isUnKnown)
+ || (probe.columnStatistics().containsKey(slot) &&
!probe.columnStatistics().get(slot).isUnKnown)) {
+ continue;
+ }
+ return true;
+ }
+ return false;
+ }
+
@Override
public Cost visitPhysicalNestedLoopJoin(
PhysicalNestedLoopJoin<? extends Plan, ? extends Plan>
nestedLoopJoin,
@@ -321,7 +358,11 @@ class CostModelV1 extends PlanVisitor<Cost, PlanContext> {
Preconditions.checkState(context.arity() == 2);
Statistics leftStatistics = context.getChildStatistics(0);
Statistics rightStatistics = context.getChildStatistics(1);
-
+ if (isStatsUnknown(nestedLoopJoin, leftStatistics, rightStatistics)) {
+ return CostV1.of(rightStatistics.getRowCount() + 1 /
leftStatistics.getRowCount(),
+ rightStatistics.getRowCount(),
+ 0);
+ }
return CostV1.of(
leftStatistics.getRowCount() * rightStatistics.getRowCount(),
rightStatistics.getRowCount(),
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index 656c58c26bc..44def4bcaa9 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -147,6 +147,21 @@ public class JoinEstimation {
}
private static Statistics estimateNestLoopJoin(Statistics leftStats,
Statistics rightStats, Join join) {
+ if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats,
join)) {
+ double rowCount = (leftStats.getRowCount() +
rightStats.getRowCount());
+ // We do more like the nested loop join with one rows than inner
join
+ if (leftStats.getRowCount() == 1 || rightStats.getRowCount() == 1)
{
+ rowCount *= 0.99;
+ } else {
+ rowCount *= 1.01;
+ }
+ rowCount = Math.max(1, rowCount);
+ return new StatisticsBuilder()
+ .setRowCount(rowCount)
+ .putColumnStatistics(leftStats.columnStatistics())
+ .putColumnStatistics(rightStats.columnStatistics())
+ .build();
+ }
return new StatisticsBuilder()
.setRowCount(Math.max(1, leftStats.getRowCount() *
rightStats.getRowCount()))
.putColumnStatistics(leftStats.columnStatistics())
@@ -156,7 +171,7 @@ public class JoinEstimation {
private static Statistics estimateInnerJoin(Statistics leftStats,
Statistics rightStats, Join join) {
if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats,
join)) {
- double rowCount = Math.max(leftStats.getRowCount(),
rightStats.getRowCount());
+ double rowCount = leftStats.getRowCount() +
rightStats.getRowCount();
rowCount = Math.max(1, rowCount);
return new StatisticsBuilder()
.setRowCount(rowCount)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
index b60afd67308..a6bd8cfdd68 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashJoin.java
@@ -43,6 +43,7 @@ import org.apache.doris.statistics.Statistics;
import org.apache.doris.thrift.TRuntimeFilterType;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
@@ -51,6 +52,7 @@ import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
/**
* Physical hash join plan.
@@ -244,6 +246,11 @@ public class PhysicalHashJoin<
return pushedDown;
}
+ public Set<Slot> getConditionSlot() {
+ return Stream.concat(hashJoinConjuncts.stream(),
otherJoinConjuncts.stream())
+ .flatMap(expr ->
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+ }
+
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
index d2dca0d2e73..9071b137ca6 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalNestedLoopJoin.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.properties.LogicalProperties;
import org.apache.doris.nereids.properties.PhysicalProperties;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.MarkJoinSlotReference;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.JoinHint;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.Plan;
@@ -31,11 +32,13 @@ import org.apache.doris.nereids.util.MutableState;
import org.apache.doris.statistics.Statistics;
import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import java.util.stream.Stream;
/**
* Use nested loop algorithm to do join.
@@ -173,6 +176,11 @@ public class PhysicalNestedLoopJoin<
return bitMapRuntimeFilterConditions.isEmpty();
}
+ public Set<Slot> getConditionSlot() {
+ return Stream.concat(hashJoinConjuncts.stream(),
otherJoinConjuncts.stream())
+ .flatMap(expr ->
expr.getInputSlots().stream()).collect(ImmutableSet.toImmutableSet());
+ }
+
@Override
public String shapeInfo() {
StringBuilder builder = new StringBuilder("NestedLoopJoin");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]