This is an automated email from the ASF dual-hosted git repository.
englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c5a53e0caa [tpch](nereids) estimate cost with unknown column stats
#19046
c5a53e0caa is described below
commit c5a53e0caa4424fe56f3766c1407c458b8a7c56a
Author: minghong <[email protected]>
AuthorDate: Thu May 11 19:03:11 2023 +0800
[tpch](nereids) estimate cost with unknown column stats #19046
make nereids generate more reasonable plans with table row count, but
without column stats.
TODO: q5 and q7 is not good, because of column correlation
ps_suppkey and ps_partkey
---
.../apache/doris/nereids/stats/JoinEstimation.java | 40 ++++++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index 7afe007612..ac8545a1bd 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.EqualTo;
import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.plans.JoinType;
import org.apache.doris.nereids.trees.plans.algebra.Join;
import org.apache.doris.nereids.util.ExpressionUtils;
@@ -51,7 +52,32 @@ public class JoinEstimation {
}
}
+ private static boolean
hashJoinConditionContainsUnknownColumnStats(Statistics leftStats,
+ Statistics rightStats, Join join) {
+ for (Expression expr : join.getHashJoinConjuncts()) {
+ for (Slot slot : expr.getInputSlots()) {
+ ColumnStatistic colStats =
leftStats.findColumnStatistics(slot);
+ if (colStats == null) {
+ colStats = rightStats.findColumnStatistics(slot);
+ }
+ if (colStats == null || colStats.isUnKnown) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
private static Statistics estimateInnerJoin(Statistics leftStats,
Statistics rightStats, Join join) {
+ if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats,
join)) {
+ double rowCount = Math.max(leftStats.getRowCount(),
rightStats.getRowCount());
+ rowCount = Math.max(1, rowCount);
+ return new StatisticsBuilder()
+ .setRowCount(rowCount)
+ .putColumnStatistics(leftStats.columnStatistics())
+ .putColumnStatistics(rightStats.columnStatistics())
+ .build();
+ }
/*
* When we estimate filter A=B,
* if any side of equation, A or B, is almost unique, the confidence
level of estimation is high.
@@ -174,6 +200,20 @@ public class JoinEstimation {
}
private static Statistics estimateSemiOrAnti(Statistics leftStats,
Statistics rightStats, Join join) {
+ if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats,
join)) {
+ if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+ return new
StatisticsBuilder().setRowCount(leftStats.getRowCount())
+ .putColumnStatistics(leftStats.columnStatistics())
+ .putColumnStatistics(rightStats.columnStatistics())
+ .build();
+ } else {
+ //right semi or anti
+ return new
StatisticsBuilder().setRowCount(rightStats.getRowCount())
+ .putColumnStatistics(leftStats.columnStatistics())
+ .putColumnStatistics(rightStats.columnStatistics())
+ .build();
+ }
+ }
double rowCount = Double.POSITIVE_INFINITY;
for (Expression conjunct : join.getHashJoinConjuncts()) {
double eqRowCount =
estimateSemiOrAntiRowCountBySlotsEqual(leftStats, rightStats,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]