This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c5a53e0caa [tpch](nereids) estimate cost with unknown column stats 
#19046
c5a53e0caa is described below

commit c5a53e0caa4424fe56f3766c1407c458b8a7c56a
Author: minghong <[email protected]>
AuthorDate: Thu May 11 19:03:11 2023 +0800

    [tpch](nereids) estimate cost with unknown column stats #19046
    
    make nereids generate more reasonable plans with table row count, but 
without column stats.
    TODO: q5 and q7 is not good, because of column correlation
    ps_suppkey and ps_partkey
---
 .../apache/doris/nereids/stats/JoinEstimation.java | 40 ++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
index 7afe007612..ac8545a1bd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/JoinEstimation.java
@@ -21,6 +21,7 @@ import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.exceptions.AnalysisException;
 import org.apache.doris.nereids.trees.expressions.EqualTo;
 import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Slot;
 import org.apache.doris.nereids.trees.plans.JoinType;
 import org.apache.doris.nereids.trees.plans.algebra.Join;
 import org.apache.doris.nereids.util.ExpressionUtils;
@@ -51,7 +52,32 @@ public class JoinEstimation {
         }
     }
 
+    private static boolean 
hashJoinConditionContainsUnknownColumnStats(Statistics leftStats,
+            Statistics rightStats, Join join) {
+        for (Expression expr : join.getHashJoinConjuncts()) {
+            for (Slot slot : expr.getInputSlots()) {
+                ColumnStatistic colStats = 
leftStats.findColumnStatistics(slot);
+                if (colStats == null) {
+                    colStats = rightStats.findColumnStatistics(slot);
+                }
+                if (colStats == null || colStats.isUnKnown) {
+                    return true;
+                }
+            }
+        }
+        return false;
+    }
+
     private static Statistics estimateInnerJoin(Statistics leftStats, 
Statistics rightStats, Join join) {
+        if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, 
join)) {
+            double rowCount = Math.max(leftStats.getRowCount(), 
rightStats.getRowCount());
+            rowCount = Math.max(1, rowCount);
+            return new StatisticsBuilder()
+                .setRowCount(rowCount)
+                .putColumnStatistics(leftStats.columnStatistics())
+                .putColumnStatistics(rightStats.columnStatistics())
+                .build();
+        }
         /*
          * When we estimate filter A=B,
          * if any side of equation, A or B, is almost unique, the confidence 
level of estimation is high.
@@ -174,6 +200,20 @@ public class JoinEstimation {
     }
 
     private static Statistics estimateSemiOrAnti(Statistics leftStats, 
Statistics rightStats, Join join) {
+        if (hashJoinConditionContainsUnknownColumnStats(leftStats, rightStats, 
join)) {
+            if (join.getJoinType().isLeftSemiOrAntiJoin()) {
+                return new 
StatisticsBuilder().setRowCount(leftStats.getRowCount())
+                        .putColumnStatistics(leftStats.columnStatistics())
+                        .putColumnStatistics(rightStats.columnStatistics())
+                        .build();
+            } else {
+                //right semi or anti
+                return new 
StatisticsBuilder().setRowCount(rightStats.getRowCount())
+                        .putColumnStatistics(leftStats.columnStatistics())
+                        .putColumnStatistics(rightStats.columnStatistics())
+                        .build();
+            }
+        }
         double rowCount = Double.POSITIVE_INFINITY;
         for (Expression conjunct : join.getHashJoinConjuncts()) {
             double eqRowCount = 
estimateSemiOrAntiRowCountBySlotsEqual(leftStats, rightStats,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to