This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 9a76cd651f6 [opt](nereids) if column stats are unknown, 10-20 
table-join optimization use cascading instead of dphyp
9a76cd651f6 is described below

commit 9a76cd651f63859dde1380186c7173e1c80a7a48
Author: minghong <[email protected]>
AuthorDate: Sun Feb 4 10:42:27 2024 +0800

    [opt](nereids) if column stats are unknown, 10-20 table-join optimization 
use cascading instead of dphyp
    
    pick from master #29902
---
 .../java/org/apache/doris/nereids/StatementContext.java   | 15 +++++++++++++++
 .../org/apache/doris/nereids/jobs/executor/Optimizer.java | 13 ++++++++++++-
 .../org/apache/doris/nereids/stats/StatsCalculator.java   |  7 +++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 9db05187cbb..9ea5ed09759 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -73,6 +73,13 @@ public class StatementContext {
     private boolean isDpHyp = false;
     private boolean isOtherJoinReorder = false;
 
+    // hasUnknownColStats true if any column stats in the tables used by this 
sql is unknown
+    // the algorithm to derive plan when column stats are unknown is 
implemented in cascading framework, not in dphyper.
+    // And hence, when column stats are unknown, even if the tables used by a 
sql is more than
+    // MAX_TABLE_COUNT_USE_CASCADES_JOIN_REORDER, join reorder should choose 
cascading framework.
+    // Thus hasUnknownColStats has higher priority than isDpHyp
+    private boolean hasUnknownColStats = false;
+
     private final IdGenerator<ExprId> exprIdGenerator = 
ExprId.createGenerator();
     private final IdGenerator<ObjectId> objectIdGenerator = 
ObjectId.createGenerator();
     private final IdGenerator<RelationId> relationIdGenerator = 
RelationId.createGenerator();
@@ -255,4 +262,12 @@ public class StatementContext {
     public void addJoinFilters(Collection<Expression> newJoinFilters) {
         this.joinFilters.addAll(newJoinFilters);
     }
+
+    public boolean isHasUnknownColStats() {
+        return hasUnknownColStats;
+    }
+
+    public void setHasUnknownColStats(boolean hasUnknownColStats) {
+        this.hasUnknownColStats = hasUnknownColStats;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
index 84295ec6886..f92aaed66fb 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
@@ -55,11 +55,22 @@ public class Optimizer {
                 cascadesContext.getCurrentJobContext()));
         cascadesContext.getJobScheduler().executeJobPool(cascadesContext);
         serializeStatUsed(cascadesContext.getConnectContext());
+        boolean optimizeWithUnknownColStats = false;
+        if (ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null) {
+            if 
(ConnectContext.get().getStatementContext().isHasUnknownColStats()) {
+                optimizeWithUnknownColStats = true;
+            }
+        }
         // DPHyp optimize
+        int maxTableCount = 
getSessionVariable().getMaxTableCountUseCascadesJoinReorder();
+        if (optimizeWithUnknownColStats) {
+            // if column stats are unknown, 10~20 table-join is optimized by 
cascading framework
+            maxTableCount = 2 * maxTableCount;
+        }
         int maxJoinCount = cascadesContext.getMemo().countMaxContinuousJoin();
         
cascadesContext.getStatementContext().setMaxContinuousJoin(maxJoinCount);
         boolean isDpHyp = getSessionVariable().enableDPHypOptimizer
-                || maxJoinCount > 
getSessionVariable().getMaxTableCountUseCascadesJoinReorder();
+                || maxJoinCount > maxTableCount;
         cascadesContext.getStatementContext().setDpHyp(isDpHyp);
         cascadesContext.getStatementContext().setOtherJoinReorder(false);
         if (!getSessionVariable().isDisableJoinReorder() && isDpHyp
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index eabb8b14fb9..cd806db5cf2 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -624,6 +624,7 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
         Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
         TableIf table = catalogRelation.getTable();
         double rowCount = catalogRelation.getTable().estimatedRowCount();
+        boolean hasUnknownCol = false;
         for (SlotReference slotReference : slotSet) {
             String colName = slotReference.getName();
             boolean shouldIgnoreThisCol = 
StatisticConstants.shouldIgnoreCol(table, slotReference.getColumn().get());
@@ -645,13 +646,19 @@ public class StatsCalculator extends 
DefaultPlanVisitor<Statistics, Void> {
             }
             if (!cache.isUnKnown) {
                 rowCount = Math.max(rowCount, cache.count);
+            } else {
+                hasUnknownCol = true;
             }
             if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableStats) {
                 columnStatisticMap.put(slotReference, cache);
             } else {
                 columnStatisticMap.put(slotReference, ColumnStatistic.UNKNOWN);
+                hasUnknownCol = true;
             }
         }
+        if (hasUnknownCol && ConnectContext.get() != null && 
ConnectContext.get().getStatementContext() != null) {
+            
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
+        }
         Statistics stats = new Statistics(rowCount, columnStatisticMap);
         stats = normalizeCatalogRelationColumnStatsRowCount(stats);
         return stats;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to