This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 9a76cd651f6 [opt](nereids) if column stats are unknown, 10-20
table-join optimization use cascading instead of dphyp
9a76cd651f6 is described below
commit 9a76cd651f63859dde1380186c7173e1c80a7a48
Author: minghong <[email protected]>
AuthorDate: Sun Feb 4 10:42:27 2024 +0800
[opt](nereids) if column stats are unknown, 10-20 table-join optimization
use cascading instead of dphyp
pick from master #29902
---
.../java/org/apache/doris/nereids/StatementContext.java | 15 +++++++++++++++
.../org/apache/doris/nereids/jobs/executor/Optimizer.java | 13 ++++++++++++-
.../org/apache/doris/nereids/stats/StatsCalculator.java | 7 +++++++
3 files changed, 34 insertions(+), 1 deletion(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
index 9db05187cbb..9ea5ed09759 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java
@@ -73,6 +73,13 @@ public class StatementContext {
private boolean isDpHyp = false;
private boolean isOtherJoinReorder = false;
+ // hasUnknownColStats true if any column stats in the tables used by this
sql is unknown
+ // the algorithm to derive plan when column stats are unknown is
implemented in cascading framework, not in dphyper.
+ // And hence, when column stats are unknown, even if the tables used by a
sql is more than
+ // MAX_TABLE_COUNT_USE_CASCADES_JOIN_REORDER, join reorder should choose
cascading framework.
+ // Thus hasUnknownColStats has higher priority than isDpHyp
+ private boolean hasUnknownColStats = false;
+
private final IdGenerator<ExprId> exprIdGenerator =
ExprId.createGenerator();
private final IdGenerator<ObjectId> objectIdGenerator =
ObjectId.createGenerator();
private final IdGenerator<RelationId> relationIdGenerator =
RelationId.createGenerator();
@@ -255,4 +262,12 @@ public class StatementContext {
public void addJoinFilters(Collection<Expression> newJoinFilters) {
this.joinFilters.addAll(newJoinFilters);
}
+
+ public boolean isHasUnknownColStats() {
+ return hasUnknownColStats;
+ }
+
+ public void setHasUnknownColStats(boolean hasUnknownColStats) {
+ this.hasUnknownColStats = hasUnknownColStats;
+ }
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
index 84295ec6886..f92aaed66fb 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Optimizer.java
@@ -55,11 +55,22 @@ public class Optimizer {
cascadesContext.getCurrentJobContext()));
cascadesContext.getJobScheduler().executeJobPool(cascadesContext);
serializeStatUsed(cascadesContext.getConnectContext());
+ boolean optimizeWithUnknownColStats = false;
+ if (ConnectContext.get() != null &&
ConnectContext.get().getStatementContext() != null) {
+ if
(ConnectContext.get().getStatementContext().isHasUnknownColStats()) {
+ optimizeWithUnknownColStats = true;
+ }
+ }
// DPHyp optimize
+ int maxTableCount =
getSessionVariable().getMaxTableCountUseCascadesJoinReorder();
+ if (optimizeWithUnknownColStats) {
+ // if column stats are unknown, 10~20 table-join is optimized by
cascading framework
+ maxTableCount = 2 * maxTableCount;
+ }
int maxJoinCount = cascadesContext.getMemo().countMaxContinuousJoin();
cascadesContext.getStatementContext().setMaxContinuousJoin(maxJoinCount);
boolean isDpHyp = getSessionVariable().enableDPHypOptimizer
- || maxJoinCount >
getSessionVariable().getMaxTableCountUseCascadesJoinReorder();
+ || maxJoinCount > maxTableCount;
cascadesContext.getStatementContext().setDpHyp(isDpHyp);
cascadesContext.getStatementContext().setOtherJoinReorder(false);
if (!getSessionVariable().isDisableJoinReorder() && isDpHyp
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
index eabb8b14fb9..cd806db5cf2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java
@@ -624,6 +624,7 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
Map<Expression, ColumnStatistic> columnStatisticMap = new HashMap<>();
TableIf table = catalogRelation.getTable();
double rowCount = catalogRelation.getTable().estimatedRowCount();
+ boolean hasUnknownCol = false;
for (SlotReference slotReference : slotSet) {
String colName = slotReference.getName();
boolean shouldIgnoreThisCol =
StatisticConstants.shouldIgnoreCol(table, slotReference.getColumn().get());
@@ -645,13 +646,19 @@ public class StatsCalculator extends
DefaultPlanVisitor<Statistics, Void> {
}
if (!cache.isUnKnown) {
rowCount = Math.max(rowCount, cache.count);
+ } else {
+ hasUnknownCol = true;
}
if (ConnectContext.get() != null &&
ConnectContext.get().getSessionVariable().enableStats) {
columnStatisticMap.put(slotReference, cache);
} else {
columnStatisticMap.put(slotReference, ColumnStatistic.UNKNOWN);
+ hasUnknownCol = true;
}
}
+ if (hasUnknownCol && ConnectContext.get() != null &&
ConnectContext.get().getStatementContext() != null) {
+
ConnectContext.get().getStatementContext().setHasUnknownColStats(true);
+ }
Statistics stats = new Statistics(rowCount, columnStatisticMap);
stats = normalizeCatalogRelationColumnStatsRowCount(stats);
return stats;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]